###############################################################################
# NOTE
# Before running this program we need to pre-download the packages "forecast"
# and "urca".
# This will enable the plotting of the forecast confidence intervals,
# conducting the Dickey-Fuller Unit Root test, and running the function
# "auto.arima".
# Go to the "Packages" icon in the R GUI and then choose "Load Package". Seek
# out the "forecast" package and select "OK" after highlighting it. After
# doing this, do the same with the selection of the "urca" package.
# After these packages have been pre-loaded you can proceed to run the program.
# It will probably be the case that you will have to import the "forecast"
# and "ucra" packages from one of the R-mirrors and take it to your R library
# on your computer before you can proceed. I will try to talk through these
# processes in class.
###############################################################################
# Read in the Lead Production Data
leaddata<-read.table("e:\\E5375s13\\ARIMA\\leaddata.txt",header=T)
# Print out the headers
names(leaddata)
# Attach the data to R
attach(leaddata)
# Print out the data to make sure everything looks OK
leaddata[,,]
# Define the vector (variable) "obs"
obs<-leaddata[,1]
# Define the vector (variable) "leadprod"
leadprod<-leaddata[,2]
# Plot the Leadprod time series
ts.plot(leadprod)
# Keep the Graph windows open
windows()
# Plot the ACF and PACF side-by-side
par(mfrow=c(1,2))
acf(leadprod)
pacf(leadprod)
windows()
# auto.arima(leadprod)
# Define the various models to be estimated in the P-Q Box
model00<-arima(leadprod,order=c(0,0,0))
model10<-arima(leadprod,order=c(1,0,0))
model20<-arima(leadprod,order=c(2,0,0))
model01<-arima(leadprod,order=c(0,0,1))
model02<-arima(leadprod,order=c(0,0,2))
model11<-arima(leadprod,order=c(1,0,1))
# Get the AIC Goodness-of-Fit measures for the competing models
AIC(model00,model10,model20,model01,model02,model11)
# The "Winning" model is the (1,0,0) ARIMA model. This matches what is
# implied by the ACF and PACF functions
# Get the ARIMA(1,0,0)'s coefficient estimates and their standard errors
# Apart from the intercept, are the coefficients of the model statistically
# significant? Hopefully, they are.
coef(model10)
vcov(model10)
# Let's Check to see if the residuals of our chosen model are white noise
# using the Box-Ljung Portmonteu test for autocorrelation at lag = 24
resid10<-residuals(model10)
Box.test(resid10,lag=24,type="Ljung")
# Get the coefficient estimates and their standard errors for the
# Overfitting models ARIMA(2,0,0) and ARIMA(1,0,1)
# If the ovefitting parameters of these overfitting models are
# statistically insignificant then we go with the ARIMA(1,0,0) model
coef(model20)
vcov(model20)
coef(model11)
vcov(model11)
# produce 12 step-ahead-forecasts of ARIMA(1,0,0) model for lead production
# and plot the forecasts with 80% and 95% confidence intervals
par(mfrow=c(1,1))
forecast(model10,h=12)
plot(forecast(model10,h=12))
#########################################################################
# Let's do some extra mile stuff here. Let's test the Leadprod data
# for a "unit root" (to see if the data needs to be differenced first).
# We are going to apply the Dickey-Fuller Unit root test and two
# separate goodness-of-fit criteria (AIC and BIC) to choose the
# number of augmenting terms for the DF test equation. We get the
# same answer (stationarity) regardless of which criteria we use.
#########################################################################
lp.df<-ur.df(y=leadprod, type = 'drift', selectlags = 'AIC')
summary(lp.df)
lp.df<-ur.df(y=leadprod, type = 'drift', selectlags = 'BIC')
summary(lp.df)
#########################################################################
# We are going to use Rob Hyndman's "auto.arima" function to shop over
# some plausible ARIMA models using the information criteria AIC, AICC,
# and BIC. Again, the ARIMA(1,0,0)model is chosen but we gave the search
# some very useful information, namely that the data is stationary and
# that we didn't want to consider very large values of p and q.
#########################################################################
auto.arima(leadprod, d = 0, max.p = 2, max.q=2, stationary = TRUE, ic = c("aic"), allowdrift=TRUE)