/* This program runs a horse race between the Box-Jenkins Model ARIMA(1,1,1) for y and a RESTRICTED Equal Lag Length VAR (p=8). The forecast horizon is one step ahead while the hold-out sample data set is observations 121 - 150. The Restricted VAR does a little bit better than the Unrestricted VAR thus we conclude that imposing one-way causality (as indicated by the Granger Causality test) is a useful thing to do. */ OPTIONS PAGESIZE=60 LINESIZE=72 NODATE; data lead; input obs x y; /* obs lead sales */ datalines; 1 10.01 200.1 2 10.07 199.5 3 10.32 199.4 4 9.75 198.9 5 10.33 199.0 6 10.13 200.2 7 10.36 198.6 8 10.32 200.0 9 10.13 200.3 10 10.16 201.2 11 10.58 201.6 12 10.62 201.5 13 10.86 201.5 14 11.20 203.5 15 10.74 204.9 16 10.56 207.1 17 10.48 210.5 18 10.77 210.5 19 11.33 209.8 20 10.96 208.8 21 11.16 209.5 22 11.70 213.2 23 11.39 213.7 24 11.42 215.1 25 11.94 218.7 26 11.24 219.8 27 11.59 220.5 28 10.96 223.8 29 11.40 222.8 30 11.02 223.8 31 11.01 221.7 32 11.23 222.3 33 11.33 220.8 34 10.83 219.4 35 10.84 220.1 36 11.14 220.6 37 10.38 218.9 38 10.90 217.8 39 11.05 217.7 40 11.11 215.0 41 11.01 215.3 42 11.22 215.9 43 11.21 216.7 44 11.91 216.7 45 11.69 217.7 46 10.93 218.7 47 10.99 222.9 48 11.01 224.9 49 10.84 222.2 50 10.76 220.7 51 10.77 220.0 52 10.88 218.7 53 10.49 217.0 54 10.50 215.9 55 11.00 215.8 56 10.98 214.1 57 10.61 212.3 58 10.48 213.9 59 10.53 214.6 60 11.07 213.6 61 10.61 212.1 62 10.86 211.4 63 10.34 213.1 64 10.78 212.9 65 10.80 213.3 66 10.33 211.5 67 10.44 212.3 68 10.50 213.0 69 10.75 211.0 70 10.40 210.7 71 10.40 210.1 72 10.34 211.4 73 10.55 210.0 74 10.46 209.7 75 10.82 208.8 76 10.91 208.8 77 10.87 208.8 78 10.67 210.6 79 11.11 211.9 80 10.88 212.8 81 11.28 212.5 82 11.27 214.8 83 11.44 215.3 84 11.52 217.5 85 12.10 218.8 86 11.83 220.7 87 12.62 222.2 88 12.41 226.7 89 12.43 228.4 90 12.73 233.2 91 13.01 235.7 92 12.74 237.1 93 12.73 240.6 94 12.76 243.8 95 12.92 245.3 96 12.64 246.0 97 12.79 246.3 98 13.05 247.7 99 12.69 247.6 100 13.01 247.8 101 12.90 249.4 102 13.12 249.0 103 12.47 249.9 104 12.47 250.5 105 12.94 251.5 106 13.10 249.0 107 12.91 247.6 108 13.39 248.8 109 13.13 250.4 110 13.34 250.7 111 13.34 253.0 112 13.14 253.7 113 13.49 255.0 114 13.87 256.2 115 13.39 256.0 116 13.59 257.4 117 13.27 260.4 118 13.70 260.0 119 13.20 261.3 120 13.32 260.4 121 13.15 261.6 122 13.30 260.8 123 12.94 259.8 124 13.29 259.0 125 13.26 258.9 126 13.08 257.4 127 13.24 257.7 128 13.31 257.9 129 13.52 257.4 130 13.02 257.3 131 13.25 257.6 132 13.12 258.9 133 13.26 257.8 134 13.11 257.7 135 13.30 257.2 136 13.06 257.5 137 13.32 256.8 138 13.10 257.5 139 13.27 257.0 140 13.64 257.6 141 13.58 257.3 142 13.87 257.5 143 13.53 259.6 144 13.41 261.1 145 13.25 262.9 146 13.50 263.3 147 13.58 262.8 148 13.51 261.8 149 13.77 262.2 150 13.40 262.7 ; /* Roll the ARIMA(1,1,1) Box-Jenkins model for y through the out-of-sample data /* /* forecasting one period ahead. This is the benchmark forecast. */ %macro bj(num); proc arima data=lead(obs=&num) out=result; identify var=y(1) noprint; estimate p=1 q=1 method=ml noprint; forecast lead=1 nooutall noprint; proc append base=collect1 data=result; %mend; %macro doit; %do ii=120 %to 149; %bj(&ii); %end; %mend; %doit; data bjf; set collect1; f1=forecast; drop forecast y std l95 u95 residual; data actual; set lead; if 120 < _n_; actual=y; keep actual; data merge1; merge actual bjf; /* Roll the Restricted Equal Lag Length VAR (p = 8) for y and x through the hold-out sample data (observations 121 - 150) forecasting one period ahead each time. */ data yseries; set lead; y1=y; y2=x; %macro var(num); proc varmax data=yseries(obs=&num) noprint; model y1-y2 / dify(1) p=8; restrict ar(1,2,1)=0 ar(2,2,1)=0 ar(3,2,1)=0 ar(4,2,1)=0 ar(5,2,1)=0 ar(6,2,1)=0 ar(7,2,1)=0 ar(8,2,1)=0; output out=result lead=1; data result; set result; if _n_ eq &num + 1; proc append base=collect2 data=result; %mend; %macro doit; %do ii=120 %to 149; %var(&ii); %end; %mend; %doit; data collect2; set collect2; f2=for1; keep f2; data compare; merge merge1 collect2; data errors; set compare; e1=actual - f1; e2=actual - f2; abse1=abs(e1); abse2=abs(e2); e12=e1**2.0; e22=e2**2.0; /* Calculate the MAEs and MSEs of the Box-Jenkins Model and the VAR Model for y over the hold-out sample data set (observations 121 - 150). MAE1 and MSE1 are for the BJ Model while MAE2 and MSE2 are for the RVAR Model for y = y1. Is the RVAR model significantly better than the BJ forecasting model? If it is, then we have have evidence that the proposed supplementary variable(x) is a useful one. */ proc univariate data=errors noprint; var abse1 e12 abse2 e22; output mean = mae1 mse1 mae2 mse2 out=results; proc print data=results; var mae1 mse1 mae2 mse2; title1 'Forecasting Accuracy Measures of BJ (Method 1) and RVAR (Method 2)'; title3 'Forecast Horizon = 1'; run; proc print data=compare; title ' Actual vs. Method 1 = BJ Method 2 = RVAR'; proc plot data=compare; plot actual*f1; title 'Prediction-Realization Diagram for BJ Method'; proc plot data=compare; plot actual*f2; title 'Prediction-Realization Diagram for RVAR Method'; data errors; set compare; e1=actual - f1; e2=actual - f2; abse1=abs(e1); abse2=abs(e2); e12=e1**2.0; e22=e2**2.0; ae1mae2 = abse1 - abse2; e12me22 = e12 - e22; /* Here we produce the forecasting accuracy measures for the competing methods: Method 1 = BJ Model Method 2 = the RVAR model. */ proc univariate data=errors noprint; var abse1 e12 abse2 e22; output mean = mae1 mse1 mae2 mse2 out=results; proc print data = results; var mae1 mse1 mae2 mse2; title 'Forecasting Accuracy Measures of Competing Methods'; title2 'Method 1 = BJ, Method 2 = RVAR, Forecast Horizon = 1'; /* Here we test the significant difference in MAEs of the two competing models using the Diebold-Mariano ARMA approach. This approach DOES NOT require that the competing forecasting methods be unbiased. It is just a matter of testing the significance of the sample mean of an appropriate ARMA model. As it turns out the p = 0 q = 0 model is the one to use for the Diebold-Mariano test. Look at the t-statistic on the mean (MU). */ proc arima data=errors; identify var=ae1mae2; estimate p = 0 q = 0; estimate p = 1 q = 0; estimate p = 0 q = 1; title1 'Test for Significant Difference in MAEs'; title2 'Using the Diebold-Mariano ARMA approach'; run; /* Here we test the significant difference in MSEs of the two competing models using the Diebold-Mariano ARMA approach. This approach DOES NOT require that the competing forecasting methods be unbiased. It is just a matter of testing the significance of the sample mean of an appropriate ARMA model. As it turns out the p = 0 q = 0 model is the one to use for the Diebold-Mariano test. Look at the t-statistic on the mean (MU). */ title1 'Test for Significant Difference in MSEs'; title2 'Using the Diebold-Mariano ARMA approach'; proc arima data=errors; identify var=e12me22; estimate p = 0 q = 0; estimate p = 1 q = 0; estimate p = 0 q = 1; run;