/* This program runs a horse race between the Box-Jenkins model ARIMA(1,1,1) for the y series and an Equal Lag Length VAR (L=8). The forecast horizon is one step ahead while the hold-out sample data set is observations 121 - 150. The VAR model (which incorporates the leading X series) produces more accuracte forecasts of the y series than the Box-Jenkins model. Thus, X looks to be a useful leading indicator of y. */ OPTIONS PAGESIZE=60 LINESIZE=72 NODATE; data lead; input obs x y; /* obs lead sales */ datalines; 1 10.01 200.1 2 10.07 199.5 3 10.32 199.4 4 9.75 198.9 5 10.33 199.0 6 10.13 200.2 7 10.36 198.6 8 10.32 200.0 9 10.13 200.3 10 10.16 201.2 11 10.58 201.6 12 10.62 201.5 13 10.86 201.5 14 11.20 203.5 15 10.74 204.9 16 10.56 207.1 17 10.48 210.5 18 10.77 210.5 19 11.33 209.8 20 10.96 208.8 21 11.16 209.5 22 11.70 213.2 23 11.39 213.7 24 11.42 215.1 25 11.94 218.7 26 11.24 219.8 27 11.59 220.5 28 10.96 223.8 29 11.40 222.8 30 11.02 223.8 31 11.01 221.7 32 11.23 222.3 33 11.33 220.8 34 10.83 219.4 35 10.84 220.1 36 11.14 220.6 37 10.38 218.9 38 10.90 217.8 39 11.05 217.7 40 11.11 215.0 41 11.01 215.3 42 11.22 215.9 43 11.21 216.7 44 11.91 216.7 45 11.69 217.7 46 10.93 218.7 47 10.99 222.9 48 11.01 224.9 49 10.84 222.2 50 10.76 220.7 51 10.77 220.0 52 10.88 218.7 53 10.49 217.0 54 10.50 215.9 55 11.00 215.8 56 10.98 214.1 57 10.61 212.3 58 10.48 213.9 59 10.53 214.6 60 11.07 213.6 61 10.61 212.1 62 10.86 211.4 63 10.34 213.1 64 10.78 212.9 65 10.80 213.3 66 10.33 211.5 67 10.44 212.3 68 10.50 213.0 69 10.75 211.0 70 10.40 210.7 71 10.40 210.1 72 10.34 211.4 73 10.55 210.0 74 10.46 209.7 75 10.82 208.8 76 10.91 208.8 77 10.87 208.8 78 10.67 210.6 79 11.11 211.9 80 10.88 212.8 81 11.28 212.5 82 11.27 214.8 83 11.44 215.3 84 11.52 217.5 85 12.10 218.8 86 11.83 220.7 87 12.62 222.2 88 12.41 226.7 89 12.43 228.4 90 12.73 233.2 91 13.01 235.7 92 12.74 237.1 93 12.73 240.6 94 12.76 243.8 95 12.92 245.3 96 12.64 246.0 97 12.79 246.3 98 13.05 247.7 99 12.69 247.6 100 13.01 247.8 101 12.90 249.4 102 13.12 249.0 103 12.47 249.9 104 12.47 250.5 105 12.94 251.5 106 13.10 249.0 107 12.91 247.6 108 13.39 248.8 109 13.13 250.4 110 13.34 250.7 111 13.34 253.0 112 13.14 253.7 113 13.49 255.0 114 13.87 256.2 115 13.39 256.0 116 13.59 257.4 117 13.27 260.4 118 13.70 260.0 119 13.20 261.3 120 13.32 260.4 121 13.15 261.6 122 13.30 260.8 123 12.94 259.8 124 13.29 259.0 125 13.26 258.9 126 13.08 257.4 127 13.24 257.7 128 13.31 257.9 129 13.52 257.4 130 13.02 257.3 131 13.25 257.6 132 13.12 258.9 133 13.26 257.8 134 13.11 257.7 135 13.30 257.2 136 13.06 257.5 137 13.32 256.8 138 13.10 257.5 139 13.27 257.0 140 13.64 257.6 141 13.58 257.3 142 13.87 257.5 143 13.53 259.6 144 13.41 261.1 145 13.25 262.9 146 13.50 263.3 147 13.58 262.8 148 13.51 261.8 149 13.77 262.2 150 13.40 262.7 ; /* Roll the Box-Jenkins model for y through the out-of-sample data /* /* forecasting one period ahead. This is the benchmark forecast */ %macro bj(num); proc arima data=lead(obs=&num) out=result; identify var=y(1) noprint; estimate p=1 q=1 method=ml noprint; forecast lead=1 nooutall noprint; proc append base=collect1 data=result; %mend; %macro doit; %do ii=120 %to 149; %bj(&ii); %end; %mend; %doit; data bjf; set collect1; f1=forecast; drop forecast y std l95 u95 residual; data actual; set lead; if 120 < _n_; actual=y; keep actual; data merge1; merge actual bjf; /* Roll the Equal Lag Length VAR (p = 8) for y and x through the hold-out sample data (observations 121 - 150) forecasting one period ahead each time. */ data yseries; set lead; y1=y; y2=x; %macro var(num); proc varmax data=yseries(obs=&num); model y1-y2 / dify(1) p=8 q=0 noprint; output out=result lead=1 noprint; data result; set result; f2 = for1; if _n_ = &num + 1; keep f2; proc append base=collect2 data=result; %mend; %macro doit; %do ii=120 %to 149; %var(&ii); %end; %mend; %doit; data compare; merge merge1 collect2; data errors; set compare; e1=actual - f1; e2=actual - f2; abse1=abs(e1); abse2=abs(e2); e12=e1**2.0; e22=e2**2.0; e1pe2=e1 + e2; e1me2=e1 - e2; /* Calculate the MAEs and MSEs of the Box-Jenkins Model and the VAR Model for y over the hold-out sample data set (observations 121 - 150). MAE1 and MSE1 are for the TF Model while MAE2 and MSE2 are for the VAR Model for y = y1. Is the VAR model significantly better than the BJ forecasting model? If it is, then we have have evidence that the proposed leading indicator (x) is a useful one. */ proc univariate data=errors noprint; var abse1 e12 abse2 e22; output mean = mae1 mse1 mae2 mse2 out=results; proc print data=results; var mae1 mse1 mae2 mse2; title1 'Forecasting Accuracy Measures of BJ (Method 1) and VAR (Method 2)'; title3 'Forecast Horizon = 1'; run;