/* This is intervention data taken from the textbook Forecasting Principles and Applications by Stephen A. DeLurgio, p. 508, Irwin/McGraw-Hill, 1998. This is a temporary intervention. */ /* week gallons intervention n=110, Weekly demand for bottled water prior to, during, and after flood from periods 75 to 84. Water in thousands of gallons. */ data water; input week kgals interv; datalines; 1 89.5460 0.0 2 92.1816 0.0 3 96.3044 0.0 4 96.6500 0.0 5 99.9868 0.0 6 95.9396 0.0 7 98.3144 0.0 8 95.9932 0.0 9 100.1812 0.0 10 108.2292 0.0 11 97.4724 0.0 12 103.6488 0.0 13 110.7996 0.0 14 94.5796 0.0 15 98.4340 0.0 16 96.6144 0.0 17 101.8856 0.0 18 100.8232 0.0 19 109.0104 0.0 20 110.2688 0.0 21 104.6388 0.0 22 102.3228 0.0 23 103.4800 0.0 24 113.0104 0.0 25 114.9948 0.0 26 108.7712 0.0 27 105.9812 0.0 28 104.5748 0.0 29 112.2724 0.0 30 110.1060 0.0 31 105.1716 0.0 32 115.1820 0.0 33 116.2084 0.0 34 110.5996 0.0 35 110.4940 0.0 36 113.0444 0.0 37 116.1608 0.0 38 110.0724 0.0 39 117.8328 0.0 40 120.7360 0.0 41 113.6464 0.0 42 115.0988 0.0 43 119.2856 0.0 44 119.5848 0.0 45 110.8000 0.0 46 122.0040 0.0 47 119.6072 0.0 48 119.7196 0.0 49 115.8604 0.0 50 119.8252 0.0 51 120.5808 0.0 52 116.4580 0.0 53 123.4388 0.0 54 121.4192 0.0 55 124.2264 0.0 56 124.4972 0.0 57 125.1768 0.0 58 130.8900 0.0 59 121.7976 0.0 60 124.4820 0.0 61 126.5196 0.0 62 131.9668 0.0 63 128.0204 0.0 64 129.6864 0.0 65 122.3484 0.0 66 123.2304 0.0 67 133.5664 0.0 68 127.4280 0.0 69 126.9304 0.0 70 126.9304 0.0 71 127.4828 0.0 72 134.2140 0.0 73 130.8760 0.0 74 129.7768 0.0 75 154.3632 1.0 76 155.0004 1.0 77 161.4332 1.0 78 160.3660 1.0 79 160.7872 1.0 80 157.6016 1.0 81 158.4992 1.0 82 155.0712 1.0 83 162.9236 1.0 84 167.0836 1.0 85 140.0000 0.0 86 140.9540 0.0 87 140.2560 0.0 88 129.1956 0.0 89 138.2160 0.0 90 137.8500 0.0 91 134.1180 0.0 92 136.8724 0.0 93 140.2644 0.0 94 144.9196 0.0 95 143.8944 0.0 96 145.2768 0.0 97 143.5000 0.0 98 141.9732 0.0 99 144.6516 0.0 100 137.7968 0.0 101 147.4736 0.0 102 148.8548 0.0 103 146.9712 0.0 104 142.4760 0.0 105 149.5192 0.0 106 152.4736 0.0 107 151.1600 0.0 108 149.0000 0.0 109 148.4820 0.0 110 149.6440 0.0 ; data water; set water; dkgals = kgals - lag(kgals); pulse1 = (week = 75); pulse2 = (week = 85); /* Note that we set the latter pulse at time period 85 instead of 84 because we are going to be modeling the differences in weekly demand (the stationary form) rather than the level of demand itself. In differences that means the pulses are 10 periods apart (instead of 9 periods apart were the level of the data stationary). */ run; /* Let's look at the data's systematic dynamics before the intervention (Obs. 1 - 74). */ proc arima data = water (obs=74); identify var = kgals(1); estimate p = 1; estimate p = 2; estimate q = 1; estimate q = 2; estimate p = 1 q = 1; /* An MA(1) model for kgals(1) seems to fit the pre-intervention data the best. */ run; proc arima data = water; identify var = kgals(1) crosscorr = (pulse1 pulse2) noprint; estimate q = 1 input = (pulse1 pulse2); /* This last model seems pretty good. Its residuals are white noise. For comparison, let's try to overfit the error process to see what happens. */ estimate q = 2 input = (pulse1 pulse2); estimate p=1 q=1 input = (pulse1 pulse2); run; /* The overfits are statistically insignificant so let's go with the previous model. */ /* Now let's forecast with this model both with and without an intervention in the out-of-sample data. Suppose that we want to forecast the next 16 weeks under the following two scenarios. In the first scenario we want to assume that a flood like before occurs in the second out-of-sample period and the flood's effect, like before, lasts for 9 periods (as before). In the second scenario we want to assume that a flood never occurs. Let's see what difference it makes in the demand for water. */ data base1; set water; keep week kgals interv pulse1 pulse2; data pred1; input week kgals interv; datalines; 111 . 0 112 . 1 113 . 1 114 . 1 115 . 1 116 . 1 117 . 1 118 . 1 119 . 1 120 . 1 121 . 0 122 . 0 123 . 0 124 . 0 125 . 0 126 . 0 ; data pred1; set pred1; pulse1 = (week=112); pulse2 = (week=121); proc append base=base1 data=pred1; proc arima data = base1 out=fore1; identify var = kgals(1) crosscorr = (pulse1 pulse2) noprint; estimate q = 1 input = (pulse1 pulse2); forecast lead = 16; run; /* You can use the following data set in SAS INSIGHT to see the predictions assuming the Flood occurs. */ data forecast1; merge base1 fore1; run; data forecast1; set forecast1; forecast1 = forecast; keep week kgals forecast1; run; /* Now let's form a contrast where we forecast 16 days in advance using the second model where, beginning week two in the out-of-sample period a flood begins and lasts until the tenth week in the out-of-sample period. Eventually we want to evaluate what the difference in predicted kgals of water demanded, at this point in time, first with and then without a flood. */ data base2; set water; keep week kgals interv pulse1 pulse2; data pred2; input week kgals interv; datalines; 111 . 0 112 . 0 113 . 0 114 . 0 115 . 0 116 . 0 117 . 0 118 . 0 119 . 0 120 . 0 121 . 0 122 . 0 123 . 0 124 . 0 125 . 0 126 . 0 ; data pred2; set pred2; pulse1 = 0; pulse2 = 0; proc append base=base2 data=pred2; proc arima data = base2 out=fore2; identify var = kgals(1) crosscorr = (pulse1 pulse2) noprint; estimate q = 1 input = (pulse1 pulse2); forecast lead = 16; run; /* You can use the following data set in SAS INSIGHT to see the predictions assuming that there is no flood in the out-of-sample period. */ data forecast2; merge base2 fore2; data forecast2; set forecast2; forecast2 = forecast; keep week kgals forecast2; run; /* Now let's create a data set that will allow us to plot the difference between the forecasts assuming a flood occurs and forecasts assuming no flood occurs. You can use SAS INSIGHT to produce a plot of the differences in these two forecasts. */ data forecasts; merge forecast1 forecast2; if _n_ > 110; run; proc print data = forecasts; run;