/* This SAS program is for examining the incidence of unit roots in the Plano Sales Tax Revenue data. Data provided by Reginald Gray. */ /* References: Hasza, David P. and Fuller, Wayne A. "Testing for Nonstationary Parameter Specifications in Seasonal Time Series Models," Annals of Statistics, Vol. 10, No. 4 (Dec., 1982), 1209-1216. Dickey, D.A., Hasza D.P., Fuller, W.A. "Tesing for Unit Roots in Seasonal Time Series," Journal of the American Statistical Association, Vol. 79, No. 386 (Jun., 1984), 355-367. */ data Plano; input date:monyy5. rev; format date monyy5.; title 'Plano Sales Tax Revenue Data'; title2 'By Month'; datalines; Feb90 2068592 Mar90 867387 Apr90 791878 May90 1731316 Jun90 911839 Jul90 909258 Aug90 1826999 Sep90 964868 Oct90 1020941 Nov90 1881435 Dec90 1075607 Jan91 964977 Feb91 2699324 Mar91 884494 Apr91 1035007 May91 1930143 Jun91 1124814 Jul91 1098136 Aug91 1812798 Sep91 1095294 Oct91 1163039 Nov91 1920424 Dec91 1000743 Jan92 1075763 Feb92 2341127 Mar92 1062449 Apr92 1120898 May92 1939866 Jun92 1316907 Jul92 1284888 Aug92 2098891 Sep92 1375423 Oct92 1201251 Nov92 2165295 Dec92 1301110 Jan93 1251165 Feb93 2986796 Mar93 1271028 Apr93 1228055 May93 2349629 Jun93 1385267 Jul93 1537452 Aug93 2576586 Sep93 1642938 Oct93 1577049 Nov93 2765401 Dec93 1940847 Jan94 1640531 Feb94 3271545 Mar94 1383909 Apr94 1495825 May94 2772734 Jun94 1592051 Jul94 1560732 Aug94 2773904 Sep94 1523255 Oct94 2013622 Nov94 2957306 Dec94 1789103 Jan95 1848972 Feb95 3507801 Mar95 1821378 Apr95 1930585 May95 2823010 Jun95 1970356 Jul95 1970534 Aug95 2982305 Sep95 1795240 Oct95 2145180 Nov95 3021075 Dec95 1908781 Jan96 1957956 Feb96 3955970 Mar96 2119970 Apr96 2208176 May96 3063504 Jun96 2190613 Jul96 2197082 Aug96 3085586 Sep96 2642591 Oct96 2550586 Nov96 3230872 Dec96 2482466 Jan97 2315274 Feb97 4388396 Mar97 2335249 Apr97 1956240 May97 3183566 Jun97 2421722 Jul97 1879301 Aug97 3094563 Sep97 2599894 Oct97 2320012 Nov97 3518486 Dec97 2407487 Jan98 2291118 Feb98 4813948 Mar98 2380134 Apr98 2223477 May98 3378416 Jun98 2876314 Jul98 2650942 Aug98 3788448 Sep98 2651506 Oct98 2450710 Nov98 4118992 Dec98 2434040 Jan99 2763878 Feb99 5227962 Mar99 2762093 Apr99 2528931 May99 4040412 Jun99 2883152 Jul99 3100274 Aug99 4149743 Sep99 3061236 Oct99 2805394 Nov99 3962285 Dec99 3197688 Jan00 3149649 Feb00 5401137 Mar00 3393528 Apr00 2852524 May00 4708691 Jun00 3567883 Jul00 3405732 Aug00 4885709 Sep00 4142396 Oct00 3564755 Nov00 4794159 Dec00 3459785 Jan01 3600702 Feb01 5789400 Mar01 3283596 Apr01 3411052 May01 4783941 Jun01 3706871 Jul01 3756080 Aug01 4318154 Sep01 3201376 Oct01 3502712 Nov01 4864603 Dec01 3108517 Jan02 3357796 Feb02 5904823 Mar02 2951480 Apr02 3185525 May02 4729624 Jun02 3282329 Jul02 3271971 Aug02 4559047 Sep02 3350292 Oct02 3286394 Nov02 4566940 Dec02 2863028 Jan03 3049842 Feb03 5780438 Mar03 3286533 Apr03 3016081 May03 4533575 Jun03 3296881 Jul03 3535071 Aug03 5290070 Sep03 3323063 Oct03 3318144 Nov03 5206490 Dec03 3240679 Jan04 3673046 Feb04 6166054 Mar04 3573983 Apr04 2999256 May04 5177550 Jun04 3845943 Jul04 3492933 Aug04 4975878 Sep04 3531498 Oct04 3611446 Nov04 5145814 Dec04 3260597 Jan05 3715755 Feb05 6239931 Mar05 3730730 Apr05 3431157 May05 5404423 Jun05 4049371 Jul05 3648390 Aug05 5394527 Sep05 3968853 Oct05 3970771 Nov05 5384216 ; goptions cback=white colors=(black) border reset=(axis symbol); title 'Plano Sales Tax Revenue'; axis1 offset=(1 cm) label=('Year') minor=none order=('01jan90'd to '01nov06'd by year); axis2 label=(angle=90 'Plano Sales Tax Revenue') order=(0 to 7000000 by 1000000); symbol1 i=join; proc gplot data=plano; format date year4.; plot rev*date / haxis=axis1 vaxis=axis2 vminor=1; run; title 'Log of Plano Sales Tax Revenue'; data plano; set plano; lrev = log(rev); goptions cback=white colors=(black) border reset=(axis symbol); axis1 offset=(1 cm) label=('Year') minor=none order=('01jan90'd to '01nov06'd by year); axis2 label=(angle=90 'Log of Plano Sales Tax Revenue') order=(12 to 18 by 1); symbol1 i=join; proc gplot data=plano; format date year4.; plot lrev*date / haxis=axis1 vaxis=axis2 vminor=1; run; /* Given the above plots, it appears that the log transformation of the data is not necessary. See later %logtest results that support this decision. */ /* The below ACF of the first differences of the data has very slowly damping autocorrelations at the seasonal lags of 12, 24, 36, and 48 thus hinting that there is substantial seasonal variation in the data. We may need to take the first and seasonal span difference or at least the seasonal span difference of the data in order to make it stationary and amenable to Box-Jenkins analysis. */ title 'Examination of Autocorrelation Function for Seasonality'; proc arima data = plano; identify var = rev(1) nlag=48; run; /* Let us first look at plots of the first differenced data, the seasonal span differenced data and the (1,12) differenced data. */ data plano; set plano; drev = rev - lag(rev); rev12 = rev - lag12(rev); rev112 = drev - lag12(drev); run; title 'Difference in Plano Sales Tax Revenue'; axis1 offset=(1 cm) label=('Year') minor=none order=('01jan90'd to '01nov06'd by year); axis2 label=(angle=90 'Dif in Plano Sales Tax Revenue') order=(-3000000 to 3000000 by 1000000); symbol1 i=join; proc gplot data=plano; format date year4.; plot drev*date / haxis=axis1 vaxis=axis2 vminor=1; run; title 'Seasonal Difference in Plano Sales Tax Revenue'; axis1 offset=(1 cm) label=('Year') minor=none order=('01jan90'd to '01nov06'd by year); axis2 label=(angle=90 'Seas Dif in Plano Sales Tax Revenue') order=(-1000000 to 1000000 by 500000); symbol1 i=join; proc gplot data=plano; format date year4.; plot rev12*date / haxis=axis1 vaxis=axis2 vminor=1; run; title 'First and Seasonal Difference in Plano Sales Tax Revenue'; axis1 offset=(1 cm) label=('Year') minor=none order=('01jan90'd to '01nov06'd by year); axis2 label=(angle=90 '1st and Seas Dif in Plano Sales Tax Revenue') order=(-1000000 to 1000000 by 500000); symbol1 i=join; proc gplot data=plano; format date year4.; plot rev112*date / haxis=axis1 vaxis=axis2 vminor=1; run; /* In the below code we conduct the Hasza and Fuller (1982) test for the first and seasonal span difference in the data. Notice that we rescale the data to be in millions of dollars rather than dollars. This rescaling makes it easier on the computer in terms of matrix inversion and the calculation of the requisite F-statistic for the Hasza-Fuller test. If you apply the test statement on the raw data, the computer returns a message that it cannot compute the statistic. The appropriate table to use is Table 5.1, p. 1214 and in particular the second block of numbers in the table designated with Phi(3,n-d-4). The appropriate critical value at the 5% level is 14.78. Obviously, the observed F-statistic of 1.91 is less than this critical value thus we accept the null hypothesis that the (1,12) differencing is appropriate for the data. */ data plano; set plano; rev = rev/1000000; x1 = lag(rev); x2 = lag(rev) - lag13(rev); x3 = lag12(rev) - lag13(rev); rev112_1 = lag(rev112); rev112_2 = lag2(rev112); rev112_3 = lag3(rev112); rev112_4 = lag4(rev112); run; title 'Hasza-Fuller Test for (1,12) Differencing'; proc reg data = plano; model rev = x1 x2 x3 rev112_1 rev112_2 rev112_3 rev112_4 / noint ; test x1 = 1, x2 = 0, x3 = 1; run; /* The above Hasza-Fuller test indicates that the (1,12) differencing of the Plano Tax Revenue Data is appropriate. */ /* Just "for practice" I am going to conduct the Dickey/Hasza/Fuller (1984) test of the hypothesis that the seasonal span difference of the data is sufficient to render the Plano Data to be stationary. You can the t-statistic associated with the variable rev_12. The appropriate table is Table 3, p. 359. The appropriate critical value at the 5% level is -2.09. The observed t-statistic is 1.71 and therefore the null hypothesis that the seasonal span difference is needed is supported by the data. However, we have seen from the previous Hasza-Fuller test that the (1,12) differencing is supported by the data. */ data plano; set plano; rev_12 = lag12(rev); rev12_1 = lag(rev12); rev12_2 = lag2(rev12); rev12_3 = lag3(rev12); rev12_4 = lag4(rev12); title 'The Dickey-Hasza-Fuller test for Seasonal span difference'; proc reg data = plano; model rev12 = rev_12 rev12_1 rev12_2 rev12_3 rev12_4 / noint; run; /* Here we use the %logtest macro to see if the log transformation is needed. Given the test results, it seems that the log transformation is not needed. We check the robustness of the results over the different choices of AR lag lengths 4 and 12. */ title1 'Using SAS %logtest macro to test level versus log specifications'; title2 'With AR=4 lag length'; %logtest(plano, rev, ar=4, dif=(1,12), print=yes); run; title1 'Using SAS %logtest macro to test level versus log specifications'; title2 'With AR=12 lag length'; %logtest(plano, rev, ar=12, dif=(1,12), print=yes); run; /* Let's generate the ACF and PACF for the (1,12) data so that we can make a tentative identification of a reasonable multiplicative, seasonal B-J model for the Plano data. */ title1 'Tentative Identification of Multiplicative , Seasonal B-J Model'; title2 'for the Plano Tax Revenue Data'; proc arima data = plano; identify var = rev(1,12) nlag=48; run; title 'Estimation of the "Airline" Model'; proc arima data = plano; identify var=rev(1,12) noprint; estimate q=(1)(12); run; title 'Estimation of some reasonable Seasonal B-J models'; proc arima data = plano; identify var=rev(1,12) noprint; estimate q=(1,2,3)(12); estimate p=(1) q=(1,2,3)(12); estimate p=(24) q=(1,2,3)(12); run; title1 'Forecasting with a Chosen Seasonal B-J model'; title2 'Note: Forecasts are in millions of dollars'; proc arima data = plano; identify var=rev(1,12) noprint; estimate p=(24) q=(1,2,3)(12) noprint; forecast lead = 13; run;