/* This monte carlo program generates an ARIMA(0,1,0) series in logs. Then the SAS %logtest macro is used to determine which of two competing models to choose from: an ARIMA(0,1,0) in levels or an ARIMA(0,1,0) in logs. We trace how the test performs over the first 50 observations, then the first 75 observations, and finally the entire 100 observations of the monte carlo data set. At the end of the program we look at how the %logtest macro performs with an incorrect lag length (AR=5, the default, instead of AR=0). As one might expect, the "power" of the test is somewhat mitigated by superflous lags. */ options pagesize=60 linesize=74 nodate; goptions device=win gsfname=plot rotate=landscape gsfmode=append target=hpljs3; * border; title1 'Simulated ARIMA(0,1,0) in logs'; /* Here is where the ARIMA(0,1,0) in logs data is generated. X is the logged data and Y is the levels data. */ data a; x1 = 3; a1 = 0; do i = -50 to 100; a = rannor( 32565 ); x = x1 + .03 +.02*a; if i > 0 then output; a1 = a; x1 = x; end; data a; set a; dx=x-lag(x); y=exp(x); dy=y-lag(y); obs=_n_; proc print data=a; proc means data=a; var x y dx dy; /* Plot the X and Y series and the DX and DY series in various snapshots, i.e. for obs = 50, 75, and 100. Notice, as described by Ermini and Granger (1993), "Some Generalizatins on the Algebra of I(1) Processes," JOURNAL OF ECONOMETRICS, Aug., Volume 58, No. 3, pp. 369 - 384, and Ermini and Hendry (1991), "Log Income Vs. Linear Income: An Application of the Encompassing Principle," Working Paper No. 91-11, April, 1991, the differences in levels should have drift and variance that exponentially increases with time. For additional discussion see Banerjee, Dolado, Galbraith, and Hendry (1993), CO-INTEGRATION, ERROR-CORRECTION, AND THE ECONOMETRIC ANALYSIS OF NON-STATIONARY DATA (Oxford University Press: Oxford, UK), pp. 192 - 199. */ proc gplot data=a(obs=50); title1 height=4 'Plot of Log(Y)'; title2 height=3 '50 Observations'; axis2 order=(4 to 8 by 0.5) label=(f=duplex 'Log(Y)'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot x*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=50); title1 height=4 'Plot of Y'; title2 height=3 '50 Observations'; axis2 order=(0 to 2200 by 200) label=(f=duplex 'Y'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot y*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=50); title2 height=4 'Plot of Change in Log(Y)'; title1 height=3 '50 Observations'; axis2 order=(-.03 to .12 by .03) label=(f=duplex 'Change in Log(Y)'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot dx*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=50); title1 height=4 'Plot of Change in Y'; title2 height=3 '50 Observations'; axis2 order=(-5 to 105 by 10) label=(f=duplex 'Change in Y'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot dy*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=75); title1 height=4 'Plot of Log(Y)'; title2 height=3 '75 Observations'; axis2 order=(4 to 8 by 0.5) label=(f=duplex 'Log(Y)'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot x*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=75); title1 height=4 'Plot of Y'; title2 height=3 '75 Observations'; axis2 order=(0 to 2200 by 200) label=(f=duplex 'Y'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot y*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=75); title1 height=4 'Plot of Change in Log(Y)'; title2 height=3 '75 Observations'; axis2 order=(-.03 to .12 by .03) label=(f=duplex 'Change in Log(Y)'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot dx*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a(obs=75); title1 height=4 'Plot of Change in Y'; title2 height=3 '75 Observations'; axis2 order=(-5 to 105 by 10) label=(f=duplex 'Change in Y'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot dy*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a; title1 height=4 'Plot of Log(Y)'; title2 height=3 '100 Observations'; axis2 order=(4 to 8 by 0.5) label=(f=duplex 'Log(Y)'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot x*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a; title1 height=4 'Plot of Y'; title2 height=3 '100 Observations'; axis2 order=(0 to 2200 by 200) label=(f=duplex 'Y'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot y*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a; title1 height=4 'Plot of Change in Log(Y)'; title2 height=3 '100 Observations'; axis2 order=(-.03 to .12 by .03) label=(f=duplex 'Change in Log(Y)'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot dx*obs=1/vaxis=axis2 haxis=axis1; proc gplot data=a; title1 height=4 'Plot of Change in Y'; title2 height=3 '100 Observations'; axis2 order=(-5 to 105 by 10) label=(f=duplex 'Change in Y'); axis1 order=(0 to 100 by 10) label=(f=duplex 'Obs'); symbol i=join; plot dy*obs=1/vaxis=axis2 haxis=axis1; data a50; set a; if _n_ < 51; data a75; set a; if _n_ < 76; /* Here we use the %logtest macro to determine whether the data should be analysed in levels or logs. We are using the parsimonious lag length AR=0 to reflect the true data generating process. The %logtest is first applied to the first 50 observations, then the first 75, and finally all of the data. The favored transformation will have the largest Log likelihood (LOGLIK), and the smaller RMSE, AIC, and SBC. The macro seems to work pretty well in favoring the log specification over the levels specification. */ title 'logtest for first 50 observations, AR=0'; %logtest(a50,y,dif=(1),print=yes,ar=0); title 'logtest for first 75 observations, AR=0'; %logtest(a75,y,dif=(1),print=yes,ar=0); title 'logtest for all observations, AR=0'; %logtest(a,y,dif=(1),print=yes,ar=0); /* Here we use the %logtest macro to determine whether the data should be analysed in levels or logs but we use a non- parsimonious lag length AR=5 (the default of %logtest). The %logtest is first applied to the first 50 observations, then the first 75, and finally all of the data. Even with this nonparsimonious specification the macro seems to work pretty well in favoring the log specification over the levels specification. The exception is at obs=50 where the RMSE criterion inappropriately favors the levels specification. Also note that at obs=100 the AR=5 specification is so nonparsimonious that the various measures cannot be calculated for the levels specification. */ title 'logtest for first 50 observations, AR=5, the default'; %logtest(a50,y,dif=(1),print=yes); title 'logtest for first 75 observations, AR=5, the default'; %logtest(a75,y,dif=(1),print=yes); title 'logtest for all observations, AR=5, the default'; %logtest(a,y,dif=(1),print=yes); run;