/* This monte carlo program generates an ARIMA(0,1,0) series in logs.
   Then the SAS %logtest macro is used to determine which of two competing
   models to choose from: an ARIMA(0,1,0) in levels or an ARIMA(0,1,0)
   in logs.  We trace how the test performs over the first 50 observations,
   then the first 75 observations, and finally the entire 100
   observations of the monte carlo data set.  At the end of the
   program we look at how the %logtest macro performs with an incorrect
   lag length (AR=5, the default, instead of AR=0).  As one might expect,
   the "power" of the test is somewhat mitigated by superflous lags. */


options pagesize=60 linesize=74 nodate;
goptions device=win
         gsfname=plot
         rotate=landscape
         gsfmode=append
         target=hpljs3;
*        border;

title1 'Simulated ARIMA(0,1,0) in logs';

/* Here is where the ARIMA(0,1,0) in logs data is generated.  X is
   the logged data and Y is the levels data.  */

data a;
  x1 = 3; a1 = 0;
  do i = -50 to 100;
     a = rannor( 32565 );
     x = x1 + .03 +.02*a;
     if i > 0 then output;
     a1 = a;
     x1 = x;
     end;

data a;
  set a;
  dx=x-lag(x);
  y=exp(x);
  dy=y-lag(y);
  obs=_n_;

proc print data=a;

proc means data=a;
  var x y dx dy;

/* Plot the X and Y series and the DX and DY series in various
   snapshots, i.e. for obs = 50, 75, and 100.  Notice, as described
   by Ermini and Granger (1993), "Some Generalizatins on the Algebra
   of I(1) Processes," JOURNAL OF ECONOMETRICS, Aug., Volume 58, No. 3,
   pp. 369 - 384, and Ermini and Hendry (1991), "Log Income Vs. Linear
   Income: An Application of the Encompassing Principle," Working Paper
   No. 91-11, April, 1991, the differences in levels should have
   drift and variance that exponentially increases with time.  For
   additional discussion see Banerjee, Dolado, Galbraith, and Hendry
   (1993), CO-INTEGRATION, ERROR-CORRECTION, AND THE ECONOMETRIC
   ANALYSIS OF NON-STATIONARY DATA (Oxford University Press: Oxford, UK),
   pp. 192 - 199.  */

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Log(Y)';
  title2 height=3 '50 Observations';
  axis2 order=(4 to 8 by 0.5)
        label=(f=duplex 'Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot x*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Y';
  title2 height=3 '50 Observations';
  axis2 order=(0 to 2200 by 200)
        label=(f=duplex 'Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot y*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=50);
  title2 height=4 'Plot of Change in Log(Y)';
  title1 height=3 '50 Observations';
  axis2 order=(-.03 to .12 by .03)
        label=(f=duplex 'Change in Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dx*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Change in Y';
  title2 height=3 '50 Observations';
  axis2 order=(-5 to 105 by 10)
        label=(f=duplex 'Change in Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dy*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Log(Y)';
  title2 height=3 '75 Observations';
  axis2 order=(4 to 8 by 0.5)
        label=(f=duplex 'Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot x*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Y';
  title2 height=3 '75 Observations';
  axis2 order=(0 to 2200 by 200)
        label=(f=duplex 'Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot y*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Change in Log(Y)';
  title2 height=3 '75 Observations';
  axis2 order=(-.03 to .12 by .03)
        label=(f=duplex 'Change in Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dx*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Change in Y';
  title2 height=3 '75 Observations';
  axis2 order=(-5 to 105 by 10)
        label=(f=duplex 'Change in Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dy*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Log(Y)';
  title2 height=3 '100 Observations';
  axis2 order=(4 to 8 by 0.5)
        label=(f=duplex 'Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot x*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Y';
  title2 height=3 '100 Observations';
  axis2 order=(0 to 2200 by 200)
        label=(f=duplex 'Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot y*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Change in Log(Y)';
  title2 height=3 '100 Observations';
  axis2 order=(-.03 to .12 by .03)
        label=(f=duplex 'Change in Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dx*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Change in Y';
  title2 height=3 '100 Observations';
  axis2 order=(-5 to 105 by 10)
        label=(f=duplex 'Change in Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dy*obs=1/vaxis=axis2 haxis=axis1;

data a50;
  set a;
  if _n_ < 51;

data a75;
  set a;
  if _n_ < 76;

/* Here we use the %logtest macro to determine whether the data
   should be analysed in levels or logs.  We are using the
   parsimonious lag length AR=0 to reflect the true data generating
   process.  The %logtest is first applied to the first 50
   observations, then the first 75, and finally all of the data.
   The favored transformation will have the largest Log likelihood
   (LOGLIK), and the smaller RMSE, AIC, and SBC. The macro seems to
   work pretty well in favoring the log specification over the levels
   specification. */

title 'logtest for first 50 observations, AR=0';
%logtest(a50,y,dif=(1),print=yes,ar=0);

title 'logtest for first 75 observations, AR=0';
%logtest(a75,y,dif=(1),print=yes,ar=0);

title 'logtest for all observations, AR=0';
%logtest(a,y,dif=(1),print=yes,ar=0);

/* Here we use the %logtest macro to determine whether the data
   should be analysed in levels or logs but we use a non-
   parsimonious lag length AR=5 (the default of %logtest).
   The %logtest is first applied to the first 50 observations,
   then the first 75, and finally all of the data.  Even with this
   nonparsimonious specification the macro seems to work pretty well
   in favoring the log specification over the levels specification.
   The exception is at obs=50 where the RMSE criterion inappropriately
   favors the levels specification.  Also note that at obs=100 the AR=5
   specification is so nonparsimonious that the various measures cannot
   be calculated for the levels specification. */

title 'logtest for first 50 observations, AR=5, the default';
%logtest(a50,y,dif=(1),print=yes);

title 'logtest for first 75 observations, AR=5, the default';
%logtest(a75,y,dif=(1),print=yes);

title 'logtest for all observations, AR=5, the default';
%logtest(a,y,dif=(1),print=yes);

run;