/* This monte carlo program generates an ARIMA(0,1,0) series in levels.
   Then the SAS %logtest macro is used to determine which of two competing
   models to choose from: an ARIMA(0,1,0) in levels or an ARIMA(0,1,0)
   in logs.  We trace how the test perfoms over the first 50 observations,
   then the first 75 observations, and finally the entire 100
   observations of the monte carlo data set.  At the end of the
   program we look at how the %logtest macro performs with an incorrect
   lag length (AR=5, the default, instead of AR=0).  As one might expect,
   the "power" of the test is somewhat mitigated by superflous lags. */

options pagesize=60 linesize=74 nodate;
goptions device=win
         gsfname=plot
         rotate=landscape
         gsfmode=append
         target=hpljs3;
*        border;

title1 'Simulated ARIMA(0,1,0) in levels';

/* Here is where the ARIMA(0,1,0) in levels data is generated.  X is
   the logged data and Y is the levels data.  */

data a;
  y1 = 0; a1 = 0;
  do i = -50 to 100;
     a = rannor( 32565 );
     y = y1 + 1.0 + a;
     if i > 0 then output;
     a1 = a;
     y1 = y;
     end;

data a;
  set a;
  x=log(y);
  dx=x-lag(x);
  dy=y-lag(y);
  obs=_n_;

proc print data=a;

proc means data=a;
  var x y dx dy;

/* Plot the X and Y series and the DX and DY series in various
   snapshots, i.e. for obs = 50, 75, and 100.  Unlike the case
   where the differences in Levels ARIMA(0,1,0) has an exponential
   drift and variance in the presence of a Log ARIMA(0,1,0) model (as
   documented by Ermini and Granger (1993) and Ermini and Hendry (1991)),
   the behavior of the trend and variance of the Log ARIMA(0,1,0) in
   the presence of a Levels ARIMA(0,1,0) has not been documented in
   the literature.  Let's see how the %Logtest macro does in this
   case.  */

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Log(Y)';
  title2 height=3 '50 Observations';
  axis2 order=(3 to 6 by 0.5)
        label=(f=duplex 'Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot x*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Y';
  title2 height=3 '50 Observations';
  axis2 order=(40 to 170 by 10)
        label=(f=duplex 'Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot y*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Change in Log(Y)';
  title2 height=3 '50 Observations';
  axis2 order=(-.03 to .09 by .03)
        label=(f=duplex 'Change in Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dx*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=50);
  title1 height=4 'Plot of Change in Y';
  title2 height=3 '50 Observations';
  axis2 order=(-2 to 5 by 0.5)
        label=(f=duplex 'Change in Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dy*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Log(Y)';
  title2 height=3 '75 Observations';
  axis2 order=(3 to 6 by 0.5)
        label=(f=duplex 'Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot x*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Y';
  title2 height=3 '75 Observations';
  axis2 order=(40 to 170 by 10)
        label=(f=duplex 'Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot y*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Change in Log(Y)';
  title2 height=3 '75 Observations'; 
  axis2 order=(-.03 to .09 by .03)
        label=(f=duplex 'Change in Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dx*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a(obs=75);
  title1 height=4 'Plot of Change in Y';
  title2 height=3 '75 Observations';
  axis2 order=(-2 to 5 by 0.5)
        label=(f=duplex 'Change in Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dy*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Log(Y)';
  title2 height=3 '100 Observations';
  axis2 order=(3 to 6 by 0.5)
        label=(f=duplex 'Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot x*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Y';
  title2 height=3 '100 Observations';
  axis2 order=(40 to 170 by 10)
        label=(f=duplex 'Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot y*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Change in Log(Y)';
  title2 height=3 '100 Observations';
  axis2 order=(-.03 to .09 by .03)
        label=(f=duplex 'Change in Log(Y)');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dx*obs=1/vaxis=axis2 haxis=axis1;

proc gplot data=a;
  title1 height=4 'Plot of Change in Y';
  title2 height=3 '100 Observations';
  axis2 order=(-2 to 5 by 0.5)
        label=(f=duplex 'Change in Y');
  axis1 order=(0 to 100 by 10)
        label=(f=duplex 'Obs');
  symbol i=join;

  plot dy*obs=1/vaxis=axis2 haxis=axis1;


data a50;
  set a;
  if _n_ < 51;

data a75;
  set a;
  if _n_ < 76;

/* Here we use the %logtest macro to determine whether the data
   should be analysed in levels or logs.  We are using the
   parsimonious lag length AR=0 to reflect the true data generating
   process.  The %logtest is first applied to the first 50
   observations, then the first 75, and finally all of the data.
   The favored transformation will have the largest Log likelihood
   (LOGLIK), and the smaller RMSE, AIC, and SBC. The macro seems to
   work pretty well in favoring the level specification over the log
   specification. */

title 'logtest for first 50 observations, AR=0';
%logtest(a50,y,dif=(1),print=yes,ar=0);

title 'logtest for first 75 observations, AR=0';
%logtest(a75,y,dif=(1),print=yes,ar=0);

title 'logtest for all observations, AR=0';
%logtest(a,y,dif=(1),print=yes,ar=0);

/* Here we use the %logtest macro to determine whether the data
   should be analysed in levels or logs but we use a non-
   parsimonious lag length AR=5 (the default of %logtest).
   The %logtest is first applied to the first 50 observations,
   then the first 75, and finally all of the data.  Even with this
   nonparsimonious specification the macro seems to work pretty well
   in favoring the level specification over the log specification
   although you can tell that the differences in the various criteria
   have narrowed (thus implying lower power of the test statistics). */

title 'logtest for first 50 observations, AR=5, the default';
%logtest(a50,y,dif=(1),print=yes);

title 'logtest for first 75 observations, AR=5, the default';
%logtest(a75,y,dif=(1),print=yes);

title 'logtest for all observations, AR=5, the default';
%logtest(a,y,dif=(1),print=yes);

run;