/*Jacob Williamson*/ /*This program's purpose is to visually demonstrate the distribution of beta0 (intercept) and beta1 (slope) utilizing the Ordinary Least Squares estimation methodology and the Extreme Value estimation methodology. Both methodologies, when taken on a large enough number of samples, arrive at the same B0 and B1 (true intercept and slope of "Mother Nature's" line). However, the variance of the OLS Estimates is always less than the variance of the EV Estimates proving the Gauss-Markov theorem. The program calls for six input values from the user (which are detailed below). */ ****************************************************************************** ****************************************************************************** ******************************************************************************; /*User Inputs*/ /*Please enter the number of samples for the test*/ %let n = 100; /*Please enter the sample size of each test*/ /*Note that the larger you make the sample size of each test, the larger the variance is on the EV Estimators*/ %let q = 20; /*Please enter the slope of "Mother Nature's" line*/ %let m = 2; /*Please enter the intercept of "Mother Nature's" line*/ %let b = 20; /*Please enter the mu and sigma values of the normal distribution curve to calculate the random, normally-distributed error terms.*/ %let mu = 0; %let sigma = 3; ****************************************************************************** ****************************************************************************** ******************************************************************************; %macro iteration(it, sl, int, s_mean, s_variance); data load_in (drop = i); do i=1 to ⁢ random_number_x = rand('NORMAL', 0, 1); random_number_y = rand('NORMAL', &s_mean, &s_variance); output; end; run; data load_in (drop = random_number_x random_number_y); set load_in; x = random_number_x; y = (&sl.*x) + (&int. + random_number_y); run; ods listing close; proc reg data=load_in outest=ols_estimate; model y = x; run; quit; ods listing; proc sort data=load_in; by x; run; data min_obs max_obs; set load_in; rank = _n_; if rank = 1 then output min_obs; else if rank = &it then output max_obs; run; proc sql; create table load_in2 as select a.x as min_x, a.y as min_y, b.x as max_x, b.y as max_y from min_obs a, max_obs b; quit; data ev_estimate (keep = slope intercept); set load_in2; slope = (max_y-min_y)/(max_x-min_x); intercept = max_y - slope*max_x; run; %mend; %macro looper(sample_count, sample_size, beta1, beta0, smean, svar); %do j = 1 %to &sample_count; %iteration(&sample_size, &beta1, &beta0, &smean, &svar); %if &j=1 %then %do; data ols_final; set ols_estimate; run; data ev_final; set ev_estimate; run; %end; %else %do; data ols_final; set ols_final ols_estimate; run; data ev_final; set ev_final ev_estimate; run; %end; %end; %mend; %looper(&n, &q, &m, &b, &mu, &sigma); data ols_final (keep = intercept slope); set ols_final (rename = (x=slope)); run; proc delete data=ev_estimate load_in load_in2 max_obs min_obs ols_estimate; run; data est_final; set ols_final (in=a) ev_final (in=b); format regression_type $15.; if a=1 then regression_type = "OLS Estimators"; else if b=1 then regression_type = "EV Estimators"; run; ods select est_sl ; title "Distribution of Beta1 (slope)"; proc univariate data=est_final; class regression_type; var slope; histogram / normal (color=yellow w=2) cfill=ligb cframe=ligr cframeside=ligr intertile=1 nrows=2 name="est_sl"; inset mean var="Variance" / pos=ne format=6.3 cfill=ywh; run; ods select est_int ; title "Distribution of Beta0 (intercept)"; proc univariate data=est_final; class regression_type; var intercept; histogram / normal (color=yellow w=2) cfill=ligb cframe=ligr cframeside=ligr intertile=1 nrows=2 name="est_int"; inset mean var="Variance" / pos=ne format=6.3 cfill=ywh; run;