***************************************************************************; ** PROGRAM: boot_2MRCV.sas **; ** AUTHOR: Christopher R. Bilder **; ** Department of Statistics **; ** University of Nebraska-Lincoln **; ** chris@chrisbilder.com **; ** DATE: 3-23-06 **; ** PURPOSE: Fit a generalized loglinear model via the marginal modeling **; ** method and use the bootstrap and Rao-Scott approximations **; ** to find a p-value for testing the model of interest vs. the **; ** saturated model. Modifications can be made to the program **; ** to use an alternative hypothesis model other than the **; ** saturated model. **; ** NOTES: **; ** 1) Do not distribute copies of this program without the permission of**; ** the author. **; ** 2) Copyright 2006 Christopher R. Bilder **; ** 3) See control_2MRCV.sas for more information **; ***************************************************************************; **********************************************************************; ** **; ** NAME: MACRO TIME **; ** PURPOSE: Write time to OUTPUT window **; ** NOTES: **; ** VARIABLES: title = begin or end of program **; ** **; **********************************************************************; %MACRO TIME(title); data time; time = hour(time()); minute = minute(time()); second = second(time()); month = month(today()); day = day(today()); year = year(today()); run; proc print data=time; title2 "&title"; run; %MEND TIME; **********************************************************************; ** **; ** NAME: MACRO NUMOBS **; ** PURPOSE: Find number of observations in a data set **; ** NOTES: Adapted from a SAS website sample program **; ** VARIABLES: dsn = Data set **; ** name = Number of observations put in here **; ** **; **********************************************************************; %MACRO NUMOBS(dsn, name); %global &name; data _null_; call symput("&name", left(put(count2,8.))); stop; set &dsn nobs=count2; run; %MEND NUMOBS; **********************************************************************; ** **; ** NAME: MACRO G_H **; ** PURPOSE: Creates G and H matrices needed for PROC IML **; ** NOTES: See Appendix A of paper and Bilder and Loughin **; ** (Biometrics, 2004) **; ** VARIABLES: numb_items = number of items **; ** matrix = G or H **; ** **; **********************************************************************; %MACRO G_H(numb_items, matrix); *From data generation program - iterate out all possible combinations; %let X = do item1 = 0 to 1%str(;); %let endit = end%str(;); *Generalize; %do j = 2 %to &numb_items; %let X = &X.do item&j=0 to 1%str(;); %let endit = &endit end%str(;); %end; *Create matrix of all items; data itemvec; &X; output; &endit; run; *Transpose the matrix to be in the correct form; proc transpose data=itemvec out=&matrix.itemvec2; var item1-item&numb_items; run; %MEND G_H; **********************************************************************; ** **; ** NAME: MACRO REVERSE **; ** PURPOSE: Bilder and Loughin (Biometrics, 2004) and simulations **; ** for this paper used a reverse definition of **; ** Y and W so reverse here to match; **; ** NOTES: This MACRO is not used in this program. It is here only **; ** to inform a user who may want to duplicate the simulations**; ** in Bilder and Loughin (Biometrics, 2004) and this paper **; ** VARIABLES: data_set = contains generated data **; ** n = sample size **; ** **; **********************************************************************; %MACRO REVERSE(data_set, n); data &data_set; set sortset4; iter = floor((_n_-0.1)/&n+1); array rownumb {&nrow2} w1-w&nrow2; array colnumb {&ncol2} y1-y&ncol2; array names {%eval(&nrow2*&ncol2)} item1-item%eval(&nrow2*&ncol2); do j = 1 to &ncol2; colnumb{j} = names{j}; end; do j=1 to &nrow2; rownumb{j} = names{j+&ncol2}; end; dummy=1; keep y1-y&ncol2 w1-w&nrow2 iter dummy; run; %MEND REVERSE; **********************************************************************; ** **; ** NAME: MACRO GENERALIZE_FREQ **; ** PURPOSE: Write part of a TABLES statement in PROC FREQ **; ** Finds multinomial counts for data set **; ** VARIABLES: data_set = contains data **; ** out_data = name of data set to produce **; ** **; **********************************************************************; %MACRO GENERALIZE_FREQ(data, out_data); %let W = W1 *; %do i = 2 %to &nrow2; %let W = &W.W&i.*; %end; %let Y = Y1; %do j = 2 %to &ncol2; %let Y = &Y.*Y&j; %end; proc freq data=&data noprint; table &W &Y / sparse out=&out_data; by iter; run; %MEND GENERALIZE_FREQ; **********************************************************************; ** **; ** NAME: MACRO FIND_SUBTABLE_WY **; ** PURPOSE: Finds sub-tables **; ** VARIABLES: data_set = contains data **; ** save_data = name of data set to produce **; ** **; **********************************************************************; %MACRO FIND_SUBTABLE_WY(data, save_data); %do i=1 %to &nrow2; %do j=1 %to &ncol2; proc freq data=&data noprint; tables W&i*Y&j / sparse out=out_sub; by iter; weight count; run; data out_sub2; set out_sub; W = &i; Y = &j; wi = w&i; yj = y&j; drop percent w&i y&j; run; data &save_data; set &save_data out_sub2; run; %end; %end; %MEND FIND_SUBTABLE_WY; **********************************************************************; ** **; ** NAME: MACRO SIM_LOOP **; ** PURPOSE: Looping structure for simulations **; ** NOTES: Just use sim_numb = 1 %to 1 for one example data set **; ** VARIABLES: sim_data = name of data set for analysis **; ** seed_start = seed for bootstrap resamples **; ** n = sample size **; ** **; **********************************************************************; %MACRO SIM_LOOP(sim_data, seed_start, n); *Could change upper bound for sim_numb if actually doing simulations; %do sim_numb = 1 %to 1; *Only needed for multiple data sets in a simulation; %let seed = %eval(&seed_start+&sim_numb); %let dataset = &sim_numb; *******************************************************************************************; * Do some summary calculations for the observed data; data data2x2; set &sim_data; if iter = &dataset; *drop dummy; run; *Find the multinomial counts; %GENERALIZE_FREQ(data2x2, freq_set); *This data set contains the observed multinomial counts and proportions; data freq_set; set freq_set; percent = percent*0.01; run; data save_sub_WY_obs; set _null_; run; *Find sub-table counts; %FIND_SUBTABLE_WY(freq_set, save_sub_WY_obs); *Add 0.5 to a sub-table cell if the cell is 0.; *In order to account for the lagoon and salt item pair (see Section 4.1), this; * DATA STEP is used with the; * if w=3 and y=1 then w3y1=1; * else w3y1=0; * code; data set2_orig; set save_sub_WY_obs(drop=iter); if count = 0 then count = &constant_zero; if w=3 and y=1 then w3y1=1; else w3y1=0; run; *******************************************************************************************; *Calculate the model for the observed data; *TURN OFF PRINTING; proc printto print="nul:"; run; *Fit the model; proc genmod data=set2_orig; class &class ; model count = &model / dist=poi link=log obstats noint; ods output obstats=obstats_orig modelfit=modfit_orig ParameterEstimates=est_orig; run; *TURN ON PRINTING; proc printto; run; *Find model predicted ORs for the model; proc freq data = obstats_orig noprint; tables wi*yj / measures; weight pred; output out = or(drop=L_RROR U_RROR rename=(_RROR_=OR)) or; by W Y; run; *Sort for a merge later; proc sort data=modfit_orig; by criterion; run; *Using the observed data and the predicted data, create a vector of multinomial probabilities; * under the Ho model; *Uses the Gange program; %DO_IT_GANGE; ********************************************************************************************************; *Find the resamples; *Change result format of Gange program in order to generate multinomial counts under the Ho model; data set1_tau2; set set1_tau; array item {&ncol} item1-item&ncol; array COLNUMB {&ncol} COL1-COL&ncol; do j = 1 to &ncol; item{j} = colnumb{j};; end; p = col%eval(&ncol+1); keep item1-item&ncol p; run; *Find the resamples from the data_gen_resample.sas program; %R_DO_IT_DATA_GEN(set1_tau2, &seed, &n, &totsim, &numbsim, gendata); *save the number of resampled data sets lost due to marginal table not being the observed size; data numb_it_item; set numb_it; *rename n=lostset; sim_numb = &sim_numb; keep sim_numb n; run; data save_numb_it_item; set save_numb_it_item numb_it_item; run; *Finds multinomial counts for each resample; %GENERALIZE_FREQ(gendata, freq_set_gendata); *Find sub-tables for each resample; data save_sub_WY; set _null_; run; %FIND_SUBTABLE_WY(freq_set_gendata, save_sub_WY); proc sort data=save_sub_WY; by iter; run; *Add 0.5 to a sub-table cell if the cell is 0.; *In order to account for the lagoon and salt item pair (see Section 4.1), this; * DATA STEP is used with the; * if w=3 and y=1 then w3y1=1; * else w3y1=0; * code; data resample2; set save_sub_WY; if count = 0 then count = &constant_zero; if w=3 and y=1 then w3y1=1; else w3y1=0; run; *TURN OFF PRINTING; proc printto print="nul:"; run; proc genmod data=resample2; class &class; model count = &model / dist=poi link=log obstats noint; ods output obstats=obstats modelfit=mod_fit_boot; by iter; run; *TURN ON PRINTING; proc printto; run; ********************************************************************************************; *Combine results and calculate p-value; *Sort for a merge coming up; proc sort data=mod_fit_boot; by criterion; run; *Merge the model fit values for the resampled and observed data; data mod_fit_all; merge mod_fit_boot(rename=(value=value_star) drop=ValueDF DF) modfit_orig(rename=(value=value_orig) drop=ValueDF DF); by Criterion; if Criterion = "Log Likelihood" or Criterion = "Scaled Deviance" or Criterion="Scaled Pearson X2" then delete; if value_star > value_orig then reject = 1; else reject =0; run; title2 "The p-values for &model versus the saturated"; footnote1; proc means data=mod_fit_all mean noprint; class criterion; var reject; output out=out_rej mean=p_value; run; data out_rej2; set out_rej; if _TYPE_ ne 0; drop _TYPE_ _FREQ_; sim_numb = &sim_numb; run; *put into save data set; data save_rej; set save_rej out_rej2; run; *Find model predicted ORs for each resample; proc freq data = obstats noprint; tables wi*yj / measures; weight pred; output out = or_star(drop=L_RROR U_RROR rename=(_RROR_=OR)) or; by iter W Y; run; %end; %MEND SIM_LOOP; **********************************************************************; ** **; ** NAME: MACRO STAND_RES_CALC **; ** PURPOSE: Use to calculate the standardized Pearson residuals **; ** and RS results **; ** NOTES: This is meant to be used one data set at a time **; ** In particular, when using the program for actually **; ** observed data **; ** Uses some of the results from MACRO SIM_LOOP **; ** VARIABLES: n = sample size **; ** **; **********************************************************************; %MACRO STAND_RES_CALC(n); title2; *TURN OFF PRINTING; proc printto print="nul:"; run; *Find X matrix; proc glmmod data=set2_orig OUTDESIGN=X_set; class &class; model count = &model / noint; run; *TURN ON PRINTING; proc printto; run; *Count the columns in X_set by looking at the est_orig data set; * ncol(X_set) = n_row(est_orig) - 2; %NUMOBS(est_orig, col_in_X); %put &col_in_X; *Transpose for merge; proc transpose data=X_set out=Xtran name=col label=label; var col1-col%eval(&col_in_X-2); *-2 since there will be intercept and scale rows in est_orig; run; *Merge with parameter estimates to see which columns of X used; data merg_set; merge est_orig Xtran; if DF ne 0; run; *Transpose back; proc transpose data=merg_set out=X_mat(drop=_name_); var col1-col%eval(4*&nrow2*&ncol2); run; *Get in order needed for PROC IML; data merg_set2; merge obstats_orig X_mat; run; *Get # of parameters estimate - correct # of columns in the X matrix; %NUMOBS(merg_set, row_in_merg_set); %put &row_in_merg_set; proc sort data=merg_set2 out=merg_set3(keep= col1-col&row_in_merg_set); by descending wi descending yj; run; *Get in order needed for PROC IML; proc sort data=obstats_orig out=obstats_orig2; by descending wi descending yj; run; *Find G and H matrices; %G_H(&nrow2, G); %G_H(&ncol2, H); *Find the standardized residuals and RS adjustments; proc iml; *Get X; use merg_set3; read all into X; *print X; use obstats_orig2; read all var{pred} into mu_hat; *print mu_hat; use obstats_orig2; read all var{count} into m; *Get tau_hat - this is vector of multinomial probabilities; use freq_set; read all var{percent} into tau; *print tau; use Gitemvec2; read all into G; use Hitemvec2; read all into H; *print G, H; *Matrices of ones; Jr = repeat(1, &nrow2, 2**&nrow2); Jc = repeat(1, &ncol2, 2**&ncol2); *see Appendix A - called B;; tau_to_pi = G@H // G@(Jc-H) // (Jr-G)@H // (Jr-G)@(Jc-H); *Usual asymptotic covariance matrix for multinomials; cov_tau = 1/&n * (Diag(tau) - tau*t(tau)); *Put the covariance matrix in terms of the marginal probabilities; cov_pi = tau_to_pi*cov_tau*t(tau_to_pi); *Covariance for m; cov_m = &n**2 * cov_pi; *Covariance for beta_hat; A = Diag(sqrt(mu_hat))*X; cov_beta = inv(t(A)*A) * t(A) * Diag(1/sqrt(mu_hat)) * cov_m * t(inv(t(A)*A) * t(A) * Diag(1/sqrt(mu_hat))); *Covariance for mu_hat; cov_mu = Diag(mu_hat)*X * cov_beta * t(X)*Diag(mu_hat); *Covariance for residual; cov_e = (I(4*&nrow2*&ncol2) - Diag(mu_hat)*X*inv(t(X)*Diag(mu_hat)*X)*t(X)) * cov_m * t(I(4*&nrow2*&ncol2) - Diag(mu_hat)*X*inv(t(X)*Diag(mu_hat)*X)*t(X)); *Modified Pearson; e = m - mu_hat; Xsq_M = t(e) * Diag(1/mu_hat) * e; *print Xsq_M; *Pearson residuals; stand_err = repeat(0, 4*&nrow2*&ncol2, 1); do i = 1 to 4*&nrow2*&ncol2; stand_err[i,1] = sqrt(cov_e[i,i]); end; stand_resid = e/stand_err; pear_resid = e/sqrt(mu_hat); *print m mu_hat stand_err stand_resid pear_resid; send_out = stand_resid || stand_err|| pear_resid || mu_hat ; col = {"stand_resid" "stand_err" "pear_resid " "mu_hat"}; create stand_resid from send_out[colname=col]; append from send_out; *Find eigenvalues for RS adjustment; mat = Diag(1/mu_hat) * cov_e; eig_mat = eigval(mat); *print eig_mat; sum_eig = trace(mat); sum_eig_sq = sum(eig_mat[,1]##2); send_out2 = sum_eig || sum_eig_sq; *print sum_eig sum_eig_sq; col2 = {"sum_eig" "sum_eig_sq"}; create sat_adj from send_out2[colname=col2]; append from send_out2; *Find ORs and their CIs; alpha = α *Find the derivative needed for the delta method; save_logOR_der = repeat(0, &nrow2*&ncol2, 4*&nrow2*&ncol2); do i = 1 to &nrow2*&ncol2; delta = repeat(0, 1, i-1) || 1/mu_hat[i,1] || repeat(0, 1, &nrow2*&ncol2-1) || -1/mu_hat[&nrow2*&ncol2+i, 1] || repeat(0, 1, &nrow2*&ncol2-1) || -1/mu_hat[2*&nrow2*&ncol2+i, 1] || repeat(0, 1, &nrow2*&ncol2-1) || 1/mu_hat[3*&nrow2*&ncol2+i, 1] || repeat(0, 1, &nrow2*&ncol2-1-(i-1)); save_logOR_der[i,] = delta; end; *print save_logOR_der; var_LogOR = save_logOR_der*cov_mu*t(save_logOR_der); *print var_LogOR; LogOR = repeat(0, &nrow2*&ncol2, 3); LogOR_obs = repeat(0, &nrow2*&ncol2, 3); var_LogOR_save = repeat(0, &nrow2*&ncol2, 1); var_LogOR_obs_save = repeat(0, &nrow2*&ncol2, 1); do i = 1 to &nrow2*&ncol2; LogOR[i,1] = log(mu_hat[i,1]) - log(mu_hat[&nrow2*&ncol2+i, 1]) - log(mu_hat[2*&nrow2*&ncol2+i, 1]) + log(mu_hat[3*&nrow2*&ncol2+i, 1]); var_LogOR_save[i,1] = var_LogOR[i,i]; LogOR[i,2] = LogOR[i,1] - probit(1-alpha/2)*sqrt(var_LogOR[i,i]); LogOR[i,3] = LogOR[i,1] + probit(1-alpha/2)*sqrt(var_LogOR[i,i]); LogOR_obs[i,1] = log(m[i,1]) - log(m[&nrow2*&ncol2+i, 1]) - log(m[2*&nrow2*&ncol2+i, 1]) + log(m[3*&nrow2*&ncol2+i, 1]); var_LogOR_obs_save[i,1] = 1/m[i,1] + 1/m[&nrow2*&ncol2+i, 1] + 1/m[2*&nrow2*&ncol2+i, 1] + 1/m[3*&nrow2*&ncol2+i, 1]; LogOR_obs[i,2] = LogOR_obs[i,1] - probit(1-alpha/2)*sqrt(var_LogOR_obs_save[i,1]); LogOR_obs[i,3] = LogOR_obs[i,1] + probit(1-alpha/2)*sqrt(var_LogOR_obs_save[i,1]); end; OR = exp(LogOR); OR_obs = exp(LogOR_obs); save_indices = repeat(0, &nrow2*&ncol2, 2); count = 1; do i = 1 to &nrow2; do j = 1 to &ncol2; save_indices[count,] = i || j; count = count + 1; end; end; print 'save_indices: col. #1 = W item #, col. #2 = Y item #', "OR: col. #1 = model predicted OR, col. #2-#3 = (1-alpha)100% C.I.", "OR_obs: col. #1 = observed OR, col. #2-#3 = (1-alpha)100% C.I."; print save_indices OR OR_obs; quit; *Do this merge in order to get W Y Wi and Yj in the same data set - need for next sort; data stand_resid2; merge stand_resid obstats_orig2; run; *Reorder back to way it was before; proc sort data=stand_resid2 out=stand_resid3(keep=stand_resid stand_err pear_resid mu_hat observation count W Y wi yj); by w y wi yj; run; title2 "The standardized Pearson residuals, predicted values,... for MODEL count = &model"; proc print data=stand_resid3; var observation W Y wi yj count mu_hat stand_resid stand_err; run; data sat; set modfit_orig; if criterion = 'Pearson Chi-Square'; drop criterion valueDF; run; data sat2; merge sat sat_adj; RS_Pearson = sum_eig*value/sum_eig_sq; sat_p = 1-CDF('CHISQUARED', RS_Pearson, sum_eig**2 / sum_eig_sq); run; title2 '2nd order RS adjustment resulting p-value (sat_p)'; title3 'Value = Pearson statistic'; proc print data=sat2; run; %MEND STAND_RES_CALC; **********************************************************************; ** **; ** NAME: MACRO CALC_A_W **; ** PURPOSE: Find the acceleration value and bias adjustment **; ** for the BCa intervals **; ** NOTES: Used the jackknife to estimate the empirical influence **; ** values **; ** Could make macro faster by taking into account same **; ** item response vectors observed for multiple **; ** experimental units **; **; **********************************************************************; %MACRO CALC_A_W(n); data or_jack_save; set _null_; run; *TURN OFF PRINTING - otherwise the OUTPUT and LOG windows will fill up; proc printto print="nul:" log="nul:"; run; %do obs_del = 1 %to &n; data data_minus1; set data2x2; *Observed item response vectors; if _n_ ne &obs_del; run; ***************************************; *Form item response table; *Find the multinomial counts; %GENERALIZE_FREQ(data_minus1, freq_set_jack); *This data set contains the observed multinomial counts and proportions; data freq_set_jack; set freq_set_jack; percent = percent*0.01; run; data save_sub_WY_obs_jack; set _null_; run; *Find sub-table counts; %FIND_SUBTABLE_WY(freq_set_jack, save_sub_WY_obs_jack); *Add 0.5 to a sub-table cell if the cell is 0; *In order to account for the lagoon and salt item pair (see Section 4.1), this; * DATA STEP is used with the; * if w=3 and y=1 then w3y1=1; * else w3y1=0; * code; data set2_orig_jack; set save_sub_WY_obs_jack(drop=iter); if count = 0 then count = &constant_zero; if w=3 and y=1 then w3y1=1; else w3y1=0; run; *Fit the model; proc genmod data=set2_orig_jack; class &class; model count = &model / dist=poi link=log obstats noint; ods output obstats=obstats_orig_jack; run; *Find model predicted ORs for each n-1 sample; proc freq data = obstats_orig_jack noprint; tables wi*yj / measures; weight pred; output out = or_jack(drop=L_RROR U_RROR rename=(_RROR_=OR)) or; by W Y; run; data or_jack; set or_jack; obs_del = &obs_del; run; data or_jack_save; set or_jack_save or_jack; run; %end; *TURN ON PRINTING; proc printto; run; *************************************************; * Calculate w; proc sort data = or_star; by W Y; run; data or_w; merge or_star(rename = (or = or_star)) or; by W Y; run; data or_w2; set or_w; if or_star <= or then counter = 1; else counter = 0; run; proc means data = or_w2 sum noprint; class W Y; var counter; output out = out_w(drop = _TYPE_ _FREQ_) sum = sum_w; run; data out_w2; set out_w; if W > 0 and Y > 0; w_bias = probit(sum_w/(&numbsim + 1)); drop sum_w; run; ***********************************************; * Calculate a; proc sort data = or_jack_save; by W Y; run; data or_merge; merge or_jack_save(rename= (or = or_jack)) or; by W Y; run; *Jackknife estimated empirical influence values; data or_merge2; set or_merge; l_jack = (&n - 1)*(or - or_jack); l_jack_cube = l_jack**3; l_jack_sq = l_jack**2; run; proc means data = or_merge2 sum noprint; class W Y; var l_jack_cube l_jack_sq; output out = out_l_jack(drop = _TYPE_ _FREQ_) sum = l_jack_cube l_jack_sq; run; *Calculate alpha_tilde; data out_l_jack2; set out_l_jack; if W > 0 and Y > 0; a = 1/6 * l_jack_cube / l_jack_sq**(1.5); run; ***********************************************; *Calculate alpha.tilde; *Based on alpha/2; data alpha_tilde; merge out_w2 out_l_jack2; lower_alpha_tilde = probnorm(w_bias + (w_bias + probit(&alpha/2))/(1 - a*(w_bias + probit(&alpha/2)))); upper_alpha_tilde = probnorm(w_bias + (w_bias + probit(1-&alpha/2))/(1 - a*(w_bias + probit(1-&alpha/2)))); run; *title2 "Calculations needed for BCa"; *proc print data = alpha_tilde; *run; %MEND CALC_A_W; **********************************************************************; ** **; ** NAME: MACRO DO_IT_ALL **; ** PURPOSE: Wrapper macro **; ** NOTES: **; ** **; **********************************************************************; %MACRO DO_IT_ALL(sim_data, seed_start, n); %TIME(MACRO DO_IT_ALL STARTED AT:); *saves information about convergence in Gange program; data save_info_set; set _null_; run; *saves information about number of data sets lost in resamples; data save_numb_it_item; set _null_; run; *Create save data set for p-values; data save_rej; set _null_; run; *Perform the calculations; %SIM_LOOP(&sim_data, &seed_start, &n); *Merge some of the calculations; data save_rej2; merge save_rej save_info_set save_numb_it_item; by sim_numb; if p_value < &alpha then reject=1; else reject=0; run; *Variable explanation: Error - max(abs(tau*_i - tau*_(i+1))) for the last iteration where tau*_i * is a vector of multinomial probabilities for the ith iteration; * iter_conv - number of iterations took to find the multinomial probabilities; * under the Ho model; * sim_numb and reject could be printed as well if simulations are being performed; title2 'Print the boot. p-values, Gange convergence information (error and iter_conv)'; proc print data=save_rej2; var Criterion p_value error iter_conv; run; ********************************************************************; * Use code below only for example data sets - not for simulations; * This just checks if there is a problem with the resampling (see control_2MRCV.sas notes); *Create MACRO variable for the number of resamples removed; data _null_; set save_rej2; call symput('resamples_removed', n); run; %if &resamples_removed = . %then %let resamples_removed = 0; %if %eval(&totsim - &numbsim) < &resamples_removed %then %do; title2 'Warning: Number of resamples removed is greater than the number of extra resamples taken'; title3 "Less than B resamples are taken if n > totsim - numbsim = &totsim - &numbsim = %eval(&totsim - &numbsim)"; proc print data=save_rej2; var n; run; %end; ********************************************************************; /* *For simulations; title2 'Estimated type I error rates'; proc means data=save_rej2 mean; class criterion; var reject; run; */ *Create MACRO variables with the deviance and the Pearson values; data _null_; set modfit_orig; if criterion = "Deviance"; value = round(value, 0.0001); call symput('deviance', value); run; data _null_; set modfit_orig; value = round(value, 0.0001); if criterion = "Pearson Chi-Square"; call symput('pearson', value); run; title2 "Boot. Dist. Histogram for model count = &model"; proc univariate data=mod_fit_all noprint; class criterion; histogram value_star ; footnote1 "The observed Pearson = &pearson and the observed LRT = &deviance"; run; footnote1; *************************************************************; *Percentile intervals for ORs - not programmed for simulations; *PCTLDEF = 3 corresponds to EDF; proc univariate data = or_star noprint PCTLDEF = 3; class W Y; var OR; output out = perc_int pctlpre=P_ PCTLPRE=Int pctlpts=2.5 97.5; run; title2 'Bootstrap percentile intervals for OR'; proc print data = perc_int; run; *************************************************************; *BCa intervals for ORs - not programmed for simulations; %CALC_A_W(&n); data bca_int_save; set _null_; run; %do W = 1 %to &nrow2; %do Y = 1 %to &ncol2; *Create macro variables containing percentiles need to find; data _null_; set alpha_tilde; if W = &W and Y = &Y; call symput('lower', lower_alpha_tilde*100); call symput('upper', upper_alpha_tilde*100); run; *PCTLDEF = 3 corresponds to EDF; proc univariate data = or_star noprint PCTLDEF = 3; where W = "&W" and Y = "&Y"; class W Y; var OR; output out = bca_int pctlpre=P_ pctlpts=&lower &upper PCTLPRE=Int PCTLNAME=lower upper; run; data bca_int_save; set bca_int_save bca_int; run; %end; %end; title2 'Bootstrap BCa intervals for OR'; proc print data = bca_int_save; run; *************************************************************; *Standardized residuals using bootstrap variances; proc sort data = obstats(keep = iter W Y wi yj resraw) out=resid_star; by W Y wi yj; run; proc means data = resid_star noprint; class W Y wi yj; var resraw; output out = var_resid_star(drop = _freq_ _type_) var = var; run; data var_resid_star; set var_resid_star; if W > 0 and Y > 0 and wi >= 0 and yj >= 0; run; data stand_resid_boot; merge obstats_orig(keep = W Y wi yj resraw) var_resid_star; by W Y wi yj; run; data stand_resid_boot2; set stand_resid_boot; stand_res = resraw/sqrt(var); run; title2 "Standardized residuals using bootstrap variances"; proc print data = stand_resid_boot2; run; %TIME(MACRO DO_IT_ALL ENDED AT:); %MEND DO_IT_ALL;