***************************************************************************;
** PROGRAM: boot_2MRCV.sas                                               **;
** AUTHOR:  Christopher R. Bilder                                        **;
**          Department of Statistics                                     **;
**          University of Nebraska-Lincoln                               **;
**          chris@chrisbilder.com                                        **;
** DATE:    3-23-06                                                      **;
** PURPOSE: Fit a generalized loglinear model via the marginal modeling  **;
**          method and use the bootstrap and Rao-Scott approximations    **;
**          to find a p-value for testing the model of interest vs. the  **;
**          saturated model.  Modifications can be made to the program   **;
**          to use an alternative hypothesis model other than the        **;
**          saturated model.                                             **;
** NOTES:                                                                **;
**  1) Do not distribute copies of this program without the permission of**;
**     the author.                                                       **;
**  2) Copyright 2006 Christopher R. Bilder                              **;
**  3) See control_2MRCV.sas for more information                        **;
***************************************************************************;


    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO TIME                                                 **;
    ** PURPOSE: Write time to OUTPUT window                             **;
    ** NOTES:                                                           **;
    ** VARIABLES: title = begin or end of program                       **;
    **                                                                  **;
    **********************************************************************;
    %MACRO TIME(title);

      data time;
        time = hour(time());
        minute = minute(time());
        second = second(time());
        month = month(today());
        day = day(today());
        year = year(today());
      run;

      proc print data=time;
        title2 "&title";
      run;

    %MEND TIME;



    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO NUMOBS                                               **;
    ** PURPOSE: Find number of observations in a data set               **;
    ** NOTES: Adapted from a SAS website sample program                 **;
    ** VARIABLES: dsn = Data set                                        **;
    **            name = Number of observations put in here             **;
    **                                                                  **;
    **********************************************************************;                                                                             
    %MACRO NUMOBS(dsn, name);                                                             

      %global &name;                                                                    

       data _null_;                                                                    
         call symput("&name", left(put(count2,8.)));                                         
         stop;                                                                           
         set &dsn nobs=count2;                                                            
       run;     
 
    %MEND NUMOBS;     



    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO G_H                                                  **;
    ** PURPOSE: Creates G and H matrices needed for PROC IML            **;
    ** NOTES: See Appendix A of paper and Bilder and Loughin            **;
	**        (Biometrics, 2004)                                        **;
    ** VARIABLES: numb_items = number of items                          **;
    **            matrix = G or H                                       **;
    **                                                                  **;
    **********************************************************************;
    %MACRO G_H(numb_items, matrix);

      *From data generation program - iterate out all possible combinations;
      %let X = do item1 = 0 to 1%str(;); 
      %let endit = end%str(;);
 
      *Generalize; 
      %do j = 2 %to &numb_items;
        %let X =  &X.do item&j=0 to 1%str(;);
        %let endit = &endit end%str(;);  
      %end;
 
      *Create matrix of all items;  
      data itemvec;
        &X;
         output;
       &endit; 
      run;

	  *Transpose the matrix to be in the correct form;
	  proc transpose data=itemvec out=&matrix.itemvec2;
	    var item1-item&numb_items;
	  run;

    %MEND G_H;



    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO REVERSE                                              **;
    ** PURPOSE:  Bilder and Loughin (Biometrics, 2004) and simulations  **;
    **           for this paper used a reverse definition of            **;
	**           Y and W so reverse here to match;                      **;
    ** NOTES: This MACRO is not used in this program.  It is here only  **;
	**        to inform a user who may want to duplicate the simulations**;
	**        in Bilder and Loughin (Biometrics, 2004) and this paper   **;
    ** VARIABLES: data_set = contains generated data                    **;
	**            n = sample size                                       **;
    **                                                                  **;
    **********************************************************************;
    %MACRO REVERSE(data_set, n);

	  data &data_set;
        set sortset4;
		iter = floor((_n_-0.1)/&n+1);
	    array rownumb {&nrow2} w1-w&nrow2;
	    array colnumb {&ncol2} y1-y&ncol2;
        array names {%eval(&nrow2*&ncol2)} item1-item%eval(&nrow2*&ncol2);

        do j = 1 to &ncol2;
          colnumb{j} = names{j};
        end;

	    do j=1 to &nrow2;
	      rownumb{j} = names{j+&ncol2};
	    end; 

    	dummy=1;
        keep y1-y&ncol2 w1-w&nrow2 iter dummy;
      run;
     
    %MEND REVERSE;



    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO GENERALIZE_FREQ                                      **;
    ** PURPOSE: Write part of a TABLES statement in PROC FREQ           **;
	**          Finds multinomial counts for data set                   **;
    ** VARIABLES: data_set = contains data                              **;
	**            out_data = name of data set to produce                **;
    **                                                                  **;
    **********************************************************************;
    %MACRO GENERALIZE_FREQ(data, out_data);

      %let W = W1 *;  
       
	  %do i = 2 %to &nrow2;
	    %let W =  &W.W&i.*;
	  %end;

      %let Y = Y1;  
       
	  %do j = 2 %to &ncol2;
	    %let Y =  &Y.*Y&j;
	  %end;

       proc freq data=&data noprint;
         table &W &Y / sparse out=&out_data;
	     by iter;
       run;

    %MEND GENERALIZE_FREQ;

 

   **********************************************************************;
   **                                                                  **;
   ** NAME: MACRO FIND_SUBTABLE_WY                                     **;
   ** PURPOSE: Finds sub-tables                                        **;
   ** VARIABLES: data_set = contains data                              **;
   **            save_data = name of data set to produce               **;
   **                                                                  **;
   **********************************************************************;
   %MACRO FIND_SUBTABLE_WY(data, save_data);

    %do i=1 %to &nrow2;
      %do j=1 %to &ncol2;

         proc freq data=&data noprint;
		   tables W&i*Y&j / sparse out=out_sub;
		   by iter;
		   weight count;
		 run;

		 data out_sub2;
		   set out_sub;
		   W = &i;
		   Y = &j;
		   wi = w&i;
		   yj = y&j;
           drop percent w&i y&j;
		  run;

          data &save_data;
		    set &save_data out_sub2;
		  run;

        %end;
      %end;

  %MEND FIND_SUBTABLE_WY;




  **********************************************************************;
  **                                                                  **;
  ** NAME: MACRO SIM_LOOP                                             **;
  ** PURPOSE: Looping structure for simulations                       **;
  ** NOTES: Just use sim_numb = 1 %to 1 for one example data set      **;
  ** VARIABLES: sim_data = name of data set for analysis              **;
  **            seed_start = seed for bootstrap resamples             **;
  **            n = sample size                                       **;
  **                                                                  **;
  **********************************************************************;
  %MACRO SIM_LOOP(sim_data, seed_start, n);

    *Could change upper bound for sim_numb if actually doing simulations;
    %do sim_numb = 1 %to 1;

	  *Only needed for multiple data sets in a simulation;
      %let seed = %eval(&seed_start+&sim_numb);
      %let dataset = &sim_numb;


  *******************************************************************************************;
  * Do some summary calculations for the observed data;

     data data2x2;
       set &sim_data;
       if iter = &dataset;
       *drop dummy;
     run;


	 *Find the multinomial counts;
     %GENERALIZE_FREQ(data2x2, freq_set);


     *This data set contains the observed multinomial counts and proportions;
     data freq_set;
       set freq_set;
       percent = percent*0.01;
     run;


    data save_sub_WY_obs;  set _null_; run;
	*Find sub-table counts;
    %FIND_SUBTABLE_WY(freq_set, save_sub_WY_obs);


	*Add 0.5 to a sub-table cell if the cell is 0.;  
	*In order to account for the lagoon and salt item pair (see Section 4.1), this;
	*  DATA STEP is used with the;           
	*     if w=3 and y=1 then w3y1=1;
    *     else w3y1=0; 
	*  code; 
    data set2_orig;
      set save_sub_WY_obs(drop=iter);
      if count = 0 then count = &constant_zero;
if w=3 and y=1 then w3y1=1;  
else w3y1=0;
    run;


  *******************************************************************************************;
  *Calculate the model for the observed data;

    *TURN OFF PRINTING;  
    proc printto print="nul:";  run;


	*Fit the model;
    proc genmod data=set2_orig;
      class &class ;
	  model count = &model / dist=poi link=log obstats noint;
      ods output obstats=obstats_orig modelfit=modfit_orig ParameterEstimates=est_orig;
    run; 


    *TURN ON PRINTING;  
    proc printto;  run;


    *Find model predicted ORs for the model;
    proc freq data = obstats_orig noprint;
	  tables wi*yj / measures;
	  weight pred;
	  output out = or(drop=L_RROR U_RROR rename=(_RROR_=OR)) or;
	  by W Y;
	run;


    *Sort for a merge later;
    proc sort data=modfit_orig;
      by criterion;
    run;

  
    *Using the observed data and the predicted data, create a vector of multinomial probabilities;
	*  under the Ho model;
	*Uses the Gange program;
    %DO_IT_GANGE;


  ********************************************************************************************************;
  *Find the resamples;

    *Change result format of Gange program in order to generate multinomial counts under the Ho model;
    data set1_tau2;
      set set1_tau;
      array item {&ncol} item1-item&ncol;
	  array COLNUMB {&ncol} COL1-COL&ncol;
      do j = 1 to &ncol;
         item{j} = colnumb{j};;
      end;
	  p = col%eval(&ncol+1);
	  keep item1-item&ncol p;
    run;


    *Find the resamples from the data_gen_resample.sas program;
    %R_DO_IT_DATA_GEN(set1_tau2, &seed, &n, &totsim, &numbsim, gendata);

      *save the number of resampled data sets lost due to marginal table not being the observed size;
	   data numb_it_item;
	     set numb_it;
		 *rename n=lostset;
		 sim_numb = &sim_numb;
		 keep sim_numb n;
	   run;

	   data save_numb_it_item;
	     set save_numb_it_item numb_it_item;
	   run;



    *Finds multinomial counts for each resample;
    %GENERALIZE_FREQ(gendata, freq_set_gendata);


    *Find sub-tables for each resample;
    data save_sub_WY;  set _null_; run;
    %FIND_SUBTABLE_WY(freq_set_gendata, save_sub_WY);


    proc sort data=save_sub_WY;
      by iter;
    run;


	*Add 0.5 to a sub-table cell if the cell is 0.;  
	*In order to account for the lagoon and salt item pair (see Section 4.1), this;
	*  DATA STEP is used with the;           
	*     if w=3 and y=1 then w3y1=1;  
    *     else w3y1=0; 
	*  code; 
     data resample2;
      set save_sub_WY;
	  if count = 0 then count = &constant_zero;
if w=3 and y=1 then w3y1=1;  
else w3y1=0;
    run;


    *TURN OFF PRINTING;  
    proc printto print="nul:";  run;


    proc genmod data=resample2;
      class &class;
 	  model count = &model / dist=poi link=log obstats noint;
      ods output obstats=obstats modelfit=mod_fit_boot;
      by iter;
    run; 


    *TURN ON PRINTING;  
    proc printto;  run;



    ********************************************************************************************;
    *Combine results and calculate p-value;
 
    *Sort for a merge coming up;
    proc sort data=mod_fit_boot;
      by criterion;
    run;


    *Merge the model fit values for the resampled and observed data;
    data mod_fit_all;
      merge mod_fit_boot(rename=(value=value_star) drop=ValueDF DF)
            modfit_orig(rename=(value=value_orig) drop=ValueDF DF);
	  by Criterion;
	  if Criterion = "Log Likelihood" or Criterion = "Scaled Deviance" 
         or Criterion="Scaled Pearson X2" then delete;
	  if value_star > value_orig then reject = 1;  
	     else reject =0;
    run;

    
    title2 "The p-values for &model versus the saturated";
    footnote1;
    proc means data=mod_fit_all mean noprint;
      class criterion;
	  var reject;
	  output out=out_rej mean=p_value;
    run;
    

    data out_rej2;
      set out_rej;
	  if _TYPE_ ne 0;
	  drop _TYPE_ _FREQ_;
	  sim_numb = &sim_numb;
    run;


    *put into save data set;
    data save_rej;
      set save_rej out_rej2;
    run;


	*Find model predicted ORs for each resample;
    proc freq data = obstats noprint;
	  tables wi*yj / measures;
	  weight pred;
	  output out = or_star(drop=L_RROR U_RROR rename=(_RROR_=OR)) or;
	  by iter W Y;
	run;

   %end;

  %MEND SIM_LOOP;




  **********************************************************************;
  **                                                                  **;
  ** NAME: MACRO STAND_RES_CALC                                       **;
  ** PURPOSE: Use to calculate the standardized Pearson residuals     **;
  **          and RS results                                          **;
  ** NOTES: This is meant to be used one data set at a time           **;
  **        In particular, when using the program for actually        **;
  **          observed data                                           **;
  **        Uses some of the results from MACRO SIM_LOOP              **;
  ** VARIABLES: n = sample size                                       **;
  **                                                                  **;
  **********************************************************************;
  %MACRO STAND_RES_CALC(n);

    title2;

    *TURN OFF PRINTING;  
    proc printto print="nul:";  run;

    *Find X matrix;
    proc glmmod data=set2_orig OUTDESIGN=X_set;
      class &class;
      model count = &model / noint;
    run; 
 
    *TURN ON PRINTING;  
    proc printto;  run;


    *Count the columns in X_set by looking at the est_orig data set; 
    *  ncol(X_set) = n_row(est_orig) - 2;
    %NUMOBS(est_orig, col_in_X); 
    %put &col_in_X;
   

    *Transpose for merge;
    proc transpose data=X_set out=Xtran name=col label=label;
      var col1-col%eval(&col_in_X-2); *-2 since there will be intercept and scale rows in est_orig;
    run;


    *Merge with parameter estimates to see which columns of X used;
    data merg_set;
      merge est_orig Xtran;
      if DF ne 0;
    run;


    *Transpose back;
    proc transpose data=merg_set out=X_mat(drop=_name_);
	  var col1-col%eval(4*&nrow2*&ncol2);
    run;

      
    *Get in order needed for PROC IML;
    data merg_set2;
      merge obstats_orig X_mat;
    run;


    *Get # of parameters estimate - correct # of columns in the X matrix; 
    %NUMOBS(merg_set, row_in_merg_set); 
    %put &row_in_merg_set;

    proc sort data=merg_set2 out=merg_set3(keep= col1-col&row_in_merg_set);
      by descending wi descending yj;
    run;


    *Get in order needed for PROC IML;
    proc sort data=obstats_orig out=obstats_orig2;
	  by descending wi descending yj;
    run;


    *Find G and H matrices;
    %G_H(&nrow2, G);
    %G_H(&ncol2, H);


    *Find the standardized residuals and RS adjustments;
    proc iml;

      *Get X;
      use merg_set3; 
        read all into X;
      *print X;

	  use obstats_orig2; 
        read all var{pred} into mu_hat;
      *print mu_hat; 

      use obstats_orig2; 
        read all var{count} into m;
     
      *Get tau_hat - this is vector of multinomial probabilities;
      use freq_set; 
        read all var{percent} into tau;
      *print tau; 
 
      use Gitemvec2; 
        read all into G;

      use Hitemvec2; 
        read all into H;
      *print G, H;
  
      *Matrices of ones;
      Jr = repeat(1, &nrow2, 2**&nrow2);
	  Jc = repeat(1, &ncol2, 2**&ncol2);

	  *see Appendix A - called B;;
      tau_to_pi = G@H // G@(Jc-H) // (Jr-G)@H // (Jr-G)@(Jc-H);

	  *Usual asymptotic covariance matrix for multinomials;
      cov_tau = 1/&n * (Diag(tau) - tau*t(tau));

	  *Put the covariance matrix in terms of the marginal probabilities;
	  cov_pi = tau_to_pi*cov_tau*t(tau_to_pi);

	  *Covariance for m;
      cov_m = &n**2 * cov_pi;

	  *Covariance for beta_hat;
      A = Diag(sqrt(mu_hat))*X;
	  cov_beta = inv(t(A)*A) * t(A) * Diag(1/sqrt(mu_hat)) * cov_m * t(inv(t(A)*A) * t(A) * Diag(1/sqrt(mu_hat)));

      *Covariance for mu_hat;
	  cov_mu = Diag(mu_hat)*X * cov_beta * t(X)*Diag(mu_hat);

      *Covariance for residual;
      cov_e =  (I(4*&nrow2*&ncol2) - Diag(mu_hat)*X*inv(t(X)*Diag(mu_hat)*X)*t(X)) * cov_m *
              t(I(4*&nrow2*&ncol2) - Diag(mu_hat)*X*inv(t(X)*Diag(mu_hat)*X)*t(X));

      *Modified Pearson;
      e = m - mu_hat;
	  Xsq_M = t(e) * Diag(1/mu_hat) * e;
	  *print Xsq_M;

      *Pearson residuals;
	  stand_err = repeat(0, 4*&nrow2*&ncol2, 1);
      do i = 1 to 4*&nrow2*&ncol2;
        stand_err[i,1] = sqrt(cov_e[i,i]);
      end;
	  stand_resid = e/stand_err;
      pear_resid = e/sqrt(mu_hat);
      *print m mu_hat stand_err stand_resid pear_resid;

      send_out = stand_resid || stand_err|| pear_resid || mu_hat ;
      col = {"stand_resid" "stand_err" "pear_resid " "mu_hat"};
      create stand_resid from send_out[colname=col];
        append from send_out;   
 
 
	  *Find eigenvalues for RS adjustment;
	  mat =  Diag(1/mu_hat) * cov_e;
      eig_mat = eigval(mat);
	  *print eig_mat;

	  sum_eig = trace(mat);
	  sum_eig_sq = sum(eig_mat[,1]##2);
	  send_out2 = sum_eig || sum_eig_sq;
	  *print sum_eig sum_eig_sq;

	  col2 = {"sum_eig" "sum_eig_sq"};
      create sat_adj from send_out2[colname=col2];
        append from send_out2;   

	  *Find ORs and their CIs;
      alpha = &alpha;

      *Find the derivative needed for the delta method;
      save_logOR_der = repeat(0, &nrow2*&ncol2, 4*&nrow2*&ncol2);
      do i = 1 to &nrow2*&ncol2;

        delta = repeat(0, 1, i-1) ||  1/mu_hat[i,1]                   || repeat(0, 1, &nrow2*&ncol2-1)  
                                  || -1/mu_hat[&nrow2*&ncol2+i, 1]    || repeat(0, 1, &nrow2*&ncol2-1) 
                                  || -1/mu_hat[2*&nrow2*&ncol2+i, 1]  || repeat(0, 1, &nrow2*&ncol2-1) 
                                  ||  1/mu_hat[3*&nrow2*&ncol2+i, 1]  || repeat(0, 1, &nrow2*&ncol2-1-(i-1));
        save_logOR_der[i,] = delta; 
      end;
      *print save_logOR_der;

      var_LogOR = save_logOR_der*cov_mu*t(save_logOR_der);
      *print var_LogOR;

      LogOR = repeat(0, &nrow2*&ncol2, 3);
      LogOR_obs = repeat(0, &nrow2*&ncol2, 3);
      var_LogOR_save = repeat(0, &nrow2*&ncol2, 1);
      var_LogOR_obs_save = repeat(0, &nrow2*&ncol2, 1);

      do i = 1 to &nrow2*&ncol2;
        LogOR[i,1] = log(mu_hat[i,1]) - log(mu_hat[&nrow2*&ncol2+i, 1]) -
                     log(mu_hat[2*&nrow2*&ncol2+i, 1]) + log(mu_hat[3*&nrow2*&ncol2+i, 1]); 
        var_LogOR_save[i,1] = var_LogOR[i,i];
        LogOR[i,2] = LogOR[i,1] - probit(1-alpha/2)*sqrt(var_LogOR[i,i]);
        LogOR[i,3] = LogOR[i,1] + probit(1-alpha/2)*sqrt(var_LogOR[i,i]);

        LogOR_obs[i,1] = log(m[i,1]) - log(m[&nrow2*&ncol2+i, 1]) -
                         log(m[2*&nrow2*&ncol2+i, 1]) + log(m[3*&nrow2*&ncol2+i, 1]);
        var_LogOR_obs_save[i,1] = 1/m[i,1] + 1/m[&nrow2*&ncol2+i, 1] + 
                                  1/m[2*&nrow2*&ncol2+i, 1] + 1/m[3*&nrow2*&ncol2+i, 1];
        LogOR_obs[i,2] = LogOR_obs[i,1] - probit(1-alpha/2)*sqrt(var_LogOR_obs_save[i,1]);
        LogOR_obs[i,3] = LogOR_obs[i,1] + probit(1-alpha/2)*sqrt(var_LogOR_obs_save[i,1]);
      end;


      OR = exp(LogOR);
      OR_obs = exp(LogOR_obs);

      save_indices = repeat(0, &nrow2*&ncol2, 2);
      count = 1;
      do i = 1 to &nrow2;
        do j = 1 to &ncol2;
          save_indices[count,] = i || j;
          count = count + 1;
        end;
      end;
        
	  print 'save_indices: col. #1 = W item #, col. #2 = Y item #',
            "OR: col. #1 = model predicted OR, col. #2-#3 = (1-alpha)100% C.I.",
            "OR_obs: col. #1 = observed OR, col. #2-#3 = (1-alpha)100% C.I.";
      print save_indices OR OR_obs;

 
    quit;


	*Do this merge in order to get W Y Wi and Yj in the same data set - need for next sort;
    data stand_resid2;
      merge stand_resid obstats_orig2;
	run;
   

    *Reorder back to way it was before;
    proc sort data=stand_resid2 out=stand_resid3(keep=stand_resid stand_err pear_resid mu_hat 
                                                      observation count W Y wi yj);
      by w y wi yj;
    run; 

    
	title2 "The standardized Pearson residuals, predicted values,... for MODEL count = &model";
    proc print data=stand_resid3;
	  var observation W Y wi yj count mu_hat stand_resid stand_err;
	run;
    
	
    data sat;
      set modfit_orig;
	  if criterion = 'Pearson Chi-Square';
	  drop criterion valueDF;
    run;


    data sat2;
      merge sat sat_adj;
	  RS_Pearson = sum_eig*value/sum_eig_sq;
	  sat_p = 1-CDF('CHISQUARED', RS_Pearson, sum_eig**2 / sum_eig_sq); 
    run;


    title2 '2nd order RS adjustment resulting p-value (sat_p)';
	title3 'Value = Pearson statistic';
    proc print data=sat2;
    run;

%MEND STAND_RES_CALC;


 

**********************************************************************;
**                                                                  **;
** NAME: MACRO CALC_A_W                                             **;
** PURPOSE: Find the acceleration value and bias adjustment         **;
**            for the BCa intervals                                 **;
** NOTES: Used the jackknife to estimate the empirical influence    **;
**          values                                                  **;
**        Could make macro faster by taking into account same       **;
**          item response vectors observed for multiple             **;
**          experimental units                                      **;                                                                **;
**********************************************************************;
%MACRO CALC_A_W(n);

  data or_jack_save;
    set _null_;
  run;

  *TURN OFF PRINTING - otherwise the OUTPUT and LOG windows will fill up;  
  proc printto print="nul:" log="nul:";  
  run;

  %do obs_del = 1 %to &n;

     data data_minus1;
       set data2x2;  *Observed item response vectors;
	   if _n_ ne &obs_del;
	 run;

	 ***************************************;
     *Form item response table;

	 *Find the multinomial counts;
     %GENERALIZE_FREQ(data_minus1, freq_set_jack);

	 *This data set contains the observed multinomial counts and proportions;
     data freq_set_jack;
       set freq_set_jack;
       percent = percent*0.01;
     run;

     data save_sub_WY_obs_jack;  set _null_; run;
	 *Find sub-table counts;
     %FIND_SUBTABLE_WY(freq_set_jack, save_sub_WY_obs_jack);

	 *Add 0.5 to a sub-table cell if the cell is 0;  
	 *In order to account for the lagoon and salt item pair (see Section 4.1), this;
	 *  DATA STEP is used with the;           
	 *     if w=3 and y=1 then w3y1=1;
     *     else w3y1=0; 
	 *  code; 
     data set2_orig_jack;
       set save_sub_WY_obs_jack(drop=iter);
       if count = 0 then count = &constant_zero;
if w=3 and y=1 then w3y1=1;  
else w3y1=0;
     run;

    
     *Fit the model;
     proc genmod data=set2_orig_jack;
       class &class;
	   model count = &model / dist=poi link=log obstats noint;
       ods output obstats=obstats_orig_jack;
     run; 

 
     *Find model predicted ORs for each n-1 sample;
     proc freq data = obstats_orig_jack noprint;
	   tables wi*yj / measures;
	   weight pred;
	   output out = or_jack(drop=L_RROR U_RROR rename=(_RROR_=OR)) or;
	   by W Y;
	 run;

    data or_jack;
      set or_jack;
      obs_del = &obs_del;
    run;

    data or_jack_save;
      set or_jack_save or_jack;
    run;

  %end;

  *TURN ON PRINTING;  
  proc printto;  run;

  *************************************************;
  * Calculate w;
  proc sort data = or_star;
    by W Y;
  run;

  data or_w;
    merge or_star(rename = (or = or_star)) or;
    by W Y;
  run;

  data or_w2;
    set or_w;
	if or_star <= or then counter = 1;
	   else counter = 0;
  run;
  
  proc means data = or_w2 sum noprint;
    class W Y;
    var counter;
	output out = out_w(drop = _TYPE_ _FREQ_) sum = sum_w;  
  run;

  data out_w2;
    set out_w;
	if W > 0 and Y > 0;
    w_bias = probit(sum_w/(&numbsim + 1));
	drop sum_w;
  run;


  ***********************************************;
  * Calculate a;
  proc sort data = or_jack_save;
    by W Y;
  run;

  data or_merge;
    merge or_jack_save(rename= (or = or_jack)) or;
	by W Y;
  run;
  
  *Jackknife estimated empirical influence values; 
  data or_merge2;
    set or_merge;
	l_jack = (&n - 1)*(or - or_jack);
	l_jack_cube = l_jack**3;
	l_jack_sq = l_jack**2;
  run;

  proc means data = or_merge2 sum noprint;
    class W Y;
    var l_jack_cube l_jack_sq;
    output out = out_l_jack(drop = _TYPE_ _FREQ_) sum = l_jack_cube l_jack_sq;  
  run;

  *Calculate alpha_tilde;
  data out_l_jack2;
    set out_l_jack;
	if W > 0 and Y > 0;
    a = 1/6 * l_jack_cube / l_jack_sq**(1.5);
  run;


  ***********************************************;
  *Calculate alpha.tilde;

  *Based on alpha/2;
  data alpha_tilde;
    merge out_w2 out_l_jack2;
    lower_alpha_tilde = probnorm(w_bias + (w_bias + probit(&alpha/2))/(1 - a*(w_bias + probit(&alpha/2))));
    upper_alpha_tilde = probnorm(w_bias + (w_bias + probit(1-&alpha/2))/(1 - a*(w_bias + probit(1-&alpha/2))));
  run;

  *title2 "Calculations needed for BCa";
  *proc print data = alpha_tilde; 
  *run;

%MEND CALC_A_W;




**********************************************************************;
**                                                                  **;
** NAME: MACRO DO_IT_ALL                                            **;
** PURPOSE: Wrapper macro                                           **;
** NOTES:                                                           **;
**                                                                  **;
**********************************************************************;
%MACRO DO_IT_ALL(sim_data, seed_start, n);

  %TIME(MACRO DO_IT_ALL STARTED AT:);

  *saves information about convergence in Gange program;
  data save_info_set; set _null_; run;  

  *saves information about number of data sets lost in resamples;
  data save_numb_it_item; set _null_; run;

  *Create save data set for p-values;
  data save_rej;  set _null_;  run;

  
  *Perform the calculations;
  %SIM_LOOP(&sim_data, &seed_start, &n);


  *Merge some of the calculations;
  data save_rej2;
    merge save_rej save_info_set save_numb_it_item;
	by sim_numb;
    if p_value < &alpha then reject=1;
       else reject=0;
  run;


  *Variable explanation: Error - max(abs(tau*_i - tau*_(i+1))) for the last iteration where tau*_i 
  *                              is a vector of multinomial probabilities for the ith iteration;
  *                      iter_conv - number of iterations took to find the multinomial probabilities;
  *                                  under the Ho model; 
  *  sim_numb and reject could be printed as well if simulations are being performed;
  title2 'Print the boot. p-values, Gange convergence information (error and iter_conv)';
  proc print data=save_rej2;
    var Criterion p_value error iter_conv;
  run;


  ********************************************************************;
  * Use code below only for example data sets - not for simulations;
  *  This just checks if there is a problem with the resampling (see control_2MRCV.sas notes);

    *Create MACRO variable for the number of resamples removed;
    data _null_;
      set save_rej2;
      call symput('resamples_removed', n);
    run;

    %if &resamples_removed = . %then %let resamples_removed = 0;

    %if %eval(&totsim - &numbsim) < &resamples_removed %then %do;

      title2 'Warning: Number of resamples removed is greater than the number of extra resamples taken';
	  title3 "Less than B resamples are taken if n > totsim - numbsim = &totsim - &numbsim = %eval(&totsim - &numbsim)";
      proc print data=save_rej2;
        var n;
      run;

    %end;

  ********************************************************************;
  /*
  *For simulations;
  title2 'Estimated type I error rates';
  proc means data=save_rej2 mean;
    class criterion;
    var reject;
  run;
  */


  *Create MACRO variables with the deviance and the Pearson values;
  data _null_;
    set modfit_orig;
	if criterion = "Deviance";
	value = round(value, 0.0001);
    call symput('deviance', value);
  run;

  data _null_;
    set modfit_orig;
    value = round(value, 0.0001);
	if criterion = "Pearson Chi-Square";
    call symput('pearson', value);
  run;

 
  title2 "Boot. Dist. Histogram for model count = &model";
  proc univariate data=mod_fit_all noprint;
    class criterion;
    histogram value_star ;
	footnote1 "The observed Pearson = &pearson and the observed LRT = &deviance";
  run; 
  footnote1;


  *************************************************************;
  *Percentile intervals for ORs - not programmed for simulations;

  *PCTLDEF = 3 corresponds to EDF;
  proc univariate data = or_star noprint PCTLDEF = 3;
    class W Y;
	var OR;
	output out = perc_int pctlpre=P_ PCTLPRE=Int pctlpts=2.5 97.5; 
  run;

  title2 'Bootstrap percentile intervals for OR';
  proc print data = perc_int;
  run;


  *************************************************************;
  *BCa intervals for ORs - not programmed for simulations;

  %CALC_A_W(&n);

  data bca_int_save;
    set _null_;
  run;

  %do W = 1 %to &nrow2;
    %do Y = 1 %to &ncol2;

      *Create macro variables containing percentiles need to find;
      data _null_;
      set alpha_tilde;
	    if W = &W and Y = &Y;
        call symput('lower', lower_alpha_tilde*100);
        call symput('upper', upper_alpha_tilde*100);
      run;


      *PCTLDEF = 3 corresponds to EDF;
      proc univariate data = or_star noprint PCTLDEF = 3;
        where W = "&W" and Y = "&Y";
	    class W Y;
	    var OR;
	    output out = bca_int pctlpre=P_ pctlpts=&lower &upper PCTLPRE=Int PCTLNAME=lower upper; 
      run;


      data bca_int_save;
	    set bca_int_save bca_int;
	  run;

    %end;
  %end;


  title2 'Bootstrap BCa intervals for OR';
  proc print data = bca_int_save;
  run;

  *************************************************************;
  *Standardized residuals using bootstrap variances;

  proc sort data = obstats(keep = iter W Y wi yj resraw) out=resid_star;
    by W Y wi yj;
  run;  


  proc means data = resid_star noprint;
    class W Y wi yj;
	var resraw;
	output out = var_resid_star(drop = _freq_ _type_) var = var;
  run;


  data var_resid_star;
    set var_resid_star;
	if W > 0 and Y > 0 and wi >= 0 and yj >= 0;
  run;
 

  data stand_resid_boot;
    merge obstats_orig(keep = W Y wi yj resraw) var_resid_star;
	by W Y wi yj;
  run;

  data stand_resid_boot2;
    set stand_resid_boot;
	stand_res = resraw/sqrt(var);
  run;

  
  title2 "Standardized residuals using bootstrap variances";
  proc print data = stand_resid_boot2;
  run;


  %TIME(MACRO DO_IT_ALL ENDED AT:);

%MEND DO_IT_ALL;