***************************************************************************;
** PROGRAM: boot_3MRCV.sas                                               **;
** AUTHOR:  Christopher R. Bilder                                        **;
**          Department of Statistics                                     **;
**          University of Nebraska-Lincoln                               **;
**          chris@chrisbilder.com                                        **;
** DATE:    3-23-06                                                      **;
** PURPOSE: Fit a generalized loglinear model via the marginal modeling  **;
**          method and use the bootstrap and Rao-Scott approximations    **;
**          to find a p-value for testing the model of interest vs. the  **;
**          saturated model.                                             **;
** NOTES:                                                                **;
**  1) Do not distribute copies of this program without the permission of**;
**     the author.                                                       **;
**  2) Copyright 2006 Christopher R. Bilder                              **;
**  3) See control_3MRCV.sas for more information                        **;
***************************************************************************;


    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO TIME                                                 **;
    ** PURPOSE: Write time to output file                               **;
    ** VARIABLES: title = begin or end of program                       **;
    **                                                                  **;
    **********************************************************************;
    %MACRO TIME(title);

      data time;
        time = hour(time());
        minute = minute(time());
        second = second(time());
        month = month(today());
        day = day(today());
        year = year(today());
      run;

      proc print data=time;
        title2 "&title";
      run;

    %MEND TIME;


   **********************************************************************;
   **                                                                  **;
   ** NAME: MACRO MAKE_2WAY                                            **;
   ** PURPOSE: Find all possible 2-way tables for the observed data    **;
   **                                                                  **;
   **********************************************************************;
   %MACRO MAKE_2WAY(data);
  
    %do i = 1 %to (&nrow2-1);
      %do j = (&i + 1) %to &nrow2;
 
        proc freq data=&data noprint;
          tables item&i.*item&j / out=out_2way sparse;
        run;

	    *Fix data set up to save;
        data out_2way_2;
          set out_2way;
          item_numb1 = &i;
          item_numb2 = &j;
    	  a = item&i;
		  b = item&j;
	      if count = 0 then count = 0.5;
          keep item_numb1 item_numb2 a b count;
        run;

        data out_2way_save;
	      set out_2way_save out_2way_2;
        run;

      %end;
    %end;


	%do i = (&nrow2+1) %to (&nrow2+&ncol2-1);
      %do j = (&i + 1) %to (&nrow2+&ncol2);
 
        proc freq data=&data noprint;
          tables item&i.*item&j / out=out_2way sparse;
        run;

	    *Fix data set up to save;
        data out_2way_2;
          set out_2way;
          item_numb1 = &i;
          item_numb2 = &j;
    	  a = item&i;
		  b = item&j;
	      if count = 0 then count = 0.5;
          keep item_numb1 item_numb2 a b count;
        run;

        data out_2way_save;
	      set out_2way_save out_2way_2;
        run;

      %end;
    %end;


	%do i = (&nrow2+&ncol2+1) %to (&nrow2+&ncol2+&nstrat2-1);
      %do j = (&i + 1) %to (&nrow2+&ncol2+&nstrat2);
 
        proc freq data=&data noprint;
          tables item&i.*item&j / out=out_2way sparse;
        run;

	    *Fix data set up to save;
        data out_2way_2;
          set out_2way;
          item_numb1 = &i;
          item_numb2 = &j;
    	  a = item&i;
		  b = item&j;
	      if count = 0 then count = 0.5;
          keep item_numb1 item_numb2 a b count;
        run;

        data out_2way_save;
	      set out_2way_save out_2way_2;
        run;

      %end;
    %end;


    data out_2way_save;
	  set out_2way_save;
	  *just helps to reorder for PROC IML code;
	  new_count = count; 
	  drop count;
    run;


   %MEND MAKE_2WAY;


   **********************************************************************;
   **                                                                  **;
   ** NAME: MACRO FIND_SUBTABLE_WYZ                                    **;
   ** PURPOSE: Finds sub-tables for Wi, Yj, and Zk combos              **;
   **                                                                  **;
   **********************************************************************;
   %MACRO FIND_SUBTABLE_WYZ(data, save_data);

    %do i=1 %to &nrow2;
      %do j=1 %to &ncol2;
	    %do k=1 %to &nstrat2;

         proc freq data=&data noprint;
		   tables W&i*Y&j*Z&k / sparse out=out_sub;
		   by iter;
		   weight count;
		 run;

		 data out_sub2;
		   set out_sub;
		   W = &i;
		   Y = &j;
		   Z = &k;
		   wi = w&i;
		   yj = y&j;
		   zk = z&k;
           drop percent w&i y&j z&k;
		  run;

          data &save_data;
		    set &save_data out_sub2;
		  run;

		  %end;
        %end;
      %end;

    %MEND FIND_SUBTABLE_WYZ;



    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO GENERALIZE_FREQ                                      **;
    ** PURPOSE: Write part of a TABLES statement for PROC FREQ          **;
    **                                                                  **;
    **********************************************************************;
    %MACRO GENERALIZE_FREQ(data, out_data);

      %let W = W1 *;  
       
	  %do i = 2 %to &nrow2;
	    %let W =  &W.W&i.*;
	  %end;


      %let Y = Y1 *;  
       
	  %do j = 2 %to &ncol2;
	    %let Y =  &Y.Y&j*;
	  %end;


      %let Z = Z1;  
       
	  %do k = 2 %to &nstrat2;
	    %let Z =  &Z.*Z&k;
	  %end;

       proc freq data=&data noprint;
         table &W &Y &Z / sparse out=&out_data;
	     by iter;
       run;

    %MEND GENERALIZE_FREQ;




  **********************************************************************;
  **                                                                  **;
  ** NAME: MACRO CORRECT_FORMAT                                       **;
  ** PURPOSE: Put data in correct format needed for calculations      **;
  **                                                                  **;
  **********************************************************************;
  %MACRO CORRECT_FORMAT(data_set);

    *Find multinomial format of data;
    %GENERALIZE_FREQ(&data_set, freq_set);

     
     data freq_set;
       set freq_set;
       percent = percent*0.01;
     run;


    data save_sub_WY_obs;  set _null_; run;
    %FIND_SUBTABLE_WYZ(freq_set, save_sub_WY_obs);


    data set3;
      set save_sub_WY_obs;
      if count = 0 then count = &constant_zero;
    run;


*This part would need to be generalized for other data sets with different number of items!;
    *Rename items for MAKE_SUBTABLE_SAME (finds all possible observed sub-tables);
    data sasdata_one3;
      set &data_set;
      item1 = w1; item2 = w2; item3 = w3; 
      item4 = y1; item5 = y2; item6 = y3; item7 = y4;
      item8 = z1; item9 = z2; item10 = z3; item11 = z4; item12 = z5;
      keep item1-item12;
    run;

     data out_2way_save;  set _null_; run;
     %MAKE_2WAY(sasdata_one3);

  %MEND CORRECT_FORMAT;



  **********************************************************************;
  **                                                                  **;
  ** NAME: MACRO SIM_LOOP                                             **;
  ** PURPOSE: Looping structure for the bootstrap simulations         **;
  ** NOTES: Just use sim_numb = 1 %to 1 for one example data set      **;
  **        Does all of the main bootstrap calculations and fits the  **;
  **          model of interest.                                      **;
  **                                                                  **;
  **********************************************************************;
  %MACRO SIM_LOOP(sim_data, seed, n);

    %TIME(SIM_LOOP MACRO STARTED AT:);

    *Put data in correct format for analysis;
    %CORRECT_FORMAT(&sim_data); 

	*Create data sets to save some of the results in;
	data save_rej;  set _null_;  run;
	data save_info_set;  set _null_; run;
    data save_numb_it_item; set _null_; run;

    %do sim_numb = 1 %to 1;

      data set2_orig;
        set set3;
        if iter = &sim_numb; 
      run;

    ***************************************************************;
    *Calculate the model for the observed data;

      *TURN OFF PRINTING;  
      proc printto print="nul:";  run;

	  *Fit the model;
      proc genmod data=set2_orig;
        class &class ;
	    model count = &comp_ind_model &model
                    / dist=poi link=log obstats noint;
        ods output obstats=obstats_orig modelfit=modfit_orig ParameterEstimates=est_orig;
      run; 

      *TURN ON PRINTING;  
      proc printto;  run;


      *Sort for a merge later;
      proc sort data=modfit_orig;
        by criterion;
      run;


    ********************************************************************************;
	*Put the observed and predicted sub-tables together to use with Gange algorithm;

*This part would need to be generalized for other data sets with different number of items!;
      data pred_sort2;
        set obstats_orig;

        select (W);
          when ("1") item_numb1=1; 
	      when ("2") item_numb1=2;
	      when ("3") item_numb1=3;
          otherwise item_numb1=0;
        end;
        select (Y);
          when ("1") item_numb2=4; 
	      when ("2") item_numb2=5;
	      when ("3") item_numb2=6;
	      when ("4") item_numb2=7;
          otherwise item_numb2=0;
        end;
        select (Z);
          when ("1") item_numb3=8; 
	      when ("2") item_numb3=9;
          when ("3") item_numb3=10; 
	      when ("4") item_numb3=11;
          when ("5") item_numb3=12; 
	      otherwise item_numb3=0;
        end;

        if wi = "0" then a = 0;  else a = 1; *need since wi is a character;
        if yj = "0" then b = 0;  else b = 1;
        if zk = "0" then c = 0;  else c = 1;   	 
	
		new_count = pred;

        keep item_numb1-item_numb3 a b c new_count;
      run;  


      %TIME(GANGE BEGINS:);

      *Using the observed data and the predicted data, find a vector of multinomial;
	  *  probabilities under the Ho model;
	  *  - Macro is located in the separate Gange program;
      %DO_IT_GANGE;

      %TIME(GANGE ENDS:);


      *Change result format of Gange program - p = tau = multinomial probabilities;
      data set1_tau2;
        set set1_tau;
        array item {&ncol} item1-item&ncol;
	    array COLNUMB {&ncol} COL1-COL&ncol;
        do j = 1 to &ncol;
           item{j} = colnumb{j};;
        end;
	    p = col%eval(&ncol+1);
	    keep item1-item&ncol p;
      run;


    ********************************************************************************;
    *Take resamples;

      *generate the data - Macro is located in the separate resampling program;
      %R_DO_IT_DATA_GEN(set1_tau2, &seed, &n, &totsim, &numbsim, gendata);
   
       *save the number of resampled data sets lost due to marginal table not being the observed size;
	   data numb_it_item;
	     set numb_it;
		 *rename n=lostset;
		 sim_numb = &sim_numb;
		 keep sim_numb n;
	   run;

	   data save_numb_it_item;
	     set save_numb_it_item numb_it_item;
	   run;


	  *Create the final version of the resampled data.  Note that the gendata produced in the; 
	  *  resampling program will be for only 2 MRCVs (program originally written for that).;
	  *  We can still use the same program, but use a different DATA STEP to create gendata;
	  *  to hold the resamples ;
      data gendata;
        set sortset4;
		iter = floor((_n_-0.1)/&n+1);
	    array rownumb {&nrow2} w1-w&nrow2;
	    array colnumb {&ncol2} y1-y&ncol2;
		array stratnumb {&nstrat2} z1-z&nstrat2;
 
        array names {%eval(&nrow2*&ncol2*&nstrat2)} item1-item%eval(&nrow2*&ncol2*&nstrat2);

	    do j=1 to &nrow2;
	      rownumb{j} = names{j};
	    end;

        do j = 1 to &ncol2;
          colnumb{j} = names{j+&nrow2};
        end;

        do j = 1 to &nstrat2;
          stratnumb{j} = names{j+&nrow2+&ncol2};
        end;

    	dummy=1;
        keep y1-y&ncol2 w1-w&nrow2 z1-z&nstrat2 iter dummy;
      run;
      

  
       %GENERALIZE_FREQ(gendata, freq_set_gendata);

       data freq_set_gendata;
        set freq_set_gendata;
        percent = percent*0.01;
      run;


      data save_sub_WY;  set _null_; run;
      %FIND_SUBTABLE_WYZ(freq_set_gendata, save_sub_WY);


      proc sort data=save_sub_WY;
        by iter;
      run;


     *Add &constant_zero to a sub-table cell if the cell is 0;  
     data resample2;
       set save_sub_WY;
	   if count = 0 then count = &constant_zero;
     run;



   ********************************************************************************;
   *Fit model to resamples;

     %TIME(FIT MODEL TO RESAMPLES BEGINS:);

	 *TURN OFF PRINTING;  
     proc printto print="nul:";  run;

     proc genmod data=resample2;
       class &class;
 	   model count = &comp_ind_model &model
                     / dist=poi link=log obstats noint;
       ods output obstats=obstats modelfit=mod_fit_boot;
       by iter;
     run; 

     *TURN ON PRINTING;  
     proc printto;  run;

     %TIME(FIT MODEL TO RESAMPLES ENDS:);



   ********************************************************************************************;
   *Combine results and calculate p-value;
 
     *Sort for a merge coming up;
     proc sort data=mod_fit_boot;
       by criterion;
     run;


     *Merge the goodness-of-fit values for the resampled and observed data;
     data mod_fit_all;
       merge mod_fit_boot(rename=(value=value_star) drop=ValueDF DF)
             modfit_orig(rename=(value=value_orig) drop=ValueDF DF);
	   by Criterion;
	   if Criterion = "Log Likelihood" or Criterion = "Scaled Deviance" 
          or Criterion="Scaled Pearson X2" then delete;
	   if value_star > value_orig then reject = 1;  
	      else reject =0;
     run;


	 data _null_;
	   set modfit_orig;
       if criterion = 'Deviance' then call symput('Deviance', value);
	   if criterion = 'Pearson Chi-Square' then call symput('Pearson', value);
     run;


     title2 "Bootstrap histogram";
     proc univariate data=mod_fit_all noprint;
	   histogram value_star;
       footnote1 "The observed Pearson = &pearson and the observed LRT = &Deviance";
 	   by criterion;
     run;
 

     title2 "The p-values for &model versus the saturated";
     footnote1;
     proc means data=mod_fit_all mean noprint;
       class criterion;
	   var reject;
	   output out=out_rej mean=p_value;
     run;
    

     data out_rej2;
       set out_rej;
	   if _TYPE_ ne 0;
	   drop _TYPE_ _FREQ_;
	   sim_numb = &sim_numb;
     run;


     *put into save data set;
     data save_rej;
       set save_rej out_rej2;
     run;


     *Merge some of the calculations;
     data save_rej2;
       merge save_rej save_info_set save_numb_it_item;
	   by sim_numb;
       if p_value < &alpha then reject=1;
         else reject=0;
     run;


	 *Variable explanation: Error - max(abs(tau*_i - tau*_(i+1))) for the last iteration where tau*_i 
     *                              is a vector of multinomial probabilities for the ith iteration;
     *                      iter_conv - number of iterations took to find the multinomial probabilities;
     *                                  under the Ho model; 
     *  sim_numb and reject could be printed as well if simulations are being performed;
     title2 'Print the boot. p-values, Gange convergence information (error and iter_conv)';
     proc print data=save_rej2;
       var Criterion p_value error iter_conv;
     run;


    %end;


	%TIME(SIM_LOOP MACRO ENDED AT:);

   %MEND SIM_LOOP;



 

    **********************************************************************;
    **                                                                  **;
    ** NAME: MACRO NUMOBS                                               **;
    ** PURPOSE: Find number of observations in a data set               **;
    ** NOTES: Adapted from a SAS website sample program, use with RS    **;
	**        calculations                                              **;
    ** VARIABLES: dsn = Data set                                        **;
    **            name = Number of observations put in here             **;
    **                                                                  **;
    **********************************************************************;                                                                              
    %MACRO NUMOBS(dsn, name);                                                             

      %global &name;                                                                    

       data _null_;                                                                    
         call symput("&name", left(put(count2,8.)));                                         
         stop;                                                                           
         set &dsn nobs=count2;                                                            
       run;     
 
    %MEND NUMOBS;     



   **********************************************************************;
   **                                                                  **;
   ** NAME: MACRO G_H                                                  **;
   ** PURPOSE: Creates matrices of all 0-1 combinations which are      **;
   **          needed for PROC IML computations for RS                 **;
   ** NOTES: Use with RS calculations                                  **;
   ** VARIABLES: numb_items = number of items                          **;
   **            matrix = name of it                                   **;
   **                                                                  **;
   **                                                                  **;
   **********************************************************************;
   %MACRO G_H(numb_items, matrix);

     %let X = do item1 = 0 to 1%str(;); 
     %let endit = end%str(;);
 
     *Generalize; 
     %do j = 2 %to &numb_items;
       %let X =  &X.do item&j=0 to 1%str(;);
       %let endit = &endit end%str(;);  
     %end;
 
    *Create matrix of all items;  
     data itemvec;
         &X;
         output;
       &endit; 
     run;

	 *Transpose the matrix to be in the correct form;
	 proc transpose data=itemvec out=&matrix.itemvec2;
	   var item1-item&numb_items;
	 run;

   %MEND G_H;



  **********************************************************************;
  **                                                                  **;
  ** NAME: MACRO RS                                                   **;
  ** PURPOSE: Fit the model, do the 2nd order RS adjustment, find     **;
  **          standardized Pearson residuals                          **;
  ** NOTES: This MACRO is not set up for simulations                  **;
  ** VARIABLES: n = sample size                                       **;
  **                                                                  **;
  **                                                                  **;
  **********************************************************************;
  %MACRO RS(n); 

    %TIME(RS PART OF PROGRAM STARTED AT:);


    *Just used to save results;
    data save_sat2;  set _null_;  run;


	*Find G and H and L matrices;
    %G_H(&nrow2, G);
    %G_H(&ncol2, H);
    %G_H(&nstrat2, L);


    *TURN OFF PRINTING;  
    proc printto print="nul:";  run;

	*Fit model - note: set3 comes from MACRO CORRECT_FORMAT, the data set is created in the bootstrap;
	*                  portion of the program;
    proc genmod data=set3;
      class &class;
      model count = &comp_ind_model &model 
                    / dist=poi link=log type3 obstats noint; 
      ods output obstats=obstats_orig modelfit=modfit_orig ParameterEstimates=est_orig;
      ods exclude parameterestimates obstats;    
    run; 

    *TURN ON PRINTING;  
    proc printto;  run;


    ***********************************************************************************;
    *  RS calculations;

      *TURN OFF PRINTING;  
      proc printto print="nul:";  run;

      *Find X matrix;
      proc glmmod data=set3 OUTDESIGN=X_set;
        class &class;
        model count = w*y*z wi(w*y*z) yj(w*y*z) zk(w*y*z) &model / noint;
      run; 

      *TURN ON PRINTING;  
      proc printto;  run;

      *Count the columns in X_set by looking at the est_orig data set; 
      *  ncol(X_set) = n_row(est_orig) - 2;
      %NUMOBS(est_orig, col_in_X); 
   

      *Transpose for merge;
      proc transpose data=X_set out=Xtran name=col label=label;
		var col1-col%eval(&col_in_X-2); *-2 since there will be intercept and scale rows in est_orig;
      run;


      *Merge with parameter estimates to see which columns of X used;
      data merg_set;
        merge est_orig Xtran;
        if DF ne 0;
      run;


      *Transpose back;
      proc transpose data=merg_set out=X_mat(drop=_name_);
	    var col1-col%eval(8*&nrow2*&ncol2*&nstrat2);
      run;

      
      *Get in order needed for PROC IML;
      data merg_set2;
        merge obstats_orig X_mat;
      run;


      *Get # of parameters estimated - correct # of columns in the X matrix ; 
      %NUMOBS(merg_set, row_in_merg_set); 


      proc sort data=merg_set2 out=merg_set3(keep= col1-col&row_in_merg_set);
        by descending wi descending yj descending zk;
      run;


      proc sort data=obstats_orig out=obstats_orig2;
	    by descending wi descending yj descending zk;
      run;


     *Do the IML RS calculations;
     proc iml;

       *Get X;
       use merg_set3; 
         read all into X;  

	   use obstats_orig2; 
         read all var{pred} into mu_hat;
       *print mu_hat; 

       use obstats_orig2; 
          read all var{count} into m;
   

       *FIND V matrix - Cov(m);
         *Get tau_hat - multinomial probabilities;
         use freq_set; 
           read all var{percent} into tau;
         *print tau; 
 
         use Gitemvec2; 
           read all into G;

         use Hitemvec2; 
           read all into H;

		 use Litemvec2; 
           read all into L;
  
         *print G, H, L;
  
		 *Matrices of ones;
         Jr = repeat(1, &nrow2, 2**&nrow2);
		 Jc = repeat(1, &ncol2, 2**&ncol2);
		 Jq = repeat(1, &nstrat2, 2**&nstrat2);

         tau_to_pi = G@H@L // G@H@(Jq-L) // G@(Jc-H)@L // G@(Jc-H)@(Jq-L) // 
            (Jr-G)@H@L // (Jr-G)@H@(Jq-L) // (Jr-G)@(Jc-H)@L // (Jr-G)@(Jc-H)@(Jq-L);


   *This part finishes the calculations needed for the cov_m matrix (called V in Appendix A).;  
   *  Since this can take some time to do (10 minutes on 1.7GHZ processor with 512MB memory computer);
   *  I have calculated the matrix already for the Section 4.2 data and saved it to the file v_mat_m.txt.;
   *  Since the matrix does not change for each model, it can be used again - as long as the same data ;
   *  set is being analyzed.  For a different data set, remove the comments from the code below ;
   *  and calculate it.;

      *cov_m = &n * tau_to_pi*(Diag(tau) - tau*t(tau))*t(tau_to_pi);
      *create cov_m_set from cov_m;  
      *    append from cov_m;

       use cov_m_set; 
         read all into cov_m;
 
	  *Covariance for beta_hat;
      A = Diag(sqrt(mu_hat))*X;
	  cov_beta = inv(t(A)*A) * t(A) * Diag(1/sqrt(mu_hat)) * cov_m * t(inv(t(A)*A) * t(A) * Diag(1/sqrt(mu_hat)));

      *Covariance for mu_hat;
	  cov_mu = Diag(mu_hat)*X * cov_beta * t(X)*Diag(mu_hat);

      *Covariance for residual;
      cov_e =  (I(8*&nrow2*&ncol2*&nstrat2) - Diag(mu_hat)*X*inv(t(X)*Diag(mu_hat)*X)*t(X)) * cov_m *
              t(I(8*&nrow2*&ncol2*&nstrat2) - Diag(mu_hat)*X*inv(t(X)*Diag(mu_hat)*X)*t(X));

      *Modified Pearson;
      e = m - mu_hat;
	  Xsq_M = t(e) * Diag(1/mu_hat) * e;
	  *print Xsq_M;

      *Pearson residuals;
	  stand_err = repeat(0, 8*&nrow2*&ncol2*&nstrat2, 1);
      do i = 1 to 8*&nrow2*&ncol2*&nstrat2;
	  	 *due to rounding error, may get small variance <0;
        if cov_e[i,i] <=0 then cov_e[i,i] = 10**(-16);  
        stand_err[i,1] = sqrt(cov_e[i,i]);
      end;
	  stand_resid = e/stand_err;
      pear_resid = e/sqrt(mu_hat);
      *print m mu_hat stand_err stand_resid pear_resid;

      send_out = stand_resid || stand_err|| pear_resid || mu_hat ;
      col = {"stand_resid" "stand_err" "pear_resid " "mu_hat"};
      create stand_resid from send_out[colname=col];
        append from send_out;   
 
 
	  *Find eigenvalues for RS adjustment;
	  mat =  Diag(1/mu_hat) * cov_e;
      eig_mat = eigval(mat);
	  *print eig_mat;

	  sum_eig = trace(mat);
	  sum_eig_sq = sum(eig_mat[,1]##2);
	  send_out2 = sum_eig || sum_eig_sq;
	  *print sum_eig sum_eig_sq;

	  col2 = {"sum_eig" "sum_eig_sq"};
      create sat_adj from send_out2[colname=col2];
        append from send_out2;   

    quit;


	*Do this merge in order to get W Y Z Wi Yj Zk in the same data set - need for next sort;
    data stand_resid2;
      merge stand_resid obstats_orig2;
	run;
   

    *Reorder back to way it was before;
    proc sort data=stand_resid2 out=stand_resid3(keep=stand_resid stand_err pear_resid mu_hat 
                                                      observation count W Y Z wi yj zk);
      by w y z wi yj zk;
    run; 


    data sat;
      set modfit_orig;
	  if criterion = 'Pearson Chi-Square';
	  drop criterion valueDF;
    run;


	*Do the RS adjustment to the Pearson statistic;
    data sat2;
      merge sat sat_adj;
	  length model $ 80;
	  model = "&model";
	  RS_Pearson = sum_eig*value/sum_eig_sq;
	  df_adj = sum_eig**2 / sum_eig_sq;
	  sat_p = 1-CDF('CHISQUARED', RS_Pearson, df_adj); 
    run;


	data save_sat2;
	  set save_sat2 sat2;
	run;

	
    title2 "Print |standardized Pearson residuals| > 2.576";
    proc print data=stand_resid3;
	  where stand_resid>2.576 or stand_resid<-2.576;
	  var observation W Y Z wi yj zk count mu_hat stand_resid stand_err;
    run;


    title2 'The Pearson statistic (value), RS adjusted Pearson (RS_Pearson)';
    title3 'DF for chi-square approximation (df_adj), and resulting p-value (sat_p)';
    proc print data=save_sat2;
	  var value RS_Pearson df_adj sat_p;
    run;


    %TIME(RS PART OF PROGRAM ENDED AT:);

  %MEND RS;