***************************************************************************; ** PROGRAM: data_gen_resample.sas **; ** AUTHOR: Christopher R. Bilder **; ** Department of Statistics **; ** University of Nebraska-Lincoln **; ** chris@chrisbilder.com **; ** DATE: 5-2-05 **; ** PURPOSE: Using a set of multinomial probabilities from Gange program, **; ** this program generates the resamples under the Ho model **; ** NOTES: **; ** 1) Do not distribute copies of this program without the permission of**; ** the author. **; ** 2) Copyright 2005 Christopher R. Bilder **; ** 3) See control_2MRCV.sas for more information **; ***************************************************************************; **********************************************************************; ** **; ** NAME: MACRO ITEMVEC **; ** PURPOSE: Find all possible combinations of item responses **; ** sorted. **; ** **; **********************************************************************; %MACRO R_ITEMVEC; %let X = do item1 = 0 to 1%str(;); %let endit = end%str(;); *Generalize for number of items; %do j = 2 %to &ncol; %let X = &X.do item&j=0 to 1%str(;); %let endit = &endit end%str(;); %end; *Create initial matrix of all items and their corresponding joint probabilities; data itemvec; &X; output; &endit; run; %MEND R_ITEMVEC; **********************************************************************; ** **; ** NAME: MACRO DATA_GEN **; ** PURPOSE: Generates the multinomial observations **; ** using the p's (multinomial probabilities) given in Get_p**; ** NOTES: Make sure binary sequences are in the correct order **; ** 0 0 0 **; ** 0 0 1 **; ** 0 1 0 **; ** ... **; ** VARIABLES: Data=The data set name **; ** Seed=Seed **; ** Iter=# of data sets to generate **; ** n=Sample size for each data set **; ** **; **********************************************************************; %MACRO R_DATA_GEN(data, seed, iter, n); data set2; set &data; seed = &seed; do iter = 1 to &iter; do numb = 1 to &n; *Generates the index for multinomial; call rantbl(seed, of p1-p&numbp, result); output; end; end; keep iter numb result; run; *Assign the correct index to each item, row, and strata combination; data set4; set itemvec; result = _n_; run; *Need for merge with the row-item response vector data set; proc sort data=set2; by result; run; *Puts the item numbers into set; data set5; merge set2 set4; by result; if iter = . then delete; run; proc sort data=set5 out=sort_set; by iter; run; %MEND R_DATA_GEN; **********************************************************************; ** **; ** NAME: MACRO ITEM **; ** PURPOSE: PROC FREQ called within item check **; ** **; **********************************************************************; %MACRO R_ITEM(var); *Find frequency for each positive response; proc freq data=sort_set noprint; where &var = 1; tables &var / sparse out=f_&var; by iter; run; *Merge data sets to see where the 0 items are; data f3&var; merge sampnumb f_&var; by iter; drop &var percent; run; *Find frequency for each negative response; proc freq data=sort_set noprint; where &var = 0; tables &var / sparse out=f_&var.0; by iter; run; *Merge data sets to see where the 0 items are; data f3&var.0; merge sampnumb f_&var.0; by iter; drop &var percent; run; *Include only resamples with 0 positive responses or 0 negative responses for an item; data f0&var; set f3&var f3&var.0; if count=.; run; %MEND R_ITEM; **********************************************************************; ** **; ** NAME: MACRO CK_ITEM **; ** PURPOSE: Check to make sure all items have at least one positive **; ** response **; ** **; **********************************************************************; %MACRO R_CK_ITEM(n, iter); *Create data set with all sample numbers - use in MACRO ITEM; *Note: Have &totsim sample numbers since extra samples are taken in case some need; * to be thrown out; data sampnumb; do iter = 1 to %eval(&iter); output; end; run; *Create save data set; data freq0; set _null_; run; %do item = 1 %to &ncol; %R_ITEM(item&item); *Merge data sets containing no positive responses iteration numbers; data freq0; set freq0 f0item&item; run; %end; *Make one iteration number per row; proc freq data=freq0 noprint; tables iter / out=freq0_it; run; *Count number of data sets excluded; proc means data=freq0_it noprint; var iter; output out=numb_it n=n; run; *Remove data sets without a positive response for an item; data sortset2; merge sort_set freq0_it; by iter; if count >= 1 then delete; *sum = sum(of item1-item&ncol); run; data sortset3; set sortset2; iter2 = iter; iter = floor((_n_-0.1)/%eval(&n)+1); run; %MEND R_CK_ITEM; **********************************************************************; ** **; ** NAME: MACRO DO_IT **; ** PURPOSE: Control MACRO **; ** NOTES: **; ** **; ** VARIABLES: **; ** dataname = Name of the data set with the multinomial **; ** probabilities. **; ** seed = Seed **; ** iter = # of data sets to generate **; ** n = Sample size per data set **; ** gendata = Name of the data set to be used for the generated **; ** data **; ** **; **********************************************************************; %MACRO R_DO_IT_DATA_GEN(dataname, seed, n, iter, use, gendata); *Find all possible combinations of binary items; %R_ITEMVEC; data OR_keep; set _null_; run; data margkeep; set _null_; run; *Get multinomial probabilities in correct format; proc transpose data=&dataname out=set1_tau3 prefix=p; var p; run; *Generates the data for the simulation; %R_DATA_GEN(set1_tau3, &seed, &iter, &n); *Make sure all items have at least one positive value; %R_CK_ITEM(&n, &iter); *Take out extra simulated data sets; data sortset4; set sortset3; keep item1-item&ncol; if _n_<=&use*&n; run; *reorder; data &gendata; set sortset4; iter = floor((_n_-0.1)/&n+1); array rownumb {&nrow2} w1-w&nrow2; array colnumb {&ncol2} y1-y&ncol2; array names {%eval(&nrow2*&ncol2)} item1-item%eval(&nrow2*&ncol2); do j=1 to &nrow2; rownumb{j} = names{j}; end; do j = 1 to &ncol2; colnumb{j} = names{j+&nrow2}; end; dummy=1; keep y1-y&ncol2 w1-w&nrow2 iter dummy; run; %MEND R_DO_IT_DATA_GEN; /* *NEED in boot_2MRCV.sas: data save_numb_it_item; set _null_; run; *saves information about number of data sets lost in data generation; */