rm(list=ls(all=TRUE)) ############################################################################# # Importance functions for the sensitivity analysis in Web Appendix E # These functions are similar to those included in "GeneralDistribution_Hierarchical_OP.R" ################################################################################# # obtain threshold, sensitivity, Specificity for individual testing # Arguments: # Pos.genr(B) should generate B samples from f+ # Neg.genr(B) should generate B samples from f- # Mes.genr(c.tilde) should generate a measured value for each value of the argument "c.tilde" based on f_epsilon # B: number of Monte Carlo samples # Outputs: # tau.star: the threshold for individual testing (from maximizing the Youden index) # Se: the sensitivity for individual testing # Sp: the specificity for individual testing IndTest=function(Pos.genr,Neg.genr,Mes.gner,B) { sample.pos=Mes.genr(Pos.genr(B)) sample.neg=Mes.genr(Neg.genr(B)) int=c(median(sample.neg),median(sample.pos)) Youden=function(t){return(-mean(sample.pos>t)-mean(sample.negtau.star) Sp=mean(sample.negt)+mean(sample.neg0 && s<=S) { temp=c() K=S.size[1]/S.size[s] for(j in 1:K) { ind=((j-1)*S.size[s]+1):(j*S.size[s]) temp[j]=I(Mes.genr(mean(True.Concen[ind]))>thres[s])*1 if(min(res[,ind])!=0) { eff=eff+1 } } res=rbind(res,rep(temp,rep(S.size[s],K))) Done=max(temp) s=s+1 } Diag=apply(res,2,prod) return(eff/S.size[1]) } #***********************************************************************# # Supporting function # This function simply does one Monte Carlo simulation for pooling sensitivity and pooling specificity # Arguments: # S.size: = c(n1, n2, ..., nS). The configuration of the algorithm # p = disease prevalence # thres = a sequence of thresholds used for pools of sizes n1, n2, # n3, ..., nS, respectively. Note that, one should make # sure that the order of thresholds in THRED is the same # as the one of pool sizes in S.size. # Pos.genr(B) should generate B samples from f+ # Neg.genr(B) should generate B samples from f- # Mes.genr(c.tilde) should generate a measured value for each value of the argument "c.tilde" based on f_epsilon Sim.Hierarchical.sesp=function(S.size,p,thres,Pos.genr,Neg.genr,Mes.genr) { S=length(S.size) True.Status=c(0,rbinom(S.size[1]-1,1,p)) True.Concen=Pos.genr(S.size[1])*True.Status+Neg.genr(S.size[1])*(1-True.Status) s=1 while(s<=S) { ind=1:(S.size[s]) temp=I(Mes.genr(mean(True.Concen[ind]))>thres[s])*1 if(temp==0) { s=S+1 Diag.neg=temp }else{ s=s+1 Diag.neg=temp } } True.Status=c(1,rbinom(S.size[1]-1,1,p)) True.Concen=Pos.genr(S.size[1])*True.Status+Neg.genr(S.size[1])*(1-True.Status) s=1 while(s<=S) { ind=1:(S.size[s]) temp=I(Mes.genr(mean(True.Concen[ind]))>thres[s])*1 if(temp==0) { s=S+1 Diag.pos=temp }else{ s=s+1 Diag.pos=temp } } return(list(Diag.pos=Diag.pos,Diag.neg=Diag.neg)) } #***********************************************************************# # Main function # This function approximate EFF, PSE, PSP, PPV, NPV of H(n1:n2:...:nS) # for a given prevalence using M Monte Carlo replications # # Arguments: # S.size: = c(n1, n2, ..., nS). The configuration of the algorithm # p = disease prevalence # thres = a sequence of thresholds used for pools of sizes n1, n2, # n3, ..., nS, respectively. Note that, one should make # sure that the order of thresholds in THRED is the same # as the one of pool sizes in S.size. # Pos.genr(B) should generate B samples from f+ # Neg.genr(B) should generate B samples from f- # Mes.genr(c.tilde) should generate a measured value for each value of the argument "c.tilde" based on f_epsilon # B: size of Monte Carlo replications Sim.Hierarchical=function(S.size,p,thres,Pos.genr,Neg.genr,Mes.genr,B) { num=0 num2=0 dpos=0 dneg=0 for(j in 1:B) { #print(j) temp=Sim.Hierarchical.num(S.size,p,thres,Pos.genr,Neg.genr,Mes.genr) num=num+temp num2=num2+temp^2 temp=Sim.Hierarchical.sesp(S.size,p,thres,Pos.genr,Neg.genr,Mes.genr) dpos=dpos+temp$Diag.pos dneg=dneg+temp$Diag.neg } eff.mean=num/B eff.sd=sqrt(round((B*num2-num^2)/(B*(B-1)),8)) pse=dpos/B psp=1-dneg/B ppv=pse*p/(pse*p+(1-psp)*(1-p)) npv=psp*(1-p)/(psp*(1-p)+(1-pse)*p) # output everything in a list # EFF: approximated efficiency # SD: approximated standard deviation of number of tests per specimen # PSE: approximated pooling sensitivty # PSP: approximated pooling specificity # PPV: approximated pooling positive predictive value # NPV: approximated pooling negative predcitive value return(list(EFF=eff.mean,SD=eff.sd,PSE=pse,PSP=psp,PPV=ppv,NPV=npv)) } #***********************************************************************# # Final function # This function approximate EFF, PSE, PSP, PPV, NPV of H(n1:n2:...:nS) # using our newly proposed threshold to maximize the pooled version of Youden index # for a given prevalence using B Monte Carlo replications # # Arguments: # S.size: = c(n1, n2, ..., nS). The configuration of the algorithm # p = disease prevalence # Pos.genr(B) should generate B samples from f+ # Neg.genr(B) should generate B samples from f- # Mes.genr(c.tilde) should generate a measured value for each value of the argument "c.tilde" based on f_epsilon # B: size of Monte Carlo replications Sim.Hierarchical.all=function(S.size,p,Pos.genr,Neg.genr,Mes.genr,B) { Ind.res=IndTest(Pos.genr,Neg.genr,Mes.gner,B) tau.star=Ind.res$tau.star print("The threshold for individual testing is") print(tau.star) Se=Ind.res$Se Sp=Ind.res$Sp #print("The sensitivity for individual testing is") print(Se) #print("The specificity for individual testing is") print(Sp) new.thres=c() for(ss in 1:length(S.size)) { new.thres=c(new.thres,Thres(S.size[ss],Pos.genr,Neg.genr,Mes.genr,B)) } OP.newth=Sim.Hierarchical(S.size,p,new.thres,Pos.genr,Neg.genr,Mes.genr,B) print(cbind(OP.newth)) return(cbind(OP.newth)) } ################################################################################## # Conduct the sensitivity analysis in Web Appendix E # The negative biomarker distribution is Gamma with mean 1 and variance .25 para.n=c(1,.25) para.n=c(para.n[1]^2/para.n[2],para.n[1]/para.n[2]) #alpha,beta Neg.genr=function(B){return(rgamma(B,shape=para.n[1],rate=para.n[2]))} # Pos.genr(B) generates B random samples from the user-chosen positive biomarker distribution # The positive biomarker distribution is Gamma with mean 10-delta and variance 20 # For the sensitivity analysis, when delta=0, the positive biomarker distribution is correctly specificed # When delta>0, the positive biomarker distribution is wrongly specified. delta <- 0 para.p=c(10-delta,20) para.p=c(para.p[1]^2/para.p[2],para.p[1]/para.p[2]) #alpha,beta Pos.genr=function(B){return(rgamma(B,shape=para.p[1],rate=para.p[2]))} # Given a vector of true biomarker levels \tilde C's, this function produces measured values of these true levels # based on the user-chosen measurement error distribution. # C|\tilde C\sim N(\tilde C, 0.1^2) para.e=c(0,0.1) Mes.genr=function(sample){return(rnorm( length(sample),sample+para.e[1],para.e[2]))} # Delta.grid creates the wrong specificiation of the positive biomarker distribution. # At delta take the value 0 in delta.grid, there is no wrong specificiation # When delta is at nonzero values in delta.grid, there is wrong specificiation. delta.grid=seq(0,2,length=21) OP.res=c() for(delta in delta.grid) { # Neg.genr(B) generates B random samples from the user-chosen negative biomarker distribution. # Pos.genr(B) generates B random samples from the user-chosen positive biomarker distribution # The positive biomarker distribution is Gamma with mean 10-delta and variance 20 # For the sensitivity analysis, when delta=0, the positive biomarker distribution is correctly specificed # When delta>0, the positive biomarker distribution is wrongly specified. para.p=c(10-delta,20) para.p=c(para.p[1]^2/para.p[2],para.p[1]/para.p[2]) #alpha,beta Pos.genr=function(B){return(rgamma(B,shape=para.p[1],rate=para.p[2]))} S.size=c(5,1) # i.e., H(5:1) p=0.05 # prevalence B=1000000 # number of Monte Carlo data sets # Approximate EFF, PSE, PSP, PPV, NPV of H(5:1) when p=0.05 through 1,000,000 Monte Carlo replications # When the positive biomarker distribution is specified to be Gamma with mean 10-delta, variance 25 res=Sim.Hierarchical.all(S.size,p,Pos.genr,Neg.genr,Mes.genr,B) # save all the approximated value in OP.res OP.res=cbind(OP.res,res) } ########################################################################## # Create the Figure E.1 in Web Appendix E layout(matrix(1:6, 3, 2,byrow=TRUE)) par(mar=c(4.5,4.5,.5,.5)) NameS=c("EFF","SD","PSE","PSP","PPV","NPV") for(mm in 1:6) { plot(delta.grid,OP.res[mm,],ylim=c(0,1),xlab=expression(delta),ylab=NameS[mm],cex.lab=1.2,cex.axis=1.2) lines(delta.grid,rep(OP.res[mm,1],length(delta.grid))) }