#####################################################################
# NAME:  Chris Bilder                                               #
# DATE:  3-13-09                                                    #
# UPDATE: 10-19-09 - made this a read-in file only and added MP and #
#         halving                                                   #
# Purpose: Summary measures for 1SIS, 2SIS, FIS, and Dorfman        #
#                                                                   #
#                                                                   #
#####################################################################


info.gt<-function(p, Sp, Se, plot=TRUE, plot.cut.dorf = TRUE, cond.prob.plot = FALSE, font.name = "sans", num.perm = 100, 
   row.size=NULL, col.size=NULL, seed.num=floor(10000*runif(1))) {

  #####################################################################
  # Initial calculations

    I<-length(p)
    p<-sort(p)   #In case p's are not in order


  #####################################################################
  # 1SIS

    tests<-c(1, 3:I, I+1, I+2)
    prob<-numeric(length(tests))

    #t=1
    prob[1]<-Sp*prod(1-p) + (1-Se)*(1 - prod(1-p))

    #t = 3, 4, ..., I
    for (t in 3:I) {
      if(t==3) {
        prob[t-1]<-(1-Sp)*Sp^(t-2)*prod(1-p)*(1-Sp-Se) + Se *
                   ((1-Sp)*(1-p[I+3-t]) + Se*p[I+3-t]) * (Sp*prod(1-p[1:(I+2-t)]) + (1-Se)*(1-prod(1-p[1:(I+2-t)]))) 
      }
      else {           
        prob[t-1]<-(1-Sp)*Sp^(t-2)*prod(1-p)*(1-Sp-Se) + Se * prod(Sp*(1-p[(I+4-t):I]) + (1-Se)*p[(I+4-t):I]) *
                   ((1-Sp)*(1-p[I+3-t]) + Se*p[I+3-t]) * (Sp*prod(1-p[1:(I+2-t)]) + (1-Se)*(1-prod(1-p[1:(I+2-t)]))) 
      }
    }

    #t = I + 1
    prob[length(tests)-1]<-(1-Sp) * Sp^(I-2) * prod(1-p) + Se*( prod( Sp*(1-p[3:I]) + (1-Se)*p[3:I]) - Sp^(I-2)*prod(1-p))
  
    #t = I + 2
    last.set<-numeric(length(4:(I+1)))
    for (m in 4:(I+1)) {
      if(m==I+1) {
        last.set[m-3]<-(1-Sp-Se)*(1-Sp)^2*Sp^(I+1-m)*prod(1-p) + Se * 
                       ((1-Sp)*(1-p[m-1]) + Se*p[m-1]) * ((1-Sp)*prod(1-p[1:(m-2)]) + Se*(1-prod(1-p[1:(m-2)]))) #First term for t=I+2
      }
      else {
        last.set[m-3]<-(1-Sp-Se)*(1-Sp)^2*Sp^(I+1-m)*prod(1-p) + Se * prod(Sp*(1-p[m:I]) + (1-Se)*p[m:I]) * 
                       ((1-Sp)*(1-p[m-1]) + Se*p[m-1]) * ((1-Sp)*prod(1-p[1:(m-2)]) + Se*(1-prod(1-p[1:(m-2)]))) #Other terms for t=I+2 (allows for G_ = 0 in it) 
      }
    }                     
    prob[length(tests)]<-sum(last.set)
   
    mean.1stage.IS<-sum(tests*prob)
    sd.1stage.IS<-sqrt(sum(tests^2*prob) - mean.1stage.IS^2)
 


  #####################################################################
  # 2SIS - Additional calculations needed for 2SIS and combine with 1SIS calculations at the end

    tests5<-c(1, 3:I, I+1, I+2, I+3)
 
    #Can not do 2SIS when I < 4
    if (I>3) {
      
      prob5<-numeric(length(tests5))

      #t=5, ..., I+1 - only the new 2-stage part
      part2.5<-matrix(data = 0, nrow = I-3, ncol = I-3)
  
      for (t in 5:(I+1)) {
       for (m in 1:(t-4)) {
    
         if ((I+2-m)>I && (I+5-t)>(I-m)) {
             part2.5[t-4,m]<-(1-Sp-Se)*(1-Sp)^3*Sp^(t-4)*prod(1-p) +
                   Se *                                                  ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1])   *   
                   ( (1-Sp-Se)*(1-Sp)*Sp^(t-m-3)*prod(1-p[1:(I-m)]) + Se * 
                     ((1-Sp)*(1-p[I+4-t]) + Se*p[I+4-t]) * (Sp*prod(1-p[1:(I+3-t)]) + (1-Se)*(1-prod(1-p[1:(I+3-t)])))) 
         }
      
         if ((I+2-m)<=I && (I+5-t)>(I-m)) {
           part2.5[t-4,m]<-(1-Sp-Se)*(1-Sp)^3*Sp^(t-4)*prod(1-p) +
                   Se * prod(Sp*(1-p[(I+2-m):I]) + (1-Se)*p[(I+2-m):I]) * ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1])   *   
                   ( (1-Sp-Se)*(1-Sp)*Sp^(t-m-3)*prod(1-p[1:(I-m)]) + Se * 
                    ((1-Sp)*(1-p[I+4-t]) + Se*p[I+4-t]) * (Sp*prod(1-p[1:(I+3-t)]) + (1-Se)*(1-prod(1-p[1:(I+3-t)])))) 
         }

         if ((I+2-m)>I && (I+5-t)<=(I-m)) {
           part2.5[t-4,m]<-(1-Sp-Se)*(1-Sp)^3*Sp^(t-4)*prod(1-p) +
                   Se *                                                  ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1])   *   
                   ( (1-Sp-Se)*(1-Sp)*Sp^(t-m-3)*prod(1-p[1:(I-m)]) + Se * prod(Sp*(1-p[(I+5-t):(I-m)]) + (1-Se)*p[(I+5-t):(I-m)]) *
                     ((1-Sp)*(1-p[I+4-t]) + Se*p[I+4-t]) * (Sp*prod(1-p[1:(I+3-t)]) + (1-Se)*(1-prod(1-p[1:(I+3-t)])))) 
         }
           
         if ((I+2-m)<=I && (I+5-t)<=(I-m)) {
           part2.5[t-4,m]<-(1-Sp-Se)*(1-Sp)^3*Sp^(t-4)*prod(1-p) +
                   Se * prod(Sp*(1-p[(I+2-m):I]) + (1-Se)*p[(I+2-m):I]) * ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1])   *   
                   ( (1-Sp-Se)*(1-Sp)*Sp^(t-m-3)*prod(1-p[1:(I-m)]) + Se * prod(Sp*(1-p[(I+5-t):(I-m)]) + (1-Se)*p[(I+5-t):(I-m)]) *
                     ((1-Sp)*(1-p[I+4-t]) + Se*p[I+4-t]) * (Sp*prod(1-p[1:(I+3-t)]) + (1-Se)*(1-prod(1-p[1:(I+3-t)])))) 
         }     
       } 
     }
     prob5[4:(4+(I-3)-1)]<-rowSums(part2.5) #Each row of the matrix is for a particular t
  

     #t=I+2 - only the new 2-stage part
     part2.I.2<-numeric(I-3)
     for (m in 5:(I+1)) {
    
       if (m>I) {
         part2.I.2[m-4]<-(1-Sp-Se)*(1-Sp)^2*Sp^(I-3)*prod(1-p) + 
          Se *                                       ((1-Sp)*(1-p[m-1]) + Se*p[m-1]) *
           ((1-Sp-Se)*Sp^(m-4)*prod(1-p[1:(m-2)]) + Se*(prod( Sp*(1-p[3:(m-2)]) + (1-Se)*p[3:(m-2)])))
       }
    
       if (m<=I) {    
         part2.I.2[m-4]<-(1-Sp-Se)*(1-Sp)^2*Sp^(I-3)*prod(1-p) + 
           Se * prod( Sp*(1-p[m:I]) + (1-Se)*p[m:I]) * ((1-Sp)*(1-p[m-1]) + Se*p[m-1]) *
            ((1-Sp-Se)*Sp^(m-4)*prod(1-p[1:(m-2)]) + Se*(prod( Sp*(1-p[3:(m-2)]) + (1-Se)*p[3:(m-2)])))
       }
         
     } 
     prob5[length(tests5)-1]<-sum(part2.I.2)


     #t=I+3
     save.t3<-matrix(data = 0, nrow = I-3, ncol = I-3) 
     for (m in 1:(I-3)) {
      for (r in 1:(I-m-2)) {
      
        #Have both sets of G_=0
        if((I+2-m)<=I && (r+3)<=(I-m)) {
          save.t3[m,r]<-(1-Sp-Se)*(1-Sp)^4*Sp^(I-r-3)*prod(1-p) +
                        Se *  prod(Sp*(1-p[(I+2-m):I]) + (1-Se)*p[(I+2-m):I]) * ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1]) *
                              ((1-Sp-Se)*(1-Sp)^2*Sp^(I-m-r-2)*prod(1-p[1:(I-m)]) + 
                              Se * prod(Sp*(1-p[(r+3):(I-m)]) + (1-Se)*p[(r+3):(I-m)])*((1-Sp)*(1-p[r+2]) + Se*p[r+2]) *
                              ((1-Sp)*prod(1-p[1:(r+1)]) + Se*(1-prod(1-p[1:(r+1)]))))
          }
 
       #Have NO sets of G_=0
       if((I+2-m)>I && (r+3)>(I-m)) {
         save.t3[m,r]<-(1-Sp-Se)*(1-Sp)^4*Sp^(I-r-3)*prod(1-p) +
                       Se *                                                     ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1]) *
                       ((1-Sp-Se)*(1-Sp)^2*Sp^(I-m-r-2)*prod(1-p[1:(I-m)]) +
                       Se *                                                           ((1-Sp)*(1-p[r+2]) + Se*p[r+2]) *
                       ((1-Sp)*prod(1-p[1:(r+1)]) + Se*(1-prod(1-p[1:(r+1)]))))   
           } 
         
       #Have 1st set of G_=0 only
       if((I+2-m)<=I && (r+3)>(I-m)) {
         save.t3[m,r]<-(1-Sp-Se)*(1-Sp)^4*Sp^(I-r-3)*prod(1-p) +
                       Se *   prod(Sp*(1-p[(I+2-m):I]) + (1-Se)*p[(I+2-m):I]) * ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1]) *
                              ((1-Sp-Se)*(1-Sp)^2*Sp^(I-m-r-2)*prod(1-p[1:(I-m)]) + 
                              Se *                                                      ((1-Sp)*(1-p[r+2]) + Se*p[r+2]) *
                              ((1-Sp)*prod(1-p[1:(r+1)]) + Se*(1-prod(1-p[1:(r+1)]))))
         }
       
       #Have 2nd set of G_=0 only
       if((I+2-m)>I && (r+3)<=(I-m)) {
         save.t3[m,r]<-(1-Sp-Se)*(1-Sp)^4*Sp^(I-r-3)*prod(1-p) +
                       Se *                                                        ((1-Sp)*(1-p[I-m+1]) + Se*p[I-m+1]) *
                              ((1-Sp-Se)*(1-Sp)^2*Sp^(I-m-r-2)*prod(1-p[1:(I-m)]) + 
                              Se*prod(Sp*(1-p[(r+3):(I-m)]) + (1-Se)*p[(r+3):(I-m)])*((1-Sp)*(1-p[r+2]) + Se*p[r+2]) *
                              ((1-Sp)*prod(1-p[1:(r+1)]) + Se*(1-prod(1-p[1:(r+1)]))))
         }
      }
    }
    prob5[length(tests5)]<-sum(save.t3)
  
  
    #To combine prob5 with 1SIS probabilities, we need to subtract off the 1-stage probabilities that are broken down further 
    #  into parts (these correspond to the 1SIS probabilities with G_1..k = 1 where k>2 
    IS1.sub<-prob
    IS1.sub[I+2-1]<-last.set[1] #Keep first probability for t = I+2 only for IS because this contains the G_12=1
    IS2<-c(IS1.sub,0) + prob5   #Added a 0 to IS1.sub to acccount for the t=I+3 term now in IS2
    #IS is the 2-stage PMF

    mean.2stage.IS<-sum(tests5*IS2)
    sd.2stage.IS<-sqrt(sum(tests5^2*IS2) - mean.2stage.IS^2)
  } else
  
  { 
    cat("\n Warning: 2SIS can not be done for I = 3 \n")
    mean.2stage.IS<-NA
    sd.2stage.IS<-NA
    IS2<-rep(x = NA, times = length(tests)+1)
  }  
  


  #####################################################################
  # Dorfman
  
    tests3<-c(1,I+1)
    prob3<-numeric(2)
    prob3[1]<-Sp*prod(1-p) + (1-Se)*(1 - prod(1-p))
    prob3[2]<-(1-Sp)*prod(1-p) + Se*(1 - prod(1-p))
  
    mean.dorf<-sum(tests3*prob3)
    sd.dorf<-sqrt(sum(tests3^2*prob3) - mean.dorf^2)


  #####################################################################
  # FIS - Memory problems occur for I > 12 with a full matrix approach.  
  #       Peng Chen's program is run for I > 12 to get around this problem.
 
  if (I>12) {
     
    Se.original<-Se
    Sp.original<-Sp
    if (Se == 1) {Se<-0.99999999999999} 
    if (Sp == 1) {Sp<-0.99999999999999}
    if (Se == 1 | Sp == 1) { cat("Warning: If Se or Sp = 1, they are replaced with 0.99999999999999 for FIS due to the algorithm used \n")
                             cat("         to find the PMF. \n") }

    full.IS<-err_is_pmf(p,Se,Sp) #From Peng's program
    t.full.IS<-full.IS$T
    pmf.full.IS<-full.IS$prob
    
    mean.full.IS<-sum(t.full.IS*full.IS$prob)
    sd.full.IS<-sqrt(sum(t.full.IS^2*full.IS$prob) - mean.full.IS^2)
    
    if (Se.original == 1) {Se<-Se.original} 
    if (Sp.original == 1) {Sp<-Sp.original}
        
  } 
  
  else {
  
    #This is a way to find the PMF using matrices without paying attention to how large these matrices can be.  In the end, memory
    # problems will occur for a group size not small.  The matrix set up below is discussed somewhat at the end of the appendix (a little different version of A). 
    # One could just remove the else part here and use the above I>12 for I<=12 instead.
    Se.original<-Se
    Sp.original<-Sp
    if (Se == 1) {Se<-0.99999999999999} 
    if (Sp == 1) {Sp<-0.99999999999999}
    
    if (Se == 1 | Sp == 1) { cat("Warning: If Se or Sp = 1, they are replaced with 0.99999999999999 for FIS due to the algorithm used \n")
                             cat("         to find the PMF. \n") }
                              
    j<-function(length) { matrix(data = 1, nrow = length, ncol = 1) }
    q.vec<-kronecker(matrix(data = c(1-p[2], p[2]), nrow = 2, ncol = 1), c(1-p[1], p[1])) #q^(j) in the appendix
    A.mat<-matrix(data = c(  Sp,  (1-Sp)*Sp^2,      (1-Sp)^2*Sp,      (1-Sp)^2*Sp,    (1-Sp)^3,
                           1-Se, Se*Sp*(1-Se),          Se^2*Sp, Se*(1-Se)*(1-Sp), Se^2*(1-Sp),
                           1-Se, Se*(1-Se)*Sp, Se*(1-Se)*(1-Sp),          Se^2*Sp, Se^2*(1-Sp),
                           1-Se,  Se*(1-Se)^2,      Se^2*(1-Se),      Se^2*(1-Se),       Se^3), nrow = 4, ncol = 5, byrow = TRUE) #A matrix in the appendix
    t.vec<-c(1, rep(3, times = 4)) 
  
    for (I.iter in (3:I)) {
      A11<-kronecker(j(2^(I.iter-1)), matrix(data = c(1, rep(Sp, times = 5*2^(I.iter-3)-1)), nrow = 1, ncol = 5*2^(I.iter-3)))
      A12<-(1-Sp)*kronecker(t(j(5*2^(I.iter-3))), matrix(data = c(1-Sp, rep(Se, times = 2^(I.iter-1)-1)), nrow = 2^(I.iter-1), ncol = 1))
      A21<-cbind(c((1-Se)/Sp,j(2^(I.iter-1)-1)), kronecker(t(j(5*2^(I.iter-3)-1)), matrix(data = c(Se*(1-Se)/(1-Sp), rep(1-Se, times = 2^(I.iter-1)-1)), nrow = 2^(I.iter-1), ncol = 1)))
      A.mat<-rbind(cbind(A11*A.mat, A12*A.mat), cbind(A21*A.mat, Se^2*A.mat))
      q.vec<-kronecker(c(1-p[I.iter], p[I.iter]), q.vec)
      t.vec<-c(1, 1 + t.vec[-1], 2 + t.vec)
    }
  
    mean.full.IS<-sum(t(q.vec)%*%A.mat%*%t.vec)
    sd.full.IS<-sqrt(sum(t(q.vec)%*%A.mat%*%t.vec^2) - mean.full.IS^2)
    
    #PMF
    pmf1<-data.frame(T = t.vec, prob = t(t(q.vec)%*%A.mat))  #S vector in the Appendix
    pmf.full.IS<-tapply(X = pmf1$prob, INDEX = pmf1$T, FUN = sum) #condensed over the duplicate values of t
    t.full.IS<-as.numeric(names(pmf.full.IS))
    
    if (Se.original == 1) {Se<-Se.original} 
    if (Sp.original == 1) {Sp<-Sp.original}
    
  }



  
  ######################################################################
  # CDF plots
 
    if (plot == TRUE) { 
 
      #New graphics window
      win.graph(width = 10, height = 7, pointsize = 12)
      par(family = font.name)
      par(lend = "square") #Line end style - default is rounded

      oldpar<-par(mar = c(5.1, 4.1, 4.1, 4.1))  #Added to help with right side y-axis
      
      #maximum value for x-axis
      max.x<-max(t.full.IS, I+4, na.rm = TRUE) 
      
      #1SIS
      Ik<-I
      plot(x = tests, y = cumsum(prob), type = "s", xlab = expression(italic(t[k])), ylab = expression(italic(P)(italic(T[k]) <= 
        italic(t[k]))), lwd = 5, col = "darkgreen", panel.first=grid(col = "gray"), 
        main = substitute(paste("CDFs with ", italic(I[k])==Ik, ", ", italic(S[e])==Se, ", and ", italic(S[p])==Sp), list(Ik=I, Se=Se, Sp=Sp)), 
        xlim = c(0, max.x), ylim = c(0,1))
      abline(h = 0)
      segments(x0 = 0, y0 = 0, x1 = 1, y1 = 0, lwd = 5, col = "darkgreen")
      segments(x0 = 1, y0 = 0, x1 = 1, y1 = prob[1], lwd = 5, col = "darkgreen")
      segments(x0 = I+2, y0 = 1, x1 = max.x+1, y1 = 1, lwd = 5, col = "darkgreen")
   
      #2SIS
      if(I>=4) {
        lines(x = tests5, y = cumsum(IS2), type = "s", col = "purple", lty = 5, lwd = 3)
        segments(x0 = 0, y0 = 0, x1 = 1, y1 = 0, lwd = 3, col = "purple", lty = 5)
        segments(x0 = 1, y0 = 0, x1 = 1, y1 = IS2[1], lwd = 3, col = "purple", lty = 5)
        segments(x0 = I+3, y0 = 1, x1 = max.x+1, y1 = 1, lwd = 3, col = "purple", lty = 5)
      } 
      else text(x=1, y=0.15, labels = "Warning: 2-stage IS can not be done with I = 3", cex = 0.75, pos = 4)
 

     #FIS
        lines(x = t.full.IS, y = cumsum(pmf.full.IS), type = "s", col = "blue", lty = 6, lwd = 2)
        segments(x0 = 0, y0 = 0, x1 = 1, y1 = 0, lwd = 2, col = "blue", lty = 6)
        segments(x0 = 1, y0 = 0, x1 = 1, y1 = pmf.full.IS[1], lwd = 2, col = "blue", lty = 6)
        segments(x0 = max(t.full.IS), y0 = 1, x1 = max.x+1, y1 = 1, lwd = 2, col = "blue", lty = 6)
 
 
      #Since plot can get crowded, can specify whether or not to plot cut-tree and Dorfman
      if (plot.cut.dorf == TRUE) {
  
        #Dorfman
        lines(x = tests3, y = cumsum(prob3), type = "s", col = "red", lwd = 2, lty = 4)
        segments(x0 = 0, y0 = 0, x1 = 1, y1 = 0, lwd = 2, col = "red", lty = 4)
        segments(x0 = 1, y0 = 0, x1 = 1, y1 = prob3[1], lwd = 2, col = "red", lty = 4)
        segments(x0 = I+1, y0 = 1, x1 = max.x+1, y1 = 1, lwd = 2, col = "red", lty = 4)
        legend(x=max.x*3/4,y=0.8, legend=c("1SIS", "2SIS", "FIS", "Dorfman"), lty=c(1, 5, 6,  4), 
            col=c("darkgreen", "purple", "blue", "red"), bty="y", lwd=c(5,3,2,2), cex=1, bg = "white") 
             
      } else
         { 
          legend(locator(1), legend=c("1SIS", "2SIS", "FIS"), lty=c(1, 5, 6), 
            col=c("darkgreen", "purple", "cyan"), bty="n", lwd=c(5,3,2), cex=1)
          }

      p.vec<-as.character(round(p[1],3))
      for (i in 2:I) {
       p.vec<-paste(p.vec, ", ", as.character(round(p[i],3)), sep="")
      }
     
      if (I<=10) {
        text(x=1, y=0.1, labels = substitute(paste(italic(bolditalic(p)[k]), " = [", p.vec, "]"), list(p.vec=p.vec)), cex = 1, pos = 4)
      } else  
        {
        #text(x=1, y=0.1, labels = expression(paste(italic(bolditalic(p)[k]), " = [0.01, ..., 0.01, 0.1, 0.1]")),cex = 1, pos = 4)  
        text(x=1, y=0.1, labels = substitute(paste(italic(bolditalic(p)[k]), " = [", p1, ", ", p2, ", ... ,", p20, "]"), 
           list(p1=round(p[1],4), p2=round(p[2],4), p20=round(p[length(p)],4))), cex = 1, pos = 4)  
       }  
   
     par(oldpar)
         
   }

   if (cond.prob.plot == TRUE) {
   
     #2nd y-axis for the P(T_k = t_k | Z_k = 1)
     prob.at.least1<-prob[1]
     space<-(1-prob.at.least1)/5 #space equally out from the beginning to the end.  
     axis(side = 4, at = seq(from = prob.at.least1, to = 1, by = space), lab = c(0.0,0.2,0.4,0.6,0.8,1.0)) 
     mtext(text = expression(paste(italic(P)(italic(T[k]) == italic(t[k]) , "|" , italic(Z[k]) == 1))), side = 4, at=0.75, line = 2) #How can the "extra" comma be removed from the label???
     abline(h = prob.at.least1)
   }
   
   
 
  #####################################################################
  #Comparisons
  
    #Moments
    mean.sd<-data.frame(Method = c("1SIS", "2SIS", "FIS", "Dorfman"), 
               mean = c(mean.1stage.IS, mean.2stage.IS, mean.full.IS, mean.dorf), 
               sd = c(sd.1stage.IS, sd.2stage.IS, sd.full.IS, sd.dorf))
     
    #PMFs
    set1<-data.frame(tests, one.stage.IS.PMF = prob)
    set1.1<-data.frame(tests=tests5, two.stage.IS.PMF = IS2)
    set1.2<-data.frame(tests=t.full.IS, full.IS.PMF = pmf.full.IS)
    set3<-data.frame(tests=tests3, Dorfman.PMF = prob3)
    set1.set1.1<-merge(set1, set1.1, all = TRUE)
    set1.set1.12<-merge(set1.set1.1, set1.2, all = TRUE)  
    PMF<-merge(set1.set1.12, set3, all = TRUE)
    
    #CDFs
    set1<-data.frame(tests, one.stage.IS.PMF = cumsum(prob))
    set1.1<-data.frame(tests=tests5, two.stage.IS.PMF = cumsum(IS2))
    set1.2<-data.frame(tests=t.full.IS, full.IS.PMF = cumsum(pmf.full.IS))
    set3<-data.frame(tests=tests3, Dorfman.PMF = cumsum(prob3))
    set1.set1.1<-merge(set1, set1.1, all = TRUE)
    set1.set1.12<-merge(set1.set1.1, set1.2, all = TRUE)  
    CDF<-merge(set1.set1.12, set3, all = TRUE)
    
    
    #Fill in the NA's in the CDFs
    fill.in.CDF<-function(CDF.name) {
      temp<-is.na(CDF.name)
      CDF.new<-numeric(length(CDF.name))
      for (k in 1:(length(CDF.name)-1)) {
        if (temp[k] == FALSE) 
          {CDF.new[k]<-CDF.name[k]}
          else {
          CDF.new[k]<-max(CDF.name[1:k], na.rm = TRUE)
          }
        }
      CDF.new[length(CDF.name)]<-1
      CDF.new
    }
    
    #Find the differences in the CDFs
    save.diff<-matrix(data = NA, nrow = 3, ncol = 3)
    for (i in 2:(ncol(CDF)-1)) {
      for (j in (i+1):ncol(CDF)) {
        CDF1<-fill.in.CDF(CDF[,i])
        CDF2<-fill.in.CDF(CDF[,j])       
        diff.CDF<-CDF1-CDF2
        save.diff[i-1,j-2]<-sum(diff.CDF, na.rm = TRUE)
      } 
    }
    row.names(save.diff)<-c("1SIS", "2SIS", "FIS")
    save.diff<-as.data.frame(save.diff)
    names(save.diff)<-c("2SIS", "FIS", "Dorfman")
        
    
    #Conditional comparisons
    cond.PMF<-data.frame(tests = PMF$tests[-1], 
               one.stage.IS = PMF$one.stage.IS.PMF[-1]/(1-PMF$one.stage.IS.PMF[1]), 
               two.stage.IS = PMF$two.stage.IS.PMF[-1]/(1-PMF$two.stage.IS.PMF[1]),
               full.IS = PMF$full.IS.PMF[-1]/(1-PMF$full.IS.PMF[1]))
    cond.mean<-c(sum(cond.PMF$one.stage.IS*cond.PMF$tests, na.rm = TRUE), 
                 sum(cond.PMF$two.stage.IS*cond.PMF$tests, na.rm = TRUE),
                 sum(cond.PMF$full.IS*cond.PMF$tests, na.rm = TRUE))
    cond.var<-c(sum((cond.PMF$tests-cond.mean[1])^2*cond.PMF$one.stage.IS, na.rm = TRUE),
                sum((cond.PMF$tests-cond.mean[2])^2*cond.PMF$two.stage.IS, na.rm = TRUE),
                sum((cond.PMF$tests-cond.mean[3])^2*cond.PMF$full.IS, na.rm = TRUE))
    cond.moments<-data.frame(Method = c("1SIS", "2SIS", "FIS"), mean = cond.mean, sd = sqrt(cond.var))


  #####################################################################
  #Non-informative procedures added on 10-19-09
   
   set.seed(seed.num)
   
   ###############
   #Matrix pooling
   exp.mp<-NULL
   if (!is.null(row.size) & !is.null(col.size)) {
    if (I == row.size*col.size) { 
      keep.MP<-numeric(num.perm)
      for(i in 1:num.perm) {
        keep.MP[i]<-mat.pool(p = sample(p), Se = Se, Sp = Sp, row.size = row.size, col.size = col.size)
      }
      exp.mp<-mean(keep.MP)
      }
      else cat("(row size)*(column size) does not equal group size; no matrix pooling calculations are performed \n") 
   }
   
   if (is.null(row.size) | is.null(col.size)) {
    cat("No row and/or column sizes given for matrix pooling \n") #I had difficulty putting this in the if else statements above
   } 
   
   ###############
   #Halving
   
   if (I <= 8) { 
     cat("Note that 3H requires I > 2, 4H requires I > 4, and 5H requires I > 8 \n")
   } 

   keep.halving<-matrix(data = NA, nrow = num.perm, ncol = 3)
   for(i in 1:num.perm) {        
     keep.halving1<-halving(p = sample(p), Se = Se, Sp = Sp)
     keep.halving[i,1:length(keep.halving1)]<-keep.halving1 #Put results in this way in case there is a NA due to larger number of stages than possible for a halving procedure
   }

   halving.mean<-colMeans(keep.halving)
   halving.mean2<-data.frame(halving.3H = halving.mean[1], halving.4H = halving.mean[2], halving.5H = halving.mean[3])
  

#####################################################################
#Return values

  list(mean.sd = mean.sd, PMF = PMF, CDF = CDF, cond.PMF = cond.PMF, cond.moments = cond.moments, save.diff.CDF = save.diff,
       MP = exp.mp, halving = halving.mean2)

}




############################################################
#This function recursively produces the
#PMF of the number of retests T (excluding the initial test
#for the master pool) for the informative retesting procedure
#with testing errors
#Last modifed date:11-12-2008
#Author:PengChen
###########################################################
#3-13-09: Program was modified by Chris Bilder to match the notation
#         of the paper.

  err_is_pmf<-function(p, se, sp){
    n <- length(p)

    if(se==1) se=0.99999999
    if(sp==1) sp=0.99999999

    #special case with n=2
    if(n==2){
        #T can only be 1 or 3
        T<-c(1, 3)

        #px
        q.vec<-c((1-p[1])*(1-p[2]), 1-(1-p[1])*(1-p[2]))

        #Conditonal probability table
        A.mat<-matrix(c(sp,1-sp,1-se,se), ncol=2,byrow=2)

        #pmf
        prob<-t(q.vec)%*%A.mat

        return(list("T"=T,"prob"=prob))
    }
    #special case with n=3
    #serves as a starting point for the recursive relation
    else if(n==3){
        #T can only be 1 3 4 5
        T<-c(1,3,4,5)

        #px
        q.vec<-c(c((1-p[1])*(1-p[2]), 1-(1-p[1])*(1-p[2]))*(1-p[3]),
              c((1-p[1])*(1-p[2]), 1-(1-p[1])*(1-p[2]))*p[3])

        #Conditional prbability table
        A.mat<-matrix(0,nrow=4,ncol=4)
        A.mat[1,]<-c(sp,  (1-sp)^2*sp,      sp*(1-sp), (1-sp)^3)
        A.mat[2,]<-c(1-se,se*(1-sp)*(1-se), sp*se,     se^2*(1-sp))
        A.mat[3,]<-c(1-se,se^2*sp,          se*(1-se), se^2*(1-sp))
        A.mat[4,]<-c(1-se,se^2*(1-se),      (1-se)*se, se^3)

        #"W" vector is the first row of tb
        w<-A.mat[1,]

        #pmf
        prob<-as.vector(t(q.vec)%*%A.mat)

        return(list("T"=T,"prob"=prob,"w"=w))
    }
    #general recursive step
    else{
        #obtain results from last iteration
        ret<-err_is_pmf(p[1:(n-1)], se, sp)

        #dimension of the lower table
        low_dim<-length(ret$w)

        #current "w" vector
        #the first part
        w1<-c(sp,sp*ret$w[2:(low_dim)])
        #the second part
        w2<-(1-sp)^2*ret$w

        #"merge" the two parts for "w" vector
        w<-c(w1[1],w2[1],w1[2],w1[3:low_dim]+w2[2:(low_dim-1)], w2[low_dim])

        #current pmf

        #current P{T=0}, recursive relationship is not necessary
        p0<-sp*prod(1-p) + (1-se)*(1-prod(1-p))

        #subvector 1
        prob1<- c(p0,sp*(1-p[n])*ret$prob[2:low_dim]+
            (1-se)*p[n]*ret$prob[2:low_dim]+
            (se/(1-sp)-1)*(1-se)*ret$w[2:low_dim]*p[n]*prod(1-p[1:(n-1)]))

        #subvector 2
        prob2<- se*(1-sp)*(1-p[n])*ret$prob+
            ((1-sp)^2-se*(1-sp))*ret$w*prod(1-p)+
            se^2*p[n]*ret$prob


        #combine the two pieces of prob
        prob <- c(prob1[1],prob2[1],prob1[2],prob1[3:low_dim]+prob2[2:(low_dim-1)],prob2[low_dim])

        #support for T
        T    <- c(1,3:(2*n-1))

        return(list("T"=T,"prob"=prob,"w"=w))

    }
  }




##########################################################################
#Functions to perform non-informative matrix pooling and halving


  #Matrix pooling for one matrix
  mat.pool<-function(p, Se, Sp, row.size, col.size) {

    #Put p's into matrix my row - could generalize this later by allowing for a matrix of vector input and only
    #  do this if it was a vector
    p.mat<-matrix(data = p, nrow = row.size, ncol = col.size, byrow = TRUE)

    I<-nrow(p.mat)
    J<-ncol(p.mat)

    probR0<-numeric(I)
    for(i in 1:I) {
     probR0[i]<-Sp*prod(1-p.mat[i,]) + (1-Se)*(1-prod(1-p.mat[i,]))
    }

    probC0<-numeric(J)
    for(j in 1:J) {
     probC0[j]<-Sp*prod(1-p.mat[,j]) + (1-Se)*(1-prod(1-p.mat[,j]))
    }

    probR0andC0<-matrix(data = NA, nrow = I, ncol = J)
    probR0orC0<-matrix(data = NA, nrow = I, ncol = J)
    for(i in 1:I) {
      for(j in 1:J) {
        Rtilde0.Ctildle0<-Sp^2 * prod(1-p.mat[i,])*prod(1-p.mat[,j])/(1-p.mat[i,j])
        Rtilde0.Ctildle1<-Sp*(1-Se) * prod(1-p.mat[i,]) * (1 - prod(1-p.mat[-i,j]))
        Rtilde1.Ctildle0<-Sp*(1-Se) * prod(1-p.mat[,j]) * (1 - prod(1-p.mat[i,-j]))
        Rtilde1.Ctildle1<-(1-Se)^2 * (1 - prod(1-p.mat[i,]) - prod(1-p.mat[,j]) + prod(1-p.mat[i,])*prod(1-p.mat[,j])/(1-p.mat[i,j]))
        probR0andC0[i,j]<-Rtilde0.Ctildle0 + Rtilde0.Ctildle1 + Rtilde1.Ctildle0 + Rtilde1.Ctildle1
        probR0orC0[i,j]<-probR0[i] + probC0[j] - probR0andC0[i,j]
      }
    }

    exp.retests<-sum(1 - probR0orC0)
    exp.T<-nrow(p.mat) + ncol(p.mat) + exp.retests
    exp.T
  }



 #################################
 #Utility functions for halving 
  
   #P(G_11 = 1)
   prob.g1<-function(p, Se, Sp) {
     group.size<-length(p)
     if (group.size == 1) { 
       prob<-0 #Only individual testing
     }
     else {
       prob<-(1-Sp)*prod(1-p) + Se*(1 - prod(1-p)) 
     }
     prob  
   } 
   
   
   #P(G_11 = 1 & G_2j = 1) for j = 1,2
   prob.g1.g2<-function(p, Se, Sp, range) {
     group.size<-length(range) 
     if (group.size == 1) { 
       prob<-0 #This sets probability to 0 like how I_sj would be set to 0 in the tech report when a group test would not be done due to only one individual remaining (easier to do it this way)
     }
     else {
       prob<-(1-Sp)^2*prod(1-p) + Se*(1-Sp)*prod(1-p[range])*(1-prod(1-p[-range])) +  Se^2*(1 - prod(1-p[range])) 
     }
     prob  
   } 
   
   
   #P(G_11 = 1 & G_2k & G_3j = 1) for k = 1,2 and j = 1,2,3,4
   prob.g1.g2.g3<-function(p, Se, Sp, range1, range2) {
     group.size<-length(range2) 
     if (group.size == 1) { 
       prob<-0 #This sets probability to 0 like how I_sj would be set to 0 in the tech report when a group test would not be done due to only one individual remaining (easier to do it this way)
     }
     else {
       prob<-(1-Sp)^3*prod(1-p) + 
         Se*(1-Sp)^2 * ( prod(1-p[range1]) - prod(1-p) ) +  
         Se^2*(1-Sp) * ( prod(1-p[range2]) - prod(1-p[range1]) ) +
         Se^3*(1 - prod(1-p[range2])) 
     }
     prob  
   } 
  
  
   #P(G_11 = 1 & G_2k & G_3m = 1 & G_4j = 1) for k = 1,2, m = 1,2,3,4, and j = 1,...,8
   prob.g1.g2.g3.g4<-function(p, Se, Sp, range1, range2, range3) {
     group.size<-length(range3) 
     if (group.size == 1) { 
       prob<-0 #This sets probability to 0 like how I_sj would be set to 0 in the tech report when a group test would not be done due to only one individual remaining (easier to do it this way)
     }
     else {
       prob<-(1-Sp)^4*prod(1-p) + 
         Se*(1-Sp)^3   * ( prod(1-p[range1]) - prod(1-p) ) +  
         Se^2*(1-Sp)^2 * ( prod(1-p[range2]) - prod(1-p[range1]) ) +
         Se^3*(1-Sp)   * ( prod(1-p[range3]) - prod(1-p[range2]) ) +     
         Se^4*(1 - prod(1-p[range3])) 
     }
     prob  
   } 


  #######################################
  #Main halving function    

  halving<-function(p, Se, Sp) { 

    I<-length(p)
    
    #3-stage halving
    if (I > 2) {
      I21<-floor(I/2) #If group size is odd, the lower value goes into I21; e.g., I = 5 leads to I21 = 2 and I22=3 (one could switch the definition here if needed)
      I22<-I-I21
      exp.3H<-1 + 2*prob.g1(p=p, Se=Se, Sp=Sp) + I21*prob.g1.g2(p=p, Se=Se, Sp=Sp, range=1:I21) + 
              I22*prob.g1.g2(p=p, Se=Se, Sp=Sp, range=(I-I22+1):I)  
 
    } else 
    {
      exp.3H<-NULL
    }
 
   
    #4-stage halving
    if (I > 4) {  
      I31<-floor(I21/2)
      I32<-I21-I31
      I33<-floor(I22/2)
      I34<-I22-I33
      exp.4H<-1 + 2*prob.g1(p=p, Se=Se, Sp=Sp) + 2*prob.g1.g2(p=p, Se=Se, Sp=Sp, range=1:I21) + 
                                                 2*prob.g1.g2(p=p, Se=Se, Sp=Sp, range=(I-I22+1):I) +      
              I31*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=1:I31) + 
              I32*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=(I21-I32+1):I21) + 
              I33*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I21+1):(I21+I33)) + 
              I34*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I-I34+1):I)
    } else 
    {
      exp.4H<-NULL
    }
  
  
    #5-stage halving
    if (I > 8) {  
      I41<-floor(I31/2)
      I42<-I31-I41
      I43<-floor(I32/2)
      I44<-I32-I43
      I45<-floor(I33/2)
      I46<-I33-I45
      I47<-floor(I34/2)
      I48<-I34-I47
      exp.5H<-1 + 2*prob.g1(p=p, Se=Se, Sp=Sp) + 2*prob.g1.g2(p=p, Se=Se, Sp=Sp, range=1:I21) + 
                                                 2*prob.g1.g2(p=p, Se=Se, Sp=Sp, range=(I-I22+1):I) + 
              2*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=1:I31) + 
              2*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=(I21-I32+1):I21) + 
              2*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I21+1):(I21+I33)) + 
              2*prob.g1.g2.g3(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I-I34+1):I) +
           
            I41*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=1:I31, range3=1:I41) + 
            I42*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=1:I31, range3=(I31-I42+1):I31) + 
            I43*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=(I21-I32+1):I21, range3=(I31+1):(I31+I43)) + 
            I44*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=1:I21, range2=(I21-I32+1):I21, range3=(I21-I44+1):I21) + 
           
            I45*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I21+1):(I21+I33), range3=(I21+1):(I21+I45)) + 
            I46*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I21+1):(I21+I33), range3=(I21+I45+1):(I21+I33)) +
            I47*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I-I34+1):I, range3=(I21+I33+1):(I21+I33+I47)) + 
            I48*prob.g1.g2.g3.g4(p=p, Se=Se, Sp=Sp, range1=(I-I22+1):I, range2=(I-I34+1):I, range3=(I-I48+1):I)
    } else 
    {
      exp.5H<-NULL
    }    
            
    c(exp.3H, exp.4H, exp.5H)   
  }








#