# These programs implement array testing for test data as described in Section 5 of the paper. # Parts of this code was motivated from the code used to calculate the operating # characteristics (Section 2 of the paper). # Author: Chris Bilder # Create row labels for the 0-1 combinations joint.p.row.labels <- function(ordering) { save.it <- character(length = nrow(ordering)) for (i in 1:nrow(ordering)) { save.it[i] <- paste(ordering[i,], collapse="") } save.it } # Performs testing for a group over K diseases do.test <- function(ytilde, Se, Sp, stage, K) { g.result <- numeric(K) for(k in 1:K) { if (ytilde[k] == 1) { g.result[k] <- rbinom(n = 1, size = 1, prob = Se[k, stage]) } else { g.result[k] <- rbinom(n = 1, size = 1, prob = 1-Sp[k, stage]) } } g.result } # This is a replacement for Informative.array.prob() in the binGroup package # The placement of each individual in the array is now tracked Informative.array.prob2.app <- function (prob.vec, nr, nc, method = "sd") { names(prob.vec) <- 1:length(prob.vec) prob.vec <- sort(prob.vec, decreasing = TRUE) prob.vec.numb <- as.numeric(names(prob.vec)) if (method == "sd") { array.probs <- prob.vec[1:2] array.probs.numb <- as.numeric(names(prob.vec[1:2])) prob.vec <- prob.vec[-(1:2)] array.probs <- cbind(array.probs, sort(prob.vec[1:2], decreasing = FALSE)) array.probs.numb <- cbind(array.probs.numb, as.numeric(names(sort(prob.vec[1:2], decreasing = FALSE)))) prob.vec <- prob.vec[-(1:2)] max.iter <- max(nr, nc) for (i in 1:max.iter) { if (nrow(array.probs) < nr) { array.probs <- rbind(array.probs, prob.vec[1:(ncol(array.probs))]) array.probs.numb <- rbind(array.probs.numb, as.numeric(names(prob.vec[1:(ncol(array.probs))]))) prob.vec <- prob.vec[-(1:(ncol(array.probs)))] } if (ncol(array.probs) < nc) { array.probs <- cbind(array.probs, sort(prob.vec[1:(nrow(array.probs))], decreasing = FALSE)) array.probs.numb <- cbind(array.probs.numb, as.numeric(names(sort(prob.vec[1:(nrow(array.probs))],decreasing = FALSE)))) prob.vec <- prob.vec[-(1:(nrow(array.probs)))] } } } if (method == "gd") { array.probs <- matrix(prob.vec, ncol = nc, nrow = nr, byrow = FALSE) array.probs.numb <- matrix(prob.vec.numb, ncol = nc, nrow = nr, byrow = FALSE) } list(array.probs = array.probs, array.probs.numb = array.probs.numb) } # Testing for one array array.test.gen.app <- function(set1, I, J, K, ordering, Se, Sp, method = "none", specific.arrange = NULL) { K <- ncol(ordering) # Determine row numbers for each disease - This allows one to be general # so that the ordering matrix could be different, like # matrix(data = c(0, 0, 1, 1, 0, 1, 0, 1), nrow = 4, ncol = 2) disease.row.numb <- 1:2^K * ordering # Estimated probability of being negative for all diseases p.00 <- set1[, K+1] # Order the individuals in the array for informative group testing if (method == "gd") { save.arrangement <- Informative.array.prob2.app(prob.vec = 1 - p.00, nr = I, nc = J, method = "gd") testing.config <- save.arrangement$array.probs.numb } if (method == "sd") { save.arrangement <- Informative.array.prob2.app(prob.vec = 1 - p.00, nr = I, nc = J, method = "sd") testing.config <- save.arrangement$array.probs.numb } if (method == "none") { testing.config <- matrix(data = 1:(I*J), nrow = I, ncol = J, byrow = TRUE) } ######################################## # True statuses of all individuals true.status.mat <- set1[,1:K] # Use the observed data as true statuses # In the program to calculate operating characteristics, true.status was generated using rmultinom() and then true.status.mat # was created from it. Now, we need to use the above true.status.mat from the observed data and then re-organize it to the # true.status format from before. true.status <- matrix(data = NA, nrow = 2^K, ncol = I*J, ) dimnames(true.status) <- list(joint.p.row.labels(ordering = ordering), as.character(1:(I*J))) # Helpful set of labels for (subject in 1:(I*J)) { mult.resp.vec <- numeric(2^K) for(i in 1:2^K) { mult.resp.vec[i] <- sum(ordering[i,] == true.status.mat[subject,1:K]) == K } true.status[,subject] <- mult.resp.vec } ######################################### # True row responses for rows and columns R.tilde <- matrix(data = NA, nrow = I, ncol = K) for (i in 1:I) { for (disease in 1:K) { # Check to see if there are any positive individuals in row i R.tilde[i,disease] <- sum(true.status[disease.row.numb[,disease], testing.config[i,]]) > 0 } } # R.tilde C.tilde <- matrix(data = NA, nrow = J, ncol = K) for (j in 1:J) { for (disease in 1:K) { # Check to see if there are any positive individuals in column j C.tilde[j,disease] <- sum(true.status[disease.row.numb[,disease], testing.config[,j]]) > 0 } } # C.tilde ######################################## # Stage 1 testing # Find observed row and column responses R.obs <- t(apply(X = R.tilde, MARGIN = 1, FUN = do.test, Se = Se, Sp = Sp, stage = 2, K = K)) # R.obs C.obs <- t(apply(X = C.tilde, MARGIN = 1, FUN = do.test, Se = Se, Sp = Sp, stage = 2, K = K)) # C.obs ######################################## # Stage 2 testing # Example where whole rows and columns need to be retested for 3x3 # R.obs <- matrix(data = c(1, 0, # 0, 0, # 0, 0), nrow = 3, ncol = 2, byrow = TRUE) # C.obs <- matrix(data = c(0, 1, # 0, 0, # 0, 0), nrow = 3, ncol = 2, byrow = TRUE) # R.obs # C.obs # Retest; 1, 2, 3, 4, 7 (all individuals in row #1 and column #1) retest.individuals.save <- numeric(0) # Save all individuals need to be retested from the K diseases # Determine which individuals need to be retested. This needs to be done BY DISEASE # due to situations like only one row testing positively for disease 1 and only one # column testing positively for disease 2. In this case, this should lead to retesting # everyone in the positive row and everyone in the positive column, NOT just the cell # where the row and column intersect. for (k in 1:K) { retest.individuals <- numeric(0) # Need to initialize in case no retests are needed if (sum(R.obs[,k]) > 0 & sum(C.obs[,k]) > 0) { retest.individuals <- as.vector(testing.config[R.obs[,k] == 1, C.obs[,k] == 1]) } if (sum(R.obs[,k]) > 0 & sum(C.obs[,k]) == 0) { retest.individuals <- as.vector(testing.config[R.obs[,k] == 1,]) } if (sum(R.obs[,k]) == 0 & sum(C.obs[,k]) > 0) { retest.individuals <- as.vector(testing.config[, C.obs[,k] == 1]) } retest.individuals.save <- c(retest.individuals.save, retest.individuals) } # retest.individuals.save # Some individuals will be in here more than once # unique(retest.individuals.save) # Positive/negative outcomes from algorithm - initialized with all negatives alg.result <- matrix(data = 0, nrow = I*J, ncol = K) for (i in unique(retest.individuals.save)) { alg.result[i,] <- do.test(ytilde = true.status.mat[i,], Se = Se, Sp = Sp, stage = 2, K = K) } ######################################## # Number of tests numb.tests <- I + J + length(unique(retest.individuals.save)) # Final results of the array testing # Adding 0 to true.status.mat converts its True/False to 1/0 list(alg.result = alg.result, true.status = true.status.mat + 0, numb.tests = numb.tests) } #Emulate testing over the entire data set emulate <- function(data.set, I, J, ordering, Se, Sp, method) { K <- ncol(ordering) # Number of diseases sample.size <- nrow(data.set) total.in.array <- I*J max.groups <- floor(nrow(data.set)/total.in.array) # Use to store results save.numb.tests <- numeric(max.groups) # save.alg.result <- matrix(data = NA, nrow = sample.size, ncol = K) save.alg.result <- matrix(data = NA, nrow = max.groups*total.in.array, ncol = K) # Iterate over each initial set of I individuals for (i in 1:max.groups) { save.gen <- array.test.gen.app(set1 = data.set[(1 + (total.in.array)*(i-1)):((total.in.array)*i),], I = I, J = J, K = K, ordering = ordering, Se = Se, Sp = Sp, method = method) save.alg.result[(1 + total.in.array*(i-1)):(total.in.array*i),] <- save.gen$alg.result save.numb.tests[i] <- save.gen$numb.tests } # Individual testing for remainder individuals who did not fit into groups 1:max.groups save.remainder <- matrix(data = NA, nrow = sample.size - max.groups*total.in.array, ncol = K) if (max.groups*total.in.array < nrow(data.set)) { for (i in (max.groups*total.in.array + 1):sample.size) { save.remainder[i-max.groups*total.in.array,] <- do.test(ytilde = t(data.set[i,1:K]), Se = Se, Sp = Sp, stage = 1, K = K) } } save.numb.tests.w.remain <- c(save.numb.tests, rep(x = 1, times = sample.size - max.groups*total.in.array)) save.alg.result.w.remain <- rbind(save.alg.result, save.remainder) list(tests = save.numb.tests.w.remain, alg.result = save.alg.result.w.remain) } # Accuracy measures accuracy.app <- function(data.set, results, K) { acc.save <- matrix(data = NA, nrow = K, ncol = 4) dimnames(acc.save) <- list(as.character(1:K), c("PSe", "PSp", "PPPV", "PNPV")) # Functions are in Utility.R for (k in 1:K) { acc.save[k,] <- c(pl_se(g = results[,k], Y = data.set[,k]), pl_sp(g = results[,k], Y = data.set[,k]), ppv(g = results[,k], Y = data.set[,k]), npv(g = results[,k], Y = data.set[,k])) } acc.save } ################################################################################ # Main function IAT.sim <- function(data.set, I, J, ordering, Se, Sp, B, method, iter.print = 100, ...) { K<- ncol(ordering) save.T.acc <- matrix(data = NA, nrow = B, ncol = 9, dimnames = list(sim = 1:B, measure = c("Tests", paste(rep(c("PSe", "PSp", "PPPV", "PNPV"), times = 2), rep(1:K, each = 4), sep = "")))) for (b in 1:B) { save.res <- emulate(data.set = data.set, I = I, J = J, ordering = ordering, Se = Se, Sp = Sp, method = method) save.T.acc[b,1] <- sum(save.res$tests) save.acc <- accuracy.app(data.set = data.set, results = save.res$alg.result, K = ncol(ordering)) for (k in 1:K) { save.T.acc[b, (2 + 4*(k-1)):(5 + 4*(k-1))] <- save.acc[k,] } if(floor(b/iter.print) == ceiling(b/iter.print)) {print(b)} } final <- c(sd(save.T.acc[,1]), colMeans(save.T.acc)) names(final)[1:2] <- c("sd.tests", "Mean.tests") list(summary = final, save = save.T.acc) } ################################################################################ #