###################################################################### # Inference for Inference for pi1 - pi2 # # # ###################################################################### ########################################################################### #Observed values y1 <- 251 n1 <- 285 y2 <- 48 n2 <- 53 alpha <- 0.05 pi.hat1 <- y1/n1 pi.hat2 <- y2/n2 ########################################################################### #Confidence intervals #Wald - we will not use this interval var.wald <- pi.hat1*(1-pi.hat1) / n1 + pi.hat2*(1-pi.hat2) / n2 lower.wald <- pi.hat1 - pi.hat2 - qnorm(p = 1-alpha/2) * sqrt(var.wald) upper.wald <- pi.hat1 - pi.hat2 + qnorm(p = 1-alpha/2) * sqrt(var.wald) data.frame(lower.wald, upper.wald) #Another way to do the Wald library(package = PropCIs) wald2ci(x1 = y1, n1 = n1, x2 = y2, n2 = n2, conf.level = 0.95, adjust = "Wald") #Even another way to do the Wald prop.test(x = c(y1, y2), n = c(n1, n2), conf.level = 0.95, correct = FALSE) #Agresti-Caffo pi.tilde1 <- (y1+1)/(n1+2) pi.tilde2 <- (y2+1)/(n2+2) var.AC <- pi.tilde1*(1-pi.tilde1) / (n1+2) + pi.tilde2*(1-pi.tilde2) / (n2+2) lower.AC <- pi.tilde1 - pi.tilde2 - qnorm(p = 1-alpha/2) * sqrt(var.AC) upper.AC <- pi.tilde1 - pi.tilde2 + qnorm(p = 1-alpha/2) * sqrt(var.AC) data.frame(lower.AC, upper.AC) #Another way for Agresti-Caffo library(package = PropCIs) #If not done already wald2ci(x1 = y1+1, n1 = n1+2, x2 = y2+1, n2 = n2+2, conf.level = 0.95, adjust = "Wald") wald2ci(x1 = y1, n1 = n1, x2 = y2, n2 = n2, conf.level = 0.95, adjust = "AC") #Even another way to do the Agresti-Caffo prop.test(x = c(y1, y2) + 1, n = c(n1, n2) + 2, conf.level = 0.95, correct = FALSE) ########################################################################### #Hypothesis tests pi.hat.p <- (y1 + y2)/(n1 + n2) z.stat <- (pi.hat1 - pi.hat2)/sqrt(pi.hat.p*(1-pi.hat.p)*(1/n1 + 1/n2)) z.stat z.stat^2 z.crit <- qnorm(p = 1-alpha/2, mean = 0, sd = 1) pvalue <- 2*(1 - pnorm(q = abs(z.stat), mean = 0, sd = 1)) data.frame(pi.hat1, pi.hat2, z.stat, z.stat^2, z.crit, pvalue) prop.test(x = c(y1, y2), n = c(n1, n2), conf.level = 0.95, correct = FALSE, alternative = "two.sided") #Right-tail test z.crit <- qnorm(p = 1-alpha, mean = 0, sd = 1) pvalue <- 1 - pnorm(q = z.stat, mean = 0, sd = 1) data.frame(pi.hat1, pi.hat2, z.stat, z.crit, pvalue) prop.test(x = c(y1, y2), n = c(n1, n2), conf.level = 0.95, correct = FALSE, alternative = "greater") ########################################################################## # What if the number of successes were not already summarized? cell11 <- data.frame(first = rep(x = "made", times = 251), second = rep(x = "made", times = 251)) cell12 <- data.frame(first = rep(x = "made", times = 34), second = rep(x = "missed", times = 34)) cell21 <- data.frame(first = rep(x = "missed", times = 48), second = rep(x = "made", times = 48)) cell22 <- data.frame(first = rep(x = "missed", times = 5), second = rep(x = "missed", times = 5)) raw.data <- rbind(cell11, cell12, cell21, cell22) random.select <- sample(x = 1:338, replace = FALSE) raw.data2 <- raw.data[random.select,] row.names(raw.data2) <- NULL head(raw.data2) tail(raw.data2) cont.table <- table(raw.data2) cont.table y1 <- cont.table[1,1] y2 <- cont.table[2,1] n1 <- cont.table[1,1] + cont.table[1,2] n2 <- cont.table[2,1] + cont.table[2,2] data.frame(y1, y2, n1, n2)