# Example: Prenatal infectious disease screening


set1 <- read.csv(file = "c:\\data\\HIVKenya.csv")
head(set1)

Se <- 0.98  # non-perfect testing
Sp <- 0.98
# Se <- 1  # Use perfect testing to confirm that one obtains the same answer with all model fitting methods
# Sp <- 1


###########################################################################
# Estimate the model using glm() and no testing error

 mod.fit <- glm(formula = hiv ~ age, data = set1, family = binomial(link = logit))
 round(summary(mod.fit)$coefficients, 4)
 logLik(mod.fit)
 X <- model.matrix(mod.fit)
 # library(package = car)
 # Anova(mod.fit)


###########################################################################
# Estimate the model using optim()

 logL <- function(beta, X, Y, Se, Sp) {
  # pi.tilde <- exp(X%*%beta)/(1+exp(X%*%beta))  # Same as plogis()
  pi.tilde <- plogis(X%*%beta)
  # Non-matrix algebra alternative for an intercept and one explanatory variable
  # pi.tilde <- exp(beta[1] + beta[2]*X[,2])/(1+exp(beta[1] + beta[2]*X[,2]))
  pi <- Se*pi.tilde + (1 - Sp)*(1 - pi.tilde)
  sum(Y*log(pi) + (1-Y)*log(1-pi))
 }

 mod.fit.opt <- optim(par = mod.fit$coefficients, fn = logL, hessian = TRUE,
  X = X, Y = set1$hiv, control = list(fnscale = -1), Se = Se, Sp = Sp, method = "BFGS")
 mod.fit.opt$par # beta.hats
 mod.fit.opt$value # log(L)
 mod.fit.opt$convergence # 0 means converged
 cov.mat <- -solve(mod.fit.opt$hessian) # Estimated covariance matrix; multiply by -1 because of fnscale
 cov.mat
 sqrt(diag(cov.mat)) # SEs
 z <- mod.fit.opt$par[2]/sqrt(diag(cov.mat))[2] # Wald statistic
 2*(1-pnorm(q = abs(z))) # p-value

 # Compare to optim results
 summary(mod.fit)$coefficients


################################################################################
# Estimate the model using the m.logit() function and glm()

 # Used posting at https://stat.ethz.ch/pipermail/r-help/2006-April/103799.html and the work of
 #  Boan Zhang in Zhang, Bilder, and Tebbs (Statistics in Medicine, 2013) for motivation
 # mu = E(Y) = pi
 my.link <- function(Se, Sp) {
  linkfun <- function(mu) {
   pi.tilde <- (mu + Sp - 1)/(Se + Sp - 1)
   log(pi.tilde/(1-pi.tilde))
  }
  linkinv <- function(eta) {
   (exp(eta)*Se - Sp + 1)/(1 + exp(eta))
  }
  mu.eta <- function(eta) {
   exp(eta)*(Se + Sp - 1)/(1 + exp(eta))^2
  }
  save.it <- list(linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta)
  class(save.it) <- "link-glm"
  save.it
 }

 mod.fit2 <- glm(formula = hiv ~ age, data = set1, family = binomial(link = my.link(Se, Sp)))
 round(summary(mod.fit2)$coefficients, 4)
 vcov(mod.fit2)

 # The mis() function of the brglm2 package also provides its own version of my.link()
 library(package = brglm2)
 mod.fit3 <- glm(formula = hiv ~ age, data = set1,
   family = binomial(link =  mis(link = "logit", sensitivity = Se, specificity = Sp)))
 round(summary(mod.fit3)$coefficients, 4)


################################################################################
# Estimate the model using group responses

 # 1st group tests negative so there are no retests
 # 2nd group tests positive and only individual 9 is positive
 head(set1[,c("age","gnum", "groupres", "retest")], n = 10)

 library(package = binGroup2)

 # Without retests
 mod.fit.gt1 <- gtReg(formula = groupres ~ age, data = set1, type = "sp",
   groupn  =  gnum, sens = 0.98, spec = 0.98, method = "Xie")
 summary(mod.fit.gt1)


 # With retests
 mod.fit.gt2 <- gtReg(formula = groupres ~ age, data = set1, type = "sp",
   groupn  =  gnum, retest = retest, sens = 0.98, spec = 0.98, sens.ind = 0.98, spec.ind = 0.98,
   method = "Xie")
 summary(mod.fit.gt2)


 #Number of individual tests
 nrow(set1)

 # Number of tests from Dofrman's algorithm
 #   Number of group tests + number of retests
 max(set1$gnum) + sum(set1$groupres)



################################################################################
# Comparisons

  curve(expr = plogis(mod.fit2$coefficients[1] + mod.fit2$coefficients[2]*x),
    xlim = c(min(set1$age), max(set1$age)), col = "black", lty = "solid",
    xlab = "Age", ylab = "Estimated probability")
  curve(expr = plogis(mod.fit.gt1$coefficients[1] + mod.fit.gt1$coefficients[2]*x),
    col = "red", lty = "dashed", xlab = "Age", ylab = "Estimated probability", add = TRUE)
  curve(expr = plogis(mod.fit.gt2$coefficients[1] + mod.fit.gt2$coefficients[2]*x),
    col = "blue", lty = "dotdash", xlab = "Age", ylab = "Estimated probability", add = TRUE)



































#