# Example: Complete separation


################################################################################
# Example #1

  set1 <- data.frame(x1 = c(1,2,3,4,5,6,7,8,9,10), y = c(0,0,0,0,0, 1,1,1,1,1))
  set1

  mod.fit1 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit),
    trace = TRUE)
  summary(mod.fit1)
  mod.fit1$coefficients


  dev.new(width = 10, height = 6, pointsize = 12)
  # pdf(file = "c:\\figures\\Figure2.7color.pdf", width = 7, height = 6, colormodel = "cmyk")   # Create plot for book
  par(mfrow = c(1,2))
  plot(x = set1$x1, y = set1$y, main = "Plot for set1", ylab = "Estimated probability", xlab = expression(x[1]),
     panel.first = grid(col = "gray", lty = "dotted"))
  curve(expr = predict(object = mod.fit1, newdata = data.frame(x1 = x), type = "response"),
      col = "red", add = TRUE, lwd = 2, n = 1000)


  # Increase iterations
  mod.fit.maxit <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit),
                     trace = TRUE, maxit = 50)
  summary(mod.fit.maxit)
  mod.fit.maxit$coefficients
  # Decrease convergence criterion epsilon
  mod.fit.eps <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit),
                     trace = TRUE, epsilon = 1e-12, maxit = 50)
  summary(mod.fit.eps)
  mod.fit.eps$coefficients




  ###################################################################
  # Estimate model with Firth's method

  library(package = logistf)
  mod.fit.firth <- logistf(formula = y ~ x1, data = set1, pl = TRUE, alpha = 0.05)
  options(width = 60, digits = 4)  # Controls printing in R Console window
  summary(mod.fit.firth)
  options(width = 80, digits = 7) # Default

  names(mod.fit.firth)
  mod.fit.firth$loglik   # penalized log-likelihood for beta0 and beta0 + beta1*x1 models
  #Example finding LRT
  -2*(mod.fit.firth$loglik[1] - mod.fit.firth$loglik[2])
  1-pchisq(q = 7.759,1)

  # Model with no explanatory variables
  mod.fit.firth.null <- logistf(formula = y ~ 1, data = set1, pl= TRUE)
  # Demonstrate how to calculate LR statistic
  anova(mod.fit.firth, mod.fit.firth.null)
  logistftest(object = mod.fit.firth, values = 0)  # values indicate tesing beta1 = 0

  # All pi.hats
  mod.fit.firth$predict
  # pi.hat for x1 = 5
  predict(object = mod.fit.firth, newdata = data.frame(x1 = 5), type = "response")

  curve(expr = predict(object = mod.fit.firth, newdata = data.frame(x1 = x), type = "response"),
      col = "blue", add = TRUE, lwd = 1, n = 1000)
  legend(x = 0.5, y = 0.8, legend = c("glm()", "logistf()"), lty = c(1,1), lwd = c(2,1), col = c("red", "blue"), bty = "n")


  ####################################################################
  # Additional way to use Firth's method with the brglm2 package

  library(package = "brglm2")

  mod.fit.firth2 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit),
     method = "brglmFit")
  class(mod.fit.firth2)
  methods(class = "brglmFit")
  summary(mod.fit.firth2) # Deviance statistics do not use the modified likelihood
  confint(mod.fit.firth2) # Wald CI
  mod.fit.firth2$deviance
  mod.fit.firth2$null.deviance

  # Log-likelihood function - Demonstrate unmodified likelihood function is used in deviance calculations
  logL <- function(beta, x, Y) {
    pi <- exp(beta[1] + beta[2]*x)/(1+exp(beta[1] + beta[2]*x))
    sum(Y*log(pi) + (1-Y)*log(1-pi))
  }

  # Check value of the log-likelihood function at pi^
  logL(beta = mod.fit.firth2$coefficients, x = set1$x1, Y = set1$y)
  logLik(mod.fit.firth2)  # log-likelihood function at pi^

  mod.fit.firth2.null <- glm(formula = y ~ 1, data = set1, family = binomial(link = logit),
     method = "brglmFit")
  mod.fit.firth2.null$deviance
  -2*(5*log(0.5) + 5*log(1-0.5))  # Null deviance

  # Uses unmodified likelihood function
  anova(mod.fit.firth2, test = "Chisq")
  anova(mod.fit.firth2.null, mod.fit.firth2, test = "Chisq")
  library(package = car)
  Anova(mod.fit.firth2, test = "LR")





################################################################################
# Example #2

  set2 <- data.frame(x1 = c(1,2,3,4,6,5,7,8,9,10), y = c(0,0,0,0,0, 1,1,1,1,1))
  set2

  mod.fit2 <- glm(formula = y ~ x1, data = set2, family = binomial(link = logit), trace = TRUE)
  summary(mod.fit2)
  mod.fit1$coefficients

  plot(x = set2$x1, y = set2$y, main = "Plot for set2", ylab = "Estimated probability",
     panel.first = grid(col = "gray", lty = "dotted"), xlab = expression(x[1]))
  curve(expr = predict(object = mod.fit2, newdata = data.frame(x1 = x), type = "response"),
      col = "red", add = TRUE, lwd = 2, n = 1000)

  mod.fit.firth.set2 <- logistf(formula = y ~ x1, data = set2, pl = TRUE, alpha = 0.05)
  summary(mod.fit.firth.set2)
  curve(expr = predict(object = mod.fit.firth.set2, newdata = data.frame(x1 = x), type = "response"),
      col = "blue", add = TRUE, lwd = 1, n = 1000)
  legend(x = 0.5, y = 0.8, legend = c("glm()", "logistf()"), lty = c(1,1), lwd = c(2,1), col = c("red", "blue"), bty = "n")
  # dev.off()  # Create plot for book


################################################################################
# Black-and-white plot

  # pdf(file = "c:\\figures\\Figure2.7BW.pdf", width = 7, height = 6, colormodel = "cmyk")   # Create plot for book
  par(mfrow = c(1,2))
  plot(x = set1$x1, y = set1$y, main = "Plot for set1", ylab = "Estimated probability", xlab = expression(x[1]))
  curve(expr = predict(object = mod.fit1, newdata = data.frame(x1 = x), type = "response"),
    col = "black", add = TRUE, lwd = 2, n = 1000)
  curve(expr = predict(object = mod.fit.firth, newdata = data.frame(x1 = x), type = "response"),
    col = "black", add = TRUE, n = 1000)
  legend(x = 0.5, y = 0.8, legend = c("glm()", "logistf()"), lty = c(1,1), lwd = c(2,1), col = c("black", "black"), bty = "n")

  plot(x = set2$x1, y = set2$y, main = "Plot for set2", ylab = "Estimated probability", xlab = expression(x[1]))
  curve(expr = predict(object = mod.fit2, newdata = data.frame(x1 = x), type = "response"),
    col = "black", add = TRUE, lwd = 2, n = 1000)
  curve(expr = predict(object = mod.fit.firth.set2, newdata = data.frame(x1 = x), type = "response"),
    col = "black", add = TRUE, n = 1000)
  legend(x = 0.5, y = 0.8, legend = c("glm()", "logistf()"), lty = c(1,1), lwd = c(2,1), col = c("black", "black"), bty = "n")
  # dev.off()  # Create plot for book


################################################################################
# Further investigation of how many iterations R will do with data set #1

  mod.fit3 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 3)
  logLik(mod.fit3)
  mod.fit10 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 10)
  logLik(mod.fit10)
  mod.fit20 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 20)
  logLik(mod.fit20)
  mod.fit26 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 26)
  logLik(mod.fit26)
  mod.fit <-glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 100,
    epsilon = 10^(-12))  # 34 iterations used
  
  data.frame(logLik = c(logLik(mod.fit3), logLik(mod.fit10), logLik(mod.fit20), logLik(mod.fit26), logLik(mod.fit)),
    G = c(mod.fit3$deviance, mod.fit10$deviance, mod.fit20$deviance, mod.fit26$deviance, mod.fit$deviance))
  
  mod.fit32 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 32,
    epsilon = 10^(-100))
  mod.fit33 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 33,
    epsilon = 10^(-100))
  mod.fit34 <- glm(formula = y ~ x1, data = set1, family = binomial(link = logit), maxit = 34,
    epsilon = 10^(-100))
  abs(mod.fit33$deviance - mod.fit32$deviance)/(0.1 + mod.fit33$deviance)
  abs(mod.fit34$deviance - mod.fit33$deviance)/(0.1 + mod.fit34$deviance)

  mod.fit32$deviance
  mod.fit33$deviance
  mod.fit34$deviance




#