# Example: Political Ideology (ideol is nominal)


#####################################################################
# Enter the data
alldata <- read.csv("C:\\Data\\PolIdeolData.csv", stringsAsFactors=TRUE)
head(alldata)

levels(alldata$ideol)
#Reorder levels 
alldata$ideol <- factor(alldata$ideol, levels = levels(alldata$ideol)[c(5,3,1,2,4)])
levels(alldata$ideol)

# Show tables of the data
tab.gpi <- xtabs(formula = count ~ party + ideol + gender, data = alldata)
tab.gpi


#####################################################################
# Fit Models

#Saturated Model: GPI
mod.sat <- glm(formula = count ~ gender*party*ideol, family = poisson(link = "log"), data = alldata)

#Homogeneous association model in all 3 associations: GP,GI,PI
mod.homo <- glm(formula = count ~ (gender + party + ideol)^2, family = poisson(link = "log"), data = alldata)
anova(mod.homo, mod.sat, test = "Chisq")

# Model assuming only PI association G,PI
mod.homo.PI <- glm(formula = count ~ gender + party*ideol, family = poisson(link = "log"), data = alldata)
anova(mod.homo.PI, mod.homo, test = "Chisq")

# Model assuming pairwise independence: G,P,I
mod.indep <- glm(formula = count ~ gender + party + ideol, family = poisson(link = "log"), data = alldata)
anova(mod.indep, mod.homo.PI, test = "Chisq")

# Summary and LR tests of homogeneous association model 
round(summary(mod.homo)$coefficients, digits = 3)
library(package = car)
Anova(mod.homo)

# Additional sub-models of homogeneous association
mod.homo.PIGI <- glm(formula = count ~ gender*ideol + party*ideol, family = poisson(link = "log"), data = alldata)
Anova(mod.homo.PIGI)
mod.homo.PIGP <- glm(formula = count ~ gender*party + party*ideol, family = poisson(link = "log"), data = alldata)
Anova(mod.homo.PIGP)

#####################################################################
# Wald intervals using emmeans
library(package = emmeans)

# specs = A + B + ... creates linear predictor estimates at all combinations of 
#   the variables in the formula (includes interaction effects automatically)
emm1 <- emmeans(object = mod.homo, specs= ~ gender + party + ideol)
# Linear predictors for each combination with unadjusted 95% CIs.
summary(emm1)
head(summary(emm1))

# GP OR for each level (all are the same!)
GP.all <- contrast(emm1, method=list(GP.VL=c(1,-1,-1,1,rep(0,16)),
                                   GP.SL=c(rep(0,4),1,-1,-1,1,rep(0,12)),
                                   GP.M=c(rep(0,8),1,-1,-1,1,rep(0,8)),
                                   GP.SC=c(rep(0,12),1,-1,-1,1,rep(0,4)),
                                   GP.VC=c(rep(0,16),1,-1,-1,1)))
GP.all


# To get ORs for two factors averaged across all levels of other factors,
#   list only those factors in specs=

# Notice that this lists party first, so it is constructing OR as
emm.GP1 <- emmeans(object = mod.homo, specs= ~ party + gender)
# Linear predictors for each combination with unadjusted 95% CIs.
summary(emm.GP1)
aa.GP1 <- contrast(object = emm.GP1, interaction = list("revpairwise"))
confint(object = aa.GP1, type = "response")
# You can see that the results are the same here, though (prove it using mu notation!)

emm.GI <- emmeans(object = mod.homo, specs= ~ ideol + gender)
summary(emm.GI)
# We want our ORs to be Con:Lib at M vs. Con:Lib at F, 
#   So need to reverse ordering on differences for 
#   both variables 
aa.GI <- contrast(object = emm.GI, interaction = list("revpairwise", "revpairwise"))
ci.GI <- confint(object = aa.GI, type = "response", adjust = "mvt")
# Making column names shorter for printing
ci.GI
colnames(ci.GI) <- c("ideol","gender",colnames(ci.GI)[3:7])
cbind(ci.GI[1:2],round(ci.GI[c(3,4,6,7)],2))

emm.PI <- emmeans(object = mod.homo, specs= ~ ideol + party)
summary(emm.PI)
# We want our ORs to be Con:Lib at R vs. Con:Lib at D, 
#   So need to reverse ordering on differences for 
#   both variables 
aa.PI <- contrast(object = emm.PI, interaction = list("revpairwise", "revpairwise"))
ci.PI <- confint(object = aa.PI, type = "response", adjust = "mvt")
# Making column names shorter for printing
ci.PI
colnames(ci.PI) <- c("ideol","party",colnames(ci.PI)[3:7])
cbind(ci.PI[1:2],round(ci.PI[c(3,4,6,7)],2))

#####################################################################
# Confidence intervals: LR
# Find ordering of coefficients

# Already did math to determine which coefficients contribute to ORs.
# Develop contrast matrices for the different sets of ORs
# We split them into sets according to the interaction to better define a "family" for simultaneous inference
contr.mat.GP <- rbind(c(rep(0, 7), 1,0,0,0,0,0,0,0,0))
row.names(contr.mat.GP) <- c("GP Rep | M:F")
library(package = mcprofile)
LRCI.GP <- mcprofile(mod.homo, CM = contr.mat.GP)
exp(confint(LRCI.GP, adjust = "none"))
exp(confint(LRCI.GP)) #Same as unadjusted here because only one OR in family


contr.mat.GI <- rbind(
  c(rep(0, 8), 0, 0, 1,-1, rep(0, 4)),
  c(rep(0, 8), 0, 1, 0,-1, rep(0, 4)),
  c(rep(0, 8), 1, 0, 0,-1, rep(0, 4)),
  c(rep(0, 8), 0, 0, 0,-1, rep(0, 4)),
  c(rep(0, 8), 0, 1,-1, 0, rep(0, 4)),
  c(rep(0, 8), 1, 0,-1, 0, rep(0, 4)),
  c(rep(0, 8), 0, 0,-1, 0, rep(0, 4)),
  c(rep(0, 8), 1,-1, 0, 0, rep(0, 4)),
  c(rep(0, 8), 0,-1, 0, 0, rep(0, 4)),
  c(rep(0, 8),-1, 0, 0, 0, rep(0, 4)))
row.names(contr.mat.GI) <- 
  c("GI VC:SC | F:M", "GI VC:M | F:M", "GI VC:SL | F:M", 
    "GI VC:VL | F:M", "GI SC:M | F:M", "GI SC:SL | F:M", 
    "GI SC:VL | F:M", "GI M:SL | F:M", "GI M:VL | F:M", 
    "GI SL:VL | F:M")
LRCI.GI <- mcprofile(mod.homo, CM = contr.mat.GI)
exp(confint(LRCI.GI, adjust = "none"))
exp(confint(LRCI.GI))

contr.mat.PI <- rbind(
  c(rep(0, 8), rep(0, 4), 0, 0, 1,-1),
  c(rep(0, 8), rep(0, 4), 0, 1, 0,-1),
  c(rep(0, 8), rep(0, 4), 1, 0, 0,-1),
  c(rep(0, 8), rep(0, 4), 0, 0, 0,-1),
  c(rep(0, 8), rep(0, 4), 0, 1,-1, 0),
  c(rep(0, 8), rep(0, 4), 1, 0,-1, 0),
  c(rep(0, 8), rep(0, 4), 0, 0,-1, 0),
  c(rep(0, 8), rep(0, 4), 1,-1, 0, 0),
  c(rep(0, 8), rep(0, 4), 0,-1, 0, 0),
  c(rep(0, 8), rep(0, 4),-1, 0, 0, 0))
row.names(contr.mat.PI) <- c("PI VC:SC | R:D", "PI VC:M | R:D", "PI VC:SL | R:D", "PI VC:VL | R:D", "PI SC:M | R:D", "PI SC:SL | R:D", "PI SC:VL | R:D", "PI M:SL | R:D", "PI M:VL | R:D", "PI SL:VL | R:D")
LRCI.PI <- mcprofile(mod.homo, CM=contr.mat.PI)
exp(confint(LRCI.PI, adjust="none"))
exp(confint(LRCI.PI)) 


#####################################################################
# Additional programming not covered in book
#####################################################################
# Wald CIs for saturated model
# Need to add "by=" to contrasts

emm.sat <- emmeans(object = mod.sat, specs= ~ gender + party + ideol)
# Linear predictors for each combination with unadjusted 95% CIs.
summary(emm.sat)
# GP ORs for each ideology
aa.GPsat <- contrast(object = emm.sat, 
                     interaction = list("revpairwise"),
                     by="ideol")
confint(object = aa.GPsat, type="response")

# GI ORs for each party
aa.GIsat <- contrast(object = emm.sat, 
                     interaction = list("revpairwise"),
                     by="party")
confint(object = aa.GIsat, type="response")

# PI ORs for each gender
aa.PIsat <- contrast(object = emm.sat, 
                     interaction = list("revpairwise"),
                     by="gender")
confint(object = aa.PIsat, type="response")


# Wald Confidence intervals using multcomp

library(package = multcomp)
wald.GP <- glht(mod.homo, linfct = contr.mat.GP)
wald.GI <- glht(mod.homo, linfct = contr.mat.GI)
wald.PI <- glht(mod.homo, linfct = contr.mat.PI)

# Defaults use multiplicity adjustment for simultaneous confidence level
summary(wald.GP)
exp(confint(wald.GP)$confint)
# Options to get unadjusted (univariate) tests and CIs
summary(wald.GP, test = univariate())
exp(confint(wald.GP, calpha = qnorm(0.975))$confint)

# Defaults use multiplicity adjustment for simultaneous confidence level
summary(wald.GI)
exp(confint(wald.GI)$confint)
# Options to get unadjusted (univariate) tests and CIs
summary(wald.GI, test = univariate())
exp(confint(wald.GI, calpha = qnorm(0.975))$confint)

# Defaults use multiplicity adjustment for simultaneous confidence level
summary(wald.PI)
exp(confint(wald.PI)$confint)
# Options to get unadjusted (univariate) tests and CIs
summary(wald.PI, test = univariate())
exp(confint(wald.PI, calpha = qnorm(0.975))$confint)

#####################################################################
# Wald CIs by manual calculation

# Not doing multiplicity adjustments, so put all contrasts together into one matrix
contr.mat <- rbind(contr.mat.GP, contr.mat.GI, contr.mat.PI)
# Get out coefficients and variances
beta <- matrix(coef(mod.homo), ncol = 1)
v.beta <- vcov(mod.homo)
# Estimate Lin Combos and standard errors as matrix computations
log.contrasts <- contr.mat %*% beta
SElog.contrasts <- matrix(sqrt(diag(contr.mat %*% v.beta %*% t(contr.mat))), ncol = 1)
# Compute confidence intervals in linear predictor scale
alpha = 0.05
lower.log <- log.contrasts + qnorm(alpha/2)*SElog.contrasts
upper.log <- log.contrasts + qnorm(1 - alpha/2)*SElog.contrasts
# Combine Lin Combo coefficients, estimates of contrasts, and confidence intervals in mean scale
wald.ci <- round(data.frame(exp(log.contrasts), exp(lower.log), exp(upper.log)), digits = 2)
# Attach contrast names to and columns.                  
colnames(wald.ci) <- c("Estimate", "Lower CI", "Upper CI")
wald.ci