# Examine the distance^2 model with placekicking data set placekick <- read.csv(file = "C:\\data\\Placekick.csv") head(placekick) tail(placekick) ##################################################################### # Estimate the model mod.fit.distsq <- glm(formula = good ~ distance + I(distance^2), family = binomial(link = logit), data = placekick) summary(mod.fit.distsq) # LRT for distance^2 mod.fit <- glm(formula = good ~ distance, family = binomial(link = logit), data = placekick) anova(mod.fit, mod.fit.distsq, test = "Chisq") library(package = car) Anova(mod.fit.distsq, test = "LR") # This shows what the distance row in the Anova() output tests # We would not want this because distance should be in the model whenever distance^2 is in the model mod.fit2 <- glm(formula = good ~ I(distance^2), family = binomial(link = logit), data = placekick) anova(mod.fit2, mod.fit.distsq, test = "Chisq") ##################################################################### # Construct plot #Find the observed proportion of successes at each distance w <- aggregate(x = good ~ distance, data = placekick, FUN = sum) n <- aggregate(x = good ~ distance, data = placekick, FUN = length) w.n <- data.frame(distance = w$distance, success = w$good, trials = n$good, proportion = round(w$good/n$good,4)) head(w.n) tail(w.n) #Plot of the observed proportions with logistic regression model win.graph(width = 7, height = 6, pointsize = 12) symbols(x = w$distance, y = w$good/n$good, circles = sqrt(n$good), inches = 0.5, xlab="Distance (yards)", ylab="Estimated probability", panel.first = grid(col = "gray", lty = "dotted")) #Put quadtratic term model on plot curve(expr = predict(object = mod.fit.distsq, newdata = data.frame(distance = x), type = "response"), col = "blue", add = TRUE, from = 18, to = 66) #Model with linear term only mod.fit <- glm(formula = good ~ distance, family = binomial(link = logit), data = placekick) curve(expr = predict(object = mod.fit, newdata = data.frame(distance = x), type = "response"), col = "red", add = TRUE, from = 18, to = 66) legend(locator(1), legend = c("linear term only", "linear and quadratic term"), lty = c("solid", "solid"), col = c("red", "blue"), bty = "n") ##################################################################### # Odds ratios beta.hat <- mod.fit.distsq$coefficients[2:3] #Pull out beta^_1, beta^_2, so that we are using the same index as subscripts in the model (helpful to reduce coding errors) #Compare 40 to 50 yard placekicks c <- 10 x <- 40 OR.dist <- exp(c*beta.hat[1] + c*beta.hat[2]*(2*x + c)) #Estimated OR cov.mat <- vcov(mod.fit.distsq)[2:3,2:3] #Pull out covariance matrix for beta^_1, beta^_2 var.log.OR <- cov.mat[1,1] + (2*x + c)^2*cov.mat[2,2] + 2*(2*x + c)*cov.mat[1,2] #Var(beta^_1 + (2*x + c)*beta^_2) ci.log.OR.low <- c*beta.hat[1] + c*beta.hat[2]*(2*x + c) - c*qnorm(p = 0.975)*sqrt(var.log.OR) ci.log.OR.up <- c*beta.hat[1] + c*beta.hat[2]*(2*x + c) + c*qnorm(p = 0.975)*sqrt(var.log.OR) data.frame(OR.dist, OR.low = exp(ci.log.OR.low), OR.up = exp(ci.log.OR.up)) round(data.frame(OR.hat = 1/OR.dist, OR.low = 1/exp(ci.log.OR.up), OR.up = 1/exp(ci.log.OR.low)),2) #Inverted # Profile LR intervals using mcprofile library(package = mcprofile) #Need if had not already been used #OR comparing 40 to 50 yard placekick K <- matrix(data = c(0, 1, 2*x+c), nrow = 1, ncol = 3, byrow = TRUE) #setting up matrix to get c*beta_1 + beta_2*(2*x + c) linear.combo <- mcprofile(object = mod.fit.distsq, CM = K) #warnings appear - non-convergence and pi^'s close to 0 or 1 ci.log.OR <- confint(object = linear.combo, level = 0.95, adjust = "none") ci.log.OR # Invert odds ratio, incorrect column labels can be removed with as.numeric() rev(1/exp(c*ci.log.OR$confint)) save.wald <- wald(object = linear.combo) ci.log.OR.wald <- confint(object = save.wald, level = 0.95, adjust = "none") rev(1/exp(c*ci.log.OR.wald$confint)) #OR comparing 40 to 50 yard placekick - This works too - notice the use of c in K K <- matrix(data = c(0, c, c*(2*x+c)), nrow = 1, ncol = 3, byrow = TRUE) #setting up matrix to get c*beta_1 + beta_2*(2*x + c) linear.combo <- mcprofile(object = mod.fit.distsq, CM = K) #warnings appear - non-convergence and pi^'s close to 0 or 1 ci.log.OR <- confint(object = linear.combo, level = 0.95, adjust = "none") ci.log.OR rev(1/exp(ci.log.OR$confint)) # Wald intervals using the emmeans package library(emmeans) calc.est.sq <- emmeans(object = mod.fit.distsq, specs = ~ distance, at = list(distance = c(20,30,40,50,60)), type = "response") test.info <- contrast(object = calc.est.sq, method = "consec", combine = TRUE, reverse = TRUE) confint(object = test.info, adjust = "none", level = 0.95) #