##################################################################### # NAME: Tom Loughin # # DATE: 1-10-13 # # PURPOSE: Stepwise variable selection techniques in Placekick data # # # # NOTES: # ##################################################################### placekick <- read.table(file = "C:\\data\\Placekick.csv", header = TRUE, sep = ",") head(placekick) tail(placekick) # Must first fit the smallest and largest models to be considered empty.mod <- glm(formula = good ~ 1, family = binomial(link = logit), data = placekick) full.mod <- glm(formula = good ~ ., family = binomial(link = logit), data = placekick) ##################################################################### # Selection using Information Criteria # The step() function uses information criteria for variable selection, # k = 2 (default) gives AIC, k = log(nrow(...)) gives BIC when "..." is # replaced with the data set name # Setting direction = "forward", "backward", or "both" (which is the default) controls # which algorithm gets used. # Showing use with BIC forw.sel <- step(object = empty.mod, scope = list(upper = full.mod), direction = "forward", k = log(nrow(placekick)), trace = TRUE) anova(forw.sel) # For illustration purposes, below is the code for k = 2 (AIC) # forw.sel <- step(object = empty.mod, scope = list(upper = full.mod), direction = "forward", # k = 2, trace = TRUE) # For backward elimination, start with full model (object = ) and work down to empty model (scope = ) back.sel <- step(object = full.mod, scope = list(lower = empty.mod), direction = "backward", k = log(nrow(placekick)), trace = TRUE) anova(back.sel) # For illustration purposes, below is the code for k = 2 (AIC) # back.sel <- step(object = full.mod, scope = list(lower = empty.mod), direction = "backward", # k = 2, trace = TRUE) # For (alternating) stepwise selection, start with empty model (object = ) and work up to full model (scope = ) step.sel <- step(object = empty.mod, scope = list(upper = full.mod), k = log(nrow(placekick)), trace = TRUE) anova(step.sel) # For illustration purposes, below is the code for k = 2 (AIC) # step.sel <- step(object = empty.mod, scope = list(upper = full.mod), # k = 2, trace = TRUE) ##################################################################### # Selection using p-values # The step() function can be "tricked" into something that is equivalent to # selection using p-values by specifying parameters test = "Chisq" and k = 0. # For forward selection, start with empty model (object = ) and work up to full model (scope = ) forw.sel.p <- step(object = empty.mod, scope = list(upper = full.mod), direction = "forward", test = "Chisq", k = 0, trace = TRUE) anova(forw.sel.p) # For backward elimination, start with full model (object = ) and work down to empty model (scope = ) back.sel.p <- step(object = full.mod, scope = list(lower = empty.mod), direction = "backward", test = "Chisq", k = 10, trace = TRUE) anova(back.sel.p) # For (alternating) stepwise selection, start with empty model (object = ) and work up to full model (scope = ) step.sel.p <- step(object = empty.mod, scope = list(upper = full.mod), test = "Chisq", k = 0, trace = TRUE) anova(step.sel.p)