#####################################################################
# NAME: Tom Loughin #
# DATE: 1-10-13 #
# PURPOSE: All Subsets selection techniques in Placekick data #
# #
# NOTES: #
#####################################################################
placekick <- read.table(file = "C:\\data\\Placekick.csv", header = TRUE, sep = ",")
head(placekick)
tail(placekick)
# Alternative specification to perform the search using a previous glm() fit:
#
### full.mod.1 <- glm(formula = good ~., family = binomial(link = logit), data = placekick)
### search.1.aicc <- glmulti(y = full.mod.1, level = 1, method = "h", crit = "aicc", family = binomial(link = "logit"))
#
# Unfortunately, have to deactivate a system variable in order to get rJava to work.
# See http://stackoverflow.com/questions/7019912/using-the-rjava-package-on-win7-64-bit-with-r
# True for 32 bit as well
if (Sys.getenv("JAVA_HOME") != "")
Sys.setenv(JAVA_HOME = "")
library(glmulti)
# Using AICc as criterion. Could use crit = "bic" or "aic" instead.
# Using "good ~ ." to include all variables from data (other than "good")
search.1.aicc <- glmulti(y = good ~ ., data = placekick, fitfunction = "glm",
level = 1, method = "h", crit = "aicc", family = binomial(link = "logit"))
print(search.1.aicc)
aa <- weightable(search.1.aicc)
cbind(model = aa[1:5,1], round(aa[1:5,2:3], digits = 3))
plot(search.1.aicc, type = "p")
# The following search looks for all pairwise interactions as well.
# WARNING: It would have taken about 13 years to complete on an Intel Core I7 2600 with 8G RAM.
### search.2marg.aicc <- glmulti(y = full.mod.1, level = 2, marginality = TRUE, method = "h", crit = "aicc")
#
# Instead, glmulti() can use a "genetic algorithm" search procedure to find groups of models
# that might be good and find the best of those models.
#
# We repeat the first search using the genetic algorithm to show that it can get to the same
# results as the better exhaustive search in a small problem.
set.seed(267188299)
search.g.aicc <- glmulti(y = good ~ ., data = placekick, fitfunction = "glm",
level = 1, method = "g", crit = "aicc", family = binomial(link = "logit"))
# Now try genetic algorithm on bigger problem with pairwise interactions. See glmulti manual
# for details on tuning parameters for the genetic algorithm.
# NOTE: This took about 7 minutes to run on an Intel Core I7 2600 with 8G RAM.
search.gmarg.aicc <- glmulti(y = good ~ ., data = placekick, fitfunction = "glm", level = 2,
marginality = TRUE, method = "g", crit = "aicc", family = binomial(link = "logit"))
print(search.gmarg.aicc)
head(weightable(search.gmarg.aicc))
plot(search.gmarg.aicc, type = "p")
# All subsets using BMA package function bic.glm()
library(BMA)
search.bma <- bic.glm(f = good ~ ., glm.family = "binomial", data = placekick, occam.window = FALSE)
# Be aware that BMA has no function to do AICc, and that it calculates the IC(k) values slightly
# differently. The ordering of models is the same, and the differences between two models'
# IC(k) values is the same as in glmulti, but the numerical values are different.
summary(search.bma)
# All subsets using BMA package function bic.glm()
library(bestglm)
search.bestglm <- bestglm(Xy = placekick, family = binomial, IC = "BIC")
# Show glm() fit of best model
search.bestglm$BestModel
# List top 5 models. Can get more models listed using TopModels = parameter.
search.bestglm$BestModels
# List best model of each size
search.bestglm$Subsets