# Example: All subsets variable Selection on Placekick Data
# Start with dredge() from MuMIn package
options(width = 80)
placekick <- read.csv(file = "C:\\data\\Placekick.csv")
head(placekick)
tail(placekick)
# Start with full model
# MUST add na.action= na.fail to global model or dredge() will not work.
# Max allowed variables is 30
mod.fit <- glm(formula = good ~ ., family = binomial(link = "logit"), data = placekick,
na.action = na.fail)
#
library(package = MuMIn)
# options(width = 60) # Use for book formatting
allsub.aic <- dredge(global.model = mod.fit, rank = "AIC")
subset(x = allsub.aic, subset = delta < 1)
# allsub.aic[allsub.aic$delta < 1,]
######################################################
# Genetic Algorithm with glmulti
# In older machines, there used to be problems with the rjava package
# on which glmulti depends. This comment is for users who encounter
# problems with this package while loading glmulti
# See http://stackoverflow.com/questions/7019912/using-the-rjava-package-on-win7-64-bit-with-r
# Solution is to reset an environment variable:
#
## if (Sys.getenv("JAVA_HOME") != "")
## Sys.setenv(JAVA_HOME = "")
library(package = glmulti)
# glmulti y argument can take either a formula or a previous glm fit:
full.mod.1 <- glm(formula = good ~., family = binomial(link = logit), data = placekick)
search.1.aic <- glmulti(y = full.mod.1, level = 1, method = "h", crit = "aic", family = binomial(link = "logit"))
print(search.1.aic)
head(weightable(search.1.aic))
plot(search.1.aic, type = "p")
#
# The following search looks for all pairwise interactions as well.
# WARNING: It would have taken about 13 years to complete on an Intel Core I7 2600 with 8G RAM.
# search.2marg.aicc <- glmulti(y = full.mod.1, level = 2, marginality = TRUE, method = "h", crit = "aicc")
#
# Instead, glmulti() can use a "genetic algorithm" search procedure to find groups of models
# that might be good and find the best of those models.
#
# We repeat the first search using the genetic algorithm to show that it can get to the same
# results as the better exhaustive search in a small problem.
# Now try genetic algorithm on bigger problem with pairwise interactions. See glmulti manual
# for details on tuning parameters for the genetic algorithm.
# NOTE: This took about 4-6 minutes to run on an 11th-gen Intel Core I7-1185G7@3.00GHz with 16 GB of RAM
# Results will vary randomly. glmulti() does not respect seeds.
search.gmarg.aic <- glmulti(y = good ~ ., data = placekick, fitfunction = "glm", level = 2,
marginality = TRUE, method = "g", crit = "aic", family = binomial(link = "logit"))
print(search.gmarg.aic)
head(weightable(search.gmarg.aic))
plot(search.gmarg.aic, type = "p")
# All subsets using BMA package function bic.glm()
# Using "good ~ ." to include all variables from data (other than "good")
library(package = BMA)
search.bma <- bic.glm(f = good ~ ., glm.family = "binomial", data = placekick, occam.window = FALSE)
# Be aware that BMA has no function to do AICc, and that it calculates the IC(k) values slightly
# differently. The ordering of models is the same, and the differences between two models'
# IC(k) values is the same as in glmulti, but the numerical values are different.
summary(search.bma)
# All subsets using bestglm package function bestglm()
library(package = bestglm)
search.bestglm <- bestglm(Xy = placekick, family = binomial, IC = "BIC")
# Show glm() fit of best model
search.bestglm$BestModel
# List top 5 models. Can get more models listed using TopModels = parameter.
search.bestglm$BestModels
# List best model of each size
search.bestglm$Subsets
# All subsets using glmulti() pachage function glmulti()
# Using "good ~ ." to include all variables from data (other than "good")
search.1.bic <- glmulti(y = good ~ ., data = placekick, fitfunction = "glm",
level = 1, method = "h", crit = "bic", family = binomial(link = "logit"))
print(search.1.bic)
aa <- weightable(search.1.bic)
cbind(model = aa[1:5,1], round(aa[1:5,2:3], digits = 3))
plot(search.1.bic, type = "p")