###################################################################### # Golf ball distance data # # # ###################################################################### ###################################################################### #Read in data gb <- read.csv(file = "golfballs.csv") gb #Reshape data gb2 <- rbind(data.frame(Person = gb$Person, Type = "TitleistB", Distance = gb$TitleistB), data.frame(Person = gb$Person, Type = "Maxfli", Distance = gb$Maxfli), data.frame(Person = gb$Person, Type = "TitleistP", Distance = gb$TitleistP), data.frame(Person = gb$Person, Type = "TopFlite", Distance = gb$TopFlite)) head(gb2) tail(gb2) #Another way gb.temp <- stack(x = gb, select = c(TitleistB, Maxfli, TitleistP, TopFlite)) gb.temp$Person <- rep(x = 1:20, times = 4) gb.temp #The reshape() function would work too ###################################################################### # Plots #Not adjusted for block boxplot(formula = Distance ~ Type, data = gb2, main = "Box and dot plot", ylab = "Distance", xlab = "Golf ball", pars = list(outpch=NA), col = NA) #Not helpful here due to small sample size for each treatment level stripchart(x = gb2$Distance ~ gb2$Type, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE) #Adjusted for block # R has changed the syntax for aggregate(). One used to use formula = Distance ~ Person. # Now, the proper syntax is x = Distance ~ Person. distance.block.adj <- aggregate(x = Distance ~ Person, data = gb2, FUN = mean) distance.block.adj gb3 <- merge(x = gb2, y = distance.block.adj, by.x = "Person", by.y = "Person", suffixes = c("",".person")) head(gb3) tail(gb3) gb3$Distance.adj <- gb3$Distance - gb3$Distance.person head(gb3) boxplot(formula = Distance.adj ~ Type, data = gb3, main = "Box and dot plot", ylab = "Distance adjusted for person", xlab = "Golf ball", pars = list(outpch=NA), col = NA) stripchart(x = gb3$Distance.adj ~ gb3$Type, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE) # Another way to adjust the data gb.adj <- gb[,-1] - rowMeans(gb) # Now find the gb2 form of the data ###################################################################### # Analysis #Main calculations mod.fit <- aov(formula = Distance ~ Type + factor(Person), data = gb2) summary(mod.fit) TukeyHSD(x = mod.fit, conf.level = 0.95, which = "Type") #Additional calculations qf(p = 0.95, df1 = 3, df2 = 57) 1 - pf(q = 6.3621, df1 = 3, df2 = 57) aggregate(formula = Distance.adj ~ Type, data = gb3, FUN = mean) #Compare to "diff" column in TukeyHSD #What if the data was analyzed instead as a CRD? mod.fit.CRD <- aov(formula = Distance ~ Type, data = gb2) summary(mod.fit.CRD) #What if the data was analyzed instead as a CRD using the block adjusted data? mod.fit.adjust <- aov(formula = Distance.adj ~ Type, data = gb3) summary(mod.fit.adjust) #agricolae package - use for LSD and Bonferoni #http://cran.r-project.org/web/packages/agricolae/index.html # Could not get to work in R 4.0.2 options(width = 60) # Helpful to avoid printing too wide for notes library(package = agricolae) save.LSD <- LSD.test(y = mod.fit, trt = "Type", alpha = 0.05, group = FALSE, p.adj = "none", main = "Golf ball types") #LSD save.LSD save.Bon <- LSD.test(y = mod.fit, trt = "Type", alpha = 0.05, group = FALSE, p.adj = "bonferroni", main = "Golf ball types") #Bonferroni save.Bon #Multiple comparisons with pairwise.t.test - gives incorrect answers for RCB pairwise.t.test(x = gb2$Distance, g = gb2$Type, p.adjust.method = "none", alternative = "two.sided", paired = TRUE) #HOW DOES IT KNOW HOW TO PAIR? ################################################################################ # Additional investigations not in the notes #Another way to fit model with random effect for block library(package = lme4) mod.fit.lme <- lmer(formula = Distance ~ Type + (1 | Person), data = gb2) summary(mod.fit.lme) #Another way to fit model with random effect for block library(package = nlme) mod.fit <- lme(Distance ~ Type, random = ~1|factor(Person), data = gb2) summary(mod.fit) anova(mod.fit) #Another way to do multiple comparisons in R library(package = multcomp) #HSD HSD <- glht(model = mod.fit.lme, linfct = mcp(Type = "Tukey")) summary(object = HSD) confint(object = HSD, level = 0.95) plot(HSD) levels(gb2$Type) #List out the comparisons - be careful about syntax comparisons <- rbind("Maxfli - TitleistB = 0", "TitleistP - TitleistB = 0", "TopFlite - TitleistB = 0", "TitleistP - Maxfli = 0", "TopFlite - Maxfli = 0", "TopFlite - TitleistP = 0") comparisons compare.means <- glht(model = mod.fit.lme, linfct = mcp(Type = comparisons)) compare.means summary(object = compare.means, test = adjusted(type = "none")) summary(object = compare.means, test = adjusted(type = "bonferroni")) plot(compare.means) confint(object = compare.means, level = 0.95, calpha = qt(p = 0.975, df = mod.fit$df.residual)) # Even another way to do multiple comparisons in R library(package = emmeans) calc.est <- emmeans(mod.fit, specs = "Type") calc.est # Default is Tukey HSD - The factor levels are ordered the opposite as given by TukeyHSD() confint(contrast(calc.est, method = "pairwise"), level = 0.95) # Bonferroni confint(contrast(calc.est, method = "pairwise"), level = 0.95, adjust = "Bonferroni") # LSD confint(contrast(calc.est, method = "pairwise"), level = 0.95, adjust = "none") # P-values contrast(calc.est, method = "pairwise") # Tukey HSD is default contrast(calc.est, method = "pairwise", adjust = "Bonferroni") contrast(calc.est, method = "pairwise", adjust = "none") #