###################################################################### # ConAgra example # # # ###################################################################### ##################################################################### # Read in data conagra <- read.csv(file = "ConAgra_SP500.csv") #options(digits = 4) head(conagra) tail(conagra) ##################################################################### # Plots and summary values #Notice the syntax used here due to the data being in a different format from what # we have had previously par(mfrow = c(1,1)) boxplot(x = cbind(conagra$sp500, conagra$conagra), main = "Box and dot plot", ylab = "Return", xlab = "Investment", pars = list(outpch=NA), names = c("S&P 500", "ConAgra"), col = NA) stripchart(x = conagra$sp500, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE, at = 1) stripchart(x = conagra$conagra, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE, at = 2) #Alternatively, you could resturcture the data and use the usual code set1 <- rbind(data.frame(return = conagra$sp500, name = "SP500"), data.frame(return = conagra$conagra, name = "ConAgra"), col = NA) # Default puts ConAgra on left side of plot boxplot(formula = return ~ name, data = set1, main = "Box and dot plot", ylab = "Dividend Yield", xlab = "Stock exchange", pars = list(outpch=NA)) stripchart(x = set1$return ~ set1$name, lwd = 2, col = "red", method = "jitter", vertical = TRUE, pch = 1, add = TRUE) #Specify the breaks to make sure they are the same for both plots par(mfrow = c(2,1)) hist(x = conagra$sp500, main = "S&P 500", xlab = "Return", breaks = seq(from = -0.15, to = 0.1, by = 0.05), ylim = c(0, 30), col = NA) hist(x = conagra$conagra, main = "ConAgra", xlab = "Return", breaks = seq(from = -0.15, to = 0.1, by = 0.05), ylim = c(0, 30), col = NA) #Numerical summaries mean(conagra$sp500) mean(conagra$conagra) s1.sq <- var(conagra$sp500) s2.sq <- var(conagra$conagra) data.frame(s1.sq, s2.sq) ##################################################################### # Confidence interval n1 <- length(conagra$sp500) n2 <- length(conagra$conagra) data.frame(n1, n2) alpha <- 0.05 qf(p = alpha/2, df1 = n2 - 1, df2 = n1 - 1) qf(p = 1 - alpha/2, df1 = n2 - 1, df2 = n1 - 1) lower <- s1.sq/s2.sq * qf(p = alpha/2, df1 = n2 - 1, df2 = n1 - 1) upper <- s1.sq/s2.sq * qf(p = 1 - alpha/2, df1 = n2 - 1, df2 = n1 - 1) data.frame(lower, upper) #Alternative formula lower <- s1.sq/s2.sq * 1/qf(p = 1 - alpha/2, df1 = n1 - 1, df2 = n2 - 1) upper <- s1.sq/s2.sq * 1/qf(p = alpha/2, df1 = n1 - 1, df2 = n2 - 1) data.frame(lower, upper) #Easier way var.test(x = conagra$sp500, y = conagra$conagra, conf.level = 0.95) ##################################################################### # Hypothesis test #C.I. s1.sq/s2.sq * qf(p = 1 - alpha, df1 = n2 - 1, df2 = n1 - 1) #Test statistic F.stat <- s1.sq/s2.sq F.stat #Critical value - Be careful with degrees of freedom qf(p = alpha, df1 = n1 - 1, df2 = n2 - 1) #P-value pf(q = F.stat, df1 = n2 - 1, df2 = n1 - 1) #Plot par(mfrow = c(1,1)) curve(expr = df(x = x, df1 = n1 - 1, df2 = n2 - 1), from = 0, to = 3, col = "black", lwd = 2, main = "F distribution with nu1 = 51 and nu2 = 51", ylab = "f(x)", xlab = "x", n = 1000) abline(h = 0) segments(x0 = qf(p = alpha, df1 = n1 - 1, df2 = n2 - 1), y0 = 0, x1 = qf(p = alpha, df1 = n1 - 1, df2 = n2 - 1), y1 = df(x = qf(p = alpha, df1 = n1 - 1, df2 = n2 - 1), df1 = n1 - 1, df2 = n2 - 1), lwd = 5, col = "red") segments(x0 = F.stat, y0 = -0.01, x1 = F.stat, y1 = 0.01, lwd = 5, col = "blue") mtext(text = "F", side = 1, at = F.stat, line = 0.5) #Easier way var.test(x = conagra$sp500, y = conagra$conagra, ratio = 1, alternative = "less", conf.level = 0.95) ##################################################################### # Time series analysis acf(x = conagra$conagra) acf(x = conagra$sp500)