# Simulate observations, check normal distribution approximation for central limit theorem ################################################################################ # Normal distribution illustration for how to simulate observations mu <- 2.8571 sigma.sq <- 0.4082 n <- 100000 set.seed(2343) y <- rnorm(n = n, mean = mu, sd = sqrt(sigma.sq)) head(y) hist(x = y, freq = FALSE, xlab = "y", col = NA, breaks = 25) curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq)), add = TRUE, col = "red") # Compare to mu and sigma mean(y) var(y) # Check P(Y < ___) quant <- -1.96*sqrt(sigma.sq) + mu quant pnorm(q = quant, mean = mu, sd = sqrt(sigma.sq)) sum(y < quant)/n mean(y < quant) ################################################################################ # GPA example #1 # Plot of probability distribution curve(expr = 15/2048 * x^4 * (4 - x), from = 0, to = 4, col = "red", main = "Probability distribution for GPAs", ylab = "f(y)", xlab = "y") abline(h = 0) # Simulate taking samples of size 10 from the population and find sample means # for each sample n <- 20 set.seed(8129) set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n) head(round(set1,2)) ybar <- rowMeans(set1) head(round(ybar,2)) # Histogram showing the central limit theorem works! mu <- 2.8571 sigma.sq <- 0.4082 hist(x = ybar, main = "Histogram of sample means", xlab = expression(bar(y)), freq = FALSE, col = NA) curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)), add = TRUE, col = "red") # Check P(Ybar < ___) quant <- -1.96*sqrt(sigma.sq/n) + mu quant pnorm(q = quant, mean = mu, sd = sqrt(sigma.sq/n)) sum(ybar < quant)/length(ybar) mean(ybar < quant) # Change some items on histogram to make it look better for n = 5 hist(x = ybar, ylim = c(0, 1.5), main = "Histogram of sample means", xlab = expression(bar(y)), freq = FALSE, breaks = 15, col = NA) curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)), add = TRUE, col = "red") ################################################################################ # GPA example #2 n <- 20 set.seed(8129) set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 1.1), nrow = 1000, ncol = n) ybar <- rowMeans(set1) head(round(ybar,2)) #Histogram with normal distribution overlay mu <- 3.278689 sigma.sq <- 0.3330923 hist(x = ybar, main = "Histogram of sample means", xlab = expression(bar(y)), freq = FALSE, col = NA) curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)), add = TRUE, col = "red") # Check P(Ybar < ___) quant <- -1.96*sqrt(sigma.sq/n) + mu quant pnorm(q = quant, mean = mu, sd = sqrt(sigma.sq/n)) sum(ybar < quant)/length(ybar) mean(ybar < quant) # Change some of the defaults to make the plot better hist(x = ybar, main = "Histogram of sample means", xlab = expression(bar(y)), freq = FALSE, breaks = 15, col = NA) curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)), add = TRUE, col = "red") #