# Simulate observations, check normal distribution approximation for central limit theorem
################################################################################
# Normal distribution illustration for how to simulate observations
mu <- 2.8571
sigma.sq <- 0.4082
n <- 100000
set.seed(2343)
y <- rnorm(n = n, mean = mu, sd = sqrt(sigma.sq))
head(y)
hist(x = y, freq = FALSE, xlab = "y", col = NA, breaks = 25)
curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq)), add = TRUE, col = "red")
# Compare to mu and sigma
mean(y)
var(y)
# Check P(Y < ___)
quant <- -1.96*sqrt(sigma.sq) + mu
quant
pnorm(q = quant, mean = mu, sd = sqrt(sigma.sq))
sum(y < quant)/n
mean(y < quant)
################################################################################
# GPA example #1
# Plot of probability distribution
curve(expr = 15/2048 * x^4 * (4 - x), from = 0, to = 4, col = "red",
main = "Probability distribution for GPAs", ylab = "f(y)", xlab = "y")
abline(h = 0)
# Simulate taking samples of size 10 from the population and find sample means
# for each sample
n <- 20
set.seed(8129)
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 2), nrow = 1000, ncol = n)
head(round(set1,2))
ybar <- rowMeans(set1)
head(round(ybar,2))
# Histogram showing the central limit theorem works!
mu <- 2.8571
sigma.sq <- 0.4082
hist(x = ybar, main = "Histogram of sample means", xlab =
expression(bar(y)), freq = FALSE, col = NA)
curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)),
add = TRUE, col = "red")
# Check P(Ybar < ___)
quant <- -1.96*sqrt(sigma.sq/n) + mu
quant
pnorm(q = quant, mean = mu, sd = sqrt(sigma.sq/n))
sum(ybar < quant)/length(ybar)
mean(ybar < quant)
# Change some items on histogram to make it look better for n = 5
hist(x = ybar, ylim = c(0, 1.5), main = "Histogram of sample means",
xlab = expression(bar(y)), freq = FALSE, breaks = 15, col = NA)
curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)), add = TRUE, col =
"red")
################################################################################
# GPA example #2
n <- 20
set.seed(8129)
set1 <- 4*matrix(data = rbeta(n = 1000*n, shape1 = 5, shape2 = 1.1), nrow = 1000,
ncol = n)
ybar <- rowMeans(set1)
head(round(ybar,2))
#Histogram with normal distribution overlay
mu <- 3.278689
sigma.sq <- 0.3330923
hist(x = ybar, main = "Histogram of sample means", xlab = expression(bar(y)),
freq = FALSE, col = NA)
curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)), add = TRUE, col =
"red")
# Check P(Ybar < ___)
quant <- -1.96*sqrt(sigma.sq/n) + mu
quant
pnorm(q = quant, mean = mu, sd = sqrt(sigma.sq/n))
sum(ybar < quant)/length(ybar)
mean(ybar < quant)
# Change some of the defaults to make the plot better
hist(x = ybar, main = "Histogram of sample means", xlab = expression(bar(y)), freq = FALSE, breaks
= 15, col = NA)
curve(expr = dnorm(x = x, mean = mu, sd = sqrt(sigma.sq/n)),
add = TRUE, col = "red")
#