# library(knitr)
library(tidyverse)## Warning: package 'tidyverse' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.4
library(kableExtra)## Warning: package 'kableExtra' was built under R version 4.0.5
What subjects did you randomly select?
#I randomly sleceted AAS, AIS, AST, ENT, and WGSS. Heres the data I created
class_data <- tribble(
~subject, ~enrollment,
'AAS' , 44,
'AAS' , 43,
'AAS' , 27,
'AAS' , 22,
'AAS' , 13,
'AAS' , 25,
'AAS' , 12,
'AAS' , 25,
'AIS' , 35,
'AIS' , 35,
'AIS' , 34,
'AIS' , 24,
'AIS' , 34,
'AIS' , 35,
'AIS' , 35,
'AIS' , 35,
'AIS' , 35,
'AIS' , 35,
'AST' , 128,
'AST' , 127,
'AST' , 126,
'AST' , 77,
'AST' , 6,
'ENT' , 40,
'ENT' , 8,
'ENT' , 45,
'ENT' , 5,
'ENT' , 3,
'WGSS' , 52,
'WGSS' , 52,
'WGSS' , 48,
'WGSS' , 13,
'WGSS' , 5,
'WGSS' , 3,
'WGSS' , 4,
'WGSS' , 14,
'WGSS' , 15,
'WGSS' , 7,
'IST' , 13,
'IST' , 12,
'IST' , 13,
'IST' , 19,
'KOR' , 25,
'KOR' , 33,
'KOR' , 19,
'KOR' , 8,
'KOR' , 15,
'TIS' , 31,
'TIS' , 7
)
#creating the summary statistics
class_summary <- class_data %>%
group_by(subject) %>%
summarise(avg = mean(enrollment),
var = var(enrollment),
n_class = n())
#making table of amount of courses in each subject
course_data <- tribble(
~subject, ~courses,
'AAS' , 6,
'AIS' , 4,
'AST' , 3,
'ENT' , 4,
'WGSS' , 4,
'IST' , 4,
'KOR' , 4,
'TIS' , 2
)
#joing with class summary to have number of course in each subject
class_summary <- class_summary %>% full_join(course_data)## Joining, by = "subject"
#then create est.t and est.v
class_summary <- class_summary %>%
mutate(course.est.t= courses * avg,
course.est.v= signif(var*(courses-n_class)/courses, digits = 4))What are the sample mean and variance, and a 95% confidence interval for each sampled cluster?
class_summary## # A tibble: 8 x 7
## subject avg var n_class courses course.est.t course.est.v
## <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 AAS 26.4 142. 8 6 158. -47.4
## 2 AIS 33.7 11.8 10 4 135. -17.7
## 3 AST 92.8 2824. 5 3 278. -1882
## 4 ENT 20.2 421. 5 4 80.8 -105.
## 5 IST 14.2 10.2 4 4 57 0
## 6 KOR 20 91 5 4 80 -22.8
## 7 TIS 19 288 2 2 38 0
## 8 WGSS 21.3 429. 10 4 85.2 -644
Estimate mean, the average class size per subject, using the ratio estimator.
est.mean <- weighted.mean(class_summary$avg, w=class_summary$courses)
names(est.mean) <- "estimated students per class"
est.mean## estimated students per class
## 29.43387
Estimate Var1, the variance of ??^.
m <- 8 #number of cluster samples
N <- 132 #population subject of classes
summarize(class_summary,
est.M.bar = mean(courses),
ssqb =
sum(courses ^ 2 * (avg - est.mean)^2)/(m-1) ) -> between.stats
est.M.bar <- between.stats$est.M.bar
ssqb <- between.stats$ssqb
est.M <- N * est.M.bar
Vb <- (1-m/N)*ssqb/(m*est.M.bar^2)
class_summary %>%
summarize(
Msqsum =
sum(courses^2*(1-n_class/courses)*course.est.v/n_class) ) -> within.stats
Vw <- within.stats$Msqsum/(m*N*est.M.bar^2)
#Overall Estimate
#Combine the variance components to get the variance of the estimated mean.
#Then find the 95% confidence intervals for the mean and total:
V <- Vb + Vw
ME.mean <- signif(2*sqrt(V), digits=2)
# scale up the mean => total, and ME similarly
est.total <- N*est.M.bar*est.mean
ME.total <- signif(N*ME.mean, digits=2)Find a 95% confidence interval for ??
#it would be
# u= est.mean +- ME.mean
est.mean - ME.mean## estimated students per class
## 15.43387
est.mean + ME.mean## estimated students per class
## 43.43387
# (15.43, 43.43)??^↩︎