Code proving that most smart people do not get into elite
universities. IQ/ACT/SAT scores are assumed to correlate at .8. Elite
students are assumed to be selected on the basis of test scores + a
bunch of factors that don’t correlate with IQ
IPEDS_data$ACT25 <- IPEDS_data$`ACT Composite 25th percentile score`
IPEDS_data$ACT75 <- IPEDS_data$`ACT Composite 75th percentile score`
IPEDS_data$SATR25 <- IPEDS_data$`SAT Critical Reading 25th percentile score`
IPEDS_data$SATR75 <- IPEDS_data$`SAT Critical Reading 75th percentile score`
IPEDS_data$SATM25 <- IPEDS_data$`SAT Math 25th percentile score`
IPEDS_data$SATM75 <- IPEDS_data$`SAT Math 75th percentile score`
IPEDS_data$SATW25 <- IPEDS_data$`SAT Writing 25th percentile score`
IPEDS_data$SATW75 <- IPEDS_data$`SAT Writing 75th percentile score`
IPEDS_data$Graduation_Rate_4 <- IPEDS_data$`Graduation rate - Bachelor degree within 4 years, total`
IPEDS_data$Religious_affiliation <- IPEDS_data$`Religious affiliation`
IPEDS_data$Women <- IPEDS_data$`Percent of undergraduate enrollment that are women`
IPEDS_data$ACTscore <- getpc(IPEDS_data %>% select(ACT25, ACT75), dofa=F, fillmissing=F, normalizeit=T)
IPEDS_data$SATscore <- getpc(IPEDS_data %>% select(SATR25, SATR75, SATM25, SATM75, SATW25, SATW75), dofa=F, fillmissing=F, normalizeit=T)
IPEDS_data$Academic_Score <- getpc(IPEDS_data %>% select(ACTscore, SATscore), dofa=F, fillmissing=F, normalizeit=T)
IPEDS_data$app_rate <- IPEDS_data$`Admissions total`/IPEDS_data$`Applicants total`
IPEDS_data$score <- getpc(IPEDS_data %>% select(Academic_Score, Graduation_Rate_4, app_rate))
IPEDS_data$rank <- ranker(IPEDS_data$score)
unis <- IPEDS_data %>% select(rank, Name)
GG_scatter(IPEDS_data, 'Academic_Score', 'app_rate', case_names='Name')

eliteunis <- IPEDS_data %>% filter(rank < 26)
eliteunis$actmean <- (eliteunis$ACT25 + eliteunis$ACT75)/2
eliteunis$actsd <- ((-eliteunis$ACT25 + eliteunis$actmean)/qnorm(.75) + (eliteunis$ACT75 - eliteunis$actmean)/qnorm(.75))/2
eliteunis$submitted_act <- eliteunis$`Percent of freshmen submitting ACT scores`
eliteunis$submitted_sat <- eliteunis$`Percent of freshmen submitting SAT scores`
eliteunis$freshmen <- eliteunis$`Estimated freshman enrollment, full time`
eliteunis$undergrad <- eliteunis$`Undergraduate enrollment`
eliteunis$gradplus <- eliteunis$`Graduate enrollment`
eliteunis$foreign_freshmen <- eliteunis$`Number of first-time undergraduates - foreign countries`
eliteunis$american_freshmen <- eliteunis$freshmen - eliteunis$foreign_freshmen
eliteunis$foreign_ratio <- eliteunis$foreign_freshmen/eliteunis$freshmen
eliteunis$american_freshmen[is.na(eliteunis$american_freshmen)] <- eliteunis$freshmen[is.na(eliteunis$american_freshmen)]*(1-mean(eliteunis$foreign_ratio, na.rm=T))
all_elites <- c()
for(i in 1:nrow(eliteunis)) {
numofgrads <- eliteunis$american_freshmen[i]*eliteunis$`Graduation rate - Bachelor degree within 6 years, total`[i]/100
innervec <- rnorm(n=numofgrads, mean=eliteunis$actmean[i], sd=eliteunis$actsd[i])
all_elites <- append(all_elites, innervec)
}
GG_denhist(all_elites)

describe2(all_elites)
elite_students <- data.frame(ACT = all_elites)
elite_students$ACT[elite_students$ACT>35.4999] <- 36
perfect <- elite_students %>% filter(ACT==36)
GG_denhist(elite_students$ACT)

###########################
all_americans <- rnorm(2231100, mean=19.8, sd=5.7)
american_students <- data.frame(ACT = all_americans)
american_students$IQ <- normalise(american_students$ACT*0.84^2 + rnorm(2231100, mean=19.8, sd=5.7)*sqrt(1-0.84^2))*15+100
GG_scatter(american_students[1:10000,], 'ACT','IQ')

american_students$ACT[american_students$ACT>35.4999] <- 36
american_students$selection_factor <- american_students$ACT + rnorm(2231100, mean=19.8, sd=5.7)*0.54
describe2(american_students$selection_factor)
simulated_elites <- american_students %>% filter(selection_factor > 45.13)
simulated_proles <- american_students %>% filter(selection_factor < 45.13)
american_students$elite <- 0
american_students$elite[american_students$selection_factor>45.13] <- 1
describe2(simulated_elites$ACT)
describe2(elite_students$ACT)
describe2(simulated_elites$IQ)
describe2(simulated_proles$IQ)
GG_denhist(simulated_elites$IQ)

GG_denhist(simulated_proles$IQ)

GG_denhist(american_students, var='IQ', group='elite')

###############
plus125_elites <- simulated_elites %>% filter(IQ > 125)
plus125_proles <- simulated_proles %>% filter(IQ > 125)
nrow(plus125_elites)/(nrow(plus125_elites)+nrow(plus125_proles))
[1] 0.1405712
##################
plus125_elites2 <- simulated_elites %>% filter(IQ > 145)
plus125_proles2 <- simulated_proles %>% filter(IQ > 145)
american_students$elite <- as.factor(american_students$elite)
p <- ggplot(american_students, aes(x = IQ, fill = elite)) +
geom_histogram(
position = "identity", # Overlays the groups
alpha = 0.5, # Semi-transparent for overlap visibility
bins = 1000 # Adjust bins as needed
) +
scale_fill_brewer(palette = "Set1", name = "Attended elite university") +
labs(
title = "Distribution of IQ Scores by elite university attendance",
subtitle = "source: simulated data",
x = "IQ Score",
y = "Count"
) +
coord_cartesian(xlim = c(115, 160)) + # Zoom in on x-axis from 115 to 160
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 11),
axis.text.y = element_text(size = 11),
axis.title.x = element_text(size = 15),
axis.title.y = element_text(size = 15),
plot.background = element_rect(fill = "gray30"),
panel.background = element_rect(fill = "gray30"),
panel.grid.major = element_line(color = "gray30"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "gray60"),
plot.title = element_text(size = 12, face = "bold", color = "gray80"),
strip.text = element_text(size = 12, face = "bold", color = "gray80"),
axis.text = element_text(color = "gray80"),
axis.title = element_text(color = "gray80"),
legend.background = element_rect(fill = "gray20"),
legend.text = element_text(color = "gray80"),
legend.title = element_text(color = "gray80"),
plot.margin = margin(t = 5, r = 5, b = 5, l = 5, "pt")
)
p

unis <- eliteunis %>% select(rank, Name,actmean,actsd, undergrad, gradplus, freshmen, american_freshmen, foreign_freshmen)
mean(eliteunis$submitted_act)/100*nrow(perfect)
[1] 1257.152
top75 <- IPEDS_data %>% select(rank, Name,ACT25, ACT75, score) %>% filter(rank<76)
sum(top75$`Estimated freshman enrollment, full time`)
Warning: Unknown or uninitialised column: `Estimated freshman enrollment, full time`.
[1] 0
sum(IPEDS_data$`Estimated freshman enrollment, full time`, na.rm=T)
[1] 1461252
102743/1461252
[1] 0.07031162
lower <- IPEDS_data %>% select(rank, Name,ACT25, ACT75, score, Graduation_Rate_4, app_rate, Academic_Score) %>% filter(ACT25 > 28)
sum(eliteunis$american_freshmen)
[1] 27349.83
pnorm(25/15)
[1] 0.9522096
---
title: "Most smart students don't attend elite universities"
output: html_notebook
---

Code proving that most smart people do not get into elite universities.
IQ/ACT/SAT scores are assumed to correlate at .8.
Elite students are assumed to be selected on the basis of test scores + a bunch of factors that don't correlate with IQ


```{r}
IPEDS_data$ACT25 <- IPEDS_data$`ACT Composite 25th percentile score`
IPEDS_data$ACT75 <- IPEDS_data$`ACT Composite 75th percentile score`
IPEDS_data$SATR25 <- IPEDS_data$`SAT Critical Reading 25th percentile score`
IPEDS_data$SATR75 <- IPEDS_data$`SAT Critical Reading 75th percentile score`
IPEDS_data$SATM25 <- IPEDS_data$`SAT Math 25th percentile score`
IPEDS_data$SATM75 <- IPEDS_data$`SAT Math 75th percentile score`
IPEDS_data$SATW25 <- IPEDS_data$`SAT Writing 25th percentile score`
IPEDS_data$SATW75 <- IPEDS_data$`SAT Writing 75th percentile score`
IPEDS_data$Graduation_Rate_4 <- IPEDS_data$`Graduation rate - Bachelor degree within 4 years, total`
IPEDS_data$Religious_affiliation <- IPEDS_data$`Religious affiliation`
IPEDS_data$Women <- IPEDS_data$`Percent of undergraduate enrollment that are women`

IPEDS_data$ACTscore <- getpc(IPEDS_data %>% select(ACT25, ACT75), dofa=F, fillmissing=F, normalizeit=T)
IPEDS_data$SATscore <- getpc(IPEDS_data %>% select(SATR25, SATR75, SATM25, SATM75, SATW25, SATW75), dofa=F, fillmissing=F, normalizeit=T)
IPEDS_data$Academic_Score <- getpc(IPEDS_data %>% select(ACTscore, SATscore), dofa=F, fillmissing=F, normalizeit=T)
IPEDS_data$app_rate <- IPEDS_data$`Admissions total`/IPEDS_data$`Applicants total`


IPEDS_data$score <- getpc(IPEDS_data %>% select(Academic_Score, Graduation_Rate_4, app_rate))
IPEDS_data$rank <- ranker(IPEDS_data$score)
unis <- IPEDS_data %>% select(rank, Name)

GG_scatter(IPEDS_data, 'Academic_Score', 'app_rate', case_names='Name')


eliteunis <- IPEDS_data %>% filter(rank < 26)

eliteunis$actmean <- (eliteunis$ACT25 + eliteunis$ACT75)/2
eliteunis$actsd <- ((-eliteunis$ACT25 + eliteunis$actmean)/qnorm(.75) + (eliteunis$ACT75 - eliteunis$actmean)/qnorm(.75))/2
eliteunis$submitted_act <- eliteunis$`Percent of freshmen submitting ACT scores`
eliteunis$submitted_sat <- eliteunis$`Percent of freshmen submitting SAT scores`
eliteunis$freshmen <- eliteunis$`Estimated freshman enrollment, full time`
eliteunis$undergrad <- eliteunis$`Undergraduate enrollment`
eliteunis$gradplus <- eliteunis$`Graduate enrollment`
eliteunis$foreign_freshmen <- eliteunis$`Number of first-time undergraduates - foreign countries`
eliteunis$american_freshmen <- eliteunis$freshmen - eliteunis$foreign_freshmen

eliteunis$foreign_ratio <- eliteunis$foreign_freshmen/eliteunis$freshmen
eliteunis$american_freshmen[is.na(eliteunis$american_freshmen)] <- eliteunis$freshmen[is.na(eliteunis$american_freshmen)]*(1-mean(eliteunis$foreign_ratio, na.rm=T))

all_elites <- c()

for(i in 1:nrow(eliteunis)) {
  numofgrads <- eliteunis$american_freshmen[i]*eliteunis$`Graduation rate - Bachelor degree within 6 years, total`[i]/100
  
  
  innervec <- rnorm(n=numofgrads, mean=eliteunis$actmean[i], sd=eliteunis$actsd[i])
  
  all_elites <- append(all_elites, innervec)
}

GG_denhist(all_elites)
describe2(all_elites)
elite_students <- data.frame(ACT = all_elites)

elite_students$ACT[elite_students$ACT>35.4999] <- 36

perfect <- elite_students %>% filter(ACT==36)

GG_denhist(elite_students$ACT)
###########################
all_americans <- rnorm(2231100, mean=19.8, sd=5.7)


american_students <- data.frame(ACT = all_americans)
american_students$IQ <- normalise(american_students$ACT*0.84^2 + rnorm(2231100, mean=19.8, sd=5.7)*sqrt(1-0.84^2))*15+100

GG_scatter(american_students[1:10000,], 'ACT','IQ')

american_students$ACT[american_students$ACT>35.4999] <- 36


american_students$selection_factor <- american_students$ACT + rnorm(2231100, mean=19.8, sd=5.7)*0.54
describe2(american_students$selection_factor)

simulated_elites <- american_students %>% filter(selection_factor > 45.13)
simulated_proles <- american_students %>% filter(selection_factor < 45.13)

american_students$elite <- 0
american_students$elite[american_students$selection_factor>45.13] <- 1

describe2(simulated_elites$ACT)
describe2(elite_students$ACT)

describe2(simulated_elites$IQ)
describe2(simulated_proles$IQ)

GG_denhist(simulated_elites$IQ)
GG_denhist(simulated_proles$IQ)

GG_denhist(american_students, var='IQ', group='elite')

###############
plus125_elites <- simulated_elites %>% filter(IQ > 125)
plus125_proles <- simulated_proles %>% filter(IQ > 125)

nrow(plus125_elites)/(nrow(plus125_elites)+nrow(plus125_proles))

##################
plus125_elites2 <- simulated_elites %>% filter(IQ > 145)
plus125_proles2 <- simulated_proles %>% filter(IQ > 145)

american_students$elite <- as.factor(american_students$elite)

p <- ggplot(american_students, aes(x = IQ, fill = elite)) +
  geom_histogram(
    position = "identity",   # Overlays the groups
    alpha    = 0.5,          # Semi-transparent for overlap visibility
    bins     = 1000            # Adjust bins as needed
  ) +
  scale_fill_brewer(palette = "Set1", name = "Attended elite university") +
  labs(
    title = "Distribution of IQ Scores by elite university attendance",
    subtitle = "source: simulated data",
    x = "IQ Score",
    y = "Count"
  ) +
  coord_cartesian(xlim = c(115, 160)) +  # Zoom in on x-axis from 115 to 160
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 15),
    axis.title.y = element_text(size = 15),
    plot.background = element_rect(fill = "gray30"),
    panel.background = element_rect(fill = "gray30"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "gray60"),
    plot.title = element_text(size = 12, face = "bold", color = "gray80"),
    strip.text = element_text(size = 12, face = "bold", color = "gray80"),
    axis.text = element_text(color = "gray80"),
    axis.title = element_text(color = "gray80"),
    legend.background = element_rect(fill = "gray20"),
    legend.text = element_text(color = "gray80"),
    legend.title = element_text(color = "gray80"),
    plot.margin = margin(t = 5, r = 5, b = 5, l = 5, "pt")
  )
p


unis <- eliteunis %>% select(rank, Name,actmean,actsd, undergrad, gradplus, freshmen, american_freshmen, foreign_freshmen)
mean(eliteunis$submitted_act)/100*nrow(perfect)

top75 <- IPEDS_data %>% select(rank, Name,ACT25, ACT75, score) %>% filter(rank<76)
sum(top75$`Estimated freshman enrollment, full time`)
sum(IPEDS_data$`Estimated freshman enrollment, full time`, na.rm=T)
102743/1461252
lower <-  IPEDS_data %>% select(rank, Name,ACT25, ACT75, score, Graduation_Rate_4, app_rate, Academic_Score) %>% filter(ACT25 > 28)

sum(eliteunis$american_freshmen)


pnorm(25/15)

```
