Function

ThresholdMean <- function(FracA, FracB, rnd = 3){
  Gap = qnorm(FracA) - qnorm(FracB)
  cat(paste0("Group A's mean is ", round(Gap, rnd), " SDs away from Group B's.", collapse = "\n"))}

MethodOfThresholds <- function(G1, G2, 
                               epsilon = .00001,
                               deltaS = 0, 
                               rhoS = 1, 
                               sum = F,
                               rnd = 3,
                               DG = c("GDelta", "Cd")){ 
  DG = ifelse(is.na(DG), "GDelta", DG)
  colMax <- function(data) sapply(as.data.frame(data), max, na.rm = TRUE)
  if (sum == T) {G1C = cumsum(G1); G2C = cumsum(G2)} else {G1C = G1; G2C = G2}
  if (sum == T) {G1M = mapply('/', G1C, colMax(G1C)); G2M = mapply('/', G2C, colMax(G2C))} else {G1M = G1C; G2M = G2C}
  GDF = rbind(as.numeric(unlist(G1M)), as.numeric(unlist(G2M)))
  GDF[which(GDF == 1)] = GDF[which(GDF == 1)] - epsilon
  GDF[which(GDF == 0)] = GDF[which(GDF == 0)] + epsilon
  P_G1 = GDF[1,]
  P_G2 = GDF[2,]
  f0 = function(t){ 
    1/sqrt(2*pi)*exp(-t^2/2)}
  thresholds = rep(0, length(P_G1))
  for(i in 1:length(P_G1)){
    F0_temp = function(t){
      -P_G1[i] + integrate(f0, t, Inf)$value}
    thresholds[i] = uniroot(F0_temp, lower = -10, upper = 10)$root}
  F_fit = function(t, delta, rho){
    f0_full = function(x, mu = delta, sigma = rho){
      1/sqrt(2*pi*sigma^2)*exp(-(x-mu)^2/(2*sigma^2))}
    outp = rep(0, length(t))
    for(i in 1:length(outp)){
      outp[i] = integrate(f0_full,t[i],Inf)$value}
    return(outp)}
  fitLS1 = nls(P_G2 ~ F_fit(thresholds, delta, rho), start = list(delta = deltaS, rho = rhoS))
  deltaF = unname(coef(fitLS1)[1]) 
  rhoF = unname(coef(fitLS1)[2])
  out = c(deltaF, rhoF)
  thresholds = rep(0, length(P_G1))
  for(i in 1:length(P_G1)){
    F0_temp = function(t){
      -P_G2[i] + integrate(f0, t, Inf)$value}
    thresholds[i] = uniroot(F0_temp, lower = -10, upper = 10)$root}
  fitLS2 = nls(P_G1 ~ F_fit(thresholds, delta, rho), start = list(delta = -deltaS, rho = 1/rhoS))
  deltaOF = unname(coef(fitLS2)[1]) 
  rhoOF = unname(coef(fitLS2)[2]) 
  rhoOF = 1/rhoOF
  deltaOF = -deltaOF*rhoOF
  out = c((deltaF + deltaOF)/2, (rhoF + rhoOF)/2)
  suppressWarnings(if (DG == "Cd") {out[3] = sqrt((1 + out[2]^2)/2); out[1] = out[1]/out[3]}) 
  out = round(out[1:2], rnd)
  names(out) = suppressWarnings(if (DG == "GDelta") {c("Glass' Delta", "SD Ratio")} else {c("Cohen's d", "SD Ratio")})
  return(out)}

DiversitySpacePlot <- function(x, Delta, rho, 
                               xlabel = "Fraction of Group A Achieving Threshold", 
                               ylabel = "Fraction of Group B Achieving Threshold"){
  linspac = seq(0,1, length.out = 2)
  plot(linspac, linspac, type = "l", lty = 3, main = "Diversity Space", xlab = xlabel, ylab = ylabel)
  points(x[1,], x[2,], col = "red", pch = 16)
  F_fit = function(t,delta, rho){
    f0_full = function(x, mu = delta, sigma = rho){
      1/sqrt(2*pi*sigma^2)*exp(-(x-mu)^2/(2*sigma^2))}
    outp = rep(0, length(t))
    for(i in 1:length(outp)){
      outp[i] = integrate(f0_full,t[i],Inf)$value}
    return(outp)}
  tvals = seq(-5,5,length.out = 500)
  xvals = rep(0,length(tvals))
  yvals = rep(0,length(tvals))
  for(i in 1:length(tvals)){ # Create curve
    xvals[i] = F_fit(tvals[i], 0, 1)
    yvals[i] = F_fit(tvals[i], Delta, rho)}
  lines(xvals, yvals, col = "blue", lwd = 2)}

Rationale

Janet Goodwin, the CEO of the ACT, has released a statement about the decline of average ACT scores for the high school class of 2022 (https://web.archive.org/web/20221012184636/https://leadershipblog.act.org/2022/10/GradClassRelease2022.html). Apparently this year’s class have the lowest scores for any cohort in the past thirty years. The importance of this change is uncertain - we weren’t provided with any useful psychometric information and there wasn’t anything like a year-by-year test score comparison with the same samples. Darting right past the topic of her address, she linked to the 2022 ACT Profile Report (https://web.archive.org/web/20221012170948/https://www.act.org/content/act/en/research/services-and-resources/data-and-visualization/grad-class-database-2022.html), which includes a lot of data on ACT scores broken up out by sex, race, degree intention, STEM vs non-STEM, etc. Using the data contained in the National Profile Report (https://web.archive.org/web/20221012184619/https://www.act.org/content/dam/act/unsecured/documents/2022/2022-National-ACT-Profile-Report.pdf), we can assess the extent of prominent racial/ethnic gaps as of this year.

Analyses

Table 3.3 contains benchmark pass rates for the ethnicities Black/African American, American Indian/Alaska Native, White, Hispanic/Latino, Asian, Native Hawaiian/Other Pacific Islander, Two or More Races, and Prefer Not to Respond. The latter two will not be used because they are not informative. Sample sizes ranged from 2,961 for Native Hawaiian/Other Pacific Islanders to 708,952 for Whites. Taking the White mean to be 100 and the shared SD to be 15, all other means will be computed relative to that one.

Blacks      <- c(.27, .09, .18, .10, .05, .03) #English, Mathematics, Reading, Science, All Four, STEM
Amerindians <- c(.27, .11, .20, .12, .06, .04)
Whites      <- c(.65, .40, .51, .42, .29, .21) 
Hispanics   <- c(.38, .18, .28, .19, .11, .08)
Asians      <- c(.77, .64, .65, .60, .51, .46)
Hawaiians   <- c(.32, .16, .23, .16, .10, .07)

cat("Black-White \n")
## Black-White
ThresholdMean(Whites, Blacks); cat("\n Amerindian-White \n")
## Group A's mean is 0.998 SDs away from Group B's.
## Group A's mean is 1.087 SDs away from Group B's.
## Group A's mean is 0.94 SDs away from Group B's.
## Group A's mean is 1.08 SDs away from Group B's.
## Group A's mean is 1.091 SDs away from Group B's.
## Group A's mean is 1.074 SDs away from Group B's.
## 
##  Amerindian-White
ThresholdMean(Whites, Amerindians); cat("\n Hispanic-White \n")
## Group A's mean is 0.998 SDs away from Group B's.
## Group A's mean is 0.973 SDs away from Group B's.
## Group A's mean is 0.867 SDs away from Group B's.
## Group A's mean is 0.973 SDs away from Group B's.
## Group A's mean is 1.001 SDs away from Group B's.
## Group A's mean is 0.944 SDs away from Group B's.
## 
##  Hispanic-White
ThresholdMean(Whites, Hispanics); cat("\n Asian-White \n")
## Group A's mean is 0.691 SDs away from Group B's.
## Group A's mean is 0.662 SDs away from Group B's.
## Group A's mean is 0.608 SDs away from Group B's.
## Group A's mean is 0.676 SDs away from Group B's.
## Group A's mean is 0.673 SDs away from Group B's.
## Group A's mean is 0.599 SDs away from Group B's.
## 
##  Asian-White
ThresholdMean(Whites, Asians); cat("\n Hawaiian-White \n")
## Group A's mean is -0.354 SDs away from Group B's.
## Group A's mean is -0.612 SDs away from Group B's.
## Group A's mean is -0.36 SDs away from Group B's.
## Group A's mean is -0.455 SDs away from Group B's.
## Group A's mean is -0.578 SDs away from Group B's.
## Group A's mean is -0.706 SDs away from Group B's.
## 
##  Hawaiian-White
ThresholdMean(Whites, Hawaiians)
## Group A's mean is 0.853 SDs away from Group B's.
## Group A's mean is 0.741 SDs away from Group B's.
## Group A's mean is 0.764 SDs away from Group B's.
## Group A's mean is 0.793 SDs away from Group B's.
## Group A's mean is 0.728 SDs away from Group B's.
## Group A's mean is 0.669 SDs away from Group B's.

And if we treat each of these as an equally-important score, our unit-weighted composite differences are

cat("Black-White \n")
## Black-White
BW <- mean(c(.998, 1.087, .94, 1.08, 1.091, 1.074)); BW; BW * 15; cat("\n Amerindian-White \n")
## [1] 1.045
## [1] 15.675
## 
##  Amerindian-White
AW <- mean(c(.998, .973, .867, .973, 1.001, .944)); AW; AW * 15; cat("\n Hispanic-White \n")
## [1] 0.9593333
## [1] 14.39
## 
##  Hispanic-White
HW <- mean(c(.691, .662, .608, .676, .673, .599)); HW; HW * 15; cat("\n Asian-White \n")
## [1] 0.6515
## [1] 9.7725
## 
##  Asian-White
EW <- mean(c(-.354, -.612, -.36, -.455, -.578, -.706)); EW; EW * 15; cat("\n Hawaiian-White \n")
## [1] -0.5108333
## [1] -7.6625
## 
##  Hawaiian-White
IW <- mean(c(.853, .741, .764, .793, .728, .669)); IW; IW * 15
## [1] 0.758
## [1] 11.37

It may be seen as inadmissible to add “All Four” or “STEM” to the rest since they are composite of the first four variables and mathematics and science, respectively, because that would be double-dipping, so redoing this without those sumscores added in and the results made redundant as a result delivers the following mean differences:

cat("Black-White \n")
## Black-White
BW <- mean(c(.998, 1.087, .94, 1.08)); BW; BW * 15; cat("\n Amerindian-White \n")
## [1] 1.02625
## [1] 15.39375
## 
##  Amerindian-White
AW <- mean(c(.998, .973, .867, .973)); AW; AW * 15; cat("\n Hispanic-White \n")
## [1] 0.95275
## [1] 14.29125
## 
##  Hispanic-White
HW <- mean(c(.691, .662, .608, .676)); HW; HW * 15; cat("\n Asian-White \n")
## [1] 0.65925
## [1] 9.88875
## 
##  Asian-White
EW <- mean(c(-.354, -.612, -.36, -.455)); EW; EW * 15; cat("\n Hawaiian-White \n")
## [1] -0.44525
## [1] -6.67875
## 
##  Hawaiian-White
IW <- mean(c(.853, .741, .764, .793)); IW; IW * 15
## [1] 0.78775
## [1] 11.81625

And the results are barely changed. Note the difference from the raw “All Four” results, the differences from which follow from the imperfect correlations among the various tests. They are more extreme than the averages, which assume all the individual tests are perfectly correlated. For more on this, see this blog post (https://web.archive.org/web/20210220193517/https://assessingpsyche.wordpress.com/2016/02/17/the-composite-score-extremity-effect/) and Schneider (2016). If we want to assess how large those gaps are in the typical IQ metric, we can do so:

cat("Black-White \n")
## Black-White
1.091 * 15; cat("\n Amerindian-White \n")
## [1] 16.365
## 
##  Amerindian-White
1.001 * 15; cat("\n Hispanic-White \n")
## [1] 15.015
## 
##  Hispanic-White
.673 * 15; cat("\n Asian-White \n")
## [1] 10.095
## 
##  Asian-White
-.578 * 15; cat("\n Hawaiian-White \n")
## [1] -8.67
## 
##  Hawaiian-White
.728 * 15
## [1] 10.92

And now, I have figured out how to better assess the gaps using the method of multiple thresholds instead of a single one.

cat("Black-White \n")
## Black-White
Whites <- c(.40, .42, .51, .65) #Mathematics, Science, Reading, English
Blacks <- c(.09, .10, .18, .27)

pars = MethodOfThresholds(Blacks, Whites)
par(mar = c(4, 4, 1.5, 0.8))
DiversitySpacePlot(rbind(Blacks, Whites), 
                   pars[1], pars[2],
                   xlabel = "Fraction of Blacks Achieving Threshold",
                   ylabel = "Fraction of Whites Achieving Threshold")
grid()

pars; pars[1]*15; pars[2]^2
## Glass' Delta     SD Ratio 
##        1.020        1.143
## Glass' Delta 
##         15.3
## SD Ratio 
## 1.306449
Amerindians <- c(.11, .12, .20, .27)
Hispanics   <- c(.18, .19, .28, .38) 
Asians      <- c(.64, .60, .65, .77)
Hawaiians   <- c(.16, .16, .23, .32)

cat("\n Amerindian-White \n")
## 
##  Amerindian-White
pars = MethodOfThresholds(Amerindians, Whites)
par(mar = c(4, 4, 1.5, 0.8))
DiversitySpacePlot(rbind(Amerindians, Whites), 
                   pars[1], pars[2],
                   xlabel = "Fraction of Amerindians Achieving Threshold",
                   ylabel = "Fraction of Whites Achieving Threshold")
grid()

pars; pars[1]*15; pars[2]^2
## Glass' Delta     SD Ratio 
##        0.949        0.975
## Glass' Delta 
##       14.235
## SD Ratio 
## 0.950625
cat("\n Hispanic-White \n")
## 
##  Hispanic-White
pars = MethodOfThresholds(Hispanics, Whites)
par(mar = c(4, 4, 1.5, 0.8))
DiversitySpacePlot(rbind(Hispanics, Whites), 
                   pars[1], pars[2],
                   xlabel = "Fraction of Hispanics Achieving Threshold",
                   ylabel = "Fraction of Whites Achieving Threshold")
grid()

pars; pars[1]*15; pars[2]^2
## Glass' Delta     SD Ratio 
##        0.658        0.973
## Glass' Delta 
##         9.87
## SD Ratio 
## 0.946729
cat("\n Asian-White \n")
## 
##  Asian-White
pars = MethodOfThresholds(Asians, Whites)
par(mar = c(4, 4, 1.5, 0.8))
DiversitySpacePlot(rbind(Asians, Whites), 
                   pars[1], pars[2],
                   xlabel = "Fraction of Asians Achieving Threshold",
                   ylabel = "Fraction of Whites Achieving Threshold")
grid()

pars; pars[1]*15; pars[2]^2
## Glass' Delta     SD Ratio 
##       -0.441        0.716
## Glass' Delta 
##       -6.615
## SD Ratio 
## 0.512656
cat("\n Hawaiian-White \n")
## 
##  Hawaiian-White
pars = MethodOfThresholds(Hawaiians, Whites)
par(mar = c(4, 4, 1.5, 0.8))
DiversitySpacePlot(rbind(Hawaiians, Whites), 
                   pars[1], pars[2],
                   xlabel = "Fraction of Hawaiians Achieving Threshold",
                   ylabel = "Fraction of Whites Achieving Threshold")
grid()

pars; pars[1]*15; pars[2]^2
## Glass' Delta     SD Ratio 
##        0.788        0.862
## Glass' Delta 
##        11.82
## SD Ratio 
## 0.743044

The Black-White gap with this method is 1.020, the Amerindian-White gap is .949, the Hispanic-White gap is .658, the Asian-White gap is .441 but needs serious adjustment for their greater variance. A naive correction would follow from dividing by the standard deviation ratios, but this could also be done more seriously by using Cohen’s d. Here are the results of both methods, in order. To calculate Cohen’s d

15.30/1.143
## [1] 13.38583
14.235/.975
## [1] 14.6
9.87/.973
## [1] 10.14388
6.615/.716
## [1] 9.238827
11.82/.862
## [1] 13.7123
ParB <- MethodOfThresholds(Blacks, Whites, DG = "Cd"); ParB; ParB[1] * 15
## Cohen's d  SD Ratio 
##     0.950     1.143
## Cohen's d 
##     14.25
ParA <- MethodOfThresholds(Amerindians, Whites, DG = "Cd"); ParA; ParA[1] * 15
## Cohen's d  SD Ratio 
##     0.961     0.975
## Cohen's d 
##    14.415
ParH <- MethodOfThresholds(Hispanics, Whites, DG = "Cd"); ParH; ParH[1] * 15
## Cohen's d  SD Ratio 
##     0.667     0.973
## Cohen's d 
##    10.005
ParS <- MethodOfThresholds(Asians, Whites, DG = "Cd"); ParS; ParS[1] * 15
## Cohen's d  SD Ratio 
##    -0.507     0.716
## Cohen's d 
##    -7.605
ParI <- MethodOfThresholds(Hawaiians, Whites, DG = "Cd"); ParI; ParI[1] * 15
## Cohen's d  SD Ratio 
##     0.845     0.862
## Cohen's d 
##    12.675

Overall, using this more accurate method, the results are highly similar. There is likely substantial utility to be gained from using it, and especially so when it is properly corrected for variance differences, which I will come back to later.

Discussion

ACT results are remarkably similar to the results from the SAT for the Black, Hispanic, and Asian groups (https://rpubs.com/JLLJ/MMTMICH22). The Black-White gap in the SAT was almost identical to the gap in the ACT, at around 15 points or one standard deviation. The Hispanic-White gap in the SAT was slightly smaller, at .568 Delta instead of .652, or 8.52 points instead of 9.77. The Asian-White gap in the SAT was slightly larger, at -.546 Delta instead of -.511, or 8.19 points instead of 7.66. The Amerindian gap in the SAT was .628 Delta, and here it was .959, or 9.42 points instead of 14.39. The Hawaiian-White gap in the SAT was .211 Delta instead of .758, or 3.17 points instead of 11.37. The reason for these three similarities is likely that sampling was similar; the reason for these two discrepancies is quite likely to do with dissimilarities that are easily explained. The simple explanation is that Native American Indians and Hawaiians live primarily in ACT states, so those who took the SAT were selected, not just for having moved to other states, but also for needing to take the test because they wanted to get into relatively more elite colleges and universities. Hawaii is a very clear proof of this, because only 31% of high school graduates in Hawaii took the sat (https://web.archive.org/web/20221012231121/https://reports.collegeboard.org/media/pdf/2022-hawaii-sat-suite-of-assessments-annual-report.pdf), and basically every group in that sample had what would clearly be elite scores for the nation as a whole.

Overall, this was an interesting replication of the demographic differences in the SAT, and the discrepancies were also worth thinking about, since they reveal the strength of the effect sampling intricacies can have on variables like test scores. The consistency of these results with the results from the SAT suggests a few things. Firstly, the gaps are not due to mere sampling on either test. Secondly, the Asian-White gap is not large due to the SAT becoming more preppable unless the same changes have hit the ACT as well. If they have, it is a wonder that ACT scores have been on the decline. Third, because Spearman’s hypothesis is likely true and gaps are similar in size on both assessments, one could reasonably extrapolate that their predictive validities are also comparable.

References

Schneider, J. W. (2016). Why Are WJ IV Cluster Scores More Extreme Than the Average of Their Parts? A Gentle Explanation of the Composite Score Extremity Effect (Woodcock Johnson IV Assessment Service Bulletin No. 7; p. 22). Houghton Mifflin Harcourt. https://www.hmhco.com/~/media/sites/home/hmh-assessments/clinical/woodcock-johnson/pdf/wjiv/wjiv_asb_7.pdf. Archive link: https://web.archive.org/web/20210425071254/https://www.hmhco.com/~/media/sites/home/hmh-assessments/clinical/woodcock-johnson/pdf/wjiv/wjiv_asb_7.pdf.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.28   R6_2.5.1        jsonlite_1.7.2  magrittr_2.0.1 
##  [5] evaluate_0.14   highr_0.9       rlang_0.4.12    stringi_1.7.5  
##  [9] jquerylib_0.1.4 bslib_0.3.1     rmarkdown_2.11  tools_4.1.2    
## [13] stringr_1.4.0   xfun_0.27       yaml_2.2.1      fastmap_1.1.0  
## [17] compiler_4.1.2  htmltools_0.5.2 knitr_1.36      sass_0.4.0