# Import the data
DATA <- read_csv("C:/Users/dijan/Documents/DATA 712/graduation_data.csv", show_col_types = FALSE)
# Renaming variables
DATA <- DATA %>%
  rename(Cohort_year = `Cohort Year`,
         Grad_percentage = `% Grads`)

# Removing the rows where the borough is "District 79"
DATA <- DATA %>%
  filter(Borough != "District 79")

# Convert Borough from character to factor
DATA <- DATA %>%
  mutate(Borough = as.factor(Borough))

# Convert Cohort_year from numeric to factor
DATA <- DATA %>%
  mutate(Cohort_year = as.factor(Cohort_year))
# Convert graduation percentage to a binary variable (1 if the graduation percentage is 70 or above, 0 if the graduation percentage is below 70)
DATA <- DATA %>%
  mutate(Grad_binary = case_when(
    Grad_percentage >= 70 ~ 1,
    TRUE ~ 0))
# Model predicting graduation binary outcome by borough
m1 <- glm(Grad_binary ~ Borough, family = binomial, data = DATA)
summary(m1)
## 
## Call:
## glm(formula = Grad_binary ~ Borough, family = binomial, data = DATA)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -1.9095     0.3788  -5.041 4.64e-07 ***
## BoroughBrooklyn        1.8450     0.4562   4.044 5.24e-05 ***
## BoroughManhattan       2.4376     0.4611   5.286 1.25e-07 ***
## BoroughQueens          2.5786     0.4642   5.554 2.79e-08 ***
## BoroughStaten Island   4.3432     0.6009   7.228 4.90e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 424.58  on 309  degrees of freedom
## Residual deviance: 329.49  on 305  degrees of freedom
## AIC: 339.49
## 
## Number of Fisher Scoring iterations: 5
# Model predicting graduation binary outcome by borough and cohort year
m2 <- glm(Grad_binary ~ Borough + Cohort_year, family = binomial, data = DATA)
summary(m2)
## 
## Call:
## glm(formula = Grad_binary ~ Borough + Cohort_year, family = binomial, 
##     data = DATA)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -8.361e+00  1.525e+00  -5.483 4.18e-08 ***
## BoroughBrooklyn       4.200e+00  1.098e+00   3.827 0.000130 ***
## BoroughManhattan      5.338e+00  1.129e+00   4.727 2.28e-06 ***
## BoroughQueens         5.603e+00  1.137e+00   4.930 8.23e-07 ***
## BoroughStaten Island  8.542e+00  1.294e+00   6.600 4.10e-11 ***
## Cohort_year2002       2.052e-15  1.354e+00   0.000 1.000000    
## Cohort_year2003       8.404e-01  1.310e+00   0.641 0.521232    
## Cohort_year2004       1.546e+00  1.286e+00   1.202 0.229392    
## Cohort_year2005       1.851e+00  1.218e+00   1.520 0.128574    
## Cohort_year2006       3.197e+00  1.185e+00   2.698 0.006985 ** 
## Cohort_year2007       3.761e+00  1.199e+00   3.138 0.001704 ** 
## Cohort_year2008       3.475e+00  1.190e+00   2.919 0.003513 ** 
## Cohort_year2009       3.761e+00  1.199e+00   3.138 0.001704 ** 
## Cohort_year2010       4.393e+00  1.228e+00   3.576 0.000348 ***
## Cohort_year2011       5.218e+00  1.299e+00   4.018 5.87e-05 ***
## Cohort_year2012       7.336e+00  1.618e+00   4.535 5.76e-06 ***
## Cohort_year2013       8.829e+00  1.729e+00   5.105 3.31e-07 ***
## Cohort_year2014       8.447e+00  1.754e+00   4.817 1.46e-06 ***
## Cohort_year2015       8.447e+00  2.000e+00   4.225 2.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 424.58  on 309  degrees of freedom
## Residual deviance: 188.35  on 291  degrees of freedom
## AIC: 226.35
## 
## Number of Fisher Scoring iterations: 7
# Model predicting graduation binary outcome by borough and cohort year, including an interaction between borough and cohort year
m3 <- glm(Grad_binary ~ Borough * Cohort_year, family = binomial, data = DATA)
summary(m3)
## 
## Call:
## glm(formula = Grad_binary ~ Borough * Cohort_year, family = binomial, 
##     data = DATA)
## 
## Coefficients:
##                                        Estimate Std. Error z value Pr(>|z|)
## (Intercept)                          -2.057e+01  1.024e+04  -0.002    0.998
## BoroughBrooklyn                       4.395e-06  1.448e+04   0.000    1.000
## BoroughManhattan                      6.513e-06  1.448e+04   0.000    1.000
## BoroughQueens                         4.940e-06  1.448e+04   0.000    1.000
## BoroughStaten Island                  2.126e+01  1.024e+04   0.002    0.998
## Cohort_year2002                       5.226e-06  1.448e+04   0.000    1.000
## Cohort_year2003                       5.262e-06  1.448e+04   0.000    1.000
## Cohort_year2004                       5.257e-06  1.448e+04   0.000    1.000
## Cohort_year2005                       5.291e-06  1.354e+04   0.000    1.000
## Cohort_year2006                       5.252e-06  1.295e+04   0.000    1.000
## Cohort_year2007                       5.356e-06  1.295e+04   0.000    1.000
## Cohort_year2008                       5.323e-06  1.295e+04   0.000    1.000
## Cohort_year2009                       5.559e-06  1.295e+04   0.000    1.000
## Cohort_year2010                       5.244e-06  1.295e+04   0.000    1.000
## Cohort_year2011                       5.177e-06  1.295e+04   0.000    1.000
## Cohort_year2012                       2.016e+01  1.024e+04   0.002    0.998
## Cohort_year2013                       2.097e+01  1.024e+04   0.002    0.998
## Cohort_year2014                       2.057e+01  1.024e+04   0.002    0.998
## Cohort_year2015                       2.057e+01  1.024e+04   0.002    0.998
## BoroughBrooklyn:Cohort_year2002      -4.398e-06  2.047e+04   0.000    1.000
## BoroughManhattan:Cohort_year2002     -6.516e-06  2.047e+04   0.000    1.000
## BoroughQueens:Cohort_year2002        -4.944e-06  2.047e+04   0.000    1.000
## BoroughStaten Island:Cohort_year2002 -5.226e-06  1.448e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2003      -4.435e-06  2.047e+04   0.000    1.000
## BoroughManhattan:Cohort_year2003     -6.553e-06  2.047e+04   0.000    1.000
## BoroughQueens:Cohort_year2003         1.987e+01  1.773e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2003 -5.262e-06  1.448e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2004      -4.429e-06  2.047e+04   0.000    1.000
## BoroughManhattan:Cohort_year2004      1.987e+01  1.773e+04   0.001    0.999
## BoroughQueens:Cohort_year2004         1.987e+01  1.773e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2004 -5.257e-06  1.448e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2005      -4.464e-06  1.915e+04   0.000    1.000
## BoroughManhattan:Cohort_year2005      2.057e+01  1.698e+04   0.001    0.999
## BoroughQueens:Cohort_year2005         1.947e+01  1.698e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2005  4.055e-01  1.354e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2006       1.918e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2006      2.097e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2006         2.097e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2006  1.987e+01  1.518e+04   0.001    0.999
## BoroughBrooklyn:Cohort_year2007       2.016e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2007      2.097e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2007         2.195e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2007  1.987e+01  1.518e+04   0.001    0.999
## BoroughBrooklyn:Cohort_year2008       2.016e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2008      2.097e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2008         2.097e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2008  1.987e+01  1.518e+04   0.001    0.999
## BoroughBrooklyn:Cohort_year2009       2.097e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2009      2.097e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2009         2.097e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2009  1.987e+01  1.518e+04   0.001    0.999
## BoroughBrooklyn:Cohort_year2010       2.097e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2010      2.195e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2010         2.195e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2010  1.987e+01  1.518e+04   0.001    0.999
## BoroughBrooklyn:Cohort_year2011       2.195e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2011      2.195e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2011         4.113e+01  1.831e+04   0.002    0.998
## BoroughStaten Island:Cohort_year2011  1.987e+01  1.518e+04   0.001    0.999
## BoroughBrooklyn:Cohort_year2012       1.792e+00  1.448e+04   0.000    1.000
## BoroughManhattan:Cohort_year2012      2.097e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2012         2.097e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2012 -2.877e-01  1.295e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2013       2.016e+01  1.651e+04   0.001    0.999
## BoroughManhattan:Cohort_year2013      2.016e+01  1.651e+04   0.001    0.999
## BoroughQueens:Cohort_year2013         2.016e+01  1.651e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2013 -1.099e+00  1.295e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2014       2.057e+01  1.698e+04   0.001    0.999
## BoroughManhattan:Cohort_year2014      2.057e+01  1.698e+04   0.001    0.999
## BoroughQueens:Cohort_year2014         2.057e+01  1.698e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2014 -6.932e-01  1.354e+04   0.000    1.000
## BoroughBrooklyn:Cohort_year2015       2.057e+01  1.915e+04   0.001    0.999
## BoroughManhattan:Cohort_year2015      2.057e+01  1.915e+04   0.001    0.999
## BoroughQueens:Cohort_year2015         2.057e+01  1.915e+04   0.001    0.999
## BoroughStaten Island:Cohort_year2015 -6.932e-01  1.619e+04   0.000    1.000
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 424.58  on 309  degrees of freedom
## Residual deviance: 172.11  on 235  degrees of freedom
## AIC: 322.11
## 
## Number of Fisher Scoring iterations: 19
# Likelihood ratio test
anova(m1, m2, m3, test = "Chisq")
## Analysis of Deviance Table
## 
## Model 1: Grad_binary ~ Borough
## Model 2: Grad_binary ~ Borough + Cohort_year
## Model 3: Grad_binary ~ Borough * Cohort_year
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1       305     329.49                         
## 2       291     188.35 14  141.133   <2e-16 ***
## 3       235     172.11 56   16.241        1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# AIC and BIC for Model 1
AIC_1 <- AIC(m1)
BIC_1 <- BIC(m1)

# AIC and BIC for Model 2
AIC_2 <- AIC(m2)
BIC_2 <- BIC(m2)

# AIC and BIC for Model 3
AIC_3 <- AIC(m3)
BIC_3 <- BIC(m3)

# Display the results
cat("AIC and BIC for Model 1: AIC =", AIC_1, ", BIC =", BIC_1, "\n")
## AIC and BIC for Model 1: AIC = 339.4874 , BIC = 358.1703
cat("AIC and BIC for Model 2: AIC =", AIC_2, ", BIC =", BIC_2, "\n")
## AIC and BIC for Model 2: AIC = 226.3544 , BIC = 297.3493
cat("AIC and BIC for Model 3: AIC =", AIC_3, ", BIC =", BIC_3, "\n")
## AIC and BIC for Model 3: AIC = 322.1136 , BIC = 602.3565

Based on the Akaike Information Criterion (AIC) and Bayesian Information Criterion (BIC) values, Model 2, which includes both borough and cohort year, is the best model. It has the lowest AIC value (226.35) and the lowest BIC value (297.35) among the three models, indicating the best balance between model fit and complexity.

# Model 2
summary(m2)
## 
## Call:
## glm(formula = Grad_binary ~ Borough + Cohort_year, family = binomial, 
##     data = DATA)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -8.361e+00  1.525e+00  -5.483 4.18e-08 ***
## BoroughBrooklyn       4.200e+00  1.098e+00   3.827 0.000130 ***
## BoroughManhattan      5.338e+00  1.129e+00   4.727 2.28e-06 ***
## BoroughQueens         5.603e+00  1.137e+00   4.930 8.23e-07 ***
## BoroughStaten Island  8.542e+00  1.294e+00   6.600 4.10e-11 ***
## Cohort_year2002       2.052e-15  1.354e+00   0.000 1.000000    
## Cohort_year2003       8.404e-01  1.310e+00   0.641 0.521232    
## Cohort_year2004       1.546e+00  1.286e+00   1.202 0.229392    
## Cohort_year2005       1.851e+00  1.218e+00   1.520 0.128574    
## Cohort_year2006       3.197e+00  1.185e+00   2.698 0.006985 ** 
## Cohort_year2007       3.761e+00  1.199e+00   3.138 0.001704 ** 
## Cohort_year2008       3.475e+00  1.190e+00   2.919 0.003513 ** 
## Cohort_year2009       3.761e+00  1.199e+00   3.138 0.001704 ** 
## Cohort_year2010       4.393e+00  1.228e+00   3.576 0.000348 ***
## Cohort_year2011       5.218e+00  1.299e+00   4.018 5.87e-05 ***
## Cohort_year2012       7.336e+00  1.618e+00   4.535 5.76e-06 ***
## Cohort_year2013       8.829e+00  1.729e+00   5.105 3.31e-07 ***
## Cohort_year2014       8.447e+00  1.754e+00   4.817 1.46e-06 ***
## Cohort_year2015       8.447e+00  2.000e+00   4.225 2.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 424.58  on 309  degrees of freedom
## Residual deviance: 188.35  on 291  degrees of freedom
## AIC: 226.35
## 
## Number of Fisher Scoring iterations: 7

Using the Clarify Package

# Simulate coefficients
set.seed(123)
sim_coefs <- sim(m2)

print(sim_coefs)
## A `clarify_sim` object
##  - 19 coefficients, 1000 simulated values
##  - sampled distribution: multivariate normal
##  - original fitting function call:
## 
## glm(formula = Grad_binary ~ Borough + Cohort_year, family = binomial, 
##     data = DATA)
# Average Marginal Effects for Borough
sim_borough <- sim_ame(sim_coefs, var = "Borough", 
                   contrast = "rd", verbose = FALSE)

summary(sim_borough)
##                     Estimate  2.5 % 97.5 %
## E[Y(Bronx)]           0.1290 0.0793 0.2105
## E[Y(Brooklyn)]        0.4839 0.3914 0.5815
## E[Y(Manhattan)]       0.6290 0.5363 0.7088
## E[Y(Queens)]          0.6613 0.5677 0.7502
## E[Y(Staten Island)]   0.9194 0.8324 0.9574
# Plot the Average Marginal Effects for Borough
plot(sim_borough)

# Average Marginal Effects for Cohort_year
sim_year <- sim_ame(sim_coefs, var = "Cohort_year", 
                   contrast = "rd", verbose = FALSE)

summary(sim_year)
##            Estimate  2.5 % 97.5 %
## E[Y(2001)]   0.1333 0.0361 0.3242
## E[Y(2002)]   0.1333 0.0349 0.3245
## E[Y(2003)]   0.2000 0.0830 0.3989
## E[Y(2004)]   0.2667 0.1260 0.4732
## E[Y(2005)]   0.3000 0.1714 0.4699
## E[Y(2006)]   0.4800 0.3297 0.6279
## E[Y(2007)]   0.5600 0.4096 0.6915
## E[Y(2008)]   0.5200 0.3722 0.6546
## E[Y(2009)]   0.5600 0.4052 0.6883
## E[Y(2010)]   0.6400 0.4830 0.7492
## E[Y(2011)]   0.7200 0.5662 0.8307
## E[Y(2012)]   0.8400 0.7165 0.9311
## E[Y(2013)]   0.9200 0.8162 0.9776
## E[Y(2014)]   0.9000 0.7893 0.9744
## E[Y(2015)]   0.9000 0.7309 0.9889
# Plot the Average Marginal Effects for Cohort_year
plot(sim_year)

# Calculate predictions for all combinations of Borough and Cohort_year
borough_values <- levels(DATA$Borough)  
cohort_year_values <- levels(DATA$Cohort_year)  

sim_pred <- sim_setx(sim_coefs, 
                     x = list(Borough = borough_values, 
                              Cohort_year = cohort_year_values), 
                     verbose = FALSE)

summary(sim_pred)
##                                                 Estimate    2.5 %   97.5 %
## Borough = "Bronx", Cohort_year = "2001"         2.34e-04 1.24e-05 4.54e-03
## Borough = "Brooklyn", Cohort_year = "2001"      1.54e-02 1.51e-03 1.48e-01
## Borough = "Manhattan", Cohort_year = "2001"     4.64e-02 5.06e-03 3.17e-01
## Borough = "Queens", Cohort_year = "2001"        5.96e-02 7.05e-03 3.75e-01
## Borough = "Staten Island", Cohort_year = "2001" 5.45e-01 1.52e-01 8.89e-01
## Borough = "Bronx", Cohort_year = "2002"         2.34e-04 1.19e-05 4.70e-03
## Borough = "Brooklyn", Cohort_year = "2002"      1.54e-02 1.45e-03 1.15e-01
## Borough = "Manhattan", Cohort_year = "2002"     4.64e-02 4.19e-03 3.02e-01
## Borough = "Queens", Cohort_year = "2002"        5.96e-02 6.05e-03 3.76e-01
## Borough = "Staten Island", Cohort_year = "2002" 5.45e-01 1.46e-01 8.85e-01
## Borough = "Bronx", Cohort_year = "2003"         5.41e-04 3.51e-05 7.80e-03
## Borough = "Brooklyn", Cohort_year = "2003"      3.49e-02 5.08e-03 2.13e-01
## Borough = "Manhattan", Cohort_year = "2003"     1.01e-01 1.62e-02 4.49e-01
## Borough = "Queens", Cohort_year = "2003"        1.28e-01 2.30e-02 4.89e-01
## Borough = "Staten Island", Cohort_year = "2003" 7.35e-01 3.38e-01 9.49e-01
## Borough = "Bronx", Cohort_year = "2004"         1.10e-03 8.39e-05 1.26e-02
## Borough = "Brooklyn", Cohort_year = "2004"      6.82e-02 1.22e-02 3.10e-01
## Borough = "Manhattan", Cohort_year = "2004"     1.86e-01 3.93e-02 5.54e-01
## Borough = "Queens", Cohort_year = "2004"        2.29e-01 5.78e-02 6.25e-01
## Borough = "Staten Island", Cohort_year = "2004" 8.49e-01 4.57e-01 9.77e-01
## Borough = "Bronx", Cohort_year = "2005"         1.49e-03 1.39e-04 1.90e-02
## Borough = "Brooklyn", Cohort_year = "2005"      9.03e-02 2.20e-02 2.82e-01
## Borough = "Manhattan", Cohort_year = "2005"     2.37e-01 6.90e-02 5.63e-01
## Borough = "Queens", Cohort_year = "2005"        2.88e-01 8.72e-02 6.23e-01
## Borough = "Staten Island", Cohort_year = "2005" 8.84e-01 5.45e-01 9.77e-01
## Borough = "Bronx", Cohort_year = "2006"         5.68e-03 5.64e-04 5.71e-02
## Borough = "Brooklyn", Cohort_year = "2006"      2.76e-01 9.84e-02 5.70e-01
## Borough = "Manhattan", Cohort_year = "2006"     5.43e-01 2.62e-01 7.88e-01
## Borough = "Queens", Cohort_year = "2006"        6.08e-01 3.32e-01 8.35e-01
## Borough = "Staten Island", Cohort_year = "2006" 9.67e-01 8.29e-01 9.94e-01
## Borough = "Bronx", Cohort_year = "2007"         9.94e-03 1.16e-03 9.49e-02
## Borough = "Brooklyn", Cohort_year = "2007"      4.01e-01 1.75e-01 7.08e-01
## Borough = "Manhattan", Cohort_year = "2007"     6.76e-01 3.84e-01 8.85e-01
## Borough = "Queens", Cohort_year = "2007"        7.31e-01 4.45e-01 9.07e-01
## Borough = "Staten Island", Cohort_year = "2007" 9.81e-01 8.92e-01 9.97e-01
## Borough = "Bronx", Cohort_year = "2008"         7.49e-03 8.38e-04 6.94e-02
## Borough = "Brooklyn", Cohort_year = "2008"      3.35e-01 1.27e-01 6.29e-01
## Borough = "Manhattan", Cohort_year = "2008"     6.11e-01 3.25e-01 8.28e-01
## Borough = "Queens", Cohort_year = "2008"        6.72e-01 3.82e-01 8.74e-01
## Borough = "Staten Island", Cohort_year = "2008" 9.75e-01 8.73e-01 9.96e-01
## Borough = "Bronx", Cohort_year = "2009"         9.94e-03 1.04e-03 9.96e-02
## Borough = "Brooklyn", Cohort_year = "2009"      4.01e-01 1.69e-01 6.89e-01
## Borough = "Manhattan", Cohort_year = "2009"     6.76e-01 3.99e-01 8.69e-01
## Borough = "Queens", Cohort_year = "2009"        7.31e-01 4.23e-01 9.08e-01
## Borough = "Staten Island", Cohort_year = "2009" 9.81e-01 9.03e-01 9.97e-01
## Borough = "Bronx", Cohort_year = "2010"         1.85e-02 2.19e-03 1.82e-01
## Borough = "Brooklyn", Cohort_year = "2010"      5.58e-01 2.60e-01 8.16e-01
## Borough = "Manhattan", Cohort_year = "2010"     7.97e-01 5.36e-01 9.30e-01
## Borough = "Queens", Cohort_year = "2010"        8.37e-01 5.83e-01 9.52e-01
## Borough = "Staten Island", Cohort_year = "2010" 9.90e-01 9.38e-01 9.98e-01
## Borough = "Bronx", Cohort_year = "2011"         4.14e-02 4.50e-03 3.74e-01
## Borough = "Brooklyn", Cohort_year = "2011"      7.42e-01 3.97e-01 9.29e-01
## Borough = "Manhattan", Cohort_year = "2011"     9.00e-01 6.46e-01 9.76e-01
## Borough = "Queens", Cohort_year = "2011"        9.21e-01 7.05e-01 9.83e-01
## Borough = "Staten Island", Cohort_year = "2011" 9.95e-01 9.65e-01 9.99e-01
## Borough = "Bronx", Cohort_year = "2012"         2.64e-01 5.98e-02 6.73e-01
## Borough = "Brooklyn", Cohort_year = "2012"      9.60e-01 6.73e-01 9.96e-01
## Borough = "Manhattan", Cohort_year = "2012"     9.87e-01 8.61e-01 9.99e-01
## Borough = "Queens", Cohort_year = "2012"        9.90e-01 8.89e-01 9.99e-01
## Borough = "Staten Island", Cohort_year = "2012" 9.99e-01 9.91e-01 1.00e+00
## Borough = "Bronx", Cohort_year = "2013"         6.15e-01 2.22e-01 8.93e-01
## Borough = "Brooklyn", Cohort_year = "2013"      9.91e-01 8.69e-01 9.99e-01
## Borough = "Manhattan", Cohort_year = "2013"     9.97e-01 9.56e-01 1.00e+00
## Borough = "Queens", Cohort_year = "2013"        9.98e-01 9.63e-01 1.00e+00
## Borough = "Staten Island", Cohort_year = "2013" 1.00e+00 9.97e-01 1.00e+00
## Borough = "Bronx", Cohort_year = "2014"         5.22e-01 1.53e-01 8.78e-01
## Borough = "Brooklyn", Cohort_year = "2014"      9.86e-01 8.54e-01 9.99e-01
## Borough = "Manhattan", Cohort_year = "2014"     9.96e-01 9.44e-01 1.00e+00
## Borough = "Queens", Cohort_year = "2014"        9.97e-01 9.56e-01 1.00e+00
## Borough = "Staten Island", Cohort_year = "2014" 1.00e+00 9.97e-01 1.00e+00
## Borough = "Bronx", Cohort_year = "2015"         5.22e-01 6.52e-02 9.45e-01
## Borough = "Brooklyn", Cohort_year = "2015"      9.86e-01 7.14e-01 9.99e-01
## Borough = "Manhattan", Cohort_year = "2015"     9.96e-01 8.95e-01 1.00e+00
## Borough = "Queens", Cohort_year = "2015"        9.97e-01 9.12e-01 1.00e+00
## Borough = "Staten Island", Cohort_year = "2015" 1.00e+00 9.93e-01 1.00e+00
# Plot the predictions
plot(sim_pred)

# Compare predictions for 2001 vs 2015 for each borough
sim_Bronx_diff <- sim_setx(sim_coefs, 
                          x = list(Borough = "Bronx", 
                                   Cohort_year = c("2001", "2015")), 
                          verbose = FALSE)
sim_Brooklyn_diff <- sim_setx(sim_coefs, 
                          x = list(Borough = "Brooklyn", 
                                   Cohort_year = c("2001", "2015")), 
                          verbose = FALSE)

sim_Queens_diff <- sim_setx(sim_coefs, 
                          x = list(Borough = "Queens", 
                                   Cohort_year = c("2001", "2015")), 
                          verbose = FALSE)
sim_Manhattan_diff <- sim_setx(sim_coefs, 
                          x = list(Borough = "Manhattan", 
                                   Cohort_year = c("2001", "2015")), 
                          verbose = FALSE)

sim_StatenIsland_diff <- sim_setx(sim_coefs, 
                          x = list(Borough = "Staten Island", 
                                   Cohort_year = c("2001", "2015")), 
                          verbose = FALSE)

# Summary and plots for the differences

summary(sim_Bronx_diff)
##                      Estimate    2.5 %   97.5 %
## Cohort_year = "2001" 2.34e-04 1.24e-05 4.54e-03
## Cohort_year = "2015" 5.22e-01 6.52e-02 9.45e-01
plot(sim_Bronx_diff)

summary(sim_Brooklyn_diff)
##                      Estimate   2.5 %  97.5 %
## Cohort_year = "2001"  0.01535 0.00151 0.14801
## Cohort_year = "2015"  0.98643 0.71360 0.99941
plot(sim_Brooklyn_diff)

summary(sim_Queens_diff)
##                      Estimate   2.5 %  97.5 %
## Cohort_year = "2001"  0.05961 0.00705 0.37545
## Cohort_year = "2015"  0.99663 0.91223 0.99988
plot(sim_Queens_diff)

summary(sim_Manhattan_diff)
##                      Estimate   2.5 %  97.5 %
## Cohort_year = "2001"  0.04639 0.00506 0.31724
## Cohort_year = "2015"  0.99561 0.89544 0.99985
plot(sim_Manhattan_diff)

summary(sim_StatenIsland_diff)
##                      Estimate 2.5 % 97.5 %
## Cohort_year = "2001"    0.545 0.152  0.889
## Cohort_year = "2015"    1.000 0.993  1.000
plot(sim_StatenIsland_diff)

Discussion

The results of Model 2 suggest that both borough and cohort year significantly influenced whether a cohort of students achieved a graduation rate of 70% or higher. Compared to the Bronx, cohorts from Brooklyn, Manhattan, Queens, and Staten Island were more likely to achieve a graduation rate of 70% or higher. Staten Island showed the highest increase in odds, with a coefficient of 8.542. The average marginal effects for each borough further highlight these differences, with Staten Island having the highest probability at 0.9194, followed by Queens (0.6613), Manhattan (0.6290), and Brooklyn (0.4839), while the Bronx had the lowest probability at 0.1290. The cohort year, which refers to the year when a cohort began ninth grade in a given school, also revealed a positive trend. Later cohorts, particularly from 2006 to 2015, had significantly higher odds of achieving the 70% graduation rate. The probability of achieving this rate was 0.4800 for the 2006 cohort and steadily increased to 0.9000 for the 2014 and 2015 cohorts. In contrast, earlier cohorts had lower probabilities, with the 2001 and 2002 cohorts at 0.1333 and the 2005 cohort at 0.3000. Additionally, the predictions for specific borough and cohort year combinations reveal that Staten Island showed a high likelihood of achieving the 70% graduation rate across many years, with predictions reaching 0.991 for the 2012 cohort and remaining consistently high for the following years (2013–2015). In contrast, the Bronx showed much lower probabilities across all years.

Further analysis comparing the 2001 and 2015 cohorts within each borough reveals major differences in the probabilities of achieving a 70% graduation rate. For the Bronx, the probability increased from 0.000234 in 2001 to 0.522 in 2015. In Brooklyn, the probability increased from 0.01535 in 2001 to 0.98643 in 2015. In Queens, the probability also increased from 0.05961 in 2001 to 0.99663 in 2015. Manhattan showed a similar pattern, with the probability increasing from 0.04639 in 2001 to 0.99561 in 2015. Staten Island showed the most significant improvement, with the probability rising from 0.545 in 2001 to 1.000 in 2015. These differences highlight the improvements in graduation rates between 2001 and 2015, with all boroughs showing an upward trend, particularly in later years. These findings imply that graduation rates improved over time, with later cohorts being more likely to meet the 70% benchmark compared to earlier cohorts. These results suggest that certain boroughs, especially Staten Island, and later cohorts saw improvements in graduation rates over time. This could have been influenced by various factors such as educational policies, school resources, and community engagement. To gain a better understanding of the factors leading to changes in graduation rates, further investigation is required.