install.packages(“tinytex”) tinytex::install_tinytex()

library(tidyverse)
library(janitor)
library(knitr)
library(scales)

mat0993 <- read_csv(
  "data/cleaned_2024FallMAT0993.csv",
  show_col_types = FALSE
) %>%
  clean_names() %>%
  mutate(
    successful = final_grade %in% c("ANC", "BNC", "CNC"),
    act_pull_clean = na_if(act_pull, 0),
    sat_pull_clean = na_if(sat_pull, 0)
  )

esp_counts <- mat0993 %>%
  count(esp) %>%
  mutate(percent = n / sum(n))

grade_by_esp <- mat0993 %>%
  count(esp, final_grade) %>%
  group_by(esp) %>%
  mutate(percent = n / sum(n)) %>%
  ungroup()

success_by_esp <- mat0993 %>%
  group_by(esp) %>%
  summarise(
    total_students = n(),
    successful_students = sum(successful),
    unsuccessful_students = total_students - successful_students,
    success_rate = successful_students / total_students,
    .groups = "drop"
  )

test_score_summary <- mat0993 %>%
  group_by(esp) %>%
  summarise(
    total_students = n(),
    act_available = sum(!is.na(act_pull_clean)),
    mean_act = mean(act_pull_clean, na.rm = TRUE),
    median_act = median(act_pull_clean, na.rm = TRUE),
    sat_available = sum(!is.na(sat_pull_clean)),
    mean_sat = mean(sat_pull_clean, na.rm = TRUE),
    median_sat = median(sat_pull_clean, na.rm = TRUE),
    .groups = "drop"
  )

grade_table <- table(mat0993$esp, mat0993$final_grade)
grade_chisq <- chisq.test(grade_table)

success_table <- table(mat0993$esp, mat0993$successful)
success_chisq <- chisq.test(success_table)

Executive Summary

This report presents a preliminary analysis of Fall 2024 MAT0993 enrollment data to explore differences in academic outcomes between students participating in the Emerging Scholar Program (ESP) and their non-ESP peers.

The dataset includes 490 enrollment records. Of these, 162 records are associated with ESP students, representing 33.1% of the dataset.

The primary purpose of this preliminary analysis is to identify whether ESP participation is associated with different MAT0993 course outcomes.

Research Questions

This analysis addresses the following preliminary questions:

Dataset Overview

esp_counts %>%
  mutate(percent = percent(percent, accuracy = 0.1)) %>%
  kable(
    caption = "ESP Participation in MAT0993",
    col.names = c("ESP Status", "Count", "Percent")
  )
ESP Participation in MAT0993
ESP Status Count Percent
no 328 66.9%
yes 162 33.1%

The MAT0993 dataset includes both ESP and non-ESP students, which allows for a preliminary comparison of student outcomes by ESP participation status.

Final Grade Distribution

grade_by_esp %>%
  mutate(percent = percent(percent, accuracy = 0.1)) %>%
  arrange(esp, final_grade) %>%
  kable(
    caption = "Final Grade Distribution by ESP Status",
    col.names = c("ESP Status", "Final Grade", "Count", "Percent")
  )
Final Grade Distribution by ESP Status
ESP Status Final Grade Count Percent
no ANC 28 8.5%
no BNC 65 19.8%
no CNC 63 19.2%
no UNC 154 47.0%
no W 18 5.5%
yes ANC 71 43.8%
yes BNC 30 18.5%
yes CNC 16 9.9%
yes UNC 42 25.9%
yes W 3 1.9%
ggplot(
  grade_by_esp,
  aes(
    x = final_grade,
    y = percent,
    fill = esp
  )
) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = percent_format()) +
  labs(
    title = "MAT0993 Final Grade Distribution by ESP Participation",
    x = "Final Grade",
    y = "Percent of Students",
    fill = "ESP Status"
  )

Interpretation

The final grade distribution provides an initial view of how ESP and non-ESP students performed in MAT0993. Particular attention should be paid to the proportion of students earning ANC and UNC outcomes, as these categories show the clearest contrast between the groups.

Course Success Rates

For this preliminary analysis, successful course completion is defined as earning an ANC, BNC, or CNC final grade. UNC and W are treated as unsuccessful outcomes.

success_by_esp %>%
  mutate(success_rate = percent(success_rate, accuracy = 0.1)) %>%
  kable(
    caption = "Course Success Rate by ESP Status",
    col.names = c(
      "ESP Status",
      "Total Students",
      "Successful Students",
      "Unsuccessful Students",
      "Success Rate"
    )
  )
Course Success Rate by ESP Status
ESP Status Total Students Successful Students Unsuccessful Students Success Rate
no 328 156 172 47.6%
yes 162 117 45 72.2%
ggplot(
  success_by_esp,
  aes(
    x = esp,
    y = success_rate,
    fill = esp
  )
) +
  geom_col() +
  scale_y_continuous(labels = percent_format()) +
  labs(
    title = "MAT0993 Success Rate by ESP Participation",
    x = "ESP Status",
    y = "Success Rate",
    fill = "ESP Status"
  )

Interpretation

The success rate comparison provides a clearer summary of student outcomes than the full grade distribution alone. This measure allows ESP and non-ESP students to be compared based on whether they completed the course successfully.

Academic Preparation Indicators

ACT and SAT scores were examined as available proxy indicators of incoming academic preparation. In this dataset, scores of 0 were treated as missing values rather than valid test scores.

test_score_summary %>%
  mutate(
    mean_act = round(mean_act, 1),
    median_act = round(median_act, 1),
    mean_sat = round(mean_sat, 1),
    median_sat = round(median_sat, 1)
  ) %>%
  kable(
    caption = "Academic Preparation Indicators by ESP Status",
    col.names = c(
      "ESP Status",
      "Total Students",
      "ACT Available",
      "Mean ACT",
      "Median ACT",
      "SAT Available",
      "Mean SAT",
      "Median SAT"
    )
  )
Academic Preparation Indicators by ESP Status
ESP Status Total Students ACT Available Mean ACT Median ACT SAT Available Mean SAT Median SAT
no 328 56 19.3 19.0 147 979.3 960
yes 162 22 17.7 17.5 81 964.6 950
ggplot(
  mat0993,
  aes(
    x = act_pull_clean,
    fill = esp
  )
) +
  geom_histogram(
    alpha = 0.6,
    bins = 15,
    position = "identity"
  ) +
  labs(
    title = "ACT Score Distribution by ESP Participation",
    x = "ACT Score",
    y = "Number of Students",
    fill = "ESP Status"
  )
## Warning: Removed 412 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(
  mat0993,
  aes(
    x = sat_pull_clean,
    fill = esp
  )
) +
  geom_histogram(
    alpha = 0.6,
    bins = 15,
    position = "identity"
  ) +
  labs(
    title = "SAT Score Distribution by ESP Participation",
    x = "SAT Score",
    y = "Number of Students",
    fill = "ESP Status"
  )
## Warning: Removed 262 rows containing non-finite outside the scale range
## (`stat_bin()`).

Interpretation

Comparing ACT and SAT scores helps determine whether differences in course outcomes may be related to differences in incoming academic preparation. These indicators should be interpreted cautiously because test score data are not available for all students.

Statistical Tests

Final Grade by ESP Status

grade_table
##      
##       ANC BNC CNC UNC   W
##   no   28  65  63 154  18
##   yes  71  30  16  42   3
grade_chisq
## 
##  Pearson's Chi-squared test
## 
## data:  grade_table
## X-squared = 88.125, df = 4, p-value < 2.2e-16

The chi-square test assesses whether final grade distribution differs by ESP participation status. A p-value below 0.05 suggests that final grade outcomes are statistically associated with ESP status.

Course Success by ESP Status

success_table
##      
##       FALSE TRUE
##   no    172  156
##   yes    45  117
success_chisq
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  success_table
## X-squared = 25.739, df = 1, p-value = 3.908e-07

The chi-square test for course success evaluates whether successful completion rates differ by ESP participation status.

Multivariate Analysis

This section uses logistic regression to examine whether ESP participation is associated with successful completion of MAT0993. Because the outcome variable is binary, successful or not successful, logistic regression is appropriate.

For this analysis, successful completion is defined as earning an ANC, BNC, or CNC final grade. UNC and W are treated as unsuccessful outcomes.

Model 1: ESP Participation Only

model1 <- glm(
  successful ~ esp,
  data = mat0993,
  family = binomial
)

summary(model1)
## 
## Call:
## glm(formula = successful ~ esp, family = binomial, data = mat0993)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.09764    0.11056  -0.883    0.377    
## espyes       1.05315    0.20735   5.079 3.79e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 672.87  on 489  degrees of freedom
## Residual deviance: 645.36  on 488  degrees of freedom
## AIC: 649.36
## 
## Number of Fisher Scoring iterations: 4
model1_odds <- exp(coef(model1))
model1_odds
## (Intercept)      espyes 
##   0.9069767   2.8666667

Interpretation

Model 1 examines the relationship between ESP participation and successful completion of MAT0993 without accounting for any other student characteristics.

The coefficient for ESP participation is positive and statistically significant. This indicates that ESP students were significantly more likely to successfully complete MAT0993 than non-ESP students.

The odds ratio for ESP participation is 2.87. This means that students participating in ESP had approximately 2.9 times greater odds of successfully completing MAT0993 than students who did not participate in ESP.

This finding suggests a strong positive association between ESP participation and course success. However, this model does not establish causation because it does not account for other factors that may also influence student outcomes, such as academic preparation, demographics, or academic programme.

Model 2: ESP Participation and Academic Preparation

model2 <- glm(
  successful ~ esp + act_pull_clean + sat_pull_clean + test_optional,
  data = mat0993,
  family = binomial
)

summary(model2)
## 
## Call:
## glm(formula = successful ~ esp + act_pull_clean + sat_pull_clean + 
##     test_optional, family = binomial, data = mat0993)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    -4.0744293  2.8971035  -1.406   0.1596  
## espyes          1.2666213  0.7224271   1.753   0.0796 .
## act_pull_clean  0.1820815  0.3088506   0.590   0.5555  
## sat_pull_clean  0.0007467  0.0075160   0.099   0.9209  
## test_optionalY -0.1376968  0.7483015  -0.184   0.8540  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.231  on 60  degrees of freedom
## Residual deviance: 75.475  on 56  degrees of freedom
##   (429 observations deleted due to missingness)
## AIC: 85.475
## 
## Number of Fisher Scoring iterations: 4
model2_odds <- exp(coef(model2))
model2_odds
##    (Intercept)         espyes act_pull_clean sat_pull_clean test_optionalY 
##     0.01700192     3.54884167     1.19971193     1.00074693     0.87136289

Interpretation

Model 2 adds academic preparation indicators to the analysis, including ACT score, SAT score, and test-optional status.

This model helps assess whether ESP participation remains associated with successful completion after accounting for available indicators of incoming academic preparation.

The odds ratio for ESP participation in Model 2 is 3.55. If this value remains above 1 and statistically significant, it suggests that ESP participation is still positively associated with course success even after accounting for academic preparation.

Because ACT and SAT data are missing for some students, this model should be interpreted cautiously.

Model 3: ESP Participation, Academic Preparation, and Demographics

model3 <- glm(
  successful ~ esp + gender + ethnicity + act_pull_clean + sat_pull_clean + test_optional,
  data = mat0993,
  family = binomial
)

summary(model3)
## 
## Call:
## glm(formula = successful ~ esp + gender + ethnicity + act_pull_clean + 
##     sat_pull_clean + test_optional, family = binomial, data = mat0993)
## 
## Coefficients:
##                                         Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                           -3.976e+00  3.459e+00  -1.149    0.250  
## espyes                                 9.041e-01  8.845e-01   1.022    0.307  
## genderM                               -1.401e+00  7.705e-01  -1.819    0.069 .
## genderN                               -3.994e-01  9.137e-01  -0.437    0.662  
## ethnicityAsian                        -7.359e-01  1.113e+00  -0.661    0.508  
## ethnicityBlack or African American    -3.592e-01  1.002e+00  -0.358    0.720  
## ethnicityHispanic or Latino            1.695e+00  1.391e+00   1.218    0.223  
## ethnicityMiddle Eastern/North African -8.763e-01  1.483e+00  -0.591    0.555  
## ethnicityUnknown                       1.575e+01  1.455e+03   0.011    0.991  
## ethnicityWhite                        -1.277e+00  9.963e-01  -1.281    0.200  
## act_pull_clean                         2.292e-01  3.404e-01   0.673    0.501  
## sat_pull_clean                         5.803e-04  8.090e-03   0.072    0.943  
## test_optionalY                         1.864e-01  8.703e-01   0.214    0.830  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.231  on 60  degrees of freedom
## Residual deviance: 65.448  on 48  degrees of freedom
##   (429 observations deleted due to missingness)
## AIC: 91.448
## 
## Number of Fisher Scoring iterations: 14
model3_odds <- exp(coef(model3))
model3_odds
##                           (Intercept)                                espyes 
##                          1.876344e-02                          2.469792e+00 
##                               genderM                               genderN 
##                          2.462837e-01                          6.707346e-01 
##                        ethnicityAsian    ethnicityBlack or African American 
##                          4.790598e-01                          6.982346e-01 
##           ethnicityHispanic or Latino ethnicityMiddle Eastern/North African 
##                          5.444165e+00                          4.163293e-01 
##                      ethnicityUnknown                        ethnicityWhite 
##                          6.915127e+06                          2.789858e-01 
##                        act_pull_clean                        sat_pull_clean 
##                          1.257635e+00                          1.000580e+00 
##                        test_optionalY 
##                          1.204892e+00

Interpretation

Model 3 incorporates demographic and academic background variables into the analysis. This model examines whether ESP participation remains associated with successful course completion after accounting for gender, ethnicity, ACT score, SAT score, and test-optional status.

The odds ratio for ESP participation in Model 3 is 2.47. If this value remains above 1 and statistically significant, the results suggest that ESP participation is positively associated with MAT0993 success even after accounting for several student background characteristics.

This model should be interpreted as evidence of association, not causation. Additional longitudinal data and further controls would be needed to make stronger claims about the impact of ESP participation.

Summary of Model Findings

Across the logistic regression models, the key question is whether the ESP coefficient remains positive and statistically significant as additional variables are added. If it does, this strengthens the preliminary finding that ESP participation is associated with higher odds of successful MAT0993 completion.

These results can support a more nuanced preliminary conclusion: ESP participation appears to be positively associated with course success, and this relationship should be investigated further using additional semesters, student-level longitudinal records, and degree completion outcomes.

Preliminary Findings

Based on this preliminary analysis:

Recommendations for Further Analysis

Limitations

This analysis is preliminary and based on a single course in a single semester. The dataset does not currently include degree completion, STEM employment, or long-term persistence outcomes. As a result, this report should be interpreted as an early exploration of course-level outcomes rather than a full evaluation of ESP impact.