install.packages(“tinytex”) tinytex::install_tinytex()
library(tidyverse)
library(janitor)
library(knitr)
library(scales)
mat0993 <- read_csv(
"data/cleaned_2024FallMAT0993.csv",
show_col_types = FALSE
) %>%
clean_names() %>%
mutate(
successful = final_grade %in% c("ANC", "BNC", "CNC"),
act_pull_clean = na_if(act_pull, 0),
sat_pull_clean = na_if(sat_pull, 0)
)
esp_counts <- mat0993 %>%
count(esp) %>%
mutate(percent = n / sum(n))
grade_by_esp <- mat0993 %>%
count(esp, final_grade) %>%
group_by(esp) %>%
mutate(percent = n / sum(n)) %>%
ungroup()
success_by_esp <- mat0993 %>%
group_by(esp) %>%
summarise(
total_students = n(),
successful_students = sum(successful),
unsuccessful_students = total_students - successful_students,
success_rate = successful_students / total_students,
.groups = "drop"
)
test_score_summary <- mat0993 %>%
group_by(esp) %>%
summarise(
total_students = n(),
act_available = sum(!is.na(act_pull_clean)),
mean_act = mean(act_pull_clean, na.rm = TRUE),
median_act = median(act_pull_clean, na.rm = TRUE),
sat_available = sum(!is.na(sat_pull_clean)),
mean_sat = mean(sat_pull_clean, na.rm = TRUE),
median_sat = median(sat_pull_clean, na.rm = TRUE),
.groups = "drop"
)
grade_table <- table(mat0993$esp, mat0993$final_grade)
grade_chisq <- chisq.test(grade_table)
success_table <- table(mat0993$esp, mat0993$successful)
success_chisq <- chisq.test(success_table)
This report presents a preliminary analysis of Fall 2024 MAT0993 enrollment data to explore differences in academic outcomes between students participating in the Emerging Scholar Program (ESP) and their non-ESP peers.
The dataset includes 490 enrollment records. Of these, 162 records are associated with ESP students, representing 33.1% of the dataset.
The primary purpose of this preliminary analysis is to identify whether ESP participation is associated with different MAT0993 course outcomes.
This analysis addresses the following preliminary questions:
esp_counts %>%
mutate(percent = percent(percent, accuracy = 0.1)) %>%
kable(
caption = "ESP Participation in MAT0993",
col.names = c("ESP Status", "Count", "Percent")
)
| ESP Status | Count | Percent |
|---|---|---|
| no | 328 | 66.9% |
| yes | 162 | 33.1% |
The MAT0993 dataset includes both ESP and non-ESP students, which allows for a preliminary comparison of student outcomes by ESP participation status.
grade_by_esp %>%
mutate(percent = percent(percent, accuracy = 0.1)) %>%
arrange(esp, final_grade) %>%
kable(
caption = "Final Grade Distribution by ESP Status",
col.names = c("ESP Status", "Final Grade", "Count", "Percent")
)
| ESP Status | Final Grade | Count | Percent |
|---|---|---|---|
| no | ANC | 28 | 8.5% |
| no | BNC | 65 | 19.8% |
| no | CNC | 63 | 19.2% |
| no | UNC | 154 | 47.0% |
| no | W | 18 | 5.5% |
| yes | ANC | 71 | 43.8% |
| yes | BNC | 30 | 18.5% |
| yes | CNC | 16 | 9.9% |
| yes | UNC | 42 | 25.9% |
| yes | W | 3 | 1.9% |
ggplot(
grade_by_esp,
aes(
x = final_grade,
y = percent,
fill = esp
)
) +
geom_col(position = "dodge") +
scale_y_continuous(labels = percent_format()) +
labs(
title = "MAT0993 Final Grade Distribution by ESP Participation",
x = "Final Grade",
y = "Percent of Students",
fill = "ESP Status"
)
The final grade distribution provides an initial view of how ESP and non-ESP students performed in MAT0993. Particular attention should be paid to the proportion of students earning ANC and UNC outcomes, as these categories show the clearest contrast between the groups.
For this preliminary analysis, successful course completion is defined as earning an ANC, BNC, or CNC final grade. UNC and W are treated as unsuccessful outcomes.
success_by_esp %>%
mutate(success_rate = percent(success_rate, accuracy = 0.1)) %>%
kable(
caption = "Course Success Rate by ESP Status",
col.names = c(
"ESP Status",
"Total Students",
"Successful Students",
"Unsuccessful Students",
"Success Rate"
)
)
| ESP Status | Total Students | Successful Students | Unsuccessful Students | Success Rate |
|---|---|---|---|---|
| no | 328 | 156 | 172 | 47.6% |
| yes | 162 | 117 | 45 | 72.2% |
ggplot(
success_by_esp,
aes(
x = esp,
y = success_rate,
fill = esp
)
) +
geom_col() +
scale_y_continuous(labels = percent_format()) +
labs(
title = "MAT0993 Success Rate by ESP Participation",
x = "ESP Status",
y = "Success Rate",
fill = "ESP Status"
)
The success rate comparison provides a clearer summary of student outcomes than the full grade distribution alone. This measure allows ESP and non-ESP students to be compared based on whether they completed the course successfully.
ACT and SAT scores were examined as available proxy indicators of incoming academic preparation. In this dataset, scores of 0 were treated as missing values rather than valid test scores.
test_score_summary %>%
mutate(
mean_act = round(mean_act, 1),
median_act = round(median_act, 1),
mean_sat = round(mean_sat, 1),
median_sat = round(median_sat, 1)
) %>%
kable(
caption = "Academic Preparation Indicators by ESP Status",
col.names = c(
"ESP Status",
"Total Students",
"ACT Available",
"Mean ACT",
"Median ACT",
"SAT Available",
"Mean SAT",
"Median SAT"
)
)
| ESP Status | Total Students | ACT Available | Mean ACT | Median ACT | SAT Available | Mean SAT | Median SAT |
|---|---|---|---|---|---|---|---|
| no | 328 | 56 | 19.3 | 19.0 | 147 | 979.3 | 960 |
| yes | 162 | 22 | 17.7 | 17.5 | 81 | 964.6 | 950 |
ggplot(
mat0993,
aes(
x = act_pull_clean,
fill = esp
)
) +
geom_histogram(
alpha = 0.6,
bins = 15,
position = "identity"
) +
labs(
title = "ACT Score Distribution by ESP Participation",
x = "ACT Score",
y = "Number of Students",
fill = "ESP Status"
)
## Warning: Removed 412 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(
mat0993,
aes(
x = sat_pull_clean,
fill = esp
)
) +
geom_histogram(
alpha = 0.6,
bins = 15,
position = "identity"
) +
labs(
title = "SAT Score Distribution by ESP Participation",
x = "SAT Score",
y = "Number of Students",
fill = "ESP Status"
)
## Warning: Removed 262 rows containing non-finite outside the scale range
## (`stat_bin()`).
Comparing ACT and SAT scores helps determine whether differences in course outcomes may be related to differences in incoming academic preparation. These indicators should be interpreted cautiously because test score data are not available for all students.
grade_table
##
## ANC BNC CNC UNC W
## no 28 65 63 154 18
## yes 71 30 16 42 3
grade_chisq
##
## Pearson's Chi-squared test
##
## data: grade_table
## X-squared = 88.125, df = 4, p-value < 2.2e-16
The chi-square test assesses whether final grade distribution differs by ESP participation status. A p-value below 0.05 suggests that final grade outcomes are statistically associated with ESP status.
success_table
##
## FALSE TRUE
## no 172 156
## yes 45 117
success_chisq
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: success_table
## X-squared = 25.739, df = 1, p-value = 3.908e-07
The chi-square test for course success evaluates whether successful completion rates differ by ESP participation status.
This section uses logistic regression to examine whether ESP participation is associated with successful completion of MAT0993. Because the outcome variable is binary, successful or not successful, logistic regression is appropriate.
For this analysis, successful completion is defined as earning an ANC, BNC, or CNC final grade. UNC and W are treated as unsuccessful outcomes.
model1 <- glm(
successful ~ esp,
data = mat0993,
family = binomial
)
summary(model1)
##
## Call:
## glm(formula = successful ~ esp, family = binomial, data = mat0993)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.09764 0.11056 -0.883 0.377
## espyes 1.05315 0.20735 5.079 3.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 672.87 on 489 degrees of freedom
## Residual deviance: 645.36 on 488 degrees of freedom
## AIC: 649.36
##
## Number of Fisher Scoring iterations: 4
model1_odds <- exp(coef(model1))
model1_odds
## (Intercept) espyes
## 0.9069767 2.8666667
Model 1 examines the relationship between ESP participation and successful completion of MAT0993 without accounting for any other student characteristics.
The coefficient for ESP participation is positive and statistically significant. This indicates that ESP students were significantly more likely to successfully complete MAT0993 than non-ESP students.
The odds ratio for ESP participation is 2.87. This means that students participating in ESP had approximately 2.9 times greater odds of successfully completing MAT0993 than students who did not participate in ESP.
This finding suggests a strong positive association between ESP participation and course success. However, this model does not establish causation because it does not account for other factors that may also influence student outcomes, such as academic preparation, demographics, or academic programme.
model2 <- glm(
successful ~ esp + act_pull_clean + sat_pull_clean + test_optional,
data = mat0993,
family = binomial
)
summary(model2)
##
## Call:
## glm(formula = successful ~ esp + act_pull_clean + sat_pull_clean +
## test_optional, family = binomial, data = mat0993)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.0744293 2.8971035 -1.406 0.1596
## espyes 1.2666213 0.7224271 1.753 0.0796 .
## act_pull_clean 0.1820815 0.3088506 0.590 0.5555
## sat_pull_clean 0.0007467 0.0075160 0.099 0.9209
## test_optionalY -0.1376968 0.7483015 -0.184 0.8540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.231 on 60 degrees of freedom
## Residual deviance: 75.475 on 56 degrees of freedom
## (429 observations deleted due to missingness)
## AIC: 85.475
##
## Number of Fisher Scoring iterations: 4
model2_odds <- exp(coef(model2))
model2_odds
## (Intercept) espyes act_pull_clean sat_pull_clean test_optionalY
## 0.01700192 3.54884167 1.19971193 1.00074693 0.87136289
Model 2 adds academic preparation indicators to the analysis, including ACT score, SAT score, and test-optional status.
This model helps assess whether ESP participation remains associated with successful completion after accounting for available indicators of incoming academic preparation.
The odds ratio for ESP participation in Model 2 is 3.55. If this value remains above 1 and statistically significant, it suggests that ESP participation is still positively associated with course success even after accounting for academic preparation.
Because ACT and SAT data are missing for some students, this model should be interpreted cautiously.
model3 <- glm(
successful ~ esp + gender + ethnicity + act_pull_clean + sat_pull_clean + test_optional,
data = mat0993,
family = binomial
)
summary(model3)
##
## Call:
## glm(formula = successful ~ esp + gender + ethnicity + act_pull_clean +
## sat_pull_clean + test_optional, family = binomial, data = mat0993)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.976e+00 3.459e+00 -1.149 0.250
## espyes 9.041e-01 8.845e-01 1.022 0.307
## genderM -1.401e+00 7.705e-01 -1.819 0.069 .
## genderN -3.994e-01 9.137e-01 -0.437 0.662
## ethnicityAsian -7.359e-01 1.113e+00 -0.661 0.508
## ethnicityBlack or African American -3.592e-01 1.002e+00 -0.358 0.720
## ethnicityHispanic or Latino 1.695e+00 1.391e+00 1.218 0.223
## ethnicityMiddle Eastern/North African -8.763e-01 1.483e+00 -0.591 0.555
## ethnicityUnknown 1.575e+01 1.455e+03 0.011 0.991
## ethnicityWhite -1.277e+00 9.963e-01 -1.281 0.200
## act_pull_clean 2.292e-01 3.404e-01 0.673 0.501
## sat_pull_clean 5.803e-04 8.090e-03 0.072 0.943
## test_optionalY 1.864e-01 8.703e-01 0.214 0.830
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.231 on 60 degrees of freedom
## Residual deviance: 65.448 on 48 degrees of freedom
## (429 observations deleted due to missingness)
## AIC: 91.448
##
## Number of Fisher Scoring iterations: 14
model3_odds <- exp(coef(model3))
model3_odds
## (Intercept) espyes
## 1.876344e-02 2.469792e+00
## genderM genderN
## 2.462837e-01 6.707346e-01
## ethnicityAsian ethnicityBlack or African American
## 4.790598e-01 6.982346e-01
## ethnicityHispanic or Latino ethnicityMiddle Eastern/North African
## 5.444165e+00 4.163293e-01
## ethnicityUnknown ethnicityWhite
## 6.915127e+06 2.789858e-01
## act_pull_clean sat_pull_clean
## 1.257635e+00 1.000580e+00
## test_optionalY
## 1.204892e+00
Model 3 incorporates demographic and academic background variables into the analysis. This model examines whether ESP participation remains associated with successful course completion after accounting for gender, ethnicity, ACT score, SAT score, and test-optional status.
The odds ratio for ESP participation in Model 3 is 2.47. If this value remains above 1 and statistically significant, the results suggest that ESP participation is positively associated with MAT0993 success even after accounting for several student background characteristics.
This model should be interpreted as evidence of association, not causation. Additional longitudinal data and further controls would be needed to make stronger claims about the impact of ESP participation.
Across the logistic regression models, the key question is whether the ESP coefficient remains positive and statistically significant as additional variables are added. If it does, this strengthens the preliminary finding that ESP participation is associated with higher odds of successful MAT0993 completion.
These results can support a more nuanced preliminary conclusion: ESP participation appears to be positively associated with course success, and this relationship should be investigated further using additional semesters, student-level longitudinal records, and degree completion outcomes.
Based on this preliminary analysis:
This analysis is preliminary and based on a single course in a single semester. The dataset does not currently include degree completion, STEM employment, or long-term persistence outcomes. As a result, this report should be interpreted as an early exploration of course-level outcomes rather than a full evaluation of ESP impact.