con <- dbConnect(MySQL(),
dbname = "StudentOutcomes",
host = "localhost",
user = "root",
password = "3004")
query <- "
SELECT
sp.*,
CASE
WHEN weekly_self_study_hours >= 25 THEN 'High'
WHEN weekly_self_study_hours >= 15 THEN 'Medium'
ELSE 'Low'
END as study_intensity,
(math_score + physics_score + chemistry_score) / 3 as stem_average,
(history_score + english_score + geography_score) / 3 as humanities_average
FROM StudentPerformance sp
WHERE absence_days <= 10"
students_raw <- dbGetQuery(con, query)
dbDisconnect(con)
## [1] TRUE
# Tidyverse data cleaning
students <- students_raw %>%
tidyr::drop_na() %>%
dplyr::mutate(
across(ends_with("_score"), ~ scale(.) %>% as.vector),
study_intensity = factor(study_intensity, levels = c("Low", "Medium", "High")),
engagement_level = case_when(
extracurricular_activities & !part_time_job ~ "Full Academic",
extracurricular_activities & part_time_job ~ "Balanced",
!extracurricular_activities & part_time_job ~ "Work Focused",
TRUE ~ "Minimal Engagement"
)
)
# Advanced ggplot2 visualization
ggplot(students, aes(x = weekly_self_study_hours, y = stem_average)) +
geom_point(aes(color = engagement_level), alpha = 0.6) +
geom_smooth(method = "loess", se = TRUE) +
facet_wrap(~gender) +
labs(
title = "Impact of Study Hours on STEM Performance",
subtitle = "Analyzed by Gender and Engagement Level",
x = "Weekly Self-Study Hours",
y = "Standardized STEM Score Average",
color = "Engagement Type"
) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold"),
axis.title = element_text(face = "italic")
)
hier_model <- lmer(stem_average ~
weekly_self_study_hours +
(1 + weekly_self_study_hours | career_aspiration) +
(1 | engagement_level),
data = students)
# Create accessible summary for varying data literacy levels
model_summary <- summary(hier_model)
# Visualization of random effects
ranef(hier_model)$career_aspiration %>%
as.data.frame() %>%
rownames_to_column("Career") %>%
ggplot(aes(x = reorder(Career, weekly_self_study_hours), y = weekly_self_study_hours)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(
title = "Study Hours Effect by Career Aspiration",
subtitle = "From Hierarchical Model Analysis",
x = "Career Path",
y = "Effect Size"
)
# Creating an equity index
students %>%
group_by(gender, engagement_level) %>%
summarise(
avg_stem = mean(stem_average),
avg_humanities = mean(humanities_average),
opportunity_gap = abs(avg_stem - avg_humanities),
.groups = 'drop'
) %>%
ggplot(aes(x = engagement_level, y = opportunity_gap, fill = gender)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Educational Opportunity Gap Analysis",
subtitle = "By Engagement Level and Gender",
x = "Engagement Level",
y = "Performance Gap (Standardized)"
) +
scale_fill_brewer(palette = "Set2")
This analysis employs: - Hierarchical linear modeling for nested educational data - Robust visualization techniques for diverse stakeholders - Educational equity metrics - Evidence-based recommendation framework
# Save visualizations for stakeholder presentation
ggsave("study_impact.png", width = 10, height = 6)