Data Collection and Preparation

con <- dbConnect(MySQL(),
                dbname = "StudentOutcomes",
                host = "localhost",
                user = "root",
                password = "3004")


query <- "
SELECT 
    sp.*,
    CASE 
        WHEN weekly_self_study_hours >= 25 THEN 'High'
        WHEN weekly_self_study_hours >= 15 THEN 'Medium'
        ELSE 'Low'
    END as study_intensity,
    (math_score + physics_score + chemistry_score) / 3 as stem_average,
    (history_score + english_score + geography_score) / 3 as humanities_average
FROM StudentPerformance sp
WHERE absence_days <= 10"

students_raw <- dbGetQuery(con, query)
dbDisconnect(con)
## [1] TRUE
# Tidyverse data cleaning
students <- students_raw %>%
  tidyr::drop_na() %>%
  dplyr::mutate(
    across(ends_with("_score"), ~ scale(.) %>% as.vector),
    study_intensity = factor(study_intensity, levels = c("Low", "Medium", "High")),
    engagement_level = case_when(
      extracurricular_activities & !part_time_job ~ "Full Academic",
      extracurricular_activities & part_time_job ~ "Balanced",
      !extracurricular_activities & part_time_job ~ "Work Focused",
      TRUE ~ "Minimal Engagement"
    )
  )

Learning Analytics Insights

1. Study Pattern Impact Analysis

# Advanced ggplot2 visualization
ggplot(students, aes(x = weekly_self_study_hours, y = stem_average)) +
  geom_point(aes(color = engagement_level), alpha = 0.6) +
  geom_smooth(method = "loess", se = TRUE) +
  facet_wrap(~gender) +
  labs(
    title = "Impact of Study Hours on STEM Performance",
    subtitle = "Analyzed by Gender and Engagement Level",
    x = "Weekly Self-Study Hours",
    y = "Standardized STEM Score Average",
    color = "Engagement Type"
  ) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold"),
    axis.title = element_text(face = "italic")
  )

2. Hierarchical Learning Pattern Analysis

hier_model <- lmer(stem_average ~ 
                    weekly_self_study_hours + 
                    (1 + weekly_self_study_hours | career_aspiration) +
                    (1 | engagement_level),
                  data = students)

# Create accessible summary for varying data literacy levels
model_summary <- summary(hier_model)

# Visualization of random effects
ranef(hier_model)$career_aspiration %>%
  as.data.frame() %>%
  rownames_to_column("Career") %>%
  ggplot(aes(x = reorder(Career, weekly_self_study_hours), y = weekly_self_study_hours)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(
    title = "Study Hours Effect by Career Aspiration",
    subtitle = "From Hierarchical Model Analysis",
    x = "Career Path",
    y = "Effect Size"
  )

3. Educational Equity Analysis

# Creating an equity index
students %>%
  group_by(gender, engagement_level) %>%
  summarise(
    avg_stem = mean(stem_average),
    avg_humanities = mean(humanities_average),
    opportunity_gap = abs(avg_stem - avg_humanities),
    .groups = 'drop'
  ) %>%
  ggplot(aes(x = engagement_level, y = opportunity_gap, fill = gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Educational Opportunity Gap Analysis",
    subtitle = "By Engagement Level and Gender",
    x = "Engagement Level",
    y = "Performance Gap (Standardized)"
  ) +
  scale_fill_brewer(palette = "Set2")

Recommendations for Educational Impact

  1. Differentiated Support: Structure support systems based on engagement patterns
  2. Equity Interventions: Address identified opportunity gaps
  3. Resource Optimization: Align study resources with career aspirations
  4. Engagement Strategies: Develop programs that balance academic and extracurricular activities

Methodology Notes

This analysis employs: - Hierarchical linear modeling for nested educational data - Robust visualization techniques for diverse stakeholders - Educational equity metrics - Evidence-based recommendation framework

# Save visualizations for stakeholder presentation
ggsave("study_impact.png", width = 10, height = 6)