Purpose:
These packages support fast, readable data wrangling and clean printing.
Keeping stable paths ensures Power BI refresh works consistently.
BASE_DIR <- "C:/Users/yudit/Downloads/UWG/2nd-Round/Activities/R_PowerBI"
INPUT_PATH <- file.path(BASE_DIR, "DATA.csv")
OUTPUT_PATH <- file.path(BASE_DIR, "students_db_R.csv")
# Guardrail: fail early if input is missing
if (!file.exists(INPUT_PATH)) {
stop(paste0(
"Input file not found: ", INPUT_PATH,
"\nMake sure DATA.csv is in: ", BASE_DIR
))
}Read the raw coded dataset exactly once; all transformations happen after.
Raw columns use generic identifiers (“1”,“2”,“3”…). Renaming makes the dataset self-describing for analysis + BI modeling.
rename_map <- c(
"STUDENT ID" = "student_id",
"COURSE ID" = "course_id",
"GRADE" = "output_grade_code",
"1" = "student_age_group_code",
"2" = "sex_code",
"3" = "high_school_type_code",
"4" = "scholarship_type_code",
"5" = "additional_work_code",
"6" = "art_sports_activity_code",
"7" = "has_partner_code",
"8" = "salary_group_code",
"9" = "transportation_code",
"10" = "accommodation_code",
"11" = "mother_education_code",
"12" = "father_education_code",
"13" = "siblings_group_code",
"14" = "parental_status_code",
"15" = "mother_occupation_code",
"16" = "father_occupation_code",
"17" = "weekly_study_hours_code",
"18" = "reading_non_scientific_code",
"19" = "reading_scientific_code",
"20" = "attends_seminars_code",
"21" = "project_impact_code",
"22" = "class_attendance_code",
"23" = "midterm1_preparation_code",
"24" = "midterm2_preparation_code",
"25" = "taking_notes_code",
"26" = "listening_in_class_code",
"27" = "discussion_improves_success_code",
"28" = "flip_classroom_code",
"29" = "last_semester_gpa_group_code",
"30" = "expected_grad_gpa_group_code"
)
old_names <- names(df)
names(df) <- ifelse(old_names %in% names(rename_map), rename_map[old_names], old_names)Codes are useful for storage, but labels are essential for communication, reporting, and stakeholder-facing insights.
maps <- list(
student_age_group_code = c("1"="18-21", "2"="22-25", "3"="above 26"),
sex_code = c("1"="female", "2"="male"),
high_school_type_code = c("1"="private", "2"="state", "3"="other"),
scholarship_type_code = c("1"="0%", "2"="25%", "3"="50%", "4"="75%", "5"="Full"),
additional_work_code = c("1"="Yes", "2"="No"),
art_sports_activity_code = c("1"="Yes", "2"="No"),
has_partner_code = c("1"="Yes", "2"="No"),
salary_group_code = c("1"="USD 135-200", "2"="USD 201-270", "3"="USD 271-340", "4"="USD 341-410", "5"="above 410"),
transportation_code = c("1"="Bus", "2"="Private car/taxi", "3"="bicycle", "4"="Other"),
accommodation_code = c("1"="rental", "2"="dormitory", "3"="with family", "4"="Other"),
mother_education_code = c("1"="primary school", "2"="secondary school", "3"="high school", "4"="university", "5"="MSc.", "6"="Ph.D."),
father_education_code = c("1"="primary school", "2"="secondary school", "3"="high school", "4"="university", "5"="MSc.", "6"="Ph.D."),
siblings_group_code = c("1"="1", "2"="2", "3"="3", "4"="4", "5"="5 or above"),
parental_status_code = c("1"="married", "2"="divorced", "3"="died - one of them or both"),
mother_occupation_code = c("1"="retired", "2"="housewife", "3"="government officer", "4"="private sector employee", "5"="self-employment", "6"="other"),
father_occupation_code = c("1"="retired", "2"="government officer", "3"="private sector employee", "4"="self-employment", "5"="other"),
weekly_study_hours_code = c("1"="0 hours", "2"="<5 hours", "3"="6-10 hours", "4"="11-20 hours", "5"="more than 20 hours"),
reading_non_scientific_code = c("1"="Never", "2"="Sometimes", "3"="Often"),
reading_scientific_code = c("1"="Never", "2"="Sometimes", "3"="Often"),
attends_seminars_code = c("1"="Yes", "2"="No"),
project_impact_code = c("1"="positive", "2"="negative", "3"="neutral"),
class_attendance_code = c("1"="always", "2"="sometimes", "3"="never"),
midterm1_preparation_code = c("1"="alone", "2"="with friends", "3"="not applicable"),
midterm2_preparation_code = c("1"="closest date to the exam", "2"="regularly during the semester", "3"="never"),
taking_notes_code = c("1"="never", "2"="sometimes", "3"="always"),
listening_in_class_code = c("1"="never", "2"="sometimes", "3"="always"),
discussion_improves_success_code = c("1"="never", "2"="sometimes", "3"="always"),
flip_classroom_code = c("1"="not useful", "2"="useful", "3"="not applicable"),
last_semester_gpa_group_code = c("1"="<2.00", "2"="2.00-2.49", "3"="2.50-2.99", "4"="3.00-3.49", "5"="above 3.49"),
expected_grad_gpa_group_code = c("1"="<2.00", "2"="2.00-2.49", "3"="2.50-2.99", "4"="3.00-3.49", "5"="above 3.49"),
output_grade_code = c("0"="Fail", "1"="DD", "2"="DC", "3"="CC", "4"="CB", "5"="BB", "6"="BA", "7"="AA")
)If the source data includes a new/unknown code, we STOP early so the analyst can update the mapping. This prevents blank labels and bad reporting.
for (col in names(maps)) {
if (!col %in% names(df)) {
stop(paste0("Expected column '", col, "' was not found after renaming."))
}
present <- as.character(unique(stats::na.omit(df[[col]])))
allowed <- names(maps[[col]])
bad <- setdiff(present, allowed)
if (length(bad) > 0) {
stop(paste0(col, " has unexpected codes: {", paste(bad, collapse = ", "), "}"))
}
}
cat("✅ All codes validated successfully.\n")## ✅ All codes validated successfully.
Keeping both versions helps: BI modeling (codes can be stable keys) and readability (labels improve interpretability).
write_excel_csv writes UTF-8 with BOM so Excel/Power BI
read it cleanly.
readr::write_excel_csv(df, OUTPUT_PATH)
cat(sprintf("✅ Export complete: %s (rows=%s, cols=%s)\n",
OUTPUT_PATH, format(nrow(df), big.mark=","), ncol(df)))## ✅ Export complete: C:/Users/yudit/Downloads/UWG/2nd-Round/Activities/R_PowerBI/students_db_R.csv (rows=145, cols=64)
Goal: Provide quick, stakeholder-ready summaries of overall performance, distributions of key behaviors/demographics, and performance differences by group.
# Helper functions
safe_mean <- function(x) mean(suppressWarnings(as.numeric(x)), na.rm = TRUE)
# Percent display helpers (presentation-friendly)
fmt_pct100 <- function(x) sprintf("%.2f%%", x)
fmt_pct01 <- function(x) sprintf("%.2f%%", x * 100)
# Frequency table helper
freq_table <- function(data, col, dropna = FALSE) {
if (!col %in% names(data)) return(NULL)
s <- data[[col]]
if (dropna) s <- s[!is.na(s)]
tibble::tibble(value = s) %>%
dplyr::filter(!is.na(value)) %>%
dplyr::count(value, name = "count", sort = TRUE) %>%
dplyr::mutate(percent = fmt_pct100((count / sum(count)) * 100))
}Quick sanity check that data loaded correctly and columns look right.
## Shape (rows, cols): 145, 64
## # A tibble: 5 × 64
## student_id student_age_group_code sex_code high_school_type_code
## <chr> <dbl> <dbl> <dbl>
## 1 STUDENT1 2 2 3
## 2 STUDENT2 2 2 3
## 3 STUDENT3 2 2 2
## 4 STUDENT4 1 1 1
## 5 STUDENT5 2 2 1
## # ℹ 60 more variables: scholarship_type_code <dbl>, additional_work_code <dbl>,
## # art_sports_activity_code <dbl>, has_partner_code <dbl>,
## # salary_group_code <dbl>, transportation_code <dbl>,
## # accommodation_code <dbl>, mother_education_code <dbl>,
## # father_education_code <dbl>, siblings_group_code <dbl>,
## # parental_status_code <dbl>, mother_occupation_code <dbl>,
## # father_occupation_code <dbl>, weekly_study_hours_code <dbl>, …
Identify data quality issues early (types, nulls, duplicates).
# Column data types
dtypes <- tibble::tibble(
column = names(df),
dtype = vapply(df, function(x) paste(class(x), collapse=","), character(1))
) %>%
dplyr::arrange(dtype, column)
cat("Dtypes (sorted):\n")## Dtypes (sorted):
| column | dtype |
|---|---|
| accommodation | character |
| additional_work | character |
| art_sports_activity | character |
| attends_seminars | character |
| class_attendance | character |
| discussion_improves_success | character |
| expected_grad_gpa_group | character |
| father_education | character |
| father_occupation | character |
| flip_classroom | character |
| has_partner | character |
| high_school_type | character |
| last_semester_gpa_group | character |
| listening_in_class | character |
| midterm1_preparation | character |
| midterm2_preparation | character |
| mother_education | character |
| mother_occupation | character |
| output_grade | character |
| parental_status | character |
| project_impact | character |
| reading_non_scientific | character |
| reading_scientific | character |
| salary_group | character |
| scholarship_type | character |
| sex | character |
| siblings_group | character |
| student_age_group | character |
| student_id | character |
| taking_notes | character |
| transportation | character |
| weekly_study_hours | character |
| accommodation_code | numeric |
| additional_work_code | numeric |
| art_sports_activity_code | numeric |
| attends_seminars_code | numeric |
| class_attendance_code | numeric |
| course_id | numeric |
| discussion_improves_success_code | numeric |
| expected_grad_gpa_group_code | numeric |
| father_education_code | numeric |
| father_occupation_code | numeric |
| flip_classroom_code | numeric |
| has_partner_code | numeric |
| high_school_type_code | numeric |
| last_semester_gpa_group_code | numeric |
| listening_in_class_code | numeric |
| midterm1_preparation_code | numeric |
| midterm2_preparation_code | numeric |
| mother_education_code | numeric |
| mother_occupation_code | numeric |
| output_grade_code | numeric |
| parental_status_code | numeric |
| project_impact_code | numeric |
| reading_non_scientific_code | numeric |
| reading_scientific_code | numeric |
| salary_group_code | numeric |
| scholarship_type_code | numeric |
| sex_code | numeric |
| siblings_group_code | numeric |
| student_age_group_code | numeric |
| taking_notes_code | numeric |
| transportation_code | numeric |
| weekly_study_hours_code | numeric |
# Missing values table (count + %)
missing_tbl <- tibble::tibble(
column = names(df),
missing_n = colSums(is.na(df)),
missing_pct = round((missing_n / nrow(df)) * 100, 2)
) %>%
dplyr::arrange(dplyr::desc(missing_n))
cat("\nMissing values (top 25):\n")##
## Missing values (top 25):
if (all(missing_tbl$missing_n == 0)) {
cat("✅ No missing values detected (all columns have 0 missing).\n\n")
}## ✅ No missing values detected (all columns have 0 missing).
| column | missing_n | missing_pct |
|---|---|---|
| student_id | 0 | 0 |
| student_age_group_code | 0 | 0 |
| sex_code | 0 | 0 |
| high_school_type_code | 0 | 0 |
| scholarship_type_code | 0 | 0 |
| additional_work_code | 0 | 0 |
| art_sports_activity_code | 0 | 0 |
| has_partner_code | 0 | 0 |
| salary_group_code | 0 | 0 |
| transportation_code | 0 | 0 |
| accommodation_code | 0 | 0 |
| mother_education_code | 0 | 0 |
| father_education_code | 0 | 0 |
| siblings_group_code | 0 | 0 |
| parental_status_code | 0 | 0 |
| mother_occupation_code | 0 | 0 |
| father_occupation_code | 0 | 0 |
| weekly_study_hours_code | 0 | 0 |
| reading_non_scientific_code | 0 | 0 |
| reading_scientific_code | 0 | 0 |
| attends_seminars_code | 0 | 0 |
| project_impact_code | 0 | 0 |
| class_attendance_code | 0 | 0 |
| midterm1_preparation_code | 0 | 0 |
| midterm2_preparation_code | 0 | 0 |
# Duplicates
if ("student_id" %in% names(df)) {
dup_n <- sum(duplicated(df$student_id))
cat("\nDuplicate student_id:", dup_n, "\n")
} else {
dup_n <- sum(duplicated(df))
cat("\nDuplicate rows:", dup_n, "\n")
}##
## Duplicate student_id: 0
Translate grade categories into numeric GPA points for aggregation, then define pass/fail and at-risk for student success monitoring.
GRADE_POINTS_MAP <- c(
"AA"=4.0, "BA"=3.5, "BB"=3.0,
"CB"=2.5, "CC"=2.0,
"DC"=1.5, "DD"=1.0,
"Fail"=0.0
)
grade_col <- if ("output_grade" %in% names(df)) "output_grade" else NULL
gp_col <- if ("grade_points" %in% names(df)) "grade_points" else "Grade Points"
if (!is.null(grade_col) && !(gp_col %in% names(df))) {
df[[gp_col]] <- unname(GRADE_POINTS_MAP[as.character(df[[grade_col]])])
}
FAIL_SET <- c("Fail")
AT_RISK_SET <- c("DC", "DD", "Fail")
if (!is.null(grade_col)) {
df$is_fail <- df[[grade_col]] %in% FAIL_SET
df$is_at_risk <- df[[grade_col]] %in% AT_RISK_SET
df$is_pass <- !df$is_fail
} else {
cat("WARNING: 'output_grade' column not found. Outcome flags not created.\n")
}
created <- c("is_pass","is_fail","is_at_risk", gp_col)
cat("Created columns:", paste(created[created %in% names(df)], collapse=", "), "\n")## Created columns: is_pass, is_fail, is_at_risk, Grade Points
High-level performance summary (health check).
N <- if ("student_id" %in% names(df)) dplyr::n_distinct(df$student_id) else nrow(df)
avg_gpa <- if (gp_col %in% names(df)) safe_mean(df[[gp_col]]) else NaN
pass_rate <- if ("is_pass" %in% names(df)) mean(df$is_pass, na.rm = TRUE) else NaN
fail_rate <- if ("is_fail" %in% names(df)) mean(df$is_fail, na.rm = TRUE) else NaN
at_risk_rate <- if ("is_at_risk" %in% names(df)) mean(df$is_at_risk, na.rm = TRUE) else NaN
cat("Total Students:", N, "\n")## Total Students: 145
## Avg GPA (grade-mapped): 2.09
## Pass Rate: 94.48%
## Fail Rate: 5.52%
## At-Risk (DC/DD/Fail): 46.21%
Interpretation: If at-risk is high, focus analysis on groups with elevated at-risk rates.
Shows how outcomes are distributed—useful for spotting clustering at low grades.
## # A tibble: 8 × 3
## value count percent
## <chr> <int> <chr>
## 1 DD 35 24.14%
## 2 DC 24 16.55%
## 3 CC 21 14.48%
## 4 AA 17 11.72%
## 5 BB 17 11.72%
## 6 BA 13 8.97%
## 7 CB 10 6.90%
## 8 Fail 8 5.52%
Highlights dominant study/attendance patterns that may relate to outcomes.
behavior_cols <- c("weekly_study_hours", "midterm2_preparation", "class_attendance",
"attends_seminars", "additional_work")
for (c in behavior_cols) {
if (c %in% names(df)) {
cat("\n", c, ":\n", sep = "")
print(freq_table(df, c))
}
}##
## weekly_study_hours:
## # A tibble: 5 × 3
## value count percent
## <chr> <int> <chr>
## 1 <5 hours 74 51.03%
## 2 6-10 hours 30 20.69%
## 3 0 hours 29 20.00%
## 4 11-20 hours 8 5.52%
## 5 more than 20 hours 4 2.76%
##
## midterm2_preparation:
## # A tibble: 3 × 3
## value count percent
## <chr> <int> <chr>
## 1 closest date to the exam 123 84.83%
## 2 regularly during the semester 20 13.79%
## 3 never 2 1.38%
##
## class_attendance:
## # A tibble: 2 × 3
## value count percent
## <chr> <int> <chr>
## 1 always 110 75.86%
## 2 sometimes 35 24.14%
##
## attends_seminars:
## # A tibble: 2 × 3
## value count percent
## <chr> <int> <chr>
## 1 Yes 114 78.62%
## 2 No 31 21.38%
##
## additional_work:
## # A tibble: 2 × 3
## value count percent
## <chr> <int> <chr>
## 1 No 96 66.21%
## 2 Yes 49 33.79%
Interpretation: Look for behaviors with both high prevalence and high at-risk rates.
Compares average GPA + at-risk/fail rates across groups to identify gaps.
perf_by_group <- function(data, group_col, min_n = 0) {
if (!(group_col %in% names(data))) return(NULL)
if (!(gp_col %in% names(data))) return(NULL)
if (!("is_fail" %in% names(data)) || !("is_at_risk" %in% names(data))) return(NULL)
out <- data %>%
dplyr::group_by(.data[[group_col]], .drop = FALSE) %>%
dplyr::summarise(
students = if ("student_id" %in% names(data)) dplyr::n_distinct(student_id) else dplyr::n(),
avg_gpa = safe_mean(.data[[gp_col]]),
fail_rate = mean(is_fail, na.rm = TRUE),
at_risk_rate = mean(is_at_risk, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(avg_gpa = round(avg_gpa, 2))
if (min_n > 0) out <- dplyr::filter(out, students >= min_n)
out <- dplyr::arrange(out, dplyr::desc(at_risk_rate), avg_gpa)
out %>%
dplyr::mutate(
fail_rate = fmt_pct01(fail_rate),
at_risk_rate = fmt_pct01(at_risk_rate)
) %>%
{ names(.)[1] <- group_col; . }
}
key_groups <- c("additional_work", "class_attendance", "attends_seminars",
"weekly_study_hours", "midterm2_preparation")
for (c in key_groups) {
if (c %in% names(df)) {
cat("\nPerformance by ", c, ":\n", sep = "")
print(perf_by_group(df, c))
}
}##
## Performance by additional_work:
## # A tibble: 2 × 5
## additional_work students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 Yes 49 1.81 10.20% 53.06%
## 2 No 96 2.23 3.12% 42.71%
##
## Performance by class_attendance:
## # A tibble: 2 × 5
## class_attendance students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 sometimes 35 1.81 5.71% 62.86%
## 2 always 110 2.17 5.45% 40.91%
##
## Performance by attends_seminars:
## # A tibble: 2 × 5
## attends_seminars students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 No 31 1.66 12.90% 64.52%
## 2 Yes 114 2.2 3.51% 41.23%
##
## Performance by weekly_study_hours:
## # A tibble: 5 × 5
## weekly_study_hours students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 more than 20 hours 4 1.38 25.00% 75.00%
## 2 0 hours 29 1.93 3.45% 55.17%
## 3 <5 hours 74 2.2 5.41% 44.59%
## 4 6-10 hours 30 2.1 6.67% 43.33%
## 5 11-20 hours 8 1.94 0.00% 25.00%
##
## Performance by midterm2_preparation:
## # A tibble: 3 × 5
## midterm2_preparation students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 closest date to the exam 123 2.06 5.69% 47.97%
## 2 regularly during the semester 20 2.15 5.00% 40.00%
## 3 never 2 3 0.00% 0.00%
Interpretation: Prioritize groups with larger student counts and higher at-risk rates.
Useful for equity lens / segmentation while avoiding tiny-sample noise.
demo_cols <- c("sex", "student_age_group", "scholarship_type", "high_school_type", "salary_group")
for (c in demo_cols) {
if (c %in% names(df)) {
cat("\n", c, ":\n", sep = "")
print(freq_table(df, c))
}
}##
## sex:
## # A tibble: 2 × 3
## value count percent
## <chr> <int> <chr>
## 1 male 87 60.00%
## 2 female 58 40.00%
##
## student_age_group:
## # A tibble: 3 × 3
## value count percent
## <chr> <int> <chr>
## 1 22-25 70 48.28%
## 2 18-21 65 44.83%
## 3 above 26 10 6.90%
##
## scholarship_type:
## # A tibble: 5 × 3
## value count percent
## <chr> <int> <chr>
## 1 50% 76 52.41%
## 2 75% 42 28.97%
## 3 Full 23 15.86%
## 4 25% 3 2.07%
## 5 0% 1 0.69%
##
## high_school_type:
## # A tibble: 3 × 3
## value count percent
## <chr> <int> <chr>
## 1 state 103 71.03%
## 2 private 25 17.24%
## 3 other 17 11.72%
##
## salary_group:
## # A tibble: 5 × 3
## value count percent
## <chr> <int> <chr>
## 1 USD 135-200 93 64.14%
## 2 USD 201-270 27 18.62%
## 3 USD 271-340 16 11.03%
## 4 above 410 5 3.45%
## 5 USD 341-410 4 2.76%
for (c in demo_cols) {
if (c %in% names(df)) {
cat("\n", c, " (n>=5):\n", sep = "")
print(perf_by_group(df, c, min_n = 5))
}
}##
## sex (n>=5):
## # A tibble: 2 × 5
## sex students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 female 58 1.59 13.79% 62.07%
## 2 male 87 2.41 0.00% 35.63%
##
## student_age_group (n>=5):
## # A tibble: 3 × 5
## student_age_group students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 22-25 70 2.04 2.86% 48.57%
## 2 18-21 65 2.17 9.23% 44.62%
## 3 above 26 10 1.85 0.00% 40.00%
##
## scholarship_type (n>=5):
## # A tibble: 3 × 5
## scholarship_type students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 Full 23 1.83 0.00% 56.52%
## 2 50% 76 1.92 3.95% 51.32%
## 3 75% 42 2.49 11.90% 33.33%
##
## high_school_type (n>=5):
## # A tibble: 3 × 5
## high_school_type students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 private 25 1.76 8.00% 60.00%
## 2 other 17 2.18 0.00% 47.06%
## 3 state 103 2.15 5.83% 42.72%
##
## salary_group (n>=5):
## # A tibble: 4 × 5
## salary_group students avg_gpa fail_rate at_risk_rate
## <chr> <int> <dbl> <chr> <chr>
## 1 USD 271-340 16 1.72 6.25% 62.50%
## 2 above 410 5 1.8 20.00% 60.00%
## 3 USD 201-270 27 1.96 3.70% 59.26%
## 4 USD 135-200 93 2.23 4.30% 38.71%
Helps identify courses with unusually high at-risk rates (potential action areas).
if ("course_id" %in% names(df)) {
course_summary <- df %>%
dplyr::group_by(course_id) %>%
dplyr::summarise(
students = if ("student_id" %in% names(df)) dplyr::n_distinct(student_id) else dplyr::n(),
avg_gpa = safe_mean(.data[[gp_col]]),
at_risk_rate = mean(is_at_risk, na.rm = TRUE),
fail_rate = mean(is_fail, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(avg_gpa = round(avg_gpa, 2)) %>%
dplyr::arrange(dplyr::desc(at_risk_rate), dplyr::desc(students)) %>%
dplyr::mutate(
at_risk_rate = fmt_pct01(at_risk_rate),
fail_rate = fmt_pct01(fail_rate)
)
print(course_summary)
} else {
cat("course_id column not found — skipping course summary.\n")
}## # A tibble: 9 × 5
## course_id students avg_gpa at_risk_rate fail_rate
## <dbl> <int> <dbl> <chr> <chr>
## 1 8 14 1.14 100.00% 7.14%
## 2 1 66 1.62 62.12% 4.55%
## 3 9 21 1.5 52.38% 19.05%
## 4 2 2 2 50.00% 0.00%
## 5 7 15 3.67 0.00% 0.00%
## 6 3 8 3.56 0.00% 0.00%
## 7 6 8 3.44 0.00% 0.00%
## 8 5 7 3.14 0.00% 0.00%
## 9 4 4 2.75 0.00% 0.00%
Interpretation: Focus first on courses with high at-risk rates and larger enrollments.
This analysis prepared the student dataset for Power BI and identified key patterns: