This report presents a visual analysis of institutional data for California State University, Bakersfield (CSUB). The focus is on enrollment, admissions, student life, expenses, financial aid, faculty, and degrees using clear visualizations.
library(tidyverse)
library(readr)
library(stringr)
library(janitor)
library(scales)
csub <- read_csv("csub_common_data_set_2015_2016_clean_master.csv", show_col_types = FALSE) %>%
clean_names()
parse_num_safe <- function(x) {
readr::parse_number(as.character(x), na = c("", "NA", "NaN", "NULL"))
}
csub_clean <- csub %>%
mutate(
across(starts_with("col_"), ~ str_squish(as.character(.))),
across(starts_with("col_"), ~ na_if(., "")),
section = str_squish(as.character(section)),
table_id = as.character(table_id)
)
enrollment <- csub_clean %>%
filter(section == "B. ENROLLMENT AND PERSISTENCE")
gender_enrollment <- enrollment %>%
filter(table_id == "P03_T01") %>%
transmute(
category = col_1,
full_time_men = parse_num_safe(col_2),
full_time_women = parse_num_safe(col_3),
part_time_men = parse_num_safe(col_4),
part_time_women = parse_num_safe(col_5)
) %>%
filter(!is.na(full_time_men) | !is.na(full_time_women) | !is.na(part_time_men) | !is.na(part_time_women)) %>%
filter(!str_detect(category, "Total"))
gender_long <- gender_enrollment %>%
pivot_longer(-category, names_to = "group", values_to = "students") %>%
filter(!is.na(students))
ggplot(gender_long, aes(x = reorder(category, students), y = students, fill = group)) +
geom_col(position = "dodge") +
coord_flip() +
scale_y_continuous(labels = comma) +
labs(title = "Enrollment by Gender and Status", x = "", y = "Students") +
theme_minimal()
admissions_plot <- tibble(
category = c("Men Applied","Women Applied","Men Enrolled","Women Enrolled"),
value = c(2319,3477,580,882)
)
ggplot(admissions_plot, aes(x = reorder(category, value), y = value)) +
geom_col(fill = "orange") +
coord_flip() +
scale_y_continuous(labels = comma) +
labs(title = "Admissions Funnel", x = "", y = "Students") +
theme_minimal()
student_life <- csub_clean %>%
filter(section == "F. STUDENT LIFE")
activities <- student_life %>%
filter(table_id == "P15_T02") %>%
filter(str_detect(col_2, "X")) %>%
mutate(count = 1)
ggplot(activities, aes(x = reorder(col_1, count), y = count)) +
geom_col(fill = "green") +
coord_flip() +
labs(title = "Student Activities", x = "", y = "Available") +
theme_minimal()
cost_plot <- tibble(
category = c("In-State Tuition","Out-State Tuition","Fees","Room & Board","Books","Transport"),
amount = c(5472,16632,1369,13968,1898,1438)
)
ggplot(cost_plot, aes(x = reorder(category, amount), y = amount)) +
geom_col(fill = "gold") +
coord_flip() +
scale_y_continuous(labels = dollar_format()) +
labs(title = "Cost of Attendance", x = "", y = "USD") +
theme_minimal()
aid_plot <- tibble(
category = c("Applied Aid","Received Aid","Need Based","Grants","Loans"),
value = c(6123,5388,5518,5016,2369)
)
ggplot(aid_plot, aes(x = reorder(category, value), y = value)) +
geom_col(fill = "darkgreen") +
coord_flip() +
scale_y_continuous(labels = comma) +
labs(title = "Financial Aid Distribution", x = "", y = "Students") +
theme_minimal()
faculty_plot <- tibble(
category = c("Faculty","Doctorate","Women","Men"),
value = c(473,139,250,223)
)
ggplot(faculty_plot, aes(x = reorder(category, value), y = value)) +
geom_col(fill = "blue") +
coord_flip() +
labs(title = "Faculty Profile", x = "", y = "Count") +
theme_minimal()
degrees_plot <- tibble(
discipline = c("Liberal Arts","Psychology","Social Science","Education","Business"),
pct = c(29.2,9.4,7.9,7.8,6.3)
)
ggplot(degrees_plot, aes(x = reorder(discipline, pct), y = pct)) +
geom_col(fill = "purple") +
coord_flip() +
labs(title = "Degrees by Discipline", x = "", y = "%") +
theme_minimal()
This analysis shows that CSUB has a large undergraduate population, strong reliance on financial aid, and a concentration of degrees in a few major areas. Visualizations make it easier to understand patterns compared to raw tables.