Overview

This report presents a visual analysis of institutional data for California State University, Bakersfield (CSUB). The focus is on enrollment, admissions, student life, expenses, financial aid, faculty, and degrees using clear visualizations.

library(tidyverse)
library(readr)
library(stringr)
library(janitor)
library(scales)
csub <- read_csv("csub_common_data_set_2015_2016_clean_master.csv", show_col_types = FALSE) %>%
  clean_names()

parse_num_safe <- function(x) {
  readr::parse_number(as.character(x), na = c("", "NA", "NaN", "NULL"))
}

csub_clean <- csub %>%
  mutate(
    across(starts_with("col_"), ~ str_squish(as.character(.))),
    across(starts_with("col_"), ~ na_if(., "")),
    section = str_squish(as.character(section)),
    table_id = as.character(table_id)
  )

Enrollment Analysis

enrollment <- csub_clean %>%
  filter(section == "B. ENROLLMENT AND PERSISTENCE")

gender_enrollment <- enrollment %>%
  filter(table_id == "P03_T01") %>%
  transmute(
    category = col_1,
    full_time_men = parse_num_safe(col_2),
    full_time_women = parse_num_safe(col_3),
    part_time_men = parse_num_safe(col_4),
    part_time_women = parse_num_safe(col_5)
  ) %>%
  filter(!is.na(full_time_men) | !is.na(full_time_women) | !is.na(part_time_men) | !is.na(part_time_women)) %>%
  filter(!str_detect(category, "Total"))

gender_long <- gender_enrollment %>%
  pivot_longer(-category, names_to = "group", values_to = "students") %>%
  filter(!is.na(students))

ggplot(gender_long, aes(x = reorder(category, students), y = students, fill = group)) +
  geom_col(position = "dodge") +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(title = "Enrollment by Gender and Status", x = "", y = "Students") +
  theme_minimal()

Admissions Analysis

admissions_plot <- tibble(
  category = c("Men Applied","Women Applied","Men Enrolled","Women Enrolled"),
  value = c(2319,3477,580,882)
)

ggplot(admissions_plot, aes(x = reorder(category, value), y = value)) +
  geom_col(fill = "orange") +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(title = "Admissions Funnel", x = "", y = "Students") +
  theme_minimal()

Student Life

student_life <- csub_clean %>%
  filter(section == "F. STUDENT LIFE")

activities <- student_life %>%
  filter(table_id == "P15_T02") %>%
  filter(str_detect(col_2, "X")) %>%
  mutate(count = 1)

ggplot(activities, aes(x = reorder(col_1, count), y = count)) +
  geom_col(fill = "green") +
  coord_flip() +
  labs(title = "Student Activities", x = "", y = "Available") +
  theme_minimal()

Cost of Attendance

cost_plot <- tibble(
  category = c("In-State Tuition","Out-State Tuition","Fees","Room & Board","Books","Transport"),
  amount = c(5472,16632,1369,13968,1898,1438)
)

ggplot(cost_plot, aes(x = reorder(category, amount), y = amount)) +
  geom_col(fill = "gold") +
  coord_flip() +
  scale_y_continuous(labels = dollar_format()) +
  labs(title = "Cost of Attendance", x = "", y = "USD") +
  theme_minimal()

Financial Aid

aid_plot <- tibble(
  category = c("Applied Aid","Received Aid","Need Based","Grants","Loans"),
  value = c(6123,5388,5518,5016,2369)
)

ggplot(aid_plot, aes(x = reorder(category, value), y = value)) +
  geom_col(fill = "darkgreen") +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(title = "Financial Aid Distribution", x = "", y = "Students") +
  theme_minimal()

Faculty and Class Size

faculty_plot <- tibble(
  category = c("Faculty","Doctorate","Women","Men"),
  value = c(473,139,250,223)
)

ggplot(faculty_plot, aes(x = reorder(category, value), y = value)) +
  geom_col(fill = "blue") +
  coord_flip() +
  labs(title = "Faculty Profile", x = "", y = "Count") +
  theme_minimal()

Degrees

degrees_plot <- tibble(
  discipline = c("Liberal Arts","Psychology","Social Science","Education","Business"),
  pct = c(29.2,9.4,7.9,7.8,6.3)
)

ggplot(degrees_plot, aes(x = reorder(discipline, pct), y = pct)) +
  geom_col(fill = "purple") +
  coord_flip() +
  labs(title = "Degrees by Discipline", x = "", y = "%") +
  theme_minimal()

Conclusion

This analysis shows that CSUB has a large undergraduate population, strong reliance on financial aid, and a concentration of degrees in a few major areas. Visualizations make it easier to understand patterns compared to raw tables.