1 Overview

About the data:

The overall data contains the number of students enrolled in the fall, by race/ethnicity, gender, attendance (full- or part-time) status and level of student for selected major fields of study.

Major field of studies included are Education, Engineering, Biological Sciences/Life Sciences, Mathematics, Physical Sciences, Business Management and Administrative Services, Law, Dentistry, and Medicine.

The data contains enrollment data for 4-year institutions only. Institutions with traditional academic year calendar systems (semester, quarter, trimester or 4-1-4) report their enrollment as of October 15 or the official fall reporting date of the institution. Institutions with calendar systems that differ by program or allow continuous enrollment report students that are enrolled at any time between August 1 and October 31.

The data has multiple records per institution. Records are uniquely defined by the variables IPEDS ID (UNITID), and the major field of study, attendance status and level of student (EFCIPLEV). Each record will contain the total enrollment, enrollment for men and women, and enrollment for men and women for all race/ethnicity categories.

2 Data Cleaning

2.1 Total Fall Enrollment by Gender

Corresponding figure is here.

# adding a new data frame called gender with year, grand totals, total women and total men columns
calculate_totals <- function(df, year) {
  total_men <- sum(df$EFTOTLM, na.rm = TRUE)
  total_women <- sum(df$EFTOTLW, na.rm = TRUE)
  grand_total <- sum(df$EFTOTLT, na.rm = TRUE)
  
  return(data.frame(year = year, total_men = total_men, total_women = total_women, grand_total = grand_total))
}

# years for the df's
years <- c(2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022)

gender <- data.frame()

# looping through each year and df, calculating totals, and appending rows to df
for (i in seq_along(years)) {
  df <- get(paste0("df", years[i]))
  gender <- bind_rows(gender, calculate_totals(df, years[i]))
}

2.2 Total Fall Enrollment by Major Field of Study

Corresponding figure is here.

# now doing this to calculate the cip code totals for each df

cipcode_totals <- function(df, year) {
  df %>%
    group_by(CIPCODE) %>%
    summarize(grand_total = sum(EFTOTLT, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(Year = year)
}

cipcode <- data.frame()

for (year in years) {
  df <- get(paste0("df", year))
  cipcode <- bind_rows(cipcode, cipcode_totals(df, year))
}

# relabeling
as.factor(cipcode$CIPCODE)

##  [1] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [10] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [19] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [28] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [37] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [46] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [55] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [64] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [73] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## [82] 13      14      22.0101 26      27      40      51.0401 51.1201 52     
## Levels: 13 14 22.0101 26 27 40 51.0401 51.1201 52

cipcode <- cipcode %>%
  mutate(CIPCODE = case_when(
    CIPCODE == "13" ~ "Education",
    CIPCODE == "14" ~ "Engineering",
    CIPCODE == "26" ~ "Biological Sciences/Life Sciences",
    CIPCODE == "27" ~ "Mathematics",
    CIPCODE == "40" ~ "Physical Sciences",
    CIPCODE == "51.0401" ~ "Dentistry",
    CIPCODE == "51.1201" ~ "Medicine",
    CIPCODE == "52" ~ "Business Management and Administrative Services",
    CIPCODE == "22.0101" ~ "Law"
  ))

# creating life science majors and medicine vs others

biomed <- cipcode %>%
  mutate(Major = case_when(
    CIPCODE %in% c("Biological Sciences/Life Sciences", "Medicine") ~ CIPCODE,
    TRUE ~ "Other")) %>%
  group_by(Year, Major) %>%
  summarise(grand_total = sum(grand_total)) %>%
  ungroup()

2.3 Total Fall Enrollment by Level of Study (Undergraduate, Graduate, First Professionals)

Corresponding figure is here.

# now doing this for level of study - comparing just all students total, undergrad totals, graduate totals, and first professional totals

lstudy_totals <- function(df, year) {
  df %>%
    filter(LSTUDY %in% c(1, 2, 12, 16)) %>%
    group_by(LSTUDY) %>%
    summarize(grand_total = sum(EFTOTLT, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(Year = year)
}

lstudy <- data.frame()

for (year in years) {
  df <- get(paste0("df", year))
  lstudy <- bind_rows(lstudy, lstudy_totals(df, year))
}



# relabeling

lstudy <- lstudy %>%
  mutate(LSTUDY = case_when(
    LSTUDY == "1" ~ "All students total",
    LSTUDY == "2" ~ "All students, Undergraduate total",
    LSTUDY == "12" ~ "All students, Graduate",
    LSTUDY == "16" ~ "All students, First professional"))

2.4 Total Fall Enrollment by Level of Study for Biological/Life Sciences Students

Corresponding figure is here.

# now we will look at just the subset of biological/life sciences students and compare trends for all students total, undergrad total, and graduate totals

biostat_totals <- function(df, year) {
  df %>%
    filter(EFCIPLEV %in% c(301, 302, 312)) %>%
    group_by(EFCIPLEV) %>%
    summarize(grand_total = sum(EFTOTLT, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(Year = year)
}

biostat <- data.frame()

for (year in years) {
  df <- get(paste0("df", year))
  biostat <- bind_rows(biostat, biostat_totals(df, year))
}

# relabeling

biostat <- biostat %>%
  mutate(EFCIPLEV = case_when(
    EFCIPLEV == "301" ~ "All students total",
    EFCIPLEV == "302" ~ "Undergraduate total",
    EFCIPLEV == "312" ~ "Graduate total"))


# and here we will look at only undergrad - degree/certificate seeking


biounder_totals <- function(df, year) {
  df %>%
    filter(EFCIPLEV %in% c(302, 303, 304, 305)) %>%
    group_by(EFCIPLEV) %>%
    summarize(grand_total = sum(EFTOTLT, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(Year = year)
}

biounder <- data.frame()

for (year in years) {
  df <- get(paste0("df", year))
  biounder <- bind_rows(biounder, biounder_totals(df, year))
}

# relabel

biounder <- biounder %>%
  mutate(EFCIPLEV = case_when(
    EFCIPLEV == "302" ~ "Undergraduate total",
    EFCIPLEV == "303" ~ "Total Degree-Seeking",
    EFCIPLEV == "304" ~ "First-time",
    EFCIPLEV == "305" ~ "Other"))

2.5 Total Fall Enrollment by Attendance Status for Medicine Students

Corresponding figure is here.

# and here we will look at just medicine - all students, full time and part time
med_totals <- function(df, year) {
  df %>%
    filter(EFCIPLEV %in% c(916, 936, 956)) %>%
    group_by(EFCIPLEV) %>%
    summarize(grand_total = sum(EFTOTLT, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(Year = year)
}

medicine <- data.frame()

for (year in years) {
  df <- get(paste0("df", year))
  medicine <- bind_rows(medicine, med_totals(df, year))
}

# relabel

medicine <- medicine %>%
  mutate(EFCIPLEV = case_when(
    EFCIPLEV == "916" ~ "All students",
    EFCIPLEV == "936" ~ "Full time",
    EFCIPLEV == "956" ~ "Part time"))

3 Figures