The plots below depict fall enrollments overall and by gender from 2004 to 2022, with linear trend lines fitted for the years 2013, 2015, and 2017. This analysis originates from data sourced from the Integrated Postsecondary Education Data System (IPEDS).
To generate the plots below, I followed the following steps:
Retrieved and processed IPEDS data containing information on fall enrollment by postsecondary institutions over 2004-2022.
Conducted linear regression analyses to identify and visualize linear trends for specific years (2013, 2015, and 2017) within the dataset
Created line plots that visualizes the total sum of health care degrees conferred over time, with separate lines representing the base data and the linear trends for the specified years.
# calculating the linear regression coeffs for each year
coef_2013 <- lm(total_sum ~ year, data = filter(df_gender, year >= 2013))$coefficients
coef_2015 <- lm(total_sum ~ year, data = filter(df_gender, year >= 2015))$coefficients
coef_2017 <- lm(total_sum ~ year, data = filter(df_gender, year >= 2017))$coefficients
# creating ggplot with the base line
totalfig <- ggplot(df_gender, aes(x = year, y = total_sum)) +
geom_line() +
# adding color lines for linear trends for specified years - 2013, 2015, and 2017
geom_segment(aes(x = 2013, xend = max(df_gender$year),
y = coef_2013[1] + coef_2013[2] * 2013,
yend = coef_2013[1] + coef_2013[2] * max(df_gender$year),
color = "2013"),
linetype = "dashed") +
geom_segment(aes(x = 2015, xend = max(df_gender$year),
y = coef_2015[1] + coef_2015[2] * 2015,
yend = coef_2015[1] + coef_2015[2] * max(df_gender$year),
color = "2015"),
linetype = "dashed") +
geom_segment(aes(x = 2017, xend = max(df_gender$year),
y = coef_2017[1] + coef_2017[2] * 2017,
yend = coef_2017[1] + coef_2017[2] * max(df_gender$year),
color = "2017"),
linetype = "dashed") +
labs(x = "Year", y = "Fall Enrollment", title = "Total Fall Enrollment for All Institutions (2004-2022)") +
scale_color_manual(name = "Linear Trends",
values = c("2013" = "red", "2015" = "navyblue", "2017" = "green"),
labels = c("2013", "2015", "2017")) +
scale_x_continuous(breaks = unique(df_gender$year)[c(FALSE, TRUE, FALSE)]) +
scale_y_continuous(labels = scales::comma_format()) +
theme(legend.position = 'bottom',
plot.title = element_text(hjust = 0.5))
totalfig
# calculating the linear regression coeffs for each year
wcoef_2013 <- lm(total_women ~ year, data = filter(df_gender, year >= 2013))$coefficients
wcoef_2015 <- lm(total_women ~ year, data = filter(df_gender, year >= 2015))$coefficients
wcoef_2017 <- lm(total_women ~ year, data = filter(df_gender, year >= 2017))$coefficients
# creating ggplot with the base line
womenfig <- ggplot(df_gender, aes(x = year, y = total_women)) +
geom_line() +
# adding color lines for linear trends for specified years - 2013, 2015, and 2017
geom_segment(aes(x = 2013, xend = max(df_gender$year),
y = wcoef_2013[1] + wcoef_2013[2] * 2013,
yend = wcoef_2013[1] + wcoef_2013[2] * max(df_gender$year),
color = "2013"),
linetype = "dashed") +
geom_segment(aes(x = 2015, xend = max(df_gender$year),
y = wcoef_2015[1] + wcoef_2015[2] * 2015,
yend = wcoef_2015[1] + wcoef_2015[2] * max(df_gender$year),
color = "2015"),
linetype = "dashed") +
geom_segment(aes(x = 2017, xend = max(df_gender$year),
y = wcoef_2017[1] + wcoef_2017[2] * 2017,
yend = wcoef_2017[1] + wcoef_2017[2] * max(df_gender$year),
color = "2017"),
linetype = "dashed") +
labs(x = "Year", y = "Fall Enrollment", title = "Women's Fall Enrollment for All Institutions (2004-2022)") +
scale_color_manual(name = "Linear Trends",
values = c("2013" = "red", "2015" = "navyblue", "2017" = "green"),
labels = c("2013", "2015", "2017")) +
scale_x_continuous(breaks = unique(df_gender$year)[c(FALSE, TRUE, FALSE)]) +
scale_y_continuous(labels = scales::comma_format()) +
theme(legend.position = 'bottom',
plot.title = element_text(hjust = 0.5))
womenfig
# calculating the linear regression coeffs for each year
mcoef_2013 <- lm(total_men ~ year, data = filter(df_gender, year >= 2013))$coefficients
mcoef_2015 <- lm(total_men ~ year, data = filter(df_gender, year >= 2015))$coefficients
mcoef_2017 <- lm(total_men ~ year, data = filter(df_gender, year >= 2017))$coefficients
# creating ggplot with the base line
menfig <- ggplot(df_gender, aes(x = year, y = total_men)) +
geom_line() +
# adding color lines for linear trends for specified years - 2013, 2015, and 2017
geom_segment(aes(x = 2013, xend = max(df_gender$year),
y = mcoef_2013[1] + mcoef_2013[2] * 2013,
yend = mcoef_2013[1] + mcoef_2013[2] * max(df_gender$year),
color = "2013"),
linetype = "dashed") +
geom_segment(aes(x = 2015, xend = max(df_gender$year),
y = mcoef_2015[1] + mcoef_2015[2] * 2015,
yend = mcoef_2015[1] + mcoef_2015[2] * max(df_gender$year),
color = "2015"),
linetype = "dashed") +
geom_segment(aes(x = 2017, xend = max(df_gender$year),
y = mcoef_2017[1] + mcoef_2017[2] * 2017,
yend = mcoef_2017[1] + mcoef_2017[2] * max(df_gender$year),
color = "2017"),
linetype = "dashed") +
labs(x = "Year", y = "Fall Enrollment", title = "Men's Fall Enrollment for All Institutions (2004-2022)") +
scale_color_manual(name = "Linear Trends",
values = c("2013" = "red", "2015" = "navyblue", "2017" = "green"),
labels = c("2013", "2015", "2017")) +
scale_x_continuous(breaks = unique(df_gender$year)[c(FALSE, TRUE, FALSE)]) +
scale_y_continuous(labels = scales::comma_format()) +
theme(legend.position = 'bottom',
plot.title = element_text(hjust = 0.5))
menfig