combined_header <- paste(names(df), df[1, ], sep = "_")
# Remove the first row from the data
df <- df[-1, ]
# Set the combined value as the new header
names(df) <- combined_header
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
df[is.null(df)] <- NA
df[df == ""] <- NA
df$College_ <- na.locf(df$College_, na.rm = FALSE)
subset_df <- subset(df, Credential_ == "ADVANCED DIPLOMA")
clean_column_names <- function(names) {
cleaned_names <- str_replace_all(names, "\\.{3}", "_") # Replace "..." with "_"
cleaned_names <- str_replace_all(cleaned_names, "[^a-zA-Z]_", "") # Remove anything that is not a letter before "_"
cleaned_names <- str_replace_all(cleaned_names, "NOVEMBER_|MARCH_|JUNE_", "") # Remove NOVEMBER_, MARCH_, or JUNE_
cleaned_names <- str_replace_all(cleaned_names, "_", ".") # Replace "_" with "."
return(cleaned_names)
}
library(stringr)
# Apply clean_column_names function to column names
colnames(subset_df) <- clean_column_names(colnames(subset_df))
subset_df<-subset_df[,c(1,3:23)]
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ purrr 1.0.1
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
long_data_fall <- subset_df %>%
pivot_longer(
cols = c(2:8),
names_to = "Year",
values_to = "Fall_Count"
)
long_data_fall<-long_data_fall[,c(1,16,17)]
long_data_fall <- long_data_fall %>%
mutate(Year = str_replace(Year, "FALL.", ""))
long_data_winter <- subset_df %>%
pivot_longer(
cols = c(9:15),
names_to = "Year",
values_to = "Winter_Count"
)
long_data_winter<-long_data_winter[,c(1,16,17)]
long_data_winter <- long_data_winter %>%
mutate(Year = str_replace(Year, "WINTER.", ""))
long_data_spring <- subset_df %>%
pivot_longer(
cols = c(16:22),
names_to = "Year",
values_to = "Spring_Count"
)
long_data_spring<-long_data_spring[,c(1,16,17)]
long_data_spring <- long_data_spring %>%
mutate(Year = str_replace(Year, "SPRING.", ""))
merged_data <- left_join(long_data_spring, long_data_fall, by = c("College.", "Year"))
# Join merged_data with long_data_winter based on College and Year
merged_data <- left_join(merged_data, long_data_winter, by = c("College.", "Year"))
library(ggplot2)
# Assuming your merged dataframe is named merged_data
merged_data <-merged_data[1:168,]
# Filter the data for the specified colleges
filtered_data <- merged_data %>%
filter(College. %in% c("SHERIDAN", "SENECA", "GEORGE BROWN", "HUMBER", "CENTENNIAL", "DURHAM"))
filtered_data <- filtered_data %>%
mutate_at(vars(3:5), ~as.numeric(gsub("[^0-9.]", "", .)))
# Plot Fall_Count across the Year, with each college represented by a different color
ggplot(filtered_data, aes(x = Year, y = Fall_Count, color = College., group=College.)) +
geom_point(size = 3) + # Use points to represent each data point
geom_line() + # Connect the points with lines
labs(title = "Fall_Count across Years",
x = "Year",
y = "Fall_Count") +
scale_color_discrete(name = "College") + # Customize the legend title
theme_minimal() # Use a minimalistic theme
# Winter
ggplot(filtered_data, aes(x = Year, y = Winter_Count, color = College., group=College.)) +
geom_point(size = 3) + # Use points to represent each data point
geom_line() + # Connect the points with lines
labs(title = "Winter_Count across Years",
x = "Year",
y = "Winter_Count") +
scale_color_discrete(name = "College") + # Customize the legend title
theme_minimal()
# Spring
ggplot(filtered_data, aes(x = Year, y = Spring_Count, color = College., group=College.)) +
geom_point(size = 3) + # Use points to represent each data point
geom_line() + # Connect the points with lines
labs(title = "Spring_Count across Years",
x = "Year",
y = "Spring_Count") +
scale_color_discrete(name = "College") + # Customize the legend title
theme_minimal()
For fall enrolment, Humber and Seneca shows very similar pattern across the years, and both are still in recovery.
The enrollment at George Brown College experienced a significant decline due to the pandemic, it dropped below Humber and Senece in 2021 and 2022. However, since 2023, there has been a noticeable recovery in enrollment numbers at George Brown.
Centennial has recovered to pre-pandemic enrolment count.
Seneca has a lot more spring intakes than other colleges.