# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(maps)
# Load the cleaned data
data <- read.csv("C://Users//HP//Downloads//Merged_EducationData.csv")
# 1. Institution Types by State
institution_types <- data %>%
group_by(state_abbr_x, inst_control) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'state_abbr_x'. You can override using the
## `.groups` argument.
ggplot(institution_types, aes(x = state_abbr_x, y = count, fill = inst_control)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Institution Types by State",
x = "State",
y = "Number of Institutions",
fill = "Institution Type") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

# 2. Institution Levels
institution_levels <- data %>%
group_by(institution_level) %>%
summarise(count = n()) %>%
ungroup()
ggplot(institution_levels, aes(x = "", y = count, fill = institution_level)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Distribution of Institution Levels",
fill = "Institution Level")

# 3. Geographical Distribution
world_map <- map_data("state")
ggplot() +
geom_map(data = world_map, map = world_map,
aes(x = long, y = lat, map_id = region),
fill = "white", color = "black") +
geom_point(data = data, aes(x = longitude, y = latitude, color = inst_control),
size = 1.5, alpha = 0.7) +
labs(title = "Geographical Distribution of Institutions",
x = "Longitude",
y = "Latitude",
color = "Institution Type")
## Warning in geom_map(data = world_map, map = world_map, aes(x = long, y = lat, :
## Ignoring unknown aesthetics: x and y

# 4. Disability Percentage Distribution
ggplot(data, aes(x = as.numeric(disability_percentage))) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
labs(title = "Distribution of Disability Percentage",
x = "Disability Percentage",
y = "Count")
## Warning: Removed 769 rows containing non-finite values (`stat_bin()`).

# 5. Enrollment Trends
# Converting the 'year' column to a factor for plotting
data$year <- as.factor(data$year)
# Converting the columns to numeric
data <- data %>%
mutate(est_fte = as.numeric(est_fte),
rep_fte = as.numeric(rep_fte))
enrollment_trends <- data %>%
group_by(year) %>%
summarise(est_fte_sum = sum(est_fte, na.rm = TRUE),
rep_fte_sum = sum(rep_fte, na.rm = TRUE)) %>%
gather(key = "type", value = "value", est_fte_sum, rep_fte_sum)
ggplot(enrollment_trends, aes(x = year, y = value, color = type, group = type)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(title = "Enrollment Trends Over Years",
x = "Year",
y = "Full-Time Equivalent (FTE)",
color = "Type") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
