# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(maps)
# Load the cleaned data
data <- read.csv("C://Users//HP//Downloads//Merged_EducationData.csv")
# 1. Institution Types by State
institution_types <- data %>%
  group_by(state_abbr_x, inst_control) %>%
  summarise(count = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'state_abbr_x'. You can override using the
## `.groups` argument.
ggplot(institution_types, aes(x = state_abbr_x, y = count, fill = inst_control)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Institution Types by State",
       x = "State",
       y = "Number of Institutions",
       fill = "Institution Type") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

# 2. Institution Levels
institution_levels <- data %>%
  group_by(institution_level) %>%
  summarise(count = n()) %>%
  ungroup()

ggplot(institution_levels, aes(x = "", y = count, fill = institution_level)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(title = "Distribution of Institution Levels",
       fill = "Institution Level")

# 3. Geographical Distribution
world_map <- map_data("state")

ggplot() +
  geom_map(data = world_map, map = world_map,
           aes(x = long, y = lat, map_id = region),
           fill = "white", color = "black") +
  geom_point(data = data, aes(x = longitude, y = latitude, color = inst_control),
             size = 1.5, alpha = 0.7) +
  labs(title = "Geographical Distribution of Institutions",
       x = "Longitude",
       y = "Latitude",
       color = "Institution Type")
## Warning in geom_map(data = world_map, map = world_map, aes(x = long, y = lat, :
## Ignoring unknown aesthetics: x and y

# 4. Disability Percentage Distribution
ggplot(data, aes(x = as.numeric(disability_percentage))) +
  geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
  labs(title = "Distribution of Disability Percentage",
       x = "Disability Percentage",
       y = "Count")
## Warning: Removed 769 rows containing non-finite values (`stat_bin()`).

# 5. Enrollment Trends
# Converting the 'year' column to a factor for plotting
data$year <- as.factor(data$year)

# Converting the columns to numeric
data <- data %>%
  mutate(est_fte = as.numeric(est_fte),
         rep_fte = as.numeric(rep_fte))

enrollment_trends <- data %>%
  group_by(year) %>%
  summarise(est_fte_sum = sum(est_fte, na.rm = TRUE),
            rep_fte_sum = sum(rep_fte, na.rm = TRUE)) %>%
  gather(key = "type", value = "value", est_fte_sum, rep_fte_sum)

ggplot(enrollment_trends, aes(x = year, y = value, color = type, group = type)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(title = "Enrollment Trends Over Years",
       x = "Year",
       y = "Full-Time Equivalent (FTE)",
       color = "Type") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.