District <- read_excel("District.xls")
district_data <- District %>%
  select(DISTNAME, DPETSPEP, DPFPASPEP)
summary(district_data$DPETSPEP)  # Percent Special Education
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70
summary(district_data$DPFPASPEP) # Funding for Special Education
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5
colSums(is.na(district_data))  # Count missing values per column
##  DISTNAME  DPETSPEP DPFPASPEP 
##         0         0         5
district_clean <- na.omit(district_data)
nrow(district_clean)  # Number of remaining observations after removing missing data
## [1] 1202
ggplot(district_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
  geom_point(color = "blue") +
  labs(title = "Comparison of Special Education % and Funding",
       x = "Percent Special Education (DPETSPEP)",
       y = "Funding for Special Education (DPFPASPEP)") +
  theme_minimal()

correlation <- cor(district_clean$DPETSPEP, district_clean$DPFPASPEP, use="complete.obs")
correlation
## [1] 0.3700234

Interpretation: The correlation of 0.37 indicates a weak connection between the number of special education students and the funding received. While districts with more students usually get more funding, that is not always the case. The correlation suggests a relationship, but it is not a strong one.