District <- read_excel("District.xls")
district_data <- District %>%
select(DISTNAME, DPETSPEP, DPFPASPEP)
summary(district_data$DPETSPEP) # Percent Special Education
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.90 12.10 12.27 14.20 51.70
summary(district_data$DPFPASPEP) # Funding for Special Education
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.800 8.900 9.711 12.500 49.000 5
colSums(is.na(district_data)) # Count missing values per column
## DISTNAME DPETSPEP DPFPASPEP
## 0 0 5
district_clean <- na.omit(district_data)
nrow(district_clean) # Number of remaining observations after removing missing data
## [1] 1202
ggplot(district_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
geom_point(color = "blue") +
labs(title = "Comparison of Special Education % and Funding",
x = "Percent Special Education (DPETSPEP)",
y = "Funding for Special Education (DPFPASPEP)") +
theme_minimal()
correlation <- cor(district_clean$DPETSPEP, district_clean$DPFPASPEP, use="complete.obs")
correlation
## [1] 0.3700234
Interpretation: The correlation of 0.37 indicates a weak connection between the number of special education students and the funding received. While districts with more students usually get more funding, that is not always the case. The correlation suggests a relationship, but it is not a strong one.