# Load CSV or Excel file
library(readxl)
district <- read_excel("C:/Users/miche/OneDrive/Desktop/My Class Stuff/Wednesday Class/District Data/district.xls")
View(district)
# Subset relevant variables
spec_ed <- district %>%
select(DISTNAME, DPETSPEP, DPFPASPEP)
summary(spec_ed$DPETSPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.90 12.10 12.27 14.20 51.70
summary(spec_ed$DPFPASPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.800 8.900 9.711 12.500 49.000 5
colSums(is.na(spec_ed))
## DISTNAME DPETSPEP DPFPASPEP
## 0 0 5
spec_ed_clean <- spec_ed %>% drop_na()
nrow(spec_ed_clean)
## [1] 1202
ggplot(spec_ed_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
geom_point(color = "red", alpha = 0.6) +
labs(title = "Percent Special Education vs. Money Spent on Special Education",
x = "Percent Special Education (DPETSPEP)",
y = "Money Spent on Special Education (DPFPASPEP)")
The correlation of approximately 0.37 between the percentage of special education students (DPETSPEP) and the percentage of funding allocated to special education (DPFPASPEP) shows that there is a moderate association but propbobly not direct association. Other variables like district size, available resources, state & federal funding and and other factors can help with the outcome.