district <- read_excel("district.xls")
df_new <- district %>%select(DISTNAME, DPETSPEP, DPFPASPEP)
summary(df_new$DPETSPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70
summary(df_new$DPFPASPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5
cat("Missing DPETSPEP:", sum(is.na(df_new$DPETSPEP)), "\n")
## Missing DPETSPEP: 0
cat("Missing DPFPASPEP:", sum(is.na(df_new$DPFPASPEP)), "\n")
## Missing DPFPASPEP: 5
df_clean <- df_new %>%
drop_na()
cor(df_clean$DPFPASPEP, df_clean$DPETSPEP)
## [1] 0.3700234
cat("Remaining observations:", nrow(df_clean), "\n")
## Remaining observations: 1202
ggplot(df_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
geom_point() +
labs(title = "Special Education Spending vs Percent of Students",
x = "Percent Special Education (DPETSPEP)",
y = "Spending on Special Education (DPFPASPEP)")

They are correlated, with a few extra “special circumstances”