homework3

district <- read_excel("district.xls")
df_new <- district %>%select(DISTNAME, DPETSPEP, DPFPASPEP)
summary(df_new$DPETSPEP)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70

summary(df_new$DPFPASPEP)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5

cat("Missing DPETSPEP:", sum(is.na(df_new$DPETSPEP)), "\n")

## Missing DPETSPEP: 0

cat("Missing DPFPASPEP:", sum(is.na(df_new$DPFPASPEP)), "\n")

## Missing DPFPASPEP: 5

df_clean <- df_new %>%
drop_na()
cor(df_clean$DPFPASPEP, df_clean$DPETSPEP)

## [1] 0.3700234

cat("Remaining observations:", nrow(df_clean), "\n")

## Remaining observations: 1202

ggplot(df_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
geom_point() +
labs(title = "Special Education Spending vs Percent of Students",
x = "Percent Special Education (DPETSPEP)",
y = "Spending on Special Education (DPFPASPEP)")

They are correlated, with a few extra “special circumstances”

homework3

KEVIN ELLISON

2026-04-10