1. Load Data

# Load CSV or Excel file
library(readxl)
district <- read_excel("C:/Users/miche/OneDrive/Desktop/My Class Stuff/Wednesday Class/District Data/district.xls")
View(district)
# Subset relevant variables
spec_ed <- district %>% 
  select(DISTNAME, DPETSPEP, DPFPASPEP)

2. Summary Statistics

summary(spec_ed$DPETSPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70
summary(spec_ed$DPFPASPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5

3. Missing Values

colSums(is.na(spec_ed))
##  DISTNAME  DPETSPEP DPFPASPEP 
##         0         0         5

4. Remove Missing Values

spec_ed_clean <- spec_ed %>% drop_na()
nrow(spec_ed_clean)
## [1] 1202

5. Scatter Plot

ggplot(spec_ed_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
  geom_point(color = "red", alpha = 0.6) +
  labs(title = "Percent Special Education vs. Money Spent on Special Education",
       x = "Percent Special Education (DPETSPEP)",
       y = "Money Spent on Special Education (DPFPASPEP)")

6. Correlation

cor(spec_ed_clean$DPETSPEP, spec_ed_clean$DPFPASPEP)
## [1] 0.3700234

7. Interpretation of the data

The correlation of approximately 0.37 between the percentage of special education students (DPETSPEP) and the percentage of funding allocated to special education (DPFPASPEP) shows that there is a moderate association. Some districts with higher proportions of special education students generally seem to have more resources for these programs, seems like the percentages alone do not drive the spending decisions. Additional variables—including district size, available resources, state and federal funding and and other factors can help with the outcome.