library(ggplot2)

library(readxl)

district <- read_excel("C:/Users/Campo/Downloads/district.xls")
View(district)

head(district)
## # A tibble: 6 × 137
##   DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
##   <chr>    <chr>    <chr>    <chr>  <chr>       <dbl>    <dbl>    <dbl>    <dbl>
## 1 CAYUGA … 001902   001 AND… 07     A               3      574      4.4     11.5
## 2 ELKHART… 001903   001 AND… 07     A               4     1150      4       11.8
## 3 FRANKST… 001904   001 AND… 07     A               3      808      8.5     11.3
## 4 NECHES … 001906   001 AND… 07     A               2      342      8.2     13.5
## 5 PALESTI… 001907   001 AND… 07     B               6     3360     25.1     42.9
## 6 WESTWOO… 001908   001 AND… 07     B               4     1332     19.7     26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## #   DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## #   DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## #   DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## #   DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## #   DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## #   DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
special_education <- data.frame(
  DISTNAME = district$DISTNAME,
  DPETSPEP = district$DPETSPEP,
  DPFPASPEP = district$DPFPASPEP
)

summary(special_education$DPETSPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70
summary(special_education$DPFPASPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5
colSums(is.na(special_education))
##  DISTNAME  DPETSPEP DPFPASPEP 
##         0         0         5
# DPFPASPEP had 5 missing values

special_education_clean <- na.omit(special_education)

nrow(special_education_clean)
## [1] 1202
ggplot(special_education_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
  geom_point() +
  labs(title = "Comparison of Special Education Percent and Spending",
       x = "Percent Special Education (DPETSPEP)",
       y = "Spending on Special Education (DPFPASPEP)")

correlation_result <- cor(special_education_clean$DPFPASPEP, special_education_clean$DPETSPEP)
correlation_result
## [1] 0.3700234
# with 37% id say this is a moderate positive correlation.