library(ggplot2)
library(readxl)
district <- read_excel("C:/Users/Campo/Downloads/district.xls")
View(district)
head(district)
## # A tibble: 6 × 137
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
special_education <- data.frame(
DISTNAME = district$DISTNAME,
DPETSPEP = district$DPETSPEP,
DPFPASPEP = district$DPFPASPEP
)
summary(special_education$DPETSPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.90 12.10 12.27 14.20 51.70
summary(special_education$DPFPASPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.800 8.900 9.711 12.500 49.000 5
colSums(is.na(special_education))
## DISTNAME DPETSPEP DPFPASPEP
## 0 0 5
# DPFPASPEP had 5 missing values
special_education_clean <- na.omit(special_education)
nrow(special_education_clean)
## [1] 1202
ggplot(special_education_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
geom_point() +
labs(title = "Comparison of Special Education Percent and Spending",
x = "Percent Special Education (DPETSPEP)",
y = "Spending on Special Education (DPFPASPEP)")

correlation_result <- cor(special_education_clean$DPFPASPEP, special_education_clean$DPETSPEP)
correlation_result
## [1] 0.3700234
# with 37% id say this is a moderate positive correlation.