library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)

district<-read_excel("district.xls")
head(district)
## # A tibble: 6 × 137
##   DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
##   <chr>    <chr>    <chr>    <chr>  <chr>       <dbl>    <dbl>    <dbl>    <dbl>
## 1 CAYUGA … 001902   001 AND… 07     A               3      574      4.4     11.5
## 2 ELKHART… 001903   001 AND… 07     A               4     1150      4       11.8
## 3 FRANKST… 001904   001 AND… 07     A               3      808      8.5     11.3
## 4 NECHES … 001906   001 AND… 07     A               2      342      8.2     13.5
## 5 PALESTI… 001907   001 AND… 07     B               6     3360     25.1     42.9
## 6 WESTWOO… 001908   001 AND… 07     B               4     1332     19.7     26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## #   DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## #   DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## #   DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## #   DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## #   DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## #   DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
spedstats<-district%>%select(DISTNAME,DPETSPEP,DPFPASPEP)
head(spedstats)
## # A tibble: 6 × 3
##   DISTNAME      DPETSPEP DPFPASPEP
##   <chr>            <dbl>     <dbl>
## 1 CAYUGA ISD        14.6      28.9
## 2 ELKHART ISD       12.1       8.8
## 3 FRANKSTON ISD     13.1       8.4
## 4 NECHES ISD        10.5      10.1
## 5 PALESTINE ISD     13.5       6.1
## 6 WESTWOOD ISD      14.5       9.4
summary(spedstats)
##    DISTNAME            DPETSPEP       DPFPASPEP     
##  Length:1207        Min.   : 0.00   Min.   : 0.000  
##  Class :character   1st Qu.: 9.90   1st Qu.: 5.800  
##  Mode  :character   Median :12.10   Median : 8.900  
##                     Mean   :12.27   Mean   : 9.711  
##                     3rd Qu.:14.20   3rd Qu.:12.500  
##                     Max.   :51.70   Max.   :49.000  
##                                     NA's   :5

DPFPASPEP (money spent on special education) is the variable with missing values.

spedstats_cleaned<-spedstats%>% filter(DPFPASPEP>0)
summary(spedstats_cleaned)
##    DISTNAME            DPETSPEP      DPFPASPEP     
##  Length:1201        Min.   : 0.0   Min.   : 0.200  
##  Class :character   1st Qu.: 9.9   1st Qu.: 5.800  
##  Mode  :character   Median :12.2   Median : 8.900  
##                     Mean   :12.3   Mean   : 9.719  
##                     3rd Qu.:14.2   3rd Qu.:12.500  
##                     Max.   :51.7   Max.   :49.000

There are 1201 observations left.

compare<-district%>% select(DISTNAME,DPETSPEP, DPFPASPEP)
compare<-compare%>%filter(DPFPASPEP>0)
ggplot(district,aes(x=DPETSPEP,y=DPFPASPEP)) + geom_point()
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

Yes, the percent of special education students is somewhat correlated to the amount of funding given for special education.

cor(spedstats_cleaned$DPETSPEP, spedstats_cleaned$DPFPASPEP)
## [1] 0.371033

The correlation coefficient is (r=0.371), meaning that there is a moderate positive relationship between the percentage of special education students and the amount of funding recieved for special education. The relationship is not very strong, suggesting that other variables influence funding decisions.