library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(readxl)
district<-read_excel("district.xls")
head(district)
## # A tibble: 6 × 137
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
special_ed_data<-district |> select(DISTNAME, DPETSPEP,DPFPASPEP)
summary("DPETSPEP")
## Length Class Mode
## 1 character character
summary("DPFPASPEP")
## Length Class Mode
## 1 character character
#both variables have missing values
special_ed_data |> drop_na()
## # A tibble: 1,202 × 3
## DISTNAME DPETSPEP DPFPASPEP
## <chr> <dbl> <dbl>
## 1 CAYUGA ISD 14.6 28.9
## 2 ELKHART ISD 12.1 8.8
## 3 FRANKSTON ISD 13.1 8.4
## 4 NECHES ISD 10.5 10.1
## 5 PALESTINE ISD 13.5 6.1
## 6 WESTWOOD ISD 14.5 9.4
## 7 SLOCUM ISD 14.7 9.9
## 8 ANDREWS ISD 10.4 10.9
## 9 PINEYWOODS COMMUNITY ACADEMY 11.6 9.2
## 10 HUDSON ISD 11.9 10.3
## # ℹ 1,192 more rows
clean_special_ed_data <- special_ed_data |> drop_na()
print(clean_special_ed_data)
## # A tibble: 1,202 × 3
## DISTNAME DPETSPEP DPFPASPEP
## <chr> <dbl> <dbl>
## 1 CAYUGA ISD 14.6 28.9
## 2 ELKHART ISD 12.1 8.8
## 3 FRANKSTON ISD 13.1 8.4
## 4 NECHES ISD 10.5 10.1
## 5 PALESTINE ISD 13.5 6.1
## 6 WESTWOOD ISD 14.5 9.4
## 7 SLOCUM ISD 14.7 9.9
## 8 ANDREWS ISD 10.4 10.9
## 9 PINEYWOODS COMMUNITY ACADEMY 11.6 9.2
## 10 HUDSON ISD 11.9 10.3
## # ℹ 1,192 more rows
summary("DPETSPEP")
## Length Class Mode
## 1 character character
summary("DPFPASPEP")
## Length Class Mode
## 1 character character
#IDK why it keeps doing this…I thought I removed all the missing values, so why does it still say ‘character’ ?? #There are now 1202 obs. of 3 variables instead of 1207 out of 3 variables
ggplot(clean_special_ed_data,aes(x=DPETSPEP, y=DPFPASPEP)) + geom_point()
cor(clean_special_ed_data$DPETSPEP,clean_special_ed_data$DPFPASPEP)
## [1] 0.3700234
#This coefficient shows that there is a weak and positive correlation between percent of special education and money spent on special education