library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
district<-read_excel("district.xls")
head(district)
## # A tibble: 6 × 137
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
SpedEd <- data.frame(district$DISTNAME, district$DPETSPEP, district$DPFPASPEP )
head(SpedEd)
## district.DISTNAME district.DPETSPEP district.DPFPASPEP
## 1 CAYUGA ISD 14.6 28.9
## 2 ELKHART ISD 12.1 8.8
## 3 FRANKSTON ISD 13.1 8.4
## 4 NECHES ISD 10.5 10.1
## 5 PALESTINE ISD 13.5 6.1
## 6 WESTWOOD ISD 14.5 9.4
colnames(SpedEd)
## [1] "district.DISTNAME" "district.DPETSPEP" "district.DPFPASPEP"
SpedEd <- SpedEd %>%rename(DistName = district.DISTNAME, PercentSpedEd = district.DPETSPEP, SpentSpedEd = district.DPFPASPEP)
colnames(SpedEd)
## [1] "DistName" "PercentSpedEd" "SpentSpedEd"
head(SpedEd)
## DistName PercentSpedEd SpentSpedEd
## 1 CAYUGA ISD 14.6 28.9
## 2 ELKHART ISD 12.1 8.8
## 3 FRANKSTON ISD 13.1 8.4
## 4 NECHES ISD 10.5 10.1
## 5 PALESTINE ISD 13.5 6.1
## 6 WESTWOOD ISD 14.5 9.4
summary(SpedEd$PercentSpedEd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.90 12.10 12.27 14.20 51.70
summary(SpedEd$SpentSpedEd)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.800 8.900 9.711 12.500 49.000 5
SpentSpedEd(DPFPASPEP) has 5 missing values which are labeled NA.
SpedEd_clean <- SpedEd %>% filter(!is.na(SpentSpedEd) | SpentSpedEd == 0.0)
head(SpedEd_clean)
## DistName PercentSpedEd SpentSpedEd
## 1 CAYUGA ISD 14.6 28.9
## 2 ELKHART ISD 12.1 8.8
## 3 FRANKSTON ISD 13.1 8.4
## 4 NECHES ISD 10.5 10.1
## 5 PALESTINE ISD 13.5 6.1
## 6 WESTWOOD ISD 14.5 9.4
There are now 1202 observations left. I filtered out NA values but kept 0.0 values.
compare_two<-SpedEd_clean %>% select(PercentSpedEd,SpentSpedEd)
ggplot(compare_two,aes(PercentSpedEd,SpentSpedEd)) + geom_point()
At first glance, the percentage of students in SpedEd and the amount spend on SpedEd seems to be slightly correlated.
cor(SpedEd_clean$PercentSpedEd,SpedEd_clean$SpentSpedEd)
## [1] 0.3700234
I would say that visually and with the correlation calculation, there is a moderate level of correlation between the percentage of students in SpedEd and the amount spend on SpedEd.
Knit the Rmarkdown and submit to Rpubs for publishing
submit the link to Rpubs on CANVAS