library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
district<-read_xls("district.xls")
library(dplyr)
pairs(~DPETECOP+DPSAKIDR+DPFRASTAP+DPFEAINSP+DPFPASPEP+DA0AT21R,data=district)
cor(district$DA0AT21R,district$DPETECOP)
## [1] NA
cor(district$DPETECOP,district$DPFRASTAP)
## [1] NA
cor(district$DPETECOP,district$DPFRASTAP,use = "complete.obs")
## [1] 0.1674523
#16% correlation #between STUDENTS: % ECONOMICALLY DISADVANTAGED and REVENUE: % STATE
cor(district$DPETECOP,district$DPFPASPEP,use = "complete.obs")
## [1] -0.003175111
#-.31% correlation #between STUDENTS: % ECONOMICALLY DISADVANTAGED and EXPENDITURE: % SPECIAL EDUCATION
cor(district$DPETECOP,district$DA0AT21R,use = "complete.obs")
## [1] -0.1949528
#-19.5 correlation #between STUDENTS: % ECONOMICALLY DISADVANTAGED and ATTENDANCE RATE (2020-21)
cor(district$DPETECOP,district$DPETBLAP)
## [1] 0.270626
#27% correlation #between STUDENTS: % ECONOMICALLY DISADVANTAGED and STUDENTS: % AFRICAN AMERICAN
cor(district$DPETECOP,district$DPETWHIP)
## [1] -0.5900974
#-59% correlation #between STUDENTS: % ECONOMICALLY DISADVANTAGED and STUDENTS: % WHITE
district<-district%>%mutate(HP_LOG_TRANSFORM=log(DPETECOP))
head(district)
## # A tibble: 6 × 138
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 129 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
cor.test(district$DPFRASTAP,district$DPFPASPEP,method="spearman")
## Warning in cor.test.default(district$DPFRASTAP, district$DPFPASPEP, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: district$DPFRASTAP and district$DPFPASPEP
## S = 358628977, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2390349
#the p-value is less than 1%, very significant but it says there’s ties
cor.test(district$DPFRASTAP,district$DPFPASPEP,method="kendall")
##
## Kendall's rank correlation tau
##
## data: district$DPFRASTAP and district$DPFPASPEP
## z = -8.4676, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## -0.163681
#this is comparing the variables of the percentage of students who
are economically disadvantaged and the percentage of expenditure on
special education. #the p-value is still less than 1%, so still very
significant.
#As the revenue increases, the expenditure in special education
decreases but only slightly, showing that this correlation is not very
strong.