library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
district <- read_excel("district.xls")

district_dataframe <- district %>% select(DISTNAME, DPETSPEP, DPFPASPEP)

head(district_dataframe)
## # A tibble: 6 × 3
##   DISTNAME      DPETSPEP DPFPASPEP
##   <chr>            <dbl>     <dbl>
## 1 CAYUGA ISD        14.6      28.9
## 2 ELKHART ISD       12.1       8.8
## 3 FRANKSTON ISD     13.1       8.4
## 4 NECHES ISD        10.5      10.1
## 5 PALESTINE ISD     13.5       6.1
## 6 WESTWOOD ISD      14.5       9.4
summary(district_dataframe$DPETSPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70
summary(district_dataframe$DPFPASPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5
colSums(is.na(district_dataframe[, c("DPETSPEP", "DPFPASPEP")]))
##  DPETSPEP DPFPASPEP 
##         0         5
district_dataframe_NoNA <- district_dataframe %>% filter(!is.na(DPFPASPEP) & !is.na(DPETSPEP))

nrow(district_dataframe_NoNA)
## [1] 1202
cor(district_dataframe_NoNA$DPFPASPEP, district_dataframe_NoNA$DPETSPEP)
## [1] 0.3700234