1. create an Rmarkdown document with “district” data
library(readxl)
district_df <-read_xls("district.xls") 
sub_dist_df <- district_df[c("DISTNAME", "DPETSPEP", "DPFPASPEP")]
summary(sub_dist_df$DPETSPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70
summary(sub_dist_df$DPFPASPEP) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5
colSums(is.na(sub_dist_df))
##  DISTNAME  DPETSPEP DPFPASPEP 
##         0         0         5
clean_df <-na.omit(sub_dist_df)
library(ggplot2)

ggplot(sub_dist_df,aes(x =DPFPASPEP, y = DPETSPEP)) + 
  geom_point()
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

cor(clean_df\(DPETSPEP, clean_df\)DPFPASPEP)

```The values are positively correlated mijp