library(readxl)
district_df <-read_xls("district.xls")
sub_dist_df <- district_df[c("DISTNAME", "DPETSPEP", "DPFPASPEP")]
summary(sub_dist_df$DPETSPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.90 12.10 12.27 14.20 51.70
summary(sub_dist_df$DPFPASPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.800 8.900 9.711 12.500 49.000 5
colSums(is.na(sub_dist_df))
## DISTNAME DPETSPEP DPFPASPEP
## 0 0 5
clean_df <-na.omit(sub_dist_df)
library(ggplot2)
ggplot(sub_dist_df,aes(x =DPFPASPEP, y = DPETSPEP)) +
geom_point()
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
cor(clean_df\(DPETSPEP, clean_df\)DPFPASPEP)
```The values are positively correlated mijp