Upload & get a summary

pollution <- read.csv("https://raw.githubusercontent.com/lindangulopez/Exploratory-Data-Analysis-by-Johns-Hopkins-University-on-COURSERA/c132b214bd7657010857bf19bb90365bc1eed249/datasets/231/data/avgpm25.csv", colClasses = c("numeric", "character",
 "factor", "numeric", "numeric"))
head(pollution)
##        pm25  fips region longitude latitude
## 1  9.771185 01003   east -87.74826 30.59278
## 2  9.993817 01027   east -85.84286 33.26581
## 3 10.688618 01033   east -87.72596 34.73148
## 4 11.337424 01049   east -85.79892 34.45913
## 5 12.119764 01055   east -86.03212 34.01860
## 6 10.827805 01069   east -85.35039 31.18973
head(pollution)
##        pm25  fips region longitude latitude
## 1  9.771185 01003   east -87.74826 30.59278
## 2  9.993817 01027   east -85.84286 33.26581
## 3 10.688618 01033   east -87.72596 34.73148
## 4 11.337424 01049   east -85.79892 34.45913
## 5 12.119764 01055   east -86.03212 34.01860
## 6 10.827805 01069   east -85.35039 31.18973
summary(pollution)
##       pm25            fips            region      longitude      
##  Min.   : 3.383   Length:576         east:442   Min.   :-158.04  
##  1st Qu.: 8.549   Class :character   west:134   1st Qu.: -97.38  
##  Median :10.047   Mode  :character              Median : -87.37  
##  Mean   : 9.836                                 Mean   : -91.65  
##  3rd Qu.:11.356                                 3rd Qu.: -80.72  
##  Max.   :18.441                                 Max.   : -68.26  
##     latitude    
##  Min.   :19.68  
##  1st Qu.:35.30  
##  Median :39.09  
##  Mean   :38.56  
##  3rd Qu.:41.75  
##  Max.   :64.82

Include Plots

boxplot(pollution$pm25, col = "blue")

hist(pollution$pm25, col = "green")
rug(pollution$pm25)

hist(pollution$pm25, col = "green", breaks = 100)
rug(pollution$pm25)

boxplot(pollution$pm25, col = "blue")
abline(h = 12)

## Overlay Features

hist(pollution$pm25, col = "green")
abline(v = 12, lwd = 2)
abline(v = median(pollution$pm25), col = "magenta", lwd = 4)

## Bar Graph, for a presentation

barplot(table(pollution$region), col = "wheat", main = "Number of Counties in Each Region")

Summaries of Data:

boxplot(pm25 ~ region, data = pollution, col = "red")

par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))
hist(subset(pollution, region == "east")$pm25, col = "green")
hist(subset(pollution, region == "west")$pm25, col = "green")

with(pollution, plot(latitude, pm25))
abline(h = 12, lwd = 2, lty = 2)

with(pollution, plot(latitude, pm25, col = region))
abline(h = 12, lwd = 2, lty = 2)

par(mfrow = c(1, 2), mar = c(5, 4, 2, 1))
with(subset(pollution, region == "west"), plot(latitude, pm25, main = "West"))
with(subset(pollution, region == "east"), plot(latitude, pm25, main = "East"))