pollution <- read.csv("https://raw.githubusercontent.com/lindangulopez/Exploratory-Data-Analysis-by-Johns-Hopkins-University-on-COURSERA/c132b214bd7657010857bf19bb90365bc1eed249/datasets/231/data/avgpm25.csv", colClasses = c("numeric", "character",
"factor", "numeric", "numeric"))
head(pollution)
## pm25 fips region longitude latitude
## 1 9.771185 01003 east -87.74826 30.59278
## 2 9.993817 01027 east -85.84286 33.26581
## 3 10.688618 01033 east -87.72596 34.73148
## 4 11.337424 01049 east -85.79892 34.45913
## 5 12.119764 01055 east -86.03212 34.01860
## 6 10.827805 01069 east -85.35039 31.18973
head(pollution)
## pm25 fips region longitude latitude
## 1 9.771185 01003 east -87.74826 30.59278
## 2 9.993817 01027 east -85.84286 33.26581
## 3 10.688618 01033 east -87.72596 34.73148
## 4 11.337424 01049 east -85.79892 34.45913
## 5 12.119764 01055 east -86.03212 34.01860
## 6 10.827805 01069 east -85.35039 31.18973
summary(pollution)
## pm25 fips region longitude
## Min. : 3.383 Length:576 east:442 Min. :-158.04
## 1st Qu.: 8.549 Class :character west:134 1st Qu.: -97.38
## Median :10.047 Mode :character Median : -87.37
## Mean : 9.836 Mean : -91.65
## 3rd Qu.:11.356 3rd Qu.: -80.72
## Max. :18.441 Max. : -68.26
## latitude
## Min. :19.68
## 1st Qu.:35.30
## Median :39.09
## Mean :38.56
## 3rd Qu.:41.75
## Max. :64.82
boxplot(pollution$pm25, col = "blue")
hist(pollution$pm25, col = "green")
rug(pollution$pm25)
hist(pollution$pm25, col = "green", breaks = 100)
rug(pollution$pm25)
boxplot(pollution$pm25, col = "blue")
abline(h = 12)
## Overlay Features
hist(pollution$pm25, col = "green")
abline(v = 12, lwd = 2)
abline(v = median(pollution$pm25), col = "magenta", lwd = 4)
## Bar Graph, for a presentation
barplot(table(pollution$region), col = "wheat", main = "Number of Counties in Each Region")
boxplot(pm25 ~ region, data = pollution, col = "red")
par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))
hist(subset(pollution, region == "east")$pm25, col = "green")
hist(subset(pollution, region == "west")$pm25, col = "green")
with(pollution, plot(latitude, pm25))
abline(h = 12, lwd = 2, lty = 2)
with(pollution, plot(latitude, pm25, col = region))
abline(h = 12, lwd = 2, lty = 2)
par(mfrow = c(1, 2), mar = c(5, 4, 2, 1))
with(subset(pollution, region == "west"), plot(latitude, pm25, main = "West"))
with(subset(pollution, region == "east"), plot(latitude, pm25, main = "East"))