class_data <- c("numeric", "character", "factor", "numeric", "numeric")
pollution <- read.csv("avgpm25.csv", colClasses = class_data)
head(pollution)
## pm25 fips region longitude latitude
## 1 9.771185 01003 east -87.74826 30.59278
## 2 9.993817 01027 east -85.84286 33.26581
## 3 10.688618 01033 east -87.72596 34.73148
## 4 11.337424 01049 east -85.79892 34.45913
## 5 12.119764 01055 east -86.03212 34.01860
## 6 10.827805 01069 east -85.35039 31.18973
str(pollution)
## 'data.frame': 576 obs. of 5 variables:
## $ pm25 : num 9.77 9.99 10.69 11.34 12.12 ...
## $ fips : chr "01003" "01027" "01033" "01049" ...
## $ region : Factor w/ 2 levels "east","west": 1 1 1 1 1 1 1 1 1 1 ...
## $ longitude: num -87.7 -85.8 -87.7 -85.8 -86 ...
## $ latitude : num 30.6 33.3 34.7 34.5 34 ...
fivenum(pollution$pm25)
## [1] 3.382626 8.547590 10.046697 11.356829 18.440731
summary(pollution$pm25)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.383 8.549 10.047 9.836 11.356 18.441
boxplot(pollution$pm25, col = "blue")

library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
filter(pollution, pm25 > 15)
## pm25 fips region longitude latitude
## 1 16.19452 06019 west -119.9035 36.63837
## 2 15.80378 06029 west -118.6833 35.29602
## 3 18.44073 06031 west -119.8113 36.15514
## 4 16.66180 06037 west -118.2342 34.08851
## 5 15.01573 06047 west -120.6741 37.24578
## 6 17.42905 06065 west -116.8036 33.78331
## 7 16.25190 06099 west -120.9588 37.61380
## 8 16.18358 06107 west -119.1661 36.23465
library(maps)
## Warning: package 'maps' was built under R version 4.5.1
map("county", "california")
with(filter(pollution, pm25 > 15), points(longitude, latitude))

hist(pollution$pm25, col = "green")
rug(pollution$pm25)

hist(pollution$pm25, col = "green", breaks = 100)
rug(pollution$pm25)

boxplot(pollution$pm25, col = "blue")
abline(h = 12)

hist(pollution$pm25, col = "green")
abline(v = 12, lwd = 2)
abline(v = median(pollution$pm25), col = "magenta", lwd = 4)

library(dplyr)
table(pollution$region) %>% barplot(col = "wheat")

boxplot(pm25 ~ region, data = pollution, col = "red")

par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))
hist(subset(pollution, region == "east")$pm25, col = "green")
hist(subset(pollution, region == "west")$pm25, col = "green")

with(pollution, plot(latitude, pm25))
abline(h = 12, lwd = 2, lty = 2)

with(pollution, plot(latitude, pm25, col = region))
abline(h = 12, lwd = 2, lty = 2)

levels(pollution$region)
## [1] "east" "west"
par(mfrow = c(1, 2), mar = c(5, 4, 2, 1))
with(subset(pollution, region == "west"), plot(latitude, pm25, main = "West"))
with(subset(pollution, region == "east"), plot(latitude, pm25, main = "East"))

library(lattice)
xyplot(pm25 ~ latitude | region, data = pollution)

library(ggplot2)
qplot(latitude, pm25, data = pollution, facets = . ~ region)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
