# Setting up the .csv file
class <- c("numeric", "character", "factor", "numeric", "numeric")
getwd()
## [1] "/cloud/project"
pollution <- read.csv("/cloud/project/avgpm25.csv", colClasses = class)

# Previewing the data
head(pollution)
##        pm25  fips region longitude latitude
## 1  9.771185 01003   east -87.74826 30.59278
## 2  9.993817 01027   east -85.84286 33.26581
## 3 10.688618 01033   east -87.72596 34.73148
## 4 11.337424 01049   east -85.79892 34.45913
## 5 12.119764 01055   east -86.03212 34.01860
## 6 10.827805 01069   east -85.35039 31.18973
str(pollution)
## 'data.frame':    576 obs. of  5 variables:
##  $ pm25     : num  9.77 9.99 10.69 11.34 12.12 ...
##  $ fips     : chr  "01003" "01027" "01033" "01049" ...
##  $ region   : Factor w/ 2 levels "east","west": 1 1 1 1 1 1 1 1 1 1 ...
##  $ longitude: num  -87.7 -85.8 -87.7 -85.8 -86 ...
##  $ latitude : num  30.6 33.3 34.7 34.5 34 ...
fivenum(pollution$pm25)
## [1]  3.382626  8.547590 10.046697 11.356829 18.440731
summary(pollution$pm25)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.383   8.549  10.047   9.836  11.356  18.441
# Simple box plot
boxplot(pollution$pm25, col = "blue")

# Filter pollution when pm25 is greater than 15
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
filter(pollution, pm25 > 15)
##       pm25  fips region longitude latitude
## 1 16.19452 06019   west -119.9035 36.63837
## 2 15.80378 06029   west -118.6833 35.29602
## 3 18.44073 06031   west -119.8113 36.15514
## 4 16.66180 06037   west -118.2342 34.08851
## 5 15.01573 06047   west -120.6741 37.24578
## 6 17.42905 06065   west -116.8036 33.78331
## 7 16.25190 06099   west -120.9588 37.61380
## 8 16.18358 06107   west -119.1661 36.23465
# Initializing the map library to generate a map preview
library(maps)
map("county", "california")
with(filter(pollution, pm25 > 15), points(longitude, latitude))

# Example histogram
hist(pollution$pm25, col = "green")

# Rug lets us see data points
hist(pollution$pm25, col = "green")
rug(pollution$pm25)

# Breaks gives us more bars
hist(pollution$pm25, col = "green", breaks = 100)
rug(pollution$pm25)

# Setting a line of our choosing in the box plot
boxplot(pollution$pm25, col = "blue")
abline(h = 12)

# Adding two different lines in the histogram
hist(pollution$pm25, col = "green")
abline(v = 12, lwd = 2)
abline(v = median(pollution$pm25), col = "magenta", lwd = 4)

# Example bar plot
table(pollution$region) %>% barplot(col = "wheat")

# Multiple box plots from 1 function
boxplot(pm25 ~ region, data = pollution, col = "red")

# Multiple histograms displayed adjacently
par(mfrow = c(2, 1), mar = c(4, 4, 2, 1))
hist(subset(pollution, region == "east")$pm25, col = "green")
hist(subset(pollution, region == "west")$pm25, col = "green")

# Example scatter plot
with(pollution, plot(latitude, pm25))
abline(h = 12, lwd = 2, lty = 2)
levels(pollution$region)
## [1] "east" "west"
# Multiple scatter plots displayed adjacently
par(mfrow = c(1, 2), mar = c(5, 4, 2, 1))

with(subset(pollution, region == "west"), plot(latitude, pm25, main = "West"))
with(subset(pollution, region == "east"), plot(latitude, pm25, main = "East"))