Description

This project is the Coursera Exploratory Data Analysis - Course Project 2. The overall goal of this project is to explore the National Emissions Inventory database and see what it says about fine particulate matter pollution in the United states over the 10-year period 1999–2008.

Load and read data

if (!file.exists("Courseradata")) {
    dir.create("Courseradata")
}
fileUrl <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2FNEI_data.zip"
download.file(fileUrl, destfile = "/Users/adrianromano/Downloads/Courseradata/NEI_data.zip", method = "curl")
if (!file.exists("/Users/adrianromano/Downloads/Courseradata/NEI_data")) {
    unzip(zipfile = "/Users/adrianromano/Downloads/Courseradata/NEI_data.zip", 
          exdir = "/Users/adrianromano/Downloads/Courseradata")
}
NEI <- readRDS("/Users/adrianromano/Downloads/Courseradata/NEI_data/summarySCC_PM25.rds")
SCC <-readRDS("/Users/adrianromano/Downloads/Courseradata/NEI_data/Source_Classification_Code.rds")
str(NEI)
## 'data.frame':    6497651 obs. of  6 variables:
##  $ fips     : chr  "09001" "09001" "09001" "09001" ...
##  $ SCC      : chr  "10100401" "10100404" "10100501" "10200401" ...
##  $ Pollutant: chr  "PM25-PRI" "PM25-PRI" "PM25-PRI" "PM25-PRI" ...
##  $ Emissions: num  15.714 234.178 0.128 2.036 0.388 ...
##  $ type     : chr  "POINT" "POINT" "POINT" "POINT" ...
##  $ year     : int  1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
str(SCC)
## 'data.frame':    11717 obs. of  15 variables:
##  $ SCC                : Factor w/ 11717 levels "10100101","10100102",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Data.Category      : Factor w/ 6 levels "Biogenic","Event",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Short.Name         : Factor w/ 11238 levels "","2,4-D Salts and Esters Prod /Process Vents, 2,4-D Recovery: Filtration",..: 3283 3284 3293 3291 3290 3294 3295 3296 3292 3289 ...
##  $ EI.Sector          : Factor w/ 59 levels "Agriculture - Crops & Livestock Dust",..: 18 18 18 18 18 18 18 18 18 18 ...
##  $ Option.Group       : Factor w/ 25 levels "","C/I Kerosene",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Option.Set         : Factor w/ 18 levels "","A","B","B1A",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCC.Level.One      : Factor w/ 17 levels "Brick Kilns",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ SCC.Level.Two      : Factor w/ 146 levels "","Agricultural Chemicals Production",..: 32 32 32 32 32 32 32 32 32 32 ...
##  $ SCC.Level.Three    : Factor w/ 1061 levels "","100% Biosolids (e.g., sewage sludge, manure, mixtures of these matls)",..: 88 88 156 156 156 156 156 156 156 156 ...
##  $ SCC.Level.Four     : Factor w/ 6084 levels "","(NH4)2 SO4 Acid Bath System and Evaporator",..: 4455 5583 4466 4458 1341 5246 5584 5983 4461 776 ...
##  $ Map.To             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Last.Inventory.Year: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Created_Date       : Factor w/ 57 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Revised_Date       : Factor w/ 44 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Usage.Notes        : Factor w/ 21 levels ""," ","includes bleaching towers, washer hoods, filtrate tanks, vacuum pump exhausts",..: 1 1 1 1 1 1 1 1 1 1 ...

Have total emissions from PM2.5 decreased in the United States from 1999 to 2008?

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
pm25TotalEmissions <- NEI %>%
    group_by(year) %>%
    summarize(Emissions = sum(Emissions))

barplot(pm25TotalEmissions$Emissions, names.arg = pm25TotalEmissions$year,
        xlab = "Years", ylab = expression("Total PM"[2.5]*" emissions"),
        main = expression("Total PM"[2.5]*" emissions for the years"),
        col = c("bisque", "pink", "aquamarine", "azure"))

Have total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008?

library(dplyr)
baltimoreEmissions <- NEI %>%
    filter(fips == "24510") %>%
    group_by(year) %>%
    summarize(Emissions = sum(Emissions))

barplot(baltimoreEmissions$Emissions, names.arg = baltimoreEmissions$year,
        xlab = "Years", ylab = expression("Total PM"[2.5]*" emissions"),
        main = expression("Total PM"[2.5]*" emissions in Baltimore City, Maryland"),
        col = c("bisque", "pink", "aquamarine", "azure"))

Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases and increases in emissions from 1999–2008 for Baltimore City?

library(dplyr)
baltimoreEmissions2 <- NEI %>%
    filter(fips == "24510") %>%
    group_by(year, type) %>%
    summarize(Emissions = sum(Emissions))

library(ggplot2)
ggplot(baltimoreEmissions2, aes(x = factor(year), y = Emissions, fill = type)) +
    geom_bar(stat = "identity", col = "black") +
    facet_grid(. ~ type) +
    xlab("Years") +
    ylab(expression("Total PM"[2.5]*" emissions")) +
    ggtitle (expression("PM"[2.5]*" emissions in Baltimore City, Maryland by Source Type"))

Across the United States, how have emissions from coal combustion-related sources changed from 1999–2008?

NEISCC <- merge(NEI, SCC, by = "SCC")
coal <- grepl("coal", NEISCC$Short.Name, ignore.case = TRUE)
subsetCoal <- NEISCC[coal, ]

library(dplyr)
coalEmissions <- subsetCoal %>%
    group_by(year) %>%
    summarize(Emissions = sum(Emissions))

ggplot(coalEmissions, aes(factor(year), Emissions)) +
    geom_bar(stat = "identity", fill = c("magenta", "yellow", "orange", "linen"), col = "black") +
    xlab("Years") +
    ylab(expression("Total PM"[2.5]*" emissions")) +
    ggtitle ("Emissions from coal combustion-related sources")

How have emissions from motor vehicle sources changed from 1999–2008 in Baltimore City?

subsetMotor <- NEI[(NEI$fips == "24510") & (NEI$type == "ON-ROAD"), ]
library(dplyr)
baltimoreEmissions3 <- subsetMotor %>%
    group_by(year) %>%
    summarize(Emissions = sum(Emissions))

ggplot(baltimoreEmissions3, aes(factor(year), Emissions)) +
    geom_bar(stat = "identity", fill = c("bisque", "pink", "aquamarine", "azure"), color = "black") +
    xlab("Years") +
    ylab(expression("Total PM"[2.5]*" emissions")) +
    ggtitle ("Total emissions from motor vehicle in Baltimore City, Maryland")

Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California.

subsetMotor2 <- NEI[(NEI$fips == "24510" | NEI$fips == "06037") & (NEI$type == "ON-ROAD"), ]
library(dplyr)
baltimoreLAEmissions <- subsetMotor2 %>%
    group_by(year, fips) %>%
    summarize(Emissions = sum(Emissions))
baltimoreLAEmissions$fips[baltimoreLAEmissions$fips == "24510"] <- "Baltimore City, MD"
baltimoreLAEmissions$fips[baltimoreLAEmissions$fips == "06037"] <- "Los Angeles, CA"

ggplot(baltimoreLAEmissions, aes(factor(year), Emissions, fill = fips)) + 
    geom_bar(stat = "identity", color = "black") +
    facet_grid(. ~ fips) +
    xlab("Years") +
    ylab(expression('Total PM'[2.5]*" emissions")) +
    ggtitle("Total Emissions from motor vehicle in Baltimore City, MD vs Los Angeles, CA")