Abstract

The purpose of to analyze the EPI National Emissions Invetory’s historical data for tracking the emissions of this pollutant into the atmosphere.

For each year and for each type of PM source, the NEI records how many tons of PM2.5 were emitted from that source over the course of the entire year. The data that you will use for this assignment are for 1999, 2002, 2005, and 2008. Please refer to the reference datalink from here

Step Wise Implementation

This section will summarize how each plots have been setup accordingly with generic anaology, detailed R code will be placed below to demonstrate shown plot accordingly

# Load the data.table package for the function execution
library(data.table)
library(ggplot2)

url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2FNEI_data.zip"
download.file(url,"JHU_DS4_PA02.zip")
unzip('JHU_DS4_PA02.zip')

# Extract Table
SCC <- as.data.table(x = readRDS(file = "Source_Classification_Code.rds"))
NEI <- as.data.table(x = readRDS(file = "summarySCC_PM25.rds"))

# Check the current structure of the exported data table to identify the variable characteristics
str(SCC)
## Classes 'data.table' and 'data.frame':   11717 obs. of  15 variables:
##  $ SCC                : Factor w/ 11717 levels "10100101","10100102",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Data.Category      : Factor w/ 6 levels "Biogenic","Event",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Short.Name         : Factor w/ 11238 levels "","2,4-D Salts and Esters Prod /Process Vents, 2,4-D Recovery: Filtration",..: 3283 3284 3293 3291 3290 3294 3295 3296 3292 3289 ...
##  $ EI.Sector          : Factor w/ 59 levels "Agriculture - Crops & Livestock Dust",..: 18 18 18 18 18 18 18 18 18 18 ...
##  $ Option.Group       : Factor w/ 25 levels "","C/I Kerosene",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Option.Set         : Factor w/ 18 levels "","A","B","B1A",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCC.Level.One      : Factor w/ 17 levels "Brick Kilns",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ SCC.Level.Two      : Factor w/ 146 levels "","Agricultural Chemicals Production",..: 32 32 32 32 32 32 32 32 32 32 ...
##  $ SCC.Level.Three    : Factor w/ 1061 levels "","100% Biosolids (e.g., sewage sludge, manure, mixtures of these matls)",..: 88 88 156 156 156 156 156 156 156 156 ...
##  $ SCC.Level.Four     : Factor w/ 6084 levels "","(NH4)2 SO4 Acid Bath System and Evaporator",..: 4455 5583 4466 4458 1341 5246 5584 5983 4461 776 ...
##  $ Map.To             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Last.Inventory.Year: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Created_Date       : Factor w/ 57 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Revised_Date       : Factor w/ 44 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Usage.Notes        : Factor w/ 21 levels ""," ","includes bleaching towers, washer hoods, filtrate tanks, vacuum pump exhausts",..: 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, ".internal.selfref")=<externalptr>
str(NEI)
## Classes 'data.table' and 'data.frame':   6497651 obs. of  6 variables:
##  $ fips     : chr  "09001" "09001" "09001" "09001" ...
##  $ SCC      : chr  "10100401" "10100404" "10100501" "10200401" ...
##  $ Pollutant: chr  "PM25-PRI" "PM25-PRI" "PM25-PRI" "PM25-PRI" ...
##  $ Emissions: num  15.714 234.178 0.128 2.036 0.388 ...
##  $ type     : chr  "POINT" "POINT" "POINT" "POINT" ...
##  $ year     : int  1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
##  - attr(*, ".internal.selfref")=<externalptr>
# Convert the emission index as a numeric value to prevent scientific notation.
NEI[, Emissions := lapply(.SD, as.numeric), .SDcols = c("Emissions")]
# Use Lapply to summarize the Y-values of emission by years
totalNEI <- NEI[, lapply(.SD, sum, na.rm = TRUE)
                , .SDcols = c("Emissions")
                , by = year]

# Setup plot dimension format
png("PA2_plot1.png", width=960, height=960)

# Setup Plot 1
barplot(totalNEI$Emissions
        , names = totalNEI$year
        , xlab = "years", ylab = "Emissions"
        , main = "Emissions over the Years"
        , col = "Green")

dev.off()
## png 
##   2
# Use Lapply to summarize the Y-values of emission by years while filter the place in fips as Baltimore City, MD
totalNEI2 <- NEI[fips == "24510", lapply(.SD, sum, na.rm = TRUE), 
                .SDcols = c("Emissions")
                , by = year]

# Setup plot dimension format
png("PA2_plot2.png", width=960, height=960)

# Setup Plot 2
barplot(totalNEI2$Emissions
        , names = totalNEI$year
        , xlab = "years", ylab = "Emissions"
        , main = "Emissions over the Years"
        , col = "Green")

dev.off()
## png 
##   2
# Setup plot dimension format
png("PA2_plot2.png", width=960, height=960)

# Setup Plot 2
barplot(totalNEI2$Emissions
        , names = totalNEI$year
        , xlab = "years", ylab = "Emissions"
        , main = "Emissions over the Years"
        , col = "Green")

dev.off()
## png 
##   2
# Subsetting the Baltimore NEI data according
BALNEI <- NEI[which(NEI$fips =="24510"), ]


# Setup plot dimension format
png("PA2_plot3.png", width=960, height=960)

# Setup Plot 3
ggplot(BALNEI, aes(factor(year), Emissions, fill = type)) +
        geom_bar (stat = "identity") +
        theme_bw() + guides(fill = "none") +
        facet_grid(.~type, scales = "free", space = "free") +
        labs(x = "year", 
             y = expression("Total PM"[2.5]*" Emission (Tons)"),
             title = expression("PM"[2.5]*" Emissions, Baltimore City 1999-2008 by Source Type"))

dev.off()
## png 
##   2
# Subsetting coal combustion related NEI data on each level columns in SCC
combRel <- grepl("comb", SCC$SCC.Level.One, ignore.case=TRUE)
coalRel <- grepl("coal", SCC$SCC.Level.Four, ignore.case=TRUE)
combSCC <- SCC[combRel & coalRel, SCC]

# Use the filtering condition to match NEI data with the same SCC case number
combNEI <- NEI[NEI[,SCC] %in% combSCC]

# Setup plot dimension format
png("PA2_plot4.png", width=960, height=960)

# Setup Plot 4
ggplot(combNEI, aes(x = factor(year),y = Emissions/10^5)) +
  geom_bar (stat = "identity", fill = "#BB0000", width = 0.69) +
  labs(x = "year", 
       y = expression("Total PM"[2.5]*" Emission (10^5 Tons)"),
       title = expression("PM "[2.5]*" Coal Combustion Source Emissions Across US from 1999-2008")) 

dev.off()
## png 
##   2
# Subsetting vehicle related NEI data on each level columns in SCC
vehRel <- grepl("vehicle", SCC$SCC.Level.Two, ignore.case=TRUE)
vehSCC <- SCC[vehRel, SCC]

# Use the filtering condition to match NEI data with the same SCC case number
vehNEI <- NEI[NEI[,SCC] %in% vehSCC]

# Filter with fips as Balitmore City.
BALvehNEI <- vehNEI[which(vehNEI$fips == "24510"),]

# Setup plot dimension format
png("PA2_plot5.png", width=960, height=960)

# Setup Plot 5
ggplot(BALvehNEI, aes(x = factor(year),y = Emissions)) +
  geom_bar (stat = "identity", fill = "#BB0000", width = 0.69) +
  labs(x = "year", 
       y = expression("Total PM"[2.5]*" Emission"),
       title = expression("PM "[2.5]*" Motor Vehicle Source Emissions in Baltimore from 1999-2008"))

dev.off()
## png 
##   2
# Subsetting vehicle related NEI data on each level columns in SCC
vehRel <- grepl("vehicle", SCC$SCC.Level.Two, ignore.case=TRUE)
vehSCC <- SCC[vehRel, SCC]

# Use the filtering condition to match NEI data with the same SCC case number
vehNEI <- NEI[NEI[,SCC] %in% vehSCC]

# Filter with fips as both Baltimore & Los Angeles in Cities and add city names respectively.
BALvehNEI <- vehNEI[fips == "24510", ]
BALvehNEI[, city := c("Baltimore City")]
LAvehNEI <- vehNEI[fips == "06037", ]
LAvehNEI[, city := c("Los Angeles")]

# Bundle both data sets together
togNEI <- rbind(BALvehNEI, LAvehNEI)

# Setup plot dimension format
png("PA2_plot6.png", width=960, height=960)

# Setup Plot 6
ggplot(togNEI, aes(x = factor(year),y = Emissions, fill=city)) +
  geom_bar (aes(fill = year), stat = "identity") +
  facet_grid(scales="free", space="free", .~city) +
  labs(x = "year", 
       y = expression("Total PM "[2.5]*" Emission (Kilo-Tons)"),
       title=expression("PM "[2.5]*" Motor Vehicle Source Emissions in Baltimore & LA, 1999-2008")) + 
  scale_fill_gradient(low="#4292E0", high = "#FFFD9D")

dev.off()
## png 
##   2
gc()
##            used  (Mb) gc trigger  (Mb)  max used  (Mb)
## Ncells  1017750  54.4    7680171 410.2   7458659 398.4
## Vcells 74920433 571.6  124048757 946.5 108871034 830.7