knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
str(NEI)
## 'data.frame': 6497651 obs. of 6 variables:
## $ fips : chr "09001" "09001" "09001" "09001" ...
## $ SCC : chr "10100401" "10100404" "10100501" "10200401" ...
## $ Pollutant: chr "PM25-PRI" "PM25-PRI" "PM25-PRI" "PM25-PRI" ...
## $ Emissions: num 15.714 234.178 0.128 2.036 0.388 ...
## $ type : chr "POINT" "POINT" "POINT" "POINT" ...
## $ year : int 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
head(SCC)
## SCC Data.Category
## 1 10100101 Point
## 2 10100102 Point
## 3 10100201 Point
## 4 10100202 Point
## 5 10100203 Point
## 6 10100204 Point
## Short.Name
## 1 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Traveling Grate (Overfeed) Stoker
## 3 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Wet Bottom
## 4 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Dry Bottom
## 5 Ext Comb /Electric Gen /Bituminous Coal /Cyclone Furnace
## 6 Ext Comb /Electric Gen /Bituminous Coal /Spreader Stoker
## EI.Sector Option.Group Option.Set
## 1 Fuel Comb - Electric Generation - Coal
## 2 Fuel Comb - Electric Generation - Coal
## 3 Fuel Comb - Electric Generation - Coal
## 4 Fuel Comb - Electric Generation - Coal
## 5 Fuel Comb - Electric Generation - Coal
## 6 Fuel Comb - Electric Generation - Coal
## SCC.Level.One SCC.Level.Two
## 1 External Combustion Boilers Electric Generation
## 2 External Combustion Boilers Electric Generation
## 3 External Combustion Boilers Electric Generation
## 4 External Combustion Boilers Electric Generation
## 5 External Combustion Boilers Electric Generation
## 6 External Combustion Boilers Electric Generation
## SCC.Level.Three
## 1 Anthracite Coal
## 2 Anthracite Coal
## 3 Bituminous/Subbituminous Coal
## 4 Bituminous/Subbituminous Coal
## 5 Bituminous/Subbituminous Coal
## 6 Bituminous/Subbituminous Coal
## SCC.Level.Four Map.To Last.Inventory.Year
## 1 Pulverized Coal NA NA
## 2 Traveling Grate (Overfeed) Stoker NA NA
## 3 Pulverized Coal: Wet Bottom (Bituminous Coal) NA NA
## 4 Pulverized Coal: Dry Bottom (Bituminous Coal) NA NA
## 5 Cyclone Furnace (Bituminous Coal) NA NA
## 6 Spreader Stoker (Bituminous Coal) NA NA
## Created_Date Revised_Date Usage.Notes
## 1
## 2
## 3
## 4
## 5
## 6
str(SCC)
## 'data.frame': 11717 obs. of 15 variables:
## $ SCC : Factor w/ 11717 levels "10100101","10100102",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Data.Category : Factor w/ 6 levels "Biogenic","Event",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Short.Name : Factor w/ 11238 levels "","2,4-D Salts and Esters Prod /Process Vents, 2,4-D Recovery: Filtration",..: 3283 3284 3293 3291 3290 3294 3295 3296 3292 3289 ...
## $ EI.Sector : Factor w/ 59 levels "Agriculture - Crops & Livestock Dust",..: 18 18 18 18 18 18 18 18 18 18 ...
## $ Option.Group : Factor w/ 25 levels "","C/I Kerosene",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Option.Set : Factor w/ 18 levels "","A","B","B1A",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SCC.Level.One : Factor w/ 17 levels "Brick Kilns",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ SCC.Level.Two : Factor w/ 146 levels "","Agricultural Chemicals Production",..: 32 32 32 32 32 32 32 32 32 32 ...
## $ SCC.Level.Three : Factor w/ 1061 levels "","100% Biosolids (e.g., sewage sludge, manure, mixtures of these matls)",..: 88 88 156 156 156 156 156 156 156 156 ...
## $ SCC.Level.Four : Factor w/ 6084 levels "","(NH4)2 SO4 Acid Bath System and Evaporator",..: 4455 5583 4466 4458 1341 5246 5584 5983 4461 776 ...
## $ Map.To : num NA NA NA NA NA NA NA NA NA NA ...
## $ Last.Inventory.Year: int NA NA NA NA NA NA NA NA NA NA ...
## $ Created_Date : Factor w/ 57 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Revised_Date : Factor w/ 44 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Usage.Notes : Factor w/ 21 levels ""," ","includes bleaching towers, washer hoods, filtrate tanks, vacuum pump exhausts",..: 1 1 1 1 1 1 1 1 1 1 ...
annual <- NEI %>% group_by(year) %>%
filter(year == 1999|2002|2005|2008) %>%
summarize(Annual.Total = sum(Emissions));
pts <- pretty(annual$Annual.Total/1000000);
yrs <- c(1999,2002,2005,2008)
plot(annual$year, annual$Annual.Total/1000000, type = "l", lwd = 2, axes = FALSE,
xlab = "Year",
ylab = expression("Total Tons of PM"[2.5]*" Emissions"),
main = expression("Total Tons of PM"[2.5]*" Emissions in the United States"));
axis(1, at = yrs, labels = paste(yrs));
axis(2, at = pts, labels = paste(pts, "M", sep = ""));
box()

baltimore <- NEI %>%
filter(fips == "24510") %>%
group_by(year) %>%
summarize(Annual.Total = sum(Emissions));
baltimore.pts <- pretty(baltimore$Annual.Total/1000);
plot(baltimore$year, baltimore$Annual.Total/1000, type = "l", lwd = 2, axes = FALSE,
xlab = "Year",
ylab = expression("Total Tons of PM"[2.5]*" Emissions"),
main = expression("Total Tons of PM"[2.5]*" Emissions in Baltimore"));
axis(1, at = c(1999,2002,2005,2008))
axis(2, at = baltimore.pts, labels = paste(baltimore.pts, "K", sep = ""));
box();

nei.baltimore <- NEI %>% filter(fips == "24510") %>% group_by(type, year) %>% summarize(Annual.Total = sum(Emissions));
nei.baltimore$type <- factor(nei.baltimore$type, levels = c("ON-ROAD", "NON-ROAD", "POINT", "NONPOINT")) # Re-order factor levels so they plot in the order we wish
ggplot(nei.baltimore, aes(x = factor(year), y = Annual.Total, fill = type)) +
geom_bar(stat = "identity") +
facet_grid(. ~ type) +
xlab("Year") +
ylab(expression("Total Tons of PM"[2.5]*" Emissions")) +
ggtitle(expression("Total Tons of PM"[2.5]*" Emissions in Baltimore by Source Type")) +
theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_y_continuous(labels = comma) +
guides(fill = FALSE)

scc.coal <- SCC[grep("Fuel Comb.*Coal", SCC$EI.Sector), ];
scc.coal.list <- unique(scc.coal$SCC);
nei.coal <- subset(NEI, SCC %in% scc.coal.list);
nei.coal <- nei.coal %>% group_by(type, year) %>% summarize(Annual.Total = sum(Emissions))
nei.coal.total <- nei.coal %>% group_by(year) %>% summarize(Annual.Total = sum(Annual.Total)) %>% mutate(type = "TOTAL");
nei.coal <- nei.coal %>% select(Annual.Total, type, year);
nei.coal <- bind_rows(nei.coal, nei.coal.total);
nei.coal$type <- factor(nei.coal$type, levels = c("TOTAL", "ON-ROAD", "NON-ROAD", "POINT", "NONPOINT")); # Re-order factor levels to they plot in the order we wish
ggplot(nei.coal, aes(x = factor(year), y = Annual.Total, fill = type)) +
geom_bar(stat = "identity") +
facet_grid(. ~ type) +
xlab("Year") +
ylab(expression("Total Tons of PM"[2.5]*" Emissions")) +
ggtitle(expression(atop("Total Tons of PM"[2.5]*" Emissions in the United States", paste("from Coal Combustion-Related Sources")))) +
theme(plot.margin = unit(c(1,1,1,1), "cm")) +
scale_y_continuous(labels = comma) +
scale_fill_brewer(palette = "Dark2") +
guides(fill = FALSE)

scc.vehicles <- SCC[grep("Mobile.*Vehicles", SCC$EI.Sector), ]; # Pattern match mobile vehicles in SCC description
scc.vehicles.list <- unique(scc.vehicles$SCC); # Create motor vehicle lookup list by SCC
nei.vehicles <- subset(NEI, SCC %in% scc.vehicles.list); # Filter for motor vehicle sources
nei.vehicles <- nei.vehicles %>% filter(fips == "24510") # Filter for Baltimore
nei.vehicles <- merge(x = nei.vehicles, y = scc.vehicles[, c("SCC", "SCC.Level.Two", "SCC.Level.Three")], by = "SCC") # Join in descriptive data on SCC codes
nei.vehicles <- nei.vehicles %>% group_by(year, SCC.Level.Two, SCC.Level.Three) %>% summarize(Annual.Total = sum(Emissions))
nei.vehicles.total <- nei.vehicles %>% group_by(year) %>% summarize(Annual.Total = sum(Annual.Total)) %>% mutate(SCC.Level.Two = "Total")
nei.vehicles <- bind_rows(nei.vehicles, nei.vehicles.total);
## Warning in bind_rows_(x, .id): binding factor and character vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
nei.vehicles$SCC.Level.Two <- factor(nei.vehicles$SCC.Level.Two, levels = c("Total", "Highway Vehicles - Diesel", "Highway Vehicles - Gasoline"));
ggplot(nei.vehicles, aes(x = factor(year), y = Annual.Total, fill = SCC.Level.Two)) +
geom_bar(stat = "identity") +
facet_grid(. ~ SCC.Level.Two) +
xlab("Year") +
ylab(expression("Total Tons of PM"[2.5]*" Emissions")) +
ggtitle(expression(atop("Total Tons of PM"[2.5]*" Emissions in Baltimore City", paste("from Motor Vehicle Sources")))) +
theme(plot.title = element_text(hjust = 0.5)) + # Center the plot title
theme(plot.margin = unit(c(1,1,1,1), "cm")) + # Adjust plot margins
scale_fill_brewer(palette = "Set1") +
guides(fill = FALSE)

scc.vehicles <- SCC[grep("Mobile.*Vehicles", SCC$EI.Sector), ]; # Pattern match mobile vehicles in SCC description
scc.vehicles.list <- unique(scc.vehicles$SCC); # Create motor vehicle lookup list by SCC
nei.vehicles <- subset(NEI, SCC %in% scc.vehicles.list); # Filter for motor vehicle sources
nei.vehicles <- nei.vehicles %>% filter(fips == "24510"| fips == "06037"); # Filter for Baltimore City or Los Angeles County
nei.vehicles$fips[nei.vehicles$fips == "24510"] <- "Baltimore";
nei.vehicles$fips[nei.vehicles$fips == "06037"] <- "Los Angeles";
nei.vehicles <- merge(x = nei.vehicles, y = scc.vehicles[, c("SCC", "SCC.Level.Two")], by = "SCC"); # Join in descriptive data on SCC codes
nei.vehicles <- nei.vehicles %>% group_by(fips, year, SCC.Level.Two) %>% summarize(Annual.Total = sum(Emissions));
nei.vehicles.total <- nei.vehicles %>% group_by(fips, year) %>% summarize(Annual.Total = sum(Annual.Total)) %>% mutate(SCC.Level.Two = "Total");
nei.vehicles <- bind_rows(nei.vehicles, nei.vehicles.total);
## Warning in bind_rows_(x, .id): binding factor and character vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
nei.vehicles$SCC.Level.Two <- factor(nei.vehicles$SCC.Level.Two, levels = c("Total", "Highway Vehicles - Diesel", "Highway Vehicles - Gasoline"));
ggplot(nei.vehicles, aes(x = factor(year), y = Annual.Total, fill = SCC.Level.Two)) +
geom_bar(stat = "identity") +
facet_grid(fips ~ SCC.Level.Two) +
xlab("Year") +
ylab(expression("Total Tons of PM"[2.5]*" Emissions")) +
ggtitle(expression(atop("Total Tons of PM"[2.5]*" Emissions from Motor Vehicle Sources", paste("in Baltimore City, MD and Los Angeles County, CA")))) +
theme(plot.title = element_text(hjust = 0.5)) + # Center the plot title
theme(plot.margin = unit(c(1,1,1,1), "cm")) + # Adjust plot margins
scale_fill_brewer(palette = "Set1") +
guides(fill = FALSE)

scc.vehicles <- SCC[grep("Mobile.*Vehicles", SCC$EI.Sector), ]; # Pattern match mobile vehicles in SCC description
scc.vehicles.list <- unique(scc.vehicles$SCC); # Create motor vehicle lookup list by SCC
nei.vehicles <- subset(NEI, SCC %in% scc.vehicles.list); # Filter for motor vehicle sources
nei.vehicles <- nei.vehicles %>% filter(fips == "24510"| fips == "06037"); # Filter for Baltimore City or Los Angeles County
nei.vehicles$fips[nei.vehicles$fips == "24510"] <- "Baltimore";
nei.vehicles$fips[nei.vehicles$fips == "06037"] <- "Los Angeles";
nei.vehicles <- merge(x = nei.vehicles, y = scc.vehicles[, c("SCC", "SCC.Level.Two")], by = "SCC"); # Join in descriptive data on SCC codes
nei.vehicles <- nei.vehicles %>% group_by(fips, year, SCC.Level.Two) %>% summarize(Annual.Total = sum(Emissions));
nei.vehicles.total <- nei.vehicles %>% group_by(fips, year) %>% summarize(Annual.Total = sum(Annual.Total)) %>% mutate(SCC.Level.Two = "Total");
nei.vehicles <- bind_rows(nei.vehicles, nei.vehicles.total);
## Warning in bind_rows_(x, .id): binding factor and character vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
nei.vehicles$SCC.Level.Two <- factor(nei.vehicles$SCC.Level.Two, levels = c("Total", "Highway Vehicles - Diesel", "Highway Vehicles - Gasoline"));
ggplot(nei.vehicles, aes(x = factor(year), y = Annual.Total, fill = SCC.Level.Two)) +
geom_bar(stat = "identity") +
facet_grid(fips ~ SCC.Level.Two, scales = "free") + # Setup facets and allow scales to adjust to data in each location
xlab("Year") +
ylab(expression("Total Tons of PM"[2.5]*" Emissions")) +
ggtitle(expression(atop("Total Tons of PM"[2.5]*" Emissions from Motor Vehicle Sources", paste("in Baltimore City, MD and Los Angeles County, CA")))) +
theme(plot.title = element_text(hjust = 0.5)) + # Center the plot title
theme(plot.margin = unit(c(1,1,1,1), "cm")) + # Adjust plot margins
scale_fill_brewer(palette = "Set1") +
guides(fill = FALSE)

nei.vehicles.DT <- data.table(nei.vehicles)
yoyFunc <- function(x) {x/shift(x)}
yoy.cols <- c("Annual.Total")
nei.vehicles.DT <- nei.vehicles.DT[, paste0("Percent.Change.", yoy.cols) := lapply(.SD, yoyFunc), by = "fips,SCC.Level.Two", .SDcols = yoy.cols]
nei.vehicles.DT <- mutate(nei.vehicles.DT, Percent.Change.Annual.Total = Percent.Change.Annual.Total - 1)
ggplot(nei.vehicles.DT, aes(x = factor(year), y = Percent.Change.Annual.Total, fill = SCC.Level.Two)) +
geom_bar(stat = "identity") +
facet_grid(fips ~ SCC.Level.Two) +
xlab("Year") +
ylab(expression("% Change From Prior Measurement")) +
ggtitle(expression(atop("Percentage Change in Total Tons of PM"[2.5]*" Emissions from Motor Vehicle", paste("Sources in Baltimore City, MD and Los Angeles County, CA")))) +
theme(plot.title = element_text(hjust = 0.5)) + # Center the plot title
theme(plot.margin = unit(c(1,1,1,1), "cm")) + # Adjust plot margins
scale_fill_brewer(palette = "Set1") +
guides(fill = FALSE)
## Warning: Removed 6 rows containing missing values (position_stack).

CAGR.df <- nei.vehicles.DT %>%
group_by(fips, SCC.Level.Two) %>%
summarize(N.Years = max(year) - min(year),
Beginning.Qty = Annual.Total[which(year==min(year))],
Ending.Qty = Annual.Total[which(year==max(year))],
CAGR = ((Ending.Qty-Beginning.Qty)/N.Years)/Beginning.Qty);
CAGR.df;
## # A tibble: 6 x 6
## # Groups: fips [2]
## fips SCC.Level.Two N.Years Beginning.Qty Ending.Qty CAGR
## <chr> <fct> <int> <dbl> <dbl> <dbl>
## 1 Baltimore Total 9 347. 88.3 -0.0828
## 2 Baltimore Highway Vehicles - D~ 9 216. 40.6 -0.0902
## 3 Baltimore Highway Vehicles - G~ 9 131. 47.7 -0.0707
## 4 Los Ange~ Total 9 3931. 4101. 0.00481
## 5 Los Ange~ Highway Vehicles - D~ 9 1639. 2109. 0.0319
## 6 Los Ange~ Highway Vehicles - G~ 9 2292. 1992. -0.0145
summary(nei.vehicles.DT$Percent.Change.Annual.Total[nei.vehicles.DT$fips=="Baltimore"]);
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -0.65100 -0.54984 -0.32320 -0.31266 -0.09681 0.03171 3
summary(nei.vehicles.DT$Percent.Change.Annual.Total[nei.vehicles.DT$fips=="Los Angeles"]);
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -0.17953 -0.10868 0.04257 0.03217 0.08723 0.46027 3