Emissions Data

In this analysis we look at PM2.5 emissions in the United States between 1999 and 2008.

Session Information

R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.2

locale: [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8

Loading required libraries

library(cowplot)
library(data.table)
library(dplyr)
library(ggplot2)
library(grid)
library(gridExtra)

Loading and formatting data

## Reading data
NEI <- tbl_df(readRDS("summarySCC_PM25.rds"))
SCC <- tbl_df(readRDS("Source_Classification_Code.rds"))

## Set year column class as factor
NEI$year <- as.factor(NEI$year)

Question 1

Have total emissions from PM2.5 decreased in the United States from 1999 to 2008? Using the base plotting system, make a plot showing the total PM2.5 emission from all sources for each of the years 1999, 2002, 2005, and 2008.

We group and sum the PM2.5 emissions by year.

## Group by year and then sum by group
total_by_year <- NEI %>%
        group_by(year) %>%
        summarize(Total = sum(Emissions))
Year Total
1999 7332967
2002 5635780
2005 5454703
2008 3464206

Then we set a colour palette and plot a barplot, dividing the emissions by 1 million to neaten the axis labels. The plot shows that from 1999 to 2008 PM2.5 emissions in the US have fallen.

## Set a colour palette
color1 <- colorRampPalette(c("firebrick3","lightsalmon"))

# Plot barchart (dividing emissions by 1 million)
par(mar = c(5,4,4,1)+0.1)
with(total_by_year, 
     barplot(Total/1000000, 
             names.arg = year,
             main = expression(atop("Total PM"[2.5]*" emissions by year:",
                                    "\nUnited States")),
             xlab = "Year",
             ylab = "Emissions (million tons)",
             col = color1(4)
     )
)

Question 2

Have total emissions from PM2.5 decreased in the Baltimore City, Maryland(fips == “24510” from 1999 to 2008? Use the base plotting system to make a plot answering this question.

We filter data for Baltimore using its FIPS code then group and sum by year.

## Group by year and then sum by group
Balt_year <- NEI %>%
        filter(fips == "24510") %>%
        group_by(year) %>%
        summarize(Total = sum(Emissions))
Year Total
1999 3274.180
2002 2453.916
2005 3091.354
2008 1862.282

Then we creat a color palette and plot a barplot. The barchart shows an overall decrease in PM2.5 emissions, however this has not been a steady decline with a jump up from 2002 to 2005.

## Create colour palette
color2 <- colorRampPalette(c("darkorchid4","darkslateblue"))

## Plot barchart
par(mar = c(5,4,3,1)+0.1)
with(Balt_year,
     barplot(Total,
             names.arg = year,
             main = expression(atop("Total PM"[2.5]*" emissions by year:",
                                    "\nBaltimore City")),
             xlab = "Year",
             ylab = "Emissions (tons)",
             col = color2(4)
     )
)

Question 3

Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999–2008 for Baltimore City? Which have seen increases in emissions from 1999–2008? Use the ggplot2 plotting system to make a plot answer this question.

We filter the Baltimore data using its FIPS code, group by year and then type of emission and then sum the PM2.5 emissions by groupings. We then create an index for each type of emission with a base year of 1999.

## Group by year and type, then sum by emissions
Balt_year_type <- NEI %>%
        filter(fips == '24510') %>%
        group_by(year, type) %>%
        summarize(Total = sum(Emissions))


## Create index for each emission type, base year 1999
Balt_ind <- Balt_year_type %>%
        group_by(type) %>%
        mutate(index = 100*Total/Total[year == 1999])
Year Type Total Index
1999 NON-ROAD 522.94000 100.00000
1999 NONPOINT 2107.62500 100.00000
1999 ON-ROAD 346.82000 100.00000
1999 POINT 296.79500 100.00000
2002 NON-ROAD 240.84692 46.05632
2002 NONPOINT 1509.50000 71.62090
2002 ON-ROAD 134.30882 38.72580
2002 POINT 569.26000 191.80242
2005 NON-ROAD 248.93369 47.60272
2005 NONPOINT 1509.50000 71.62090
2005 ON-ROAD 130.43038 37.60751
2005 POINT 1202.49000 405.15844
2008 NON-ROAD 55.82356 10.67495
2008 NONPOINT 1373.20731 65.15425
2008 ON-ROAD 88.27546 25.45282
2008 POINT 344.97518 116.23349

We then create two line graphs, the first showing PM2.5 emissions by source type and the second showing an index of those changes by source type. We also extract the legend from one plot so that it can be displayed along side both plots.

gg3 <- ggplot(Balt_ind, aes(x = year, 
                            y = Total, 
                            group = type, 
                            color = type))
gg3ind <- ggplot(Balt_ind, aes(x = year,
                             y = index,
                             group = type, 
                             color = type))

## Create basic plot to extract legend
p1 <- gg3 + geom_line() +
        labs(color = "Type")

## Extract the legend
leg3 <- cowplot::get_legend(p1)

## Create the plots to be displayed
p1 <- gg3 + geom_line(lwd = 1) + 
        labs(x = "Year",
             y = "Emissions (tons)",
             title = expression("PM"[2.5]*" emissions by source")) +
        theme(legend.position = 'none')

p2 <- gg3ind + geom_line(lwd=1, show.legend = FALSE) +
        labs(x = "Year", 
             y = "Index", 
             title = expression("Index of PM"[2.5]*" emissions"))

The two plots are now plotted.

## Set the layout of the two plots and the legend
lay <- rbind(c(4,1,1,1,1,2,2,2,2,4,4),
             c(4,1,1,1,1,2,2,2,2,3,3),
             c(4,1,1,1,1,2,2,2,2,4,4))

## Plot the two plots using the layout assigned above
grid.arrange(p1, p2, leg3, nrow = 1, layout_matrix = lay)

Question 4

Question 5

How have emissions from motor vehicle sources changed from 1999–2008 in Baltimore City?

We first filter the NEI data frame by FIPS code to get Baltimore only data.

## Filter Balitmore data
Balt <- filter(NEI, fips == "24510")

The SCC data frame is then searched to get the SCC codes relating to vehicle emissions. This is then merged with the filtered NEI data frame. Finally the emissions from this merged data frame are grouped and summed by year.

## Filter SCC data containing "veh" (vehicle)
veh <- grepl("Vehicle", SCC$SCC.Level.Two, ignore.case = TRUE)
veh_SCC <- SCC[veh,]

## Merge tables by SCC code
veh_merged <- merge(Balt, veh_SCC, by = "SCC")

## Group by year and then sum by emissions
veh_merged_year <- veh_merged %>%
        group_by(year) %>%
        summarize(Total = sum(Emissions))
Year Total
1999 403.7700
2002 192.0078
2005 185.4144
2008 138.2402

We create a colour palette and plot a barplot. It shows a sharp decrease in PM2.5 vehicle emissions from 1999 to 2002 and then a much slower decrease from 2002 to 2008.

## Create color palette
color5 <- colorRampPalette(c("darkorchid4","darkslateblue"))

## Plot barplot
par(mar = c(5,4,3,1)+0.1)
with(veh_merged_year, 
     barplot(Total,
             names.arg = year,
             main = expression(atop("PM"[2.5]*" emissions from motor vehicles:",
                                    "Baltimore City")),
             xlab = "Year",
             ylab = "Emissions (tons)",
             col = color5(4)
     )
)

Question 6

Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?

We first filter the NEI data to get data for Baltimore City (FIPS code 24510) and LA County (FIPS code 06037).

## Filter Baltimore and LA
Balt_LA <- filter(NEI, fips == "24510" | fips == "06037")

We use the same data frame created in question 5 for emissions relating to motor vehicles. This is merged by SCC code with the Baltimore City and LA Country data. We then group and sum the PM2.5 emissions by year and by location (Baltimore or LA).

## Merge two tables by SCC code
Balt_LA_merged <- merge(Balt_LA, veh_SCC, by = "SCC")

## Group by year and location, then sum by emissions
Balt_LA_grouped <- Balt_LA_merged %>%
        group_by(year, fips) %>%
        summarize(Total = sum(Emissions))

We create a new column which has the appropriate descriptive name for the location based on the FIPS code.

## Create locations column
Balt_LA_grouped$City <- ifelse(Balt_LA_grouped$fips == "24510", 
                               "Baltimore City", "Los Angeles County")

An index column is then added, using 1999 as the base year.

## Add an index, base year 1999
Balt_LA_ind <- Balt_LA_grouped %>%
        group_by(fips) %>%
        mutate(index = 100*Total/Total[year == 1999])
Year FIPS Total City Index
1999 06037 6109.6900 Los Angeles County 100.00000
1999 24510 403.7700 Baltimore City 100.00000
2002 06037 7188.6802 Los Angeles County 117.66031
2002 24510 192.0078 Baltimore City 47.55374
2005 06037 7304.1149 Los Angeles County 119.54968
2005 24510 185.4144 Baltimore City 45.92080
2008 06037 6421.0170 Los Angeles County 105.09563
2008 24510 138.2402 Baltimore City 34.23735

We then create two line graphs, the first showing vehicle PM2.5 emissions by location (Baltimore and LA) and the second showing an index of those changes by source location. We also extract the legend from one plot so that it can be displayed along side both plots.

gg6 <- ggplot(Balt_LA_grouped, aes(x = year, 
                                   y = Total, 
                                   group = City, 
                                   color = City))

gg6ind <- ggplot(Balt_LA_ind, aes(x = year, 
                                  y = index,
                                  group = City,
                                  color = City))

## Create basic plot to extract the legend
p3 <- gg6 + geom_line()

## Extract the legend
leg6 <- cowplot::get_legend(p3)

## Create the plots to be displayed
p3 <- gg6 + geom_line(lwd=1) + 
        labs(x = "Year", 
             y = "Emissions (tons)", 
             title = expression("PM"[2.5]*" motor vehicle emissions:"),
             subtitle = "Los Angeles County and Baltimore City") +
        theme(legend.position = 'none')

p4 <-  gg6ind + geom_line(lwd = 1) +
        labs(x = "Year", 
             y = "Index",
             title = expression("Index of PM"[2.5]*" emissions")) + 
        theme(legend.position = 'none')

The two plots and now plotted.

grid.arrange(p3, p4, leg6, nrow = 1, layout_matrix = lay)