National Emissions Inventory (NEI)
# set up working directory
setwd('/home/daria/Courses/R/Coursera/EDA/Week3/exdata-data-NEI_data')
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# read two files
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
SCC1 <- select(SCC, SCC, Short.Name, SCC.Level.Two)
First look at data
# merging data
data <- merge(NEI, SCC1, by = "SCC")
head(data)
## SCC fips Pollutant Emissions type year
## 1 10100101 34017 PM25-PRI 898.421 POINT 1999
## 2 10100101 01123 PM25-PRI 0.080 POINT 2002
## 3 10100101 08041 PM25-PRI 2.483 POINT 1999
## 4 10100101 42109 PM25-PRI 58.610 POINT 2002
## 5 10100101 42107 PM25-PRI 131.800 POINT 2005
## 6 10100101 01103 PM25-PRI 0.110 POINT 1999
## Short.Name
## 1 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 3 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 4 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 5 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 6 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## SCC.Level.Two
## 1 Electric Generation
## 2 Electric Generation
## 3 Electric Generation
## 4 Electric Generation
## 5 Electric Generation
## 6 Electric Generation
names(data)
## [1] "SCC" "fips" "Pollutant" "Emissions"
## [5] "type" "year" "Short.Name" "SCC.Level.Two"
How total emissions from PM2.5 decreased in the United States from 1999 to 2008?
# group data by year
by_year <- group_by(data, year)
# calculate sum of emissions for each year
by_year_total <- summarize(by_year, total_emission = sum(Emissions))
by_year_total
## Source: local data frame [4 x 2]
##
## year total_emission
## 1 1999 7332967
## 2 2002 5635780
## 3 2005 5454703
## 4 2008 3464206
# create plot
ggplot(data = by_year_total, aes(x = factor(year), y = total_emission,)) +
geom_bar(stat='identity', fill = 'red')+
ggtitle('PM2.5 Emission in USA in 1999 - 2008')+
xlab('Year') +
ylab('Total Emission')

# save plot
ggsave(file = 'plot1.png')
## Saving 7 x 5 in image
How total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008?
# filtering grouped by year data by fip of Baltimore City
balt_by_year <- filter(by_year, fips == '24510')
# get the sum of emission for Baltimore for each year
balt_by_year_total <- summarize(balt_by_year, total_emission = sum(Emissions))
balt_by_year_total
## Source: local data frame [4 x 2]
##
## year total_emission
## 1 1999 3274.180
## 2 2002 2453.916
## 3 2005 3091.354
## 4 2008 1862.282
# create plot
ggplot(data = balt_by_year_total, aes(x = factor(year), y = total_emission,)) +
geom_bar(stat='identity', fill = 'blue')+
ggtitle('PM2.5 Emission in the Baltimore City in 1999 - 2008')+
xlab('Year') +
ylab('Total Emission')

#save plot
ggsave(file = 'plot2.png')
## Saving 7 x 5 in image
Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999–2008 for Baltimore City? Which have seen increases in emissions from 1999–2008?
# filter merged data by fip of Baltimore
data_balt <- filter(data, fips == '24510')
# group it by year and type
by_year_type <- group_by(data_balt, year, type)
# sum emissions for each year and year type
by_year_type_sum <- summarise(by_year_type, emission = sum(Emissions))
# ungroup dataframe before plotting
by_year_type_sum <- ungroup(by_year_type_sum)
by_year_type_sum
## Source: local data frame [16 x 3]
##
## year type emission
## 1 1999 NON-ROAD 522.94000
## 2 1999 NONPOINT 2107.62500
## 3 1999 ON-ROAD 346.82000
## 4 1999 POINT 296.79500
## 5 2002 NON-ROAD 240.84692
## 6 2002 NONPOINT 1509.50000
## 7 2002 ON-ROAD 134.30882
## 8 2002 POINT 569.26000
## 9 2005 NON-ROAD 248.93369
## 10 2005 NONPOINT 1509.50000
## 11 2005 ON-ROAD 130.43038
## 12 2005 POINT 1202.49000
## 13 2008 NON-ROAD 55.82356
## 14 2008 NONPOINT 1373.20731
## 15 2008 ON-ROAD 88.27546
## 16 2008 POINT 344.97518
# create plot
ggplot(data = by_year_type_sum, aes(x = year, y = as.numeric(emission))) +
geom_line(aes(color = type), size = 4)+
ggtitle('PM2.5 Emission in the Baltimore in 1999 - 2008')+
xlab('Year') +
ylab('Total Emission')

# save plot
ggsave(file = 'plot3.png')
## Saving 7 x 5 in image
Across the United States, how have emissions from coal combustion-related sources changed from 1999–2008?
# get the observations with combustion and coal only
comb <- data[grep('[cC]omb',data$Short.Name),]
comb_coal <- comb[grep('[cC]oal',comb$Short.Name),]
# group data by year and find sum of emissions by each year
by_year <- group_by(comb_coal, year)
by_year_total <- summarize(by_year, total_emission = sum(Emissions))
by_year_total
## Source: local data frame [4 x 2]
##
## year total_emission
## 1 1999 575206.5
## 2 2002 547380.1
## 3 2005 553549.4
## 4 2008 343979.3
#create plot
ggplot(data = by_year_total , aes(x = factor(year), y = total_emission)) +
geom_bar(fill =I('#FF8B00'), stat = 'identity')+
ggtitle('Emissions from coal combustion-related sources in 1999–2008')+
xlab('Year') +
ylab('Total Emission')
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from coal combustion-related sources in
## 1999–2008' in 'mbcsToSbcs': dot substituted for <93>

# save plot
ggsave(file = 'plot4.png')
## Saving 7 x 5 in image
How have emissions from motor vehicle sources changed from 1999–2008 in Baltimore City?
# filter data for Baltimore
balt <- filter(data, fips == 24510)
# get data for emissions from motor vehicle sources
vehicle <- balt[grep('[vV]ehicle',balt$SCC.Level.Two),]
# group by year and calculate total emission for each year
by_year <- group_by(vehicle, year)
by_year_total <- summarize(by_year, total_emission = sum(Emissions))
# create plot
ggplot(data = by_year_total , aes(x = factor(year), y = total_emission)) +
geom_bar(fill =I('#66CD00'), stat = 'identity')+
ggtitle('Emissions from vehicle sources in 1999–2008')+
xlab('Year') +
ylab('Total Emission')
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>

# save plot
ggsave(file = 'plot5.png')
## Saving 7 x 5 in image
Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?
# filter data for Baltimore and Los Angeles
bl_la <- filter(data, fips == '24510' | fips == '06037' )
table(bl_la$fips)
##
## 06037 24510
## 9320 2096
# get data for emissions from motor vehicle sources
vehicle <- bl_la[grep('[vV]ehicle',bl_la$SCC.Level.Two),]
# group by year and fips and calculate total emission for each year
by_year_fips <- group_by(vehicle, year, fips)
by_year_fips_total <- summarize(by_year_fips, total_emission = sum(Emissions))
# ungroup dataframe before plotting
by_year_fips_total <- ungroup(by_year_fips_total)
by_year_fips_total
## Source: local data frame [8 x 3]
##
## year fips total_emission
## 1 1999 06037 6109.6900
## 2 1999 24510 403.7700
## 3 2002 06037 7188.6802
## 4 2002 24510 192.0078
## 5 2005 06037 7304.1149
## 6 2005 24510 185.4144
## 7 2008 06037 6421.0170
## 8 2008 24510 138.2402
# convert fips into city names
names(by_year_fips_total)[2] <- c("City")
by_year_fips_total$City[by_year_fips_total$City == '06037'] <- "Los Angeles"
by_year_fips_total$City[by_year_fips_total$City == '24510'] <- "Baltimore"
# create plot
ggplot(data = by_year_fips_total , aes(x = year, y = total_emission)) +
geom_line(aes(color = City), size = 4, stat = 'identity')+
ggtitle('Emissions from vehicle sources in 1999–2008')+
xlab('Year') +
ylab('Total Emission')
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>

# save plot
ggsave(file = 'plot6.png')
## Saving 7 x 5 in image