National Emissions Inventory (NEI)

# set up working directory
setwd('/home/daria/Courses/R/Coursera/EDA/Week3/exdata-data-NEI_data')
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# read two files
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
SCC1 <- select(SCC, SCC, Short.Name, SCC.Level.Two)

First look at data

# merging data
data <- merge(NEI, SCC1, by = "SCC")

head(data)
##        SCC  fips Pollutant Emissions  type year
## 1 10100101 34017  PM25-PRI   898.421 POINT 1999
## 2 10100101 01123  PM25-PRI     0.080 POINT 2002
## 3 10100101 08041  PM25-PRI     2.483 POINT 1999
## 4 10100101 42109  PM25-PRI    58.610 POINT 2002
## 5 10100101 42107  PM25-PRI   131.800 POINT 2005
## 6 10100101 01103  PM25-PRI     0.110 POINT 1999
##                                                 Short.Name
## 1 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 3 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 4 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 5 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 6 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
##         SCC.Level.Two
## 1 Electric Generation
## 2 Electric Generation
## 3 Electric Generation
## 4 Electric Generation
## 5 Electric Generation
## 6 Electric Generation
names(data)
## [1] "SCC"           "fips"          "Pollutant"     "Emissions"    
## [5] "type"          "year"          "Short.Name"    "SCC.Level.Two"

How total emissions from PM2.5 decreased in the United States from 1999 to 2008?

# group data by year
by_year <- group_by(data, year)

# calculate sum of emissions for each year
by_year_total <- summarize(by_year, total_emission = sum(Emissions))
by_year_total
## Source: local data frame [4 x 2]
## 
##   year total_emission
## 1 1999        7332967
## 2 2002        5635780
## 3 2005        5454703
## 4 2008        3464206
# create plot
ggplot(data = by_year_total, aes(x = factor(year), y = total_emission,)) +
 geom_bar(stat='identity', fill = 'red')+
  ggtitle('PM2.5 Emission in USA in 1999 - 2008')+
  xlab('Year') +
  ylab('Total Emission')

# save plot
ggsave(file = 'plot1.png')
## Saving 7 x 5 in image

How total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008?

# filtering grouped by year data by fip of Baltimore City
balt_by_year <- filter(by_year, fips == '24510')

# get the sum of emission for Baltimore for each year
balt_by_year_total <- summarize(balt_by_year, total_emission = sum(Emissions))
balt_by_year_total
## Source: local data frame [4 x 2]
## 
##   year total_emission
## 1 1999       3274.180
## 2 2002       2453.916
## 3 2005       3091.354
## 4 2008       1862.282
# create plot
ggplot(data = balt_by_year_total, aes(x = factor(year), y = total_emission,)) +
 geom_bar(stat='identity', fill = 'blue')+
  ggtitle('PM2.5 Emission in the Baltimore City in 1999 - 2008')+
  xlab('Year') +
  ylab('Total Emission')

#save plot
 ggsave(file = 'plot2.png')
## Saving 7 x 5 in image

Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999–2008 for Baltimore City? Which have seen increases in emissions from 1999–2008?

# filter merged data by fip of Baltimore
data_balt <- filter(data, fips == '24510')

# group it by year and type
by_year_type <- group_by(data_balt, year, type)

# sum emissions for each year and year type
by_year_type_sum <- summarise(by_year_type, emission = sum(Emissions))

# ungroup dataframe before plotting
by_year_type_sum <- ungroup(by_year_type_sum)
by_year_type_sum
## Source: local data frame [16 x 3]
## 
##    year     type   emission
## 1  1999 NON-ROAD  522.94000
## 2  1999 NONPOINT 2107.62500
## 3  1999  ON-ROAD  346.82000
## 4  1999    POINT  296.79500
## 5  2002 NON-ROAD  240.84692
## 6  2002 NONPOINT 1509.50000
## 7  2002  ON-ROAD  134.30882
## 8  2002    POINT  569.26000
## 9  2005 NON-ROAD  248.93369
## 10 2005 NONPOINT 1509.50000
## 11 2005  ON-ROAD  130.43038
## 12 2005    POINT 1202.49000
## 13 2008 NON-ROAD   55.82356
## 14 2008 NONPOINT 1373.20731
## 15 2008  ON-ROAD   88.27546
## 16 2008    POINT  344.97518
# create plot
ggplot(data = by_year_type_sum, aes(x = year, y = as.numeric(emission))) +
 geom_line(aes(color = type), size = 4)+
  ggtitle('PM2.5 Emission in the Baltimore in 1999 - 2008')+
  xlab('Year') +
  ylab('Total Emission')

# save plot
ggsave(file = 'plot3.png')
## Saving 7 x 5 in image

How have emissions from motor vehicle sources changed from 1999–2008 in Baltimore City?

# filter data for Baltimore
balt <- filter(data, fips == 24510)

# get data for emissions from motor vehicle sources
vehicle <- balt[grep('[vV]ehicle',balt$SCC.Level.Two),]

# group by year and calculate total emission for each year
by_year <- group_by(vehicle, year)
by_year_total <- summarize(by_year, total_emission = sum(Emissions))

# create plot
ggplot(data = by_year_total , aes(x = factor(year), y = total_emission)) +
 geom_bar(fill =I('#66CD00'), stat = 'identity')+
  ggtitle('Emissions from vehicle sources in 1999–2008')+
  xlab('Year') +
  ylab('Total Emission')
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>

# save plot
 ggsave(file = 'plot5.png')
## Saving 7 x 5 in image

Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?

# filter data for Baltimore and Los Angeles 
bl_la <- filter(data, fips == '24510' | fips == '06037' )
table(bl_la$fips)
## 
## 06037 24510 
##  9320  2096
# get data for emissions from motor vehicle sources
vehicle <- bl_la[grep('[vV]ehicle',bl_la$SCC.Level.Two),]

# group by year and fips and calculate total emission for each year
by_year_fips <- group_by(vehicle, year, fips)
by_year_fips_total <- summarize(by_year_fips, total_emission = sum(Emissions))

# ungroup dataframe before plotting
by_year_fips_total <- ungroup(by_year_fips_total)
by_year_fips_total
## Source: local data frame [8 x 3]
## 
##   year  fips total_emission
## 1 1999 06037      6109.6900
## 2 1999 24510       403.7700
## 3 2002 06037      7188.6802
## 4 2002 24510       192.0078
## 5 2005 06037      7304.1149
## 6 2005 24510       185.4144
## 7 2008 06037      6421.0170
## 8 2008 24510       138.2402
# convert fips into city names
names(by_year_fips_total)[2] <- c("City")
by_year_fips_total$City[by_year_fips_total$City == '06037'] <- "Los Angeles"
by_year_fips_total$City[by_year_fips_total$City == '24510'] <- "Baltimore"

# create plot
ggplot(data = by_year_fips_total , aes(x = year, y = total_emission)) +
 geom_line(aes(color = City), size = 4, stat = 'identity')+
  ggtitle('Emissions from vehicle sources in 1999–2008')+
  xlab('Year') +
  ylab('Total Emission')
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call(L_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning in grid.Call.graphics(L_text, as.graphicsAnnot(x$label), x$x, x$y,
## : conversion failure on 'Emissions from vehicle sources in 1999–2008' in
## 'mbcsToSbcs': dot substituted for <93>

# save plot
 ggsave(file = 'plot6.png')
## Saving 7 x 5 in image