PA2

This is an R Markdown document created with solutions to the peer assessment exercise 2 to Coursera's Exploratory Data Analysis

Load data

We start by loading the data and looking at it

setwd("C:/Users/Miguel/Desktop/data_science/plots/Passess2")
NEI = readRDS("summarySCC_PM25.rds"); head(NEI)
##     fips      SCC Pollutant Emissions  type year
## 4  09001 10100401  PM25-PRI    15.714 POINT 1999
## 8  09001 10100404  PM25-PRI   234.178 POINT 1999
## 12 09001 10100501  PM25-PRI     0.128 POINT 1999
## 16 09001 10200401  PM25-PRI     2.036 POINT 1999
## 20 09001 10200504  PM25-PRI     0.388 POINT 1999
## 24 09001 10200602  PM25-PRI     1.490 POINT 1999

What's in this database?

SCC = readRDS("Source_Classification_Code.rds"); head(SCC)
##        SCC Data.Category
## 1 10100101         Point
## 2 10100102         Point
## 3 10100201         Point
## 4 10100202         Point
## 5 10100203         Point
## 6 10100204         Point
##                                                                   Short.Name
## 1                   Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Traveling Grate (Overfeed) Stoker
## 3       Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Wet Bottom
## 4       Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Dry Bottom
## 5                   Ext Comb /Electric Gen /Bituminous Coal /Cyclone Furnace
## 6                   Ext Comb /Electric Gen /Bituminous Coal /Spreader Stoker
##                                EI.Sector Option.Group Option.Set
## 1 Fuel Comb - Electric Generation - Coal                        
## 2 Fuel Comb - Electric Generation - Coal                        
## 3 Fuel Comb - Electric Generation - Coal                        
## 4 Fuel Comb - Electric Generation - Coal                        
## 5 Fuel Comb - Electric Generation - Coal                        
## 6 Fuel Comb - Electric Generation - Coal                        
##                 SCC.Level.One       SCC.Level.Two
## 1 External Combustion Boilers Electric Generation
## 2 External Combustion Boilers Electric Generation
## 3 External Combustion Boilers Electric Generation
## 4 External Combustion Boilers Electric Generation
## 5 External Combustion Boilers Electric Generation
## 6 External Combustion Boilers Electric Generation
##                 SCC.Level.Three
## 1               Anthracite Coal
## 2               Anthracite Coal
## 3 Bituminous/Subbituminous Coal
## 4 Bituminous/Subbituminous Coal
## 5 Bituminous/Subbituminous Coal
## 6 Bituminous/Subbituminous Coal
##                                  SCC.Level.Four Map.To Last.Inventory.Year
## 1                               Pulverized Coal     NA                  NA
## 2             Traveling Grate (Overfeed) Stoker     NA                  NA
## 3 Pulverized Coal: Wet Bottom (Bituminous Coal)     NA                  NA
## 4 Pulverized Coal: Dry Bottom (Bituminous Coal)     NA                  NA
## 5             Cyclone Furnace (Bituminous Coal)     NA                  NA
## 6             Spreader Stoker (Bituminous Coal)     NA                  NA
##   Created_Date Revised_Date Usage.Notes
## 1                                      
## 2                                      
## 3                                      
## 4                                      
## 5                                      
## 6

This database contains a mapping from the SCC digit strings int he Emissions table to the actual name of the PM2.5 source. The sources are categorized in a few different ways from more general to more specific and one may choose to explore whatever categories one thinks are most useful. For example, source “10100101” is known as “Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal”.

Question 1: Have total emissions from PM2.5 decreased in the United States from 1999 to 2008?

Using the base plotting system, make a plot showing the total PM2.5 emission from all sources for each of the years 1999, 2002, 2005, and 2008.


To answer this question, we first have to compute the total emission per year:

png(filename = "question1.png", width = 480, height = 480)
total_emissions_per_year=aggregate(NEI$Emissions, list(NEI$year), sum)
total_emissions_per_year=as.table(total_emissions_per_year[, 2])
rownames(total_emissions_per_year)=unique(NEI$year)
barplot(total_emissions_per_year/1E6, main="Emissions per year (in 1E6 tons)", col="wheat")
dev.off()
## pdf 
##   2
barplot(total_emissions_per_year/1E6, main="Emissions per year (in 1E6 tons)", col="wheat")

plot of chunk unnamed-chunk-3

Question 2: Have total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008?

Use the base plotting system to make a plot answering this question.

To answer this question, we first have to compute the total emissions in Baltimore city per year:

png(filename = "question2.png", width = 480, height = 480)
total_emissions_per_year_Baltimore=aggregate(NEI$Emissions[NEI$fips=="24510"], list(NEI$year[NEI$fips=="24510"]), sum)
total_emissions_per_year_Baltimore=as.table(total_emissions_per_year_Baltimore[, 2])
rownames(total_emissions_per_year_Baltimore)=unique(NEI$year)
barplot(total_emissions_per_year_Baltimore, main="Emissions per year in Baltimore", col="wheat")
dev.off()
## pdf 
##   2
barplot(total_emissions_per_year_Baltimore, main="Emissions per year in Baltimore", col="wheat")

plot of chunk unnamed-chunk-4

Question 3: Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999-2008 for Baltimore City?

Which have seen increases in emissions from 1999-2008? Use the ggplot2 plotting system to make a plot answer this question.

library(ggplot2)
png(filename = "question3.png", width = 480, height = 480)
NEI_Baltimore=NEI[NEI$fips=="24510", ]
qplot(year, log10(Emissions+1), data = NEI_Baltimore, geom = c("point", "smooth"), method="lm", facets=.~type, main="Measure of emissions for Baltimore City")
dev.off()
## pdf 
##   2
qplot(year, log10(Emissions+1), data = NEI_Baltimore, geom = c("point", "smooth"), method="lm", facets=.~type, main="Measure of emissions for Baltimore City")

plot of chunk unnamed-chunk-5

Question 4: Across the United States, how have emissions from coal combustion-related sources changed from 1999-2008?

library(ggplot2)
png(filename = "question4.png", width = 480, height = 480)
coal_related_entries=grep("(c|C)oal", SCC$EI.Sector)
coal_related_SCCs=SCC$SCC[coal_related_entries]
NEI_coal=subset(NEI,SCC %in% coal_related_SCCs)


total_emissions_per_year=aggregate(NEI_coal$Emissions, list(NEI_coal$year), sum)
total_emissions_per_year=as.table(total_emissions_per_year[, 2])
rownames(total_emissions_per_year)=unique(NEI_coal$year)
barplot(total_emissions_per_year/1E6, main="Coal related emissions per year (in 1E6 tons)", col="wheat")
dev.off()
## pdf 
##   2
barplot(total_emissions_per_year/1E6, main="Coal related emissions per year (in 1E6 tons)", col="wheat")

plot of chunk unnamed-chunk-6

Question 5: How have emissions from motor vehicle sources changed from 1999-2008 in Baltimore City?

library(ggplot2)
png(filename = "question5.png", width = 480, height = 480)
motor_related_entries=grep("Vehicle", SCC$EI.Sector)
motor_related_SCCs=SCC$SCC[motor_related_entries]
NEI_motor=subset(NEI,SCC %in% motor_related_SCCs)
NEI_motor_Baltimore=NEI_motor[NEI_motor$fips=="24510", ]



total_emissions_per_year=aggregate(NEI_motor_Baltimore$Emissions, list(NEI_motor_Baltimore$year), sum)
total_emissions_per_year=as.table(total_emissions_per_year[, 2])
rownames(total_emissions_per_year)=unique(NEI_motor_Baltimore$year)
barplot(total_emissions_per_year, main="Emissions from motor vehicle sources in Baltimore city", col="wheat")
dev.off()
## pdf 
##   2
barplot(total_emissions_per_year, main="Emissions from motor vehicle sources in Baltimore city", col="wheat")

plot of chunk unnamed-chunk-7

Question 6: Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?

library(ggplot2)
library(plyr)
png(filename = "question6.png", width = 480, height = 480)
motor_related_entries=grep("Vehicle", SCC$EI.Sector)
motor_related_SCCs=SCC$SCC[motor_related_entries]
NEI_motor=subset(NEI,SCC %in% motor_related_SCCs)
NEI_motor_Baltimore_and_LA=NEI_motor[NEI_motor$fips=="24510"| NEI_motor$fips=="06037", ]
NEI_motor_Baltimore_and_LA$fips=revalue(NEI_motor_Baltimore_and_LA$fips, c("06037"="LA", "24510"="Baltimore"))

qplot(year, log10(Emissions+1), data = NEI_motor_Baltimore_and_LA, geom = c("point", "smooth"), method="lm", facets=.~fips, main="Measure of emissions arising from motor vehicle sources")
dev.off()
## pdf 
##   2
qplot(year, log10(Emissions+1), data = NEI_motor_Baltimore_and_LA, geom = c("point", "smooth"), method="lm", facets=.~fips, main="Measure of emissions arising from motor vehicle sources")

plot of chunk unnamed-chunk-8