The overall goal of this assignment is to explore the National Emissions Inventory database and see what it say about fine particulate matter pollution in the United states over the 10-year period 1999-2008.
National Emissions Inventory Data
The following 6 questions are addressed:
(1) Have total emissions from PM2.5 decreased in the United States from 1999 to 2008? Using the base plotting system, make a plot showing the total PM2.5 emission from all sources for each of the years 1999, 2002, 2005, and 2008.
(2) Have total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008? Use the base plotting system to make a plot answering this question.
(3) Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999-2008 for Baltimore City? Which have seen increases in emissions from 1999-2008? Use the ggplot2 plotting system to make a plot answer this question.
(4) Across the United States, how have emissions from coal combustion-related sources changed from 1999-2008?
(5) How have emissions from motor vehicle sources changed from 1999-2008 in Baltimore City?
(6) Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?
setwd("F:\\Academics\\2015 Summer\\Data Science Course\\4-Exploratory Data Analysis\\Homework")
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
head(NEI)
str(NEI)
head(SCC)
str(SCC)
library(ggplot2)
library(dplyr)
emission <- NEI %>%
tbl_df() %>%
select(Emissions, year) %>%
mutate(year = as.factor(year)) %>%
group_by(year) %>%
summarize(total_emissions = sum(Emissions)) %>%
print
ggplot(emission, aes(x = year, y = total_emissions/1e6)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.7) +
ggtitle("Total Emissions of PM2.5 in the United States") +
labs(x = "Year", y = "Total Emissions (million tons)") +
scale_y_continuous(breaks = 1:8) +
theme(plot.title = element_text(size = 16, face = "bold", color = "red",
hjust = 0.5, vjust = 3)) +
theme(axis.title.y = element_text(size = 13, face = "bold") ) +
theme(axis.title.x = element_text(size = 13, face = "bold") ) +
theme(axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11) ) +
theme(plot.margin = unit(c(1,1,1,1), "cm"))
#with(emission, plot(year, total_emissions, type="n", xaxt="n", yaxt="n",
# main="Total Emissions of PM2.5 in the United States",
# ylab="Total Emissions (million tons)", xlab="Year",
# xlim=c(1999,2008), ylim=c(3,8)*10^6) )
#with(emission, points(year, total_emissions, pch=19, col="blue") )
#with(emission, lines(year, total_emissions) )
#axis(1, c(1999,2002,2005,2008), las=0 )
#axis(2, at=3:8*10^6, labels=3:8, las=1 )
emission_Bal <- NEI %>%
tbl_df() %>%
filter(fips == "24510") %>%
select(Emissions, year) %>%
mutate(year = as.factor(year)) %>%
group_by(year) %>%
summarize(total_emissions = sum(Emissions)) %>%
print
ggplot(emission_Bal, aes(x = year, y = total_emissions)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.7) +
ggtitle("Total Emissions of PM2.5 in the Baltimore City") +
labs(x = "Year", y = "Total Emissions (tons)") +
scale_y_continuous(breaks = seq(0, 3000, by = 500)) +
theme(plot.title = element_text(size = 16, face = "bold", color = "red",
hjust = 0.5, vjust = 3)) +
theme(axis.title.y = element_text(size = 13, face = "bold") ) +
theme(axis.title.x = element_text(size = 13, face = "bold") ) +
theme(axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11) ) +
theme(plot.margin = unit(c(1,1,1,1), "cm"))
#with(emission_Bal, plot(year, total_emissions, type="n", xaxt="n", yaxt="n",
# main="Total Emissions of PM2.5 in the Baltimore City",
# ylab="Total Emissions (thousand tons)", xlab="Year",
# xlim=c(1999,2008), ylim=c(1500,3500) ) )
#with(emission_Bal, points(year, total_emissions, pch=19, col="blue") )
#with(emission_Bal, lines(year, total_emissions) )
#axis(1, c(1999,2002,2005,2008), las=0 )
#axis(2, seq(1500,3500,500), labels=seq(1.5,3.5,0.5), las=1 )
emission_Bal_sources <- NEI %>%
tbl_df() %>%
filter(fips == "24510") %>%
select(Emissions, year, type) %>%
group_by(year, type) %>%
summarize(total_emissions = sum(Emissions)) %>%
print
g <- ggplot(emission_Bal_sources, aes(year, total_emissions))
g <- g + geom_point(size = 3, aes(color = type, shape = type, fill = type))
g <- g + scale_shape_manual(values = 22:25)
g <- g + geom_line(aes(color = type))
g <- g + scale_x_continuous(breaks = unique(emission_Bal_sources$year) )
g <- g + xlab("Year") + ylab("Total Emissions (tons)")
g <- g + ggtitle("Emissions of PM2.5 in the Baltimore City by Four Types of Sources")
g <- g + theme(plot.title = element_text(size = 16, face = "bold",
color = "red", hjust = 0.5, vjust = 3))
print(g)
# Subset data including "Mobile" and identify four type vehicles.
Mobile <- SCC %>%
tbl_df() %>%
mutate(Match = ifelse(grepl("Mobile", EI.Sector, ignore.case = T),"Yes","NO")) %>%
filter(Match == "Yes") %>%
data.frame() %>%
mutate(Vehicle = ifelse(grepl("Diesel Heavy Duty Vehicles", EI.Sector, ignore.case = T),"1.Diesel_Heavy Duty",
ifelse(grepl("Diesel Light Duty Vehicles", EI.Sector, ignore.case = T),"2.Diesel_Light Duty",
ifelse(grepl("Gasoline Heavy Duty Vehicles", EI.Sector, ignore.case = T),"3.Gasoline_Heavy Duty",
ifelse(grepl("Gasoline Light Duty Vehicles", EI.Sector, ignore.case = T),"4.Gasoline_Light Duty",NA)))),
SCC = as.character(SCC) ) %>%
filter(!is.na(Vehicle)) %>%
select(SCC, Vehicle)
# Subset data in NEI table which includes SCC in SCC table,
# and left join SCC table to include four types of vehicles.
Motor_Related <- NEI[NEI$SCC %in% Mobile$SCC,]
Motor_Related <- Motor_Related %>%
left_join(Mobile, by = "SCC")
# Calculate total emissions of PM2.5 from each types of vehicles
# between 1999 and 2008 in the Baltimore City.
emission_motor <- Motor_Related %>%
tbl_df() %>%
filter(fips == "24510") %>%
select(Emissions, year, Vehicle) %>%
group_by(year, Vehicle) %>%
summarize(total_emissions = sum(Emissions)) %>%
print
g <- ggplot(emission_motor, aes(year, total_emissions))
g <- g + geom_point(size = 3, aes(color = Vehicle, shape = Vehicle, fill = Vehicle))
g <- g + scale_shape_manual(values = 22:25)
g <- g + geom_line(aes(color = Vehicle))
g <- g + scale_x_continuous(breaks = unique(emission_motor$year) )
g <- g + xlab("Year") + ylab("Total Emissions (tons)")
g <- g + ggtitle("Emissions of PM2.5 from Four Types of Vehicles in the Baltimore City")
g <- g + theme(plot.title = element_text(size = 16, face = "bold",
color = "red", hjust = 0.5, vjust = 3))
print(g)
emission_motor_city <- Motor_Related %>%
tbl_df() %>%
select(Emissions, year, Vehicle, fips) %>%
filter(fips == "24510" | fips == "06037") %>%
mutate(City = ifelse(fips == "24510", "Baltimore", "Los Angeles") ) %>%
group_by(year, Vehicle, City) %>%
summarise(total_emissions = sum(Emissions)) %>%
print
g <- ggplot(emission_motor_city, aes(year, total_emissions))
g <- g + geom_point(size = 3, aes(color = Vehicle, shape = Vehicle, fill = Vehicle))
g <- g + scale_shape_manual(values = 22:25)
g <- g + geom_line(aes(color = Vehicle))
g <- g + scale_x_continuous(breaks = unique(emission_motor_city$year) )
g <- g + facet_grid(. ~ City)
g <- g + xlab("Year") + ylab("Total Emissions (tons)")
g <- g + ggtitle("Emissions of PM2.5 from Four Types of Vehicles in Baltimore and Los Angeles")
g <- g + theme(plot.title = element_text(size = 16, face = "bold",
color = "red", hjust = 0.5, vjust = 3))
g <- g + theme(strip.text.x = element_text(size = 12, color = "blue") )
print(g)