We start by loading the data and looking at it
setwd("C:/Users/Miguel/Desktop/data_science/plots/Passess2")
NEI = readRDS("summarySCC_PM25.rds"); head(NEI)
## fips SCC Pollutant Emissions type year
## 4 09001 10100401 PM25-PRI 15.714 POINT 1999
## 8 09001 10100404 PM25-PRI 234.178 POINT 1999
## 12 09001 10100501 PM25-PRI 0.128 POINT 1999
## 16 09001 10200401 PM25-PRI 2.036 POINT 1999
## 20 09001 10200504 PM25-PRI 0.388 POINT 1999
## 24 09001 10200602 PM25-PRI 1.490 POINT 1999
What's in this database?
fips: A five-digit number (represented as a string) indicating the U.S. county
SCC: The name of the source as indicated by a digit string (see source code classification table)
Pollutant: A string indicating the pollutant
Emissions: Amount of PM2.5 emitted, in tons
type: The type of source (point, non-point, on-road, or non-road)
year: The year of emissions recorded
SCC = readRDS("Source_Classification_Code.rds"); head(SCC)
## SCC Data.Category
## 1 10100101 Point
## 2 10100102 Point
## 3 10100201 Point
## 4 10100202 Point
## 5 10100203 Point
## 6 10100204 Point
## Short.Name
## 1 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Traveling Grate (Overfeed) Stoker
## 3 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Wet Bottom
## 4 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Dry Bottom
## 5 Ext Comb /Electric Gen /Bituminous Coal /Cyclone Furnace
## 6 Ext Comb /Electric Gen /Bituminous Coal /Spreader Stoker
## EI.Sector Option.Group Option.Set
## 1 Fuel Comb - Electric Generation - Coal
## 2 Fuel Comb - Electric Generation - Coal
## 3 Fuel Comb - Electric Generation - Coal
## 4 Fuel Comb - Electric Generation - Coal
## 5 Fuel Comb - Electric Generation - Coal
## 6 Fuel Comb - Electric Generation - Coal
## SCC.Level.One SCC.Level.Two
## 1 External Combustion Boilers Electric Generation
## 2 External Combustion Boilers Electric Generation
## 3 External Combustion Boilers Electric Generation
## 4 External Combustion Boilers Electric Generation
## 5 External Combustion Boilers Electric Generation
## 6 External Combustion Boilers Electric Generation
## SCC.Level.Three
## 1 Anthracite Coal
## 2 Anthracite Coal
## 3 Bituminous/Subbituminous Coal
## 4 Bituminous/Subbituminous Coal
## 5 Bituminous/Subbituminous Coal
## 6 Bituminous/Subbituminous Coal
## SCC.Level.Four Map.To Last.Inventory.Year
## 1 Pulverized Coal NA NA
## 2 Traveling Grate (Overfeed) Stoker NA NA
## 3 Pulverized Coal: Wet Bottom (Bituminous Coal) NA NA
## 4 Pulverized Coal: Dry Bottom (Bituminous Coal) NA NA
## 5 Cyclone Furnace (Bituminous Coal) NA NA
## 6 Spreader Stoker (Bituminous Coal) NA NA
## Created_Date Revised_Date Usage.Notes
## 1
## 2
## 3
## 4
## 5
## 6
This database contains a mapping from the SCC digit strings int he Emissions table to the actual name of the PM2.5 source. The sources are categorized in a few different ways from more general to more specific and one may choose to explore whatever categories one thinks are most useful. For example, source “10100101” is known as “Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal”.
Using the base plotting system, make a plot showing the total PM2.5 emission from all sources for each of the years 1999, 2002, 2005, and 2008.
To answer this question, we first have to compute the total emission per year:
png(filename = "question1.png", width = 480, height = 480)
total_emissions_per_year=aggregate(NEI$Emissions, list(NEI$year), sum)
total_emissions_per_year=as.table(total_emissions_per_year[, 2])
rownames(total_emissions_per_year)=unique(NEI$year)
barplot(total_emissions_per_year/1E6, main="Emissions per year (in 1E6 tons)", col="wheat")
dev.off()
## pdf
## 2
barplot(total_emissions_per_year/1E6, main="Emissions per year (in 1E6 tons)", col="wheat")
Use the base plotting system to make a plot answering this question.
To answer this question, we first have to compute the total emissions in Baltimore city per year:
png(filename = "question2.png", width = 480, height = 480)
total_emissions_per_year_Baltimore=aggregate(NEI$Emissions[NEI$fips=="24510"], list(NEI$year[NEI$fips=="24510"]), sum)
total_emissions_per_year_Baltimore=as.table(total_emissions_per_year_Baltimore[, 2])
rownames(total_emissions_per_year_Baltimore)=unique(NEI$year)
barplot(total_emissions_per_year_Baltimore, main="Emissions per year in Baltimore", col="wheat")
dev.off()
## pdf
## 2
barplot(total_emissions_per_year_Baltimore, main="Emissions per year in Baltimore", col="wheat")
Which have seen increases in emissions from 1999-2008? Use the ggplot2 plotting system to make a plot answer this question.
library(ggplot2)
png(filename = "question3.png", width = 480, height = 480)
NEI_Baltimore=NEI[NEI$fips=="24510", ]
qplot(year, log10(Emissions+1), data = NEI_Baltimore, geom = c("point", "smooth"), method="lm", facets=.~type, main="Measure of emissions for Baltimore City")
dev.off()
## pdf
## 2
qplot(year, log10(Emissions+1), data = NEI_Baltimore, geom = c("point", "smooth"), method="lm", facets=.~type, main="Measure of emissions for Baltimore City")
library(ggplot2)
png(filename = "question4.png", width = 480, height = 480)
coal_related_entries=grep("(c|C)oal", SCC$EI.Sector)
coal_related_SCCs=SCC$SCC[coal_related_entries]
NEI_coal=subset(NEI,SCC %in% coal_related_SCCs)
total_emissions_per_year=aggregate(NEI_coal$Emissions, list(NEI_coal$year), sum)
total_emissions_per_year=as.table(total_emissions_per_year[, 2])
rownames(total_emissions_per_year)=unique(NEI_coal$year)
barplot(total_emissions_per_year/1E6, main="Coal related emissions per year (in 1E6 tons)", col="wheat")
dev.off()
## pdf
## 2
barplot(total_emissions_per_year/1E6, main="Coal related emissions per year (in 1E6 tons)", col="wheat")
library(ggplot2)
png(filename = "question5.png", width = 480, height = 480)
motor_related_entries=grep("Vehicle", SCC$EI.Sector)
motor_related_SCCs=SCC$SCC[motor_related_entries]
NEI_motor=subset(NEI,SCC %in% motor_related_SCCs)
NEI_motor_Baltimore=NEI_motor[NEI_motor$fips=="24510", ]
total_emissions_per_year=aggregate(NEI_motor_Baltimore$Emissions, list(NEI_motor_Baltimore$year), sum)
total_emissions_per_year=as.table(total_emissions_per_year[, 2])
rownames(total_emissions_per_year)=unique(NEI_motor_Baltimore$year)
barplot(total_emissions_per_year, main="Emissions from motor vehicle sources in Baltimore city", col="wheat")
dev.off()
## pdf
## 2
barplot(total_emissions_per_year, main="Emissions from motor vehicle sources in Baltimore city", col="wheat")
library(ggplot2)
library(plyr)
png(filename = "question6.png", width = 480, height = 480)
motor_related_entries=grep("Vehicle", SCC$EI.Sector)
motor_related_SCCs=SCC$SCC[motor_related_entries]
NEI_motor=subset(NEI,SCC %in% motor_related_SCCs)
NEI_motor_Baltimore_and_LA=NEI_motor[NEI_motor$fips=="24510"| NEI_motor$fips=="06037", ]
NEI_motor_Baltimore_and_LA$fips=revalue(NEI_motor_Baltimore_and_LA$fips, c("06037"="LA", "24510"="Baltimore"))
qplot(year, log10(Emissions+1), data = NEI_motor_Baltimore_and_LA, geom = c("point", "smooth"), method="lm", facets=.~fips, main="Measure of emissions arising from motor vehicle sources")
dev.off()
## pdf
## 2
qplot(year, log10(Emissions+1), data = NEI_motor_Baltimore_and_LA, geom = c("point", "smooth"), method="lm", facets=.~fips, main="Measure of emissions arising from motor vehicle sources")