library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
Setting up the working directory.
getwd()
## [1] "/Users/marinazub/MyProjects/EDA/week4"
setwd("~/MyProjects/EDA/week4")
Reading the data
NEI <- readRDS("FNEI_data/summarySCC_PM25.rds")
SCC <- readRDS("FNEI_data/Source_Classification_Code.rds")
Exploring the data
str(NEI)
## 'data.frame': 6497651 obs. of 6 variables:
## $ fips : chr "09001" "09001" "09001" "09001" ...
## $ SCC : chr "10100401" "10100404" "10100501" "10200401" ...
## $ Pollutant: chr "PM25-PRI" "PM25-PRI" "PM25-PRI" "PM25-PRI" ...
## $ Emissions: num 15.714 234.178 0.128 2.036 0.388 ...
## $ type : chr "POINT" "POINT" "POINT" "POINT" ...
## $ year : int 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
head(NEI)
## fips SCC Pollutant Emissions type year
## 4 09001 10100401 PM25-PRI 15.714 POINT 1999
## 8 09001 10100404 PM25-PRI 234.178 POINT 1999
## 12 09001 10100501 PM25-PRI 0.128 POINT 1999
## 16 09001 10200401 PM25-PRI 2.036 POINT 1999
## 20 09001 10200504 PM25-PRI 0.388 POINT 1999
## 24 09001 10200602 PM25-PRI 1.490 POINT 1999
names(NEI)
## [1] "fips" "SCC" "Pollutant" "Emissions" "type" "year"
str(SCC)
## 'data.frame': 11717 obs. of 15 variables:
## $ SCC : Factor w/ 11717 levels "10100101","10100102",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Data.Category : Factor w/ 6 levels "Biogenic","Event",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Short.Name : Factor w/ 11238 levels "","2,4-D Salts and Esters Prod /Process Vents, 2,4-D Recovery: Filtration",..: 3283 3284 3293 3291 3290 3294 3295 3296 3292 3289 ...
## $ EI.Sector : Factor w/ 59 levels "Agriculture - Crops & Livestock Dust",..: 18 18 18 18 18 18 18 18 18 18 ...
## $ Option.Group : Factor w/ 25 levels "","C/I Kerosene",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Option.Set : Factor w/ 18 levels "","A","B","B1A",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SCC.Level.One : Factor w/ 17 levels "Brick Kilns",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ SCC.Level.Two : Factor w/ 146 levels "","Agricultural Chemicals Production",..: 32 32 32 32 32 32 32 32 32 32 ...
## $ SCC.Level.Three : Factor w/ 1061 levels "","100% Biosolids (e.g., sewage sludge, manure, mixtures of these matls)",..: 88 88 156 156 156 156 156 156 156 156 ...
## $ SCC.Level.Four : Factor w/ 6084 levels "","(NH4)2 SO4 Acid Bath System and Evaporator",..: 4455 5583 4466 4458 1341 5246 5584 5983 4461 776 ...
## $ Map.To : num NA NA NA NA NA NA NA NA NA NA ...
## $ Last.Inventory.Year: int NA NA NA NA NA NA NA NA NA NA ...
## $ Created_Date : Factor w/ 57 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Revised_Date : Factor w/ 44 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Usage.Notes : Factor w/ 21 levels ""," ","includes bleaching towers, washer hoods, filtrate tanks, vacuum pump exhausts",..: 1 1 1 1 1 1 1 1 1 1 ...
head(SCC)
## SCC Data.Category
## 1 10100101 Point
## 2 10100102 Point
## 3 10100201 Point
## 4 10100202 Point
## 5 10100203 Point
## 6 10100204 Point
## Short.Name
## 1 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Traveling Grate (Overfeed) Stoker
## 3 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Wet Bottom
## 4 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Dry Bottom
## 5 Ext Comb /Electric Gen /Bituminous Coal /Cyclone Furnace
## 6 Ext Comb /Electric Gen /Bituminous Coal /Spreader Stoker
## EI.Sector Option.Group Option.Set
## 1 Fuel Comb - Electric Generation - Coal
## 2 Fuel Comb - Electric Generation - Coal
## 3 Fuel Comb - Electric Generation - Coal
## 4 Fuel Comb - Electric Generation - Coal
## 5 Fuel Comb - Electric Generation - Coal
## 6 Fuel Comb - Electric Generation - Coal
## SCC.Level.One SCC.Level.Two
## 1 External Combustion Boilers Electric Generation
## 2 External Combustion Boilers Electric Generation
## 3 External Combustion Boilers Electric Generation
## 4 External Combustion Boilers Electric Generation
## 5 External Combustion Boilers Electric Generation
## 6 External Combustion Boilers Electric Generation
## SCC.Level.Three
## 1 Anthracite Coal
## 2 Anthracite Coal
## 3 Bituminous/Subbituminous Coal
## 4 Bituminous/Subbituminous Coal
## 5 Bituminous/Subbituminous Coal
## 6 Bituminous/Subbituminous Coal
## SCC.Level.Four Map.To Last.Inventory.Year
## 1 Pulverized Coal NA NA
## 2 Traveling Grate (Overfeed) Stoker NA NA
## 3 Pulverized Coal: Wet Bottom (Bituminous Coal) NA NA
## 4 Pulverized Coal: Dry Bottom (Bituminous Coal) NA NA
## 5 Cyclone Furnace (Bituminous Coal) NA NA
## 6 Spreader Stoker (Bituminous Coal) NA NA
## Created_Date Revised_Date Usage.Notes
## 1
## 2
## 3
## 4
## 5
## 6
names(SCC)
## [1] "SCC" "Data.Category" "Short.Name"
## [4] "EI.Sector" "Option.Group" "Option.Set"
## [7] "SCC.Level.One" "SCC.Level.Two" "SCC.Level.Three"
## [10] "SCC.Level.Four" "Map.To" "Last.Inventory.Year"
## [13] "Created_Date" "Revised_Date" "Usage.Notes"
Just to be sure, let’s check is the all mentioned pollutants are PM 25 and the years are, as mentioned, 1999 2002 2005 2008
unique(NEI$Pollutant)
## [1] "PM25-PRI"
unique(NEI$year)
## [1] 1999 2002 2005 2008
NEI2<-NEI%>%
select(year, Emissions, fips, type)%>%
filter(fips == "24510")%>%
group_by(year, type)%>%
summarise(amount = sum(Emissions))
print(NEI2)
## # A tibble: 16 x 3
## # Groups: year [?]
## year type amount
## <int> <chr> <dbl>
## 1 1999 NON-ROAD 522.94000
## 2 1999 NONPOINT 2107.62500
## 3 1999 ON-ROAD 346.82000
## 4 1999 POINT 296.79500
## 5 2002 NON-ROAD 240.84692
## 6 2002 NONPOINT 1509.50000
## 7 2002 ON-ROAD 134.30882
## 8 2002 POINT 569.26000
## 9 2005 NON-ROAD 248.93369
## 10 2005 NONPOINT 1509.50000
## 11 2005 ON-ROAD 130.43038
## 12 2005 POINT 1202.49000
## 13 2008 NON-ROAD 55.82356
## 14 2008 NONPOINT 1373.20731
## 15 2008 ON-ROAD 88.27546
## 16 2008 POINT 344.97518
ggplot(NEI2, aes(y = amount, x = year, fill = type))+geom_point ( aes (color = type), size = 4) +labs ( x = "Year", y = "Amount of emissions", title = "Changes of emissions in Maryland") +geom_smooth ( method = "lm", se = FALSE, lwd = 0.5)
dev.copy(png, file="plot3.png", height=480, width=480)
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
According to the graph, we can track decreasing of all emission sources by the 2008, excepr point emission
SCC_Coal<-SCC[grep("Coal", SCC$EI.Sector),]
NEI_Coal<-subset(NEI, NEI$SCC %in% SCC_Coal$SCC)
NEI_Coal$year<-as.factor(NEI_Coal$year)
val<-tapply(X = NEI_Coal$Emissions, NEI_Coal$year, sum)
with(NEI_Coal, plot( levels(year), val, xlab= "Year", ylab="Amount of emissions", type = "l"))
dev.copy(png, file="plot4.png")
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
The graph shows decreasing of coal emissions in US 1999-2008.
5.How have emissions from motor vehicle sources changed from 1999–2008 in Baltimore City?
“NEI ONROAD sources include emissions from onroad vehicles that use gasoline, diesel, and other fuels. # These sources include light duty and heavy duty vehicle emissions from operation on roads, highway ramps, and during idling.”
#(Taken from http://www.epa.gov/ttn/chief/eiinformation.html))
NEI3<-NEI%>%
select(year, Emissions, fips, type)%>%
filter(fips == "24510" | type == "ONROAD")%>%
group_by(year)%>%
summarise(amount = sum(Emissions))
print(NEI3)
## # A tibble: 4 x 2
## year amount
## <int> <dbl>
## 1 1999 3274.180
## 2 2002 2453.916
## 3 2005 3091.354
## 4 2008 1862.282
Plotting
ggplot(NEI3, aes(y = amount, x = year))+geom_line() +labs(title = "Changes of motor vehicle emissions in Maryland", x = "Year", y = "Amount of emissions") + geom_smooth(method = "lm", se = FALSE)
dev.copy(png, file="plot5.png", height=480, width=480)
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
The plot shows that the emossions from motor vehicle for the 1999-2008 period went down.
6.Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (???????????????? == “????????????????????”). Which city has seen greater changes over time in motor vehicle emissions?
NEI4<-NEI%>%
select(year, Emissions, fips, type)%>%
filter(fips == "24510" | fips == "06037" | type == "ONROAD")%>%
group_by(fips, year)%>%
summarise(amount = sum(Emissions))
print(NEI4)
## # A tibble: 8 x 3
## # Groups: fips [?]
## fips year amount
## <chr> <int> <dbl>
## 1 06037 1999 47103.192
## 2 06037 2002 26968.795
## 3 06037 2005 22939.780
## 4 06037 2008 32135.482
## 5 24510 1999 3274.180
## 6 24510 2002 2453.916
## 7 24510 2005 3091.354
## 8 24510 2008 1862.282
plotting
c<-ggplot(NEI4, aes(fips, amount, fill = year))
c+geom_point(aes(color=year), size = 4) +labs ( x = "Year", y = "Amount of emissions", title = "Compare Baltimore and Los Angeles")+scale_x_discrete(labels=c("06037" = "Los Angeles", "24510" = "Baltimore"))
dev.copy(png, file="plot6.png", height=480, width=480)
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
The graph shows that Los Angeles has a greater changes in emoosions from motor vihecles over the time.