library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
Setting up the working directory.
getwd()
## [1] "/Users/marinazub/MyProjects/EDA/week4"
setwd("~/MyProjects/EDA/week4")
reading the data
NEI <- readRDS("FNEI_data/summarySCC_PM25.rds")
SCC <- readRDS("FNEI_data/Source_Classification_Code.rds")
Exploring the data
str(NEI)
## 'data.frame': 6497651 obs. of 6 variables:
## $ fips : chr "09001" "09001" "09001" "09001" ...
## $ SCC : chr "10100401" "10100404" "10100501" "10200401" ...
## $ Pollutant: chr "PM25-PRI" "PM25-PRI" "PM25-PRI" "PM25-PRI" ...
## $ Emissions: num 15.714 234.178 0.128 2.036 0.388 ...
## $ type : chr "POINT" "POINT" "POINT" "POINT" ...
## $ year : int 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
head(NEI)
## fips SCC Pollutant Emissions type year
## 4 09001 10100401 PM25-PRI 15.714 POINT 1999
## 8 09001 10100404 PM25-PRI 234.178 POINT 1999
## 12 09001 10100501 PM25-PRI 0.128 POINT 1999
## 16 09001 10200401 PM25-PRI 2.036 POINT 1999
## 20 09001 10200504 PM25-PRI 0.388 POINT 1999
## 24 09001 10200602 PM25-PRI 1.490 POINT 1999
names(NEI)
## [1] "fips" "SCC" "Pollutant" "Emissions" "type" "year"
str(SCC)
## 'data.frame': 11717 obs. of 15 variables:
## $ SCC : Factor w/ 11717 levels "10100101","10100102",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Data.Category : Factor w/ 6 levels "Biogenic","Event",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ Short.Name : Factor w/ 11238 levels "","2,4-D Salts and Esters Prod /Process Vents, 2,4-D Recovery: Filtration",..: 3283 3284 3293 3291 3290 3294 3295 3296 3292 3289 ...
## $ EI.Sector : Factor w/ 59 levels "Agriculture - Crops & Livestock Dust",..: 18 18 18 18 18 18 18 18 18 18 ...
## $ Option.Group : Factor w/ 25 levels "","C/I Kerosene",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Option.Set : Factor w/ 18 levels "","A","B","B1A",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ SCC.Level.One : Factor w/ 17 levels "Brick Kilns",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ SCC.Level.Two : Factor w/ 146 levels "","Agricultural Chemicals Production",..: 32 32 32 32 32 32 32 32 32 32 ...
## $ SCC.Level.Three : Factor w/ 1061 levels "","100% Biosolids (e.g., sewage sludge, manure, mixtures of these matls)",..: 88 88 156 156 156 156 156 156 156 156 ...
## $ SCC.Level.Four : Factor w/ 6084 levels "","(NH4)2 SO4 Acid Bath System and Evaporator",..: 4455 5583 4466 4458 1341 5246 5584 5983 4461 776 ...
## $ Map.To : num NA NA NA NA NA NA NA NA NA NA ...
## $ Last.Inventory.Year: int NA NA NA NA NA NA NA NA NA NA ...
## $ Created_Date : Factor w/ 57 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Revised_Date : Factor w/ 44 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Usage.Notes : Factor w/ 21 levels ""," ","includes bleaching towers, washer hoods, filtrate tanks, vacuum pump exhausts",..: 1 1 1 1 1 1 1 1 1 1 ...
head(SCC)
## SCC Data.Category
## 1 10100101 Point
## 2 10100102 Point
## 3 10100201 Point
## 4 10100202 Point
## 5 10100203 Point
## 6 10100204 Point
## Short.Name
## 1 Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Traveling Grate (Overfeed) Stoker
## 3 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Wet Bottom
## 4 Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Dry Bottom
## 5 Ext Comb /Electric Gen /Bituminous Coal /Cyclone Furnace
## 6 Ext Comb /Electric Gen /Bituminous Coal /Spreader Stoker
## EI.Sector Option.Group Option.Set
## 1 Fuel Comb - Electric Generation - Coal
## 2 Fuel Comb - Electric Generation - Coal
## 3 Fuel Comb - Electric Generation - Coal
## 4 Fuel Comb - Electric Generation - Coal
## 5 Fuel Comb - Electric Generation - Coal
## 6 Fuel Comb - Electric Generation - Coal
## SCC.Level.One SCC.Level.Two
## 1 External Combustion Boilers Electric Generation
## 2 External Combustion Boilers Electric Generation
## 3 External Combustion Boilers Electric Generation
## 4 External Combustion Boilers Electric Generation
## 5 External Combustion Boilers Electric Generation
## 6 External Combustion Boilers Electric Generation
## SCC.Level.Three
## 1 Anthracite Coal
## 2 Anthracite Coal
## 3 Bituminous/Subbituminous Coal
## 4 Bituminous/Subbituminous Coal
## 5 Bituminous/Subbituminous Coal
## 6 Bituminous/Subbituminous Coal
## SCC.Level.Four Map.To Last.Inventory.Year
## 1 Pulverized Coal NA NA
## 2 Traveling Grate (Overfeed) Stoker NA NA
## 3 Pulverized Coal: Wet Bottom (Bituminous Coal) NA NA
## 4 Pulverized Coal: Dry Bottom (Bituminous Coal) NA NA
## 5 Cyclone Furnace (Bituminous Coal) NA NA
## 6 Spreader Stoker (Bituminous Coal) NA NA
## Created_Date Revised_Date Usage.Notes
## 1
## 2
## 3
## 4
## 5
## 6
names(SCC)
## [1] "SCC" "Data.Category" "Short.Name"
## [4] "EI.Sector" "Option.Group" "Option.Set"
## [7] "SCC.Level.One" "SCC.Level.Two" "SCC.Level.Three"
## [10] "SCC.Level.Four" "Map.To" "Last.Inventory.Year"
## [13] "Created_Date" "Revised_Date" "Usage.Notes"
Just to be sure, let’s check is the all mentioned pollutants are PM 25 and the years are, as mentioned, 1999 2002 2005 2008
unique(NEI$Pollutant)
## [1] "PM25-PRI"
unique(NEI$year)
## [1] 1999 2002 2005 2008
NEI0<-NEI%>%
select(year, Emissions)%>%
group_by(year)%>%
summarise(amount = sum(Emissions))
print(NEI0)
## # A tibble: 4 x 2
## year amount
## <int> <dbl>
## 1 1999 7332967
## 2 2002 5635780
## 3 2005 5454703
## 4 2008 3464206
plot(NEI0$year, NEI0$amount, main = "Amount of emissions in US per year", xlab = "Year", ylab = "Amount", pch = 20, xlim=c(1999, 2008), type = "l")
dev.copy(png, file="plot1.png", height=480, width=480)
## quartz_off_screen
## 3
dev.off()
## quartz_off_screen
## 2
Amount of pollution in US 1999-2008 has decreased