library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)

Setting up the working directory.

getwd()
## [1] "/Users/marinazub/MyProjects/EDA/week4"
setwd("~/MyProjects/EDA/week4")

Reading the data

NEI <- readRDS("FNEI_data/summarySCC_PM25.rds")
SCC <- readRDS("FNEI_data/Source_Classification_Code.rds")

Exploring the data

str(NEI)
## 'data.frame':    6497651 obs. of  6 variables:
##  $ fips     : chr  "09001" "09001" "09001" "09001" ...
##  $ SCC      : chr  "10100401" "10100404" "10100501" "10200401" ...
##  $ Pollutant: chr  "PM25-PRI" "PM25-PRI" "PM25-PRI" "PM25-PRI" ...
##  $ Emissions: num  15.714 234.178 0.128 2.036 0.388 ...
##  $ type     : chr  "POINT" "POINT" "POINT" "POINT" ...
##  $ year     : int  1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
head(NEI)
##     fips      SCC Pollutant Emissions  type year
## 4  09001 10100401  PM25-PRI    15.714 POINT 1999
## 8  09001 10100404  PM25-PRI   234.178 POINT 1999
## 12 09001 10100501  PM25-PRI     0.128 POINT 1999
## 16 09001 10200401  PM25-PRI     2.036 POINT 1999
## 20 09001 10200504  PM25-PRI     0.388 POINT 1999
## 24 09001 10200602  PM25-PRI     1.490 POINT 1999
names(NEI)
## [1] "fips"      "SCC"       "Pollutant" "Emissions" "type"      "year"
str(SCC)
## 'data.frame':    11717 obs. of  15 variables:
##  $ SCC                : Factor w/ 11717 levels "10100101","10100102",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Data.Category      : Factor w/ 6 levels "Biogenic","Event",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Short.Name         : Factor w/ 11238 levels "","2,4-D Salts and Esters Prod /Process Vents, 2,4-D Recovery: Filtration",..: 3283 3284 3293 3291 3290 3294 3295 3296 3292 3289 ...
##  $ EI.Sector          : Factor w/ 59 levels "Agriculture - Crops & Livestock Dust",..: 18 18 18 18 18 18 18 18 18 18 ...
##  $ Option.Group       : Factor w/ 25 levels "","C/I Kerosene",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Option.Set         : Factor w/ 18 levels "","A","B","B1A",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ SCC.Level.One      : Factor w/ 17 levels "Brick Kilns",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ SCC.Level.Two      : Factor w/ 146 levels "","Agricultural Chemicals Production",..: 32 32 32 32 32 32 32 32 32 32 ...
##  $ SCC.Level.Three    : Factor w/ 1061 levels "","100% Biosolids (e.g., sewage sludge, manure, mixtures of these matls)",..: 88 88 156 156 156 156 156 156 156 156 ...
##  $ SCC.Level.Four     : Factor w/ 6084 levels "","(NH4)2 SO4 Acid Bath System and Evaporator",..: 4455 5583 4466 4458 1341 5246 5584 5983 4461 776 ...
##  $ Map.To             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Last.Inventory.Year: int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Created_Date       : Factor w/ 57 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Revised_Date       : Factor w/ 44 levels "","1/27/2000 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Usage.Notes        : Factor w/ 21 levels ""," ","includes bleaching towers, washer hoods, filtrate tanks, vacuum pump exhausts",..: 1 1 1 1 1 1 1 1 1 1 ...
head(SCC)
##        SCC Data.Category
## 1 10100101         Point
## 2 10100102         Point
## 3 10100201         Point
## 4 10100202         Point
## 5 10100203         Point
## 6 10100204         Point
##                                                                   Short.Name
## 1                   Ext Comb /Electric Gen /Anthracite Coal /Pulverized Coal
## 2 Ext Comb /Electric Gen /Anthracite Coal /Traveling Grate (Overfeed) Stoker
## 3       Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Wet Bottom
## 4       Ext Comb /Electric Gen /Bituminous Coal /Pulverized Coal: Dry Bottom
## 5                   Ext Comb /Electric Gen /Bituminous Coal /Cyclone Furnace
## 6                   Ext Comb /Electric Gen /Bituminous Coal /Spreader Stoker
##                                EI.Sector Option.Group Option.Set
## 1 Fuel Comb - Electric Generation - Coal                        
## 2 Fuel Comb - Electric Generation - Coal                        
## 3 Fuel Comb - Electric Generation - Coal                        
## 4 Fuel Comb - Electric Generation - Coal                        
## 5 Fuel Comb - Electric Generation - Coal                        
## 6 Fuel Comb - Electric Generation - Coal                        
##                 SCC.Level.One       SCC.Level.Two
## 1 External Combustion Boilers Electric Generation
## 2 External Combustion Boilers Electric Generation
## 3 External Combustion Boilers Electric Generation
## 4 External Combustion Boilers Electric Generation
## 5 External Combustion Boilers Electric Generation
## 6 External Combustion Boilers Electric Generation
##                 SCC.Level.Three
## 1               Anthracite Coal
## 2               Anthracite Coal
## 3 Bituminous/Subbituminous Coal
## 4 Bituminous/Subbituminous Coal
## 5 Bituminous/Subbituminous Coal
## 6 Bituminous/Subbituminous Coal
##                                  SCC.Level.Four Map.To Last.Inventory.Year
## 1                               Pulverized Coal     NA                  NA
## 2             Traveling Grate (Overfeed) Stoker     NA                  NA
## 3 Pulverized Coal: Wet Bottom (Bituminous Coal)     NA                  NA
## 4 Pulverized Coal: Dry Bottom (Bituminous Coal)     NA                  NA
## 5             Cyclone Furnace (Bituminous Coal)     NA                  NA
## 6             Spreader Stoker (Bituminous Coal)     NA                  NA
##   Created_Date Revised_Date Usage.Notes
## 1                                      
## 2                                      
## 3                                      
## 4                                      
## 5                                      
## 6
names(SCC)
##  [1] "SCC"                 "Data.Category"       "Short.Name"         
##  [4] "EI.Sector"           "Option.Group"        "Option.Set"         
##  [7] "SCC.Level.One"       "SCC.Level.Two"       "SCC.Level.Three"    
## [10] "SCC.Level.Four"      "Map.To"              "Last.Inventory.Year"
## [13] "Created_Date"        "Revised_Date"        "Usage.Notes"

Just to be sure, let’s check is the all mentioned pollutants are PM 25 and the years are, as mentioned, 1999 2002 2005 2008

unique(NEI$Pollutant)
## [1] "PM25-PRI"
unique(NEI$year)
## [1] 1999 2002 2005 2008
  1. Of the four types of sources indicated by the ???????????????? (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999–2008 for Baltimore City? Which have seen increases in emissions from 1999–2008? Use the ggplot2 plotting system to make a plot answer this question.
NEI2<-NEI%>%
        select(year, Emissions, fips, type)%>%
        filter(fips == "24510")%>%
        group_by(year, type)%>%
        summarise(amount = sum(Emissions))
print(NEI2)
## # A tibble: 16 x 3
## # Groups:   year [?]
##     year     type     amount
##    <int>    <chr>      <dbl>
##  1  1999 NON-ROAD  522.94000
##  2  1999 NONPOINT 2107.62500
##  3  1999  ON-ROAD  346.82000
##  4  1999    POINT  296.79500
##  5  2002 NON-ROAD  240.84692
##  6  2002 NONPOINT 1509.50000
##  7  2002  ON-ROAD  134.30882
##  8  2002    POINT  569.26000
##  9  2005 NON-ROAD  248.93369
## 10  2005 NONPOINT 1509.50000
## 11  2005  ON-ROAD  130.43038
## 12  2005    POINT 1202.49000
## 13  2008 NON-ROAD   55.82356
## 14  2008 NONPOINT 1373.20731
## 15  2008  ON-ROAD   88.27546
## 16  2008    POINT  344.97518
ggplot(NEI2, aes(y = amount, x = year, fill = type))+geom_point ( aes (color = type), size = 4) +labs ( x = "Year", y = "Amount of emissions", title = "Changes of emissions in Maryland")  +geom_smooth ( method = "lm", se = FALSE, lwd = 0.5)

dev.copy(png, file="plot3.png", height=480, width=480)
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2

According to the graph, we can track decreasing of all emission sources by the 2008, excepr point emission

  1. Across the United States, how have emissions from coal combustion-related sources changed from 1999–2008?
SCC_Coal<-SCC[grep("Coal", SCC$EI.Sector),]
NEI_Coal<-subset(NEI, NEI$SCC %in% SCC_Coal$SCC)

NEI_Coal$year<-as.factor(NEI_Coal$year)
val<-tapply(X = NEI_Coal$Emissions, NEI_Coal$year, sum)

with(NEI_Coal, plot( levels(year), val, xlab= "Year", ylab="Amount of emissions", type = "l"))

dev.copy(png, file="plot4.png")
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2

The graph shows decreasing of coal emissions in US 1999-2008.

5.How have emissions from motor vehicle sources changed from 1999–2008 in Baltimore City?

“NEI ONROAD sources include emissions from onroad vehicles that use gasoline, diesel, and other fuels. # These sources include light duty and heavy duty vehicle emissions from operation on roads, highway ramps, and during idling.”
#(Taken from http://www.epa.gov/ttn/chief/eiinformation.html))

NEI3<-NEI%>%
        select(year, Emissions, fips, type)%>%
        filter(fips == "24510" | type == "ONROAD")%>%
        group_by(year)%>%
        summarise(amount = sum(Emissions))
print(NEI3)
## # A tibble: 4 x 2
##    year   amount
##   <int>    <dbl>
## 1  1999 3274.180
## 2  2002 2453.916
## 3  2005 3091.354
## 4  2008 1862.282

Plotting

ggplot(NEI3, aes(y = amount, x = year))+geom_line() +labs(title = "Changes of motor vehicle emissions in Maryland", x = "Year", y = "Amount of emissions")  + geom_smooth(method = "lm", se = FALSE)

dev.copy(png, file="plot5.png", height=480, width=480)
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2

The plot shows that the emossions from motor vehicle for the 1999-2008 period went down.

6.Compare emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (???????????????? == “????????????????????”). Which city has seen greater changes over time in motor vehicle emissions?

NEI4<-NEI%>%
        select(year, Emissions, fips, type)%>%
        filter(fips == "24510" | fips == "06037" | type == "ONROAD")%>%
        group_by(fips, year)%>%
        summarise(amount = sum(Emissions))
print(NEI4)
## # A tibble: 8 x 3
## # Groups:   fips [?]
##    fips  year    amount
##   <chr> <int>     <dbl>
## 1 06037  1999 47103.192
## 2 06037  2002 26968.795
## 3 06037  2005 22939.780
## 4 06037  2008 32135.482
## 5 24510  1999  3274.180
## 6 24510  2002  2453.916
## 7 24510  2005  3091.354
## 8 24510  2008  1862.282

plotting

c<-ggplot(NEI4, aes(fips, amount, fill = year))
c+geom_point(aes(color=year), size = 4) +labs ( x = "Year", y = "Amount of emissions", title = "Compare Baltimore and Los Angeles")+scale_x_discrete(labels=c("06037" = "Los Angeles", "24510" = "Baltimore"))

dev.copy(png, file="plot6.png", height=480, width=480)
## quartz_off_screen 
##                 3
dev.off()
## quartz_off_screen 
##                 2

The graph shows that Los Angeles has a greater changes in emoosions from motor vihecles over the time.