Exploratory Data Analysis - Global Active Power

Have total emissions from PM2.5 decreased in the United States from 1999 to 2008? Using the base plotting system, make a plot showing the total PM2.5 emission from all sources for each of the years 1999, 2002, 2005, and 2008. Upload a PNG file containing your plot addressing this question.

#downloading the dataset
fileurl = "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2FNEI_data.zip"
download.file(fileurl, "Summary.zip", method = "curl")
unzip(zipfile = "Summary.zip")
unlink("Summary.zip")

# Load the required libraries
library(dplyr)
library(ggplot2)
#Read the file in to NEI

NEI <- readRDS("summarySCC_PM25.rds")

# Read the Source Classification Code in to SCC

SCC <- readRDS("Source_Classification_Code.rds")
# Total all emissions for the years 1999 to 2008

totalNEI <- tapply(NEI$Emissions, NEI$year, sum)

# Plot output to file

barplot(totalNEI, col = "darkgreen", xlab = "Year", ylab = "Total PM2.5 Emissions in Tons", main = "Total PM 2.5 Emissions (tons) in USA")

# FIlter observations relating to Baltimore MD

Baltimore <- subset(NEI, fips == "24510")

# Total all emissions in Baltimore MD, for the years 1999 to 2008

totalBaltimore <- tapply(Baltimore$Emissions, Baltimore$year, sum)

# Plot to file

barplot(totalBaltimore, col = "darkgreen", xlab = "Year", ylab = "Total PM2.5 Emissions (Tons)", main = "Yearly Emissions (tons) in Baltimore City, Maryland")

# Filter observations relating to Baltimore MD

Baltimore <- subset(NEI, fips == "24510")

# Total all emissions in Baltimore for the years 1999 to 2008

typeBaltimore <- Baltimore %>% 
                 group_by(year, type) %>%
                 summarise (emissions = sum(Emissions))


# Plot to file

qplot(year, emissions, data = typeBaltimore, color = type, geom = "line") + ggtitle("PM2.5 Emission by Type and Year in Baltimore City") + xlab("Year") + ylab("Total PM2.5 Emissions in tons") + theme(legend.position = c(0.85, 0.85))

# Filter Coal combustion related sources

SCC.coal <- SCC %>%
         filter(grepl("coal", Short.Name, ignore.case = TRUE))

# Merge two data sets

merge <- merge(x=NEI, y=SCC.coal, by='SCC')
merge.sum <- merge %>%
            group_by(year)%>%
            summarise(Emissions = sum(Emissions))

# Plot to file

png("plot4.png", width = 800, height = 400)

ggplot(data = merge.sum, aes(x = year, y = Emissions)) + geom_line() + geom_point(size=5, shape=21, fill="red") + ggtitle("PM2.5 Emission by Coal Combustion in USA")
#Baltimore City, Maryland == fips & motors =="ON-ROAD'

MD.onroad <- subset(NEI, fips == 24510 & type == 'ON-ROAD')

# Group by year

MDYearly <- MD.onroad %>%
            group_by(year) %>% 
            summarize(emissions = sum(Emissions))

# Plot to file

qplot(year, emissions, data = MDYearly, geom = "line") + ggtitle("PM2.5 Emissions by Motor Vehicles in Baltimore City") + xlab("Year") + ylab("PM2.5 Emissions in Tons")

# Baltimore City, Maryland & motors =="ON-ROAD'
# Los Angeles County, California & motors =="ON-ROAD'

MD.onroad <- subset(NEI, fips == '24510' & type == 'ON-ROAD')
CA.onroad <- subset(NEI, fips == '06037' & type == 'ON-ROAD')

# Group by year and we add a column with the city for reference

MDYearly <- MD.onroad %>%
            group_by(year) %>% 
            summarize(emissions = sum(Emissions))%>% 
            mutate(city = "Baltimore") 

# Group by year and we add a column with the city for reference

CAYearly <- CA.onroad %>%
            group_by(year) %>% 
            summarize(emissions = sum(Emissions))%>%
            mutate(city = "Los Angeles") 

           
# Merge Yearly Total observations of Baltimore and Los Angeles County

comparebyYEAR <- as.data.frame(rbind(MDYearly, CAYearly))

# Plot to file

qplot(year, emissions, data = comparebyYEAR, geom = "line", color = city) + ggtitle("PM2.5 Emissions by Motor Vehicles in Baltimore City, MD, Vs Los Angeles County, CA") + xlab("Year") + ylab("PM2.5 Emissions in Tons")