Synopsis

This analysis aims at detecting trend in fine particulate matter (PM 2.5) emissions across various counties in the United States esp. Baltimore City and Los Angeles and subsequently answering some questions relating to these emission trends between the period 1999-2008

Part I

Have total emissions from PM2.5 decreased in the United States from 1999 to 2008?

reading data

NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")

type conversion

NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)

split based on year

split_NEI<-split(NEI,NEI$year)

total_emissions_per_year<-c()
for(i in 1:length(split_NEI))
{
        total_emissions_per_year<-c(total_emissions_per_year,sum(split_NEI[[i]]$Emissions))
}

Plotting code

x<-as.data.frame(cbind(total_emissions_per_year,year=levels(NEI$year)))
x$total_emissions_per_year<-as.numeric(as.character((x$total_emissions_per_year)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,total_emissions_per_year,col=c(1:4),pch=17,type="p",ylab=" Total PM2.5 emission from all sources",xlab="year",main="Total PM2.5 variation from 1999 to 2008 acorss US"))
model<-lm(total_emissions_per_year~ year,data=x)
abline(model,lwd=1)

As evident from the plot and the regression line, the total emissions seem to have decreased across united states.

Part II

Have total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008?

reading data

NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")

type conversion

NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)

subsetting based on county Baltimore City

subset_NEI<-subset(NEI,fips=="24510")

spliting on year

split_subset_NEI<-split(subset_NEI,subset_NEI$year)
total_emissions_per_year_in_Baltimore<-c()
for(i in 1:length(split_subset_NEI))
{
    total_emissions_per_year_in_Baltimore<-c(total_emissions_per_year_in_Baltimore,sum(split_subset_NEI[[i]]$Emissions))
}

Plotting code

x<-as.data.frame(cbind(total_emissions_per_year_in_Baltimore,year=levels(NEI$year)))
x$total_emissions_per_year_in_Baltimore<-as.numeric(as.character((x$total_emissions_per_year_in_Baltimore)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,total_emissions_per_year_in_Baltimore,col=c(1:4),pch=17,type="p",ylab=" Total PM2.5 emission from all sources in Baltimore",xlab="year",main="Total PM2.5 variation from 1999 to 2008 in Baltomore City"))
model<-lm(total_emissions_per_year_in_Baltimore ~ year,data=x)
abline(model,lwd=1)

As evident from the plot, the total emissions in Baltimore City has infact decreased from 1999-2008. We see a rise in emissions from the year 2002 to 2005 but a sharp fall again from 2005-2008.

Part III

Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999-2008 for Baltimore City? Which have seen increases in emissions from 1999-2008?

reading data

NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")

type conversion

NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)

subsetting based on county Baltimore City

subset_NEI<-subset(NEI,fips=="24510")

splitting based on year

split_subset_NEI<-split(subset_NEI,subset_NEI$year)

empty data frame to be populated with total emissions per source per year

type_matrix<-data.frame()
for(i in 1:length(split_subset_NEI))
{
    total_emissions_per_year_per_type<-c()
    type_split_subset_NEI<-split(split_subset_NEI[[i]],split_subset_NEI[[i]]$type)
    for(j in 1:length(type_split_subset_NEI))
    {
        total_emissions_per_year_per_type<-c(total_emissions_per_year_per_type,sum(type_split_subset_NEI[[j]]$Emissions))
    }
    type_matrix<-rbind(type_matrix,total_emissions_per_year_per_type)
}

type_matrix<-as.data.frame(t(as.matrix(type_matrix)))
names(type_matrix)<-levels(subset_NEI$year)
rownames(type_matrix)<-levels(subset_NEI$type)

restructuring the data into proper form

type_df<-data.frame()
index=0
row_ind<-levels(NEI$type)
col_ind<-levels(NEI$year)
for(i in 1:nrow(type_matrix))
{
    for(j in 1:ncol(type_matrix))
    {
        index=index+1
        type_df[index,1]=type_matrix[i,j]
        type_df[index,2]=row_ind[i]
        type_df[index,3]=col_ind[j]
    }
}
names(type_df)<-c("Emissions","type","year")
type_df$type<-as.factor(type_df$type)
type_df$year<-as.factor(type_df$year)

plotting the results

library(ggplot2)
g<-ggplot(type_df,aes(year,Emissions))
f<-g+geom_point()+facet_wrap(~type,nrow=1,ncol=4)+geom_smooth(method="lm",aes(group=1),fill=NA)
f+labs(x="year")+labs(y="Total Emissions in Baltimore City")+labs(title="Variation of different types of emissions per year in Baltimore City")

AS Evident from the plot there seems to be a decrease in Emissions for first 3 sources from 1999-2008. For ‘POINT’ source however, we see a big jump in the year 2002 and 2005 which comes back in 2008, but the overall emissions seems to have risen as indicated by the positively sloped regression line for ‘POINT’ source.

Part IV

Across the United States, how have emissions from coal combustion-related sources changed from 1999-2008?

reading data

NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")

type conversion

NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)

split on year

split_NEI<-split(NEI,NEI$year)

Plotting code

#Plotting code
x<-as.data.frame(cbind(emissions_from_coal_combustion,year=levels(NEI$year)))
x$emissions_from_coal_combustion<-as.numeric(as.character((x$emissions_from_coal_combustion)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,emissions_from_coal_combustion,col=c(1:4),pch=17,type="p",ylab=" Emission from coal combustion-related sources across US",xlab="year",main="Variation in emission from coal combustion-related sources across US"))
model<-lm(emissions_from_coal_combustion ~ year,data=x)
abline(model,lwd=1)

As evident from the plot and the regression line, the emission has a decreasing trend except for a slight increase in the period 2002-2005

Part V

How have emissions from motor vehicle sources changed from 1999-2008 in Baltimore City?

reading data

NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")

type conversion

NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)

subset based on county Baltimore city

subset_NEI<-subset(NEI,fips=="24510")

split based on year

split_subset_NEI<-split(subset_NEI,subset_NEI$year)

Extrapolation of the same logic as mentioned in part 3, this time using vehicle_pattern in the regular expression

emissions_from_vehicles_in_baltimore<-c()
vehicle_pattern=".*vehicle.*"
index<-c()
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.One,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Two,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Three,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Four,ignore.case=TRUE,perl=TRUE))
index<-unique(index)
SCC_from_index<-SCC$SCC[index]
for(i in 1:length(split_subset_NEI))
{
    sum_vehicular_emissions=0
    for(j in 1:nrow(split_subset_NEI[[i]]))
    {
        if(split_subset_NEI[[i]]$SCC[j] %in% SCC_from_index)
        {
            sum_vehicular_emissions=sum_vehicular_emissions+split_subset_NEI[[i]]$Emissions[j]
        }
    }
    emissions_from_vehicles_in_baltimore<-c(emissions_from_vehicles_in_baltimore,sum_vehicular_emissions)
}

Plotting code

x<-as.data.frame(cbind(emissions_from_vehicles_in_baltimore,year=levels(NEI$year)))
x$emissions_from_vehicles_in_baltimore<-as.numeric(as.character((x$emissions_from_vehicles_in_baltimore)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,emissions_from_vehicles_in_baltimore,col=c(1:4),pch=17,type="p",ylab=" Emissions from motor vehicles in Baltimore",xlab="year",main="Emissions variation from motor vehicles in Baltimore"))
model<-lm(emissions_from_vehicles_in_baltimore ~ year,data=x)
abline(model,lwd=1)

As evident from the plot and the regression line, the emission has a decreasing trend

PartVI

Comparison of emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?

reading data

NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")

type conversion

NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)

Baltimore subset

subset_NEI_1<-subset(NEI,fips=="24510")

LA subset

subset_NEI_2<-subset(NEI,fips=="06037")

splitting based on year

Baltimore

split_subset_NEI_1<-split(subset_NEI_1,subset_NEI_1$year)
split_subset_NEI_2<-split(subset_NEI_2,subset_NEI_2$year)
emissions_from_vehicles_in_baltimore<-c()
emissions_from_vehicles_in_LA<-c()
vehicle_pattern=".*vehicle.*"
index<-c()
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.One,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Two,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Three,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Four,ignore.case=TRUE,perl=TRUE))
index<-unique(index)
SCC_from_index<-SCC$SCC[index]
for(i in 1:length(split_subset_NEI_1))
{
    sum_vehicular_emissions=0
    for(j in 1:nrow(split_subset_NEI_1[[i]]))
    {
        if(split_subset_NEI_1[[i]]$SCC[j] %in% SCC_from_index)
        {
            sum_vehicular_emissions=sum_vehicular_emissions+split_subset_NEI_1[[i]]$Emissions[j]
        }
    }
    emissions_from_vehicles_in_baltimore<-c(emissions_from_vehicles_in_baltimore,sum_vehicular_emissions)
}

LA

for(i in 1:length(split_subset_NEI_2))
{
    sum_vehicular_emissions=0
    for(j in 1:nrow(split_subset_NEI_2[[i]]))
    {
        if(split_subset_NEI_2[[i]]$SCC[j] %in% SCC_from_index)
        {
            sum_vehicular_emissions=sum_vehicular_emissions+split_subset_NEI_2[[i]]$Emissions[j]
        }
    }
    emissions_from_vehicles_in_LA<-c(emissions_from_vehicles_in_LA,sum_vehicular_emissions)
}

Plotting code

library(ggplot2)
x<-data.frame()
year<-levels(NEI$year)
year<-as.numeric(as.character(year))
for(i in 1:4)
{
        x<-rbind(x,c(emissions_from_vehicles_in_baltimore[i],year[i]))
}
for(i in 1:4)
{
    x<-rbind(x,c(emissions_from_vehicles_in_LA[i],year[i]))
}
x$county<-c(rep("BAltimore",4),rep("LA",4))
names(x)<-c("Emissions","year","county")
g<-ggplot(x,aes(year,Emissions))
f<-g+geom_point()+facet_wrap(~county,nrow=1,ncol=4)+geom_smooth(method="lm",aes(group=1),fill=NA)
f+labs(x="year")+labs(y="Emissions from motor vehicle sources across Baltimore and LA")+labs(title=" Emission comparison between Baltimore and LA")

As evident from the plot and the regression line, the emissions from Baltimore has decreased over time while the same has increased for Los Angeles esp during from 1999-2005, coming down in 2005-2008 period. Also, the emission from LA county is much greater than Baltimore mostly due to the size of the county.