This analysis aims at detecting trend in fine particulate matter (PM 2.5) emissions across various counties in the United States esp. Baltimore City and Los Angeles and subsequently answering some questions relating to these emission trends between the period 1999-2008
Have total emissions from PM2.5 decreased in the United States from 1999 to 2008?
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)
split_NEI<-split(NEI,NEI$year)
total_emissions_per_year<-c()
for(i in 1:length(split_NEI))
{
total_emissions_per_year<-c(total_emissions_per_year,sum(split_NEI[[i]]$Emissions))
}
x<-as.data.frame(cbind(total_emissions_per_year,year=levels(NEI$year)))
x$total_emissions_per_year<-as.numeric(as.character((x$total_emissions_per_year)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,total_emissions_per_year,col=c(1:4),pch=17,type="p",ylab=" Total PM2.5 emission from all sources",xlab="year",main="Total PM2.5 variation from 1999 to 2008 acorss US"))
model<-lm(total_emissions_per_year~ year,data=x)
abline(model,lwd=1)
As evident from the plot and the regression line, the total emissions seem to have decreased across united states.
Have total emissions from PM2.5 decreased in the Baltimore City, Maryland (fips == “24510”) from 1999 to 2008?
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)
subset_NEI<-subset(NEI,fips=="24510")
split_subset_NEI<-split(subset_NEI,subset_NEI$year)
total_emissions_per_year_in_Baltimore<-c()
for(i in 1:length(split_subset_NEI))
{
total_emissions_per_year_in_Baltimore<-c(total_emissions_per_year_in_Baltimore,sum(split_subset_NEI[[i]]$Emissions))
}
x<-as.data.frame(cbind(total_emissions_per_year_in_Baltimore,year=levels(NEI$year)))
x$total_emissions_per_year_in_Baltimore<-as.numeric(as.character((x$total_emissions_per_year_in_Baltimore)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,total_emissions_per_year_in_Baltimore,col=c(1:4),pch=17,type="p",ylab=" Total PM2.5 emission from all sources in Baltimore",xlab="year",main="Total PM2.5 variation from 1999 to 2008 in Baltomore City"))
model<-lm(total_emissions_per_year_in_Baltimore ~ year,data=x)
abline(model,lwd=1)
As evident from the plot, the total emissions in Baltimore City has infact decreased from 1999-2008. We see a rise in emissions from the year 2002 to 2005 but a sharp fall again from 2005-2008.
Of the four types of sources indicated by the type (point, nonpoint, onroad, nonroad) variable, which of these four sources have seen decreases in emissions from 1999-2008 for Baltimore City? Which have seen increases in emissions from 1999-2008?
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)
subset_NEI<-subset(NEI,fips=="24510")
split_subset_NEI<-split(subset_NEI,subset_NEI$year)
type_matrix<-data.frame()
for(i in 1:length(split_subset_NEI))
{
total_emissions_per_year_per_type<-c()
type_split_subset_NEI<-split(split_subset_NEI[[i]],split_subset_NEI[[i]]$type)
for(j in 1:length(type_split_subset_NEI))
{
total_emissions_per_year_per_type<-c(total_emissions_per_year_per_type,sum(type_split_subset_NEI[[j]]$Emissions))
}
type_matrix<-rbind(type_matrix,total_emissions_per_year_per_type)
}
type_matrix<-as.data.frame(t(as.matrix(type_matrix)))
names(type_matrix)<-levels(subset_NEI$year)
rownames(type_matrix)<-levels(subset_NEI$type)
type_df<-data.frame()
index=0
row_ind<-levels(NEI$type)
col_ind<-levels(NEI$year)
for(i in 1:nrow(type_matrix))
{
for(j in 1:ncol(type_matrix))
{
index=index+1
type_df[index,1]=type_matrix[i,j]
type_df[index,2]=row_ind[i]
type_df[index,3]=col_ind[j]
}
}
names(type_df)<-c("Emissions","type","year")
type_df$type<-as.factor(type_df$type)
type_df$year<-as.factor(type_df$year)
library(ggplot2)
g<-ggplot(type_df,aes(year,Emissions))
f<-g+geom_point()+facet_wrap(~type,nrow=1,ncol=4)+geom_smooth(method="lm",aes(group=1),fill=NA)
f+labs(x="year")+labs(y="Total Emissions in Baltimore City")+labs(title="Variation of different types of emissions per year in Baltimore City")
AS Evident from the plot there seems to be a decrease in Emissions for first 3 sources from 1999-2008. For ‘POINT’ source however, we see a big jump in the year 2002 and 2005 which comes back in 2008, but the overall emissions seems to have risen as indicated by the positively sloped regression line for ‘POINT’ source.
Across the United States, how have emissions from coal combustion-related sources changed from 1999-2008?
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)
split_NEI<-split(NEI,NEI$year)
#Plotting code
x<-as.data.frame(cbind(emissions_from_coal_combustion,year=levels(NEI$year)))
x$emissions_from_coal_combustion<-as.numeric(as.character((x$emissions_from_coal_combustion)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,emissions_from_coal_combustion,col=c(1:4),pch=17,type="p",ylab=" Emission from coal combustion-related sources across US",xlab="year",main="Variation in emission from coal combustion-related sources across US"))
model<-lm(emissions_from_coal_combustion ~ year,data=x)
abline(model,lwd=1)
As evident from the plot and the regression line, the emission has a decreasing trend except for a slight increase in the period 2002-2005
How have emissions from motor vehicle sources changed from 1999-2008 in Baltimore City?
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)
subset_NEI<-subset(NEI,fips=="24510")
split_subset_NEI<-split(subset_NEI,subset_NEI$year)
Extrapolation of the same logic as mentioned in part 3, this time using vehicle_pattern in the regular expression
emissions_from_vehicles_in_baltimore<-c()
vehicle_pattern=".*vehicle.*"
index<-c()
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.One,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Two,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Three,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Four,ignore.case=TRUE,perl=TRUE))
index<-unique(index)
SCC_from_index<-SCC$SCC[index]
for(i in 1:length(split_subset_NEI))
{
sum_vehicular_emissions=0
for(j in 1:nrow(split_subset_NEI[[i]]))
{
if(split_subset_NEI[[i]]$SCC[j] %in% SCC_from_index)
{
sum_vehicular_emissions=sum_vehicular_emissions+split_subset_NEI[[i]]$Emissions[j]
}
}
emissions_from_vehicles_in_baltimore<-c(emissions_from_vehicles_in_baltimore,sum_vehicular_emissions)
}
x<-as.data.frame(cbind(emissions_from_vehicles_in_baltimore,year=levels(NEI$year)))
x$emissions_from_vehicles_in_baltimore<-as.numeric(as.character((x$emissions_from_vehicles_in_baltimore)))
x$year<-as.numeric(as.character((x$year)))
with(x,plot(year,emissions_from_vehicles_in_baltimore,col=c(1:4),pch=17,type="p",ylab=" Emissions from motor vehicles in Baltimore",xlab="year",main="Emissions variation from motor vehicles in Baltimore"))
model<-lm(emissions_from_vehicles_in_baltimore ~ year,data=x)
abline(model,lwd=1)
As evident from the plot and the regression line, the emission has a decreasing trend
Comparison of emissions from motor vehicle sources in Baltimore City with emissions from motor vehicle sources in Los Angeles County, California (fips == “06037”). Which city has seen greater changes over time in motor vehicle emissions?
NEI <- readRDS("summarySCC_PM25.rds")
SCC <- readRDS("Source_Classification_Code.rds")
NEI$year<-as.factor(NEI$year)
NEI$type<-as.factor(NEI$type)
subset_NEI_1<-subset(NEI,fips=="24510")
subset_NEI_2<-subset(NEI,fips=="06037")
split_subset_NEI_1<-split(subset_NEI_1,subset_NEI_1$year)
split_subset_NEI_2<-split(subset_NEI_2,subset_NEI_2$year)
emissions_from_vehicles_in_baltimore<-c()
emissions_from_vehicles_in_LA<-c()
vehicle_pattern=".*vehicle.*"
index<-c()
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.One,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Two,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Three,ignore.case=TRUE,perl=TRUE))
index<-c(index,grep(vehicle_pattern,SCC$SCC.Level.Four,ignore.case=TRUE,perl=TRUE))
index<-unique(index)
SCC_from_index<-SCC$SCC[index]
for(i in 1:length(split_subset_NEI_1))
{
sum_vehicular_emissions=0
for(j in 1:nrow(split_subset_NEI_1[[i]]))
{
if(split_subset_NEI_1[[i]]$SCC[j] %in% SCC_from_index)
{
sum_vehicular_emissions=sum_vehicular_emissions+split_subset_NEI_1[[i]]$Emissions[j]
}
}
emissions_from_vehicles_in_baltimore<-c(emissions_from_vehicles_in_baltimore,sum_vehicular_emissions)
}
for(i in 1:length(split_subset_NEI_2))
{
sum_vehicular_emissions=0
for(j in 1:nrow(split_subset_NEI_2[[i]]))
{
if(split_subset_NEI_2[[i]]$SCC[j] %in% SCC_from_index)
{
sum_vehicular_emissions=sum_vehicular_emissions+split_subset_NEI_2[[i]]$Emissions[j]
}
}
emissions_from_vehicles_in_LA<-c(emissions_from_vehicles_in_LA,sum_vehicular_emissions)
}
library(ggplot2)
x<-data.frame()
year<-levels(NEI$year)
year<-as.numeric(as.character(year))
for(i in 1:4)
{
x<-rbind(x,c(emissions_from_vehicles_in_baltimore[i],year[i]))
}
for(i in 1:4)
{
x<-rbind(x,c(emissions_from_vehicles_in_LA[i],year[i]))
}
x$county<-c(rep("BAltimore",4),rep("LA",4))
names(x)<-c("Emissions","year","county")
g<-ggplot(x,aes(year,Emissions))
f<-g+geom_point()+facet_wrap(~county,nrow=1,ncol=4)+geom_smooth(method="lm",aes(group=1),fill=NA)
f+labs(x="year")+labs(y="Emissions from motor vehicle sources across Baltimore and LA")+labs(title=" Emission comparison between Baltimore and LA")
As evident from the plot and the regression line, the emissions from Baltimore has decreased over time while the same has increased for Los Angeles esp during from 1999-2005, coming down in 2005-2008 period. Also, the emission from LA county is much greater than Baltimore mostly due to the size of the county.