chooseCRANmirror(graphics=FALSE, ind=1)
install.packages("tidyverse")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
install.packages("dplyr")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
install.packages("ggplot2")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
install.packages("RCurl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'RCurl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
library(RCurl)
## Loading required package: bitops
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## complete(): tidyr, RCurl
## filter(): dplyr, stats
## lag(): dplyr, stats
library(dplyr)
library(ggplot2)
## Data Source and Workflow
# The data source is from the World Bank. Data science workflow applied in this project is DSEMN. Obtain, scrub, explore, model and interpret.
## Obtain the data
# The data is from world bank and upload the data into github. Then load the data into R.
data <- read.csv(url("https://raw.githubusercontent.com/JennierJ/Final_Project/master/Total%20greenhouse%20gas%20emissions.csv"))
#head(data)
#View(data)
## Scrub and explore the data
nrow(data)
## [1] 222
ncol(data)
## [1] 16
# Delete the NA column and change the column name
data$ï..Series.Name <- NULL
data$Series.Code <- NULL
data$Country.Code <- NULL
data$X2013..YR2013. <- NULL
data$X2014..YR2014. <- NULL
data$X2015..YR2015. <- NULL
data$X2016..YR2016. <- NULL
colnames(data) <- c("Country", "1990", "2000", "2007", "2008", "2009", "2010", "2011", "2012")
# Data Transformation: Tidy the data
data <- data %>%
gather(key = "year", value = "Emission", -Country)
## Warning: attributes are not identical across measure variables; they will
## be dropped
data <- subset(data, data$Emission != "..")
data <- subset(data, data$Emission != "")
#class(data$Emission)
#colnames(data)
is.numeric(data$Emission)
## [1] FALSE
class(data$Emission)
## [1] "character"
data$Emission <- as.numeric(data$Emission)
#dim(data)
#head(data)
data$Country <- NULL
table <- summarise(group_by(data, year), mean = mean(Emission), sd = sd(Emission))
table
## # A tibble: 8 × 3
## year mean sd
## <chr> <dbl> <dbl>
## 1 1990 195905.2 631358.2
## 2 2000 213687.9 698223.3
## 3 2007 265041.8 918784.4
## 4 2008 258071.3 950847.7
## 5 2009 256738.3 980898.9
## 6 2010 273583.4 1044674.3
## 7 2011 284671.7 1089739.7
## 8 2012 288126.4 1116415.1
# From the table, we can clearly see that these is a trend of increasing total emission of greenhouse gas with the year.
## Modeling and Interpret
ggplot(table, aes(year, mean, group = 1, col="red")) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Total Greenhouse Gas Emission", title = "Total Greenhouse Gas Emission vs. Year")

# Conclusion: Greenhouse gases from human activities are the most significant driver of observed climate change since the mid-20th century. According to
# the graph, the total emission of greenhouse gas is increasing as the year.
# Reference: The website address is http://databank.worldbank.org/data/reports.aspx?source=2&Topic=6#.