chooseCRANmirror(graphics=FALSE, ind=1)
install.packages("tidyverse")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
install.packages("dplyr")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
install.packages("ggplot2")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
install.packages("RCurl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'RCurl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpeCxy5C\downloaded_packages
library(RCurl)
## Loading required package: bitops
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## complete(): tidyr, RCurl
## filter():   dplyr, stats
## lag():      dplyr, stats
library(dplyr)
library(ggplot2)


## Data Source and Workflow
# The data source is from the World Bank. Data science workflow applied in this project is DSEMN. Obtain, scrub, explore, model and interpret. 

## Obtain the data 
# The data is from world bank and upload the data into github. Then load the data into R.

data <- read.csv(url("https://raw.githubusercontent.com/JennierJ/Final_Project/master/Total%20greenhouse%20gas%20emissions.csv"))

#head(data)
#View(data)

## Scrub and explore the data

nrow(data)
## [1] 222
ncol(data)
## [1] 16
# Delete the NA column and change the column name

data$ï..Series.Name <- NULL
data$Series.Code <- NULL
data$Country.Code <- NULL
data$X2013..YR2013. <- NULL
data$X2014..YR2014. <- NULL
data$X2015..YR2015. <- NULL
data$X2016..YR2016. <- NULL

colnames(data) <- c("Country", "1990", "2000", "2007", "2008", "2009", "2010", "2011", "2012")

# Data Transformation: Tidy the data

data <- data %>%
  gather(key = "year", value = "Emission", -Country)
## Warning: attributes are not identical across measure variables; they will
## be dropped
data <- subset(data, data$Emission != "..")
data <- subset(data, data$Emission != "")

#class(data$Emission)
#colnames(data)

is.numeric(data$Emission)
## [1] FALSE
class(data$Emission)
## [1] "character"
data$Emission <- as.numeric(data$Emission)

#dim(data)
#head(data)

data$Country <- NULL
table <- summarise(group_by(data, year), mean = mean(Emission), sd = sd(Emission))
table
## # A tibble: 8 × 3
##    year     mean        sd
##   <chr>    <dbl>     <dbl>
## 1  1990 195905.2  631358.2
## 2  2000 213687.9  698223.3
## 3  2007 265041.8  918784.4
## 4  2008 258071.3  950847.7
## 5  2009 256738.3  980898.9
## 6  2010 273583.4 1044674.3
## 7  2011 284671.7 1089739.7
## 8  2012 288126.4 1116415.1
# From the table, we can clearly see that these is a trend of increasing total emission of greenhouse gas with the year.

## Modeling and Interpret

ggplot(table, aes(year, mean, group = 1, col="red")) + 
  geom_point() + 
  geom_line() +
  labs(x = "Year", y = "Total Greenhouse Gas Emission", title = "Total Greenhouse Gas Emission vs. Year")

# Conclusion: Greenhouse gases from human activities are the most significant driver of observed climate change since the mid-20th century. According to
# the graph, the total emission of greenhouse gas is increasing as the year.


# Reference: The website address is http://databank.worldbank.org/data/reports.aspx?source=2&Topic=6#.