---
title: "Climate data analysis"
author: "Navya Mekera Halaswamy"
date: "August 18, 2018"
output:
flexdashboard::flex_dashboard:
orientation: rows
social: menu
source_code: embed
---
```{r setup, include=FALSE}
setwd('C:/Users/navya/Documents/HU/Data Viz/Lab2/Lab2code')
library(ggplot2)
library(grid)
library(gridExtra)
library(RColorBrewer)
library(ggthemes)
library(dplyr)
library(sp)
library(rworldmap)
library(data.table)
library(Amelia)
library(caret)
library(ggfortify)
library(forecast)
library(imputeTS)
options(width=100)
knitr::opts_chunk$set(out.width='1000px',dpi=200,message=FALSE,warning=FALSE)
```
Proof of climate change {data-orientation=columns}
==========================================================================
Sidebar {.sidebar}
-----------------------------------------------------------------------
### Earth's surface temperature rise
There is a lot of debate, about whether climate changes is real. Despite the plethora of proofs available with scientists' corrobaration, there is a huge chunk of population that denies that climate change is real.
However, there is undeniable proof that climate change is not just a theory. It is happening. Data to support this obtained from Berkley Earth. (https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data)
We can see from the "Global Land temperature in degrees Celsius" visualization in 150+ years, the overall increase is close to 2 degrees celsius. In the second tab, we see that the "Land Average temperature by year" is also steadily increasing. From the "Temperature difference between 1850 and 2012" tab we see that in various locations across the world, the temperatures have risen from anywhere uptp 4 degrees.
Column {.tabset .tabset-fade data-height=500}
-----------------------------------------------------------------------
### Global Land temperature in degrees Celsius
```{r}
glob<-read.csv('GlobalTemperatures.csv')
glob$date<-as.Date(glob$dt)
glob$year<-as.numeric(format(glob$date,'%Y'))
glob %>%
filter(year>=1850) %>%
select(LandAverageTemperature, LandAverageTemperatureUncertainty, year) %>%
group_by(year) %>% summarise(avgTemp = mean(LandAverageTemperature), avgError = mean(LandAverageTemperatureUncertainty)) %>%
ggplot(aes(x=year,y = avgTemp)) +
geom_point(aes(size=avgError,color=avgTemp),alpha=.75) +
theme(legend.position='top') +
theme_fivethirtyeight() + ylim(5,12) +
ggtitle('Global Land temperature in degrees Celsius') +
geom_smooth(color='black',size=.4) +
labs(size = 'Average errors') + scale_color_gradientn(name='Degrees Celsius',colors=rev(brewer.pal(10,'Spectral'))) +
scale_size(guide = 'none')
```
### Land Average temperature by year
```{r}
GT = fread("GlobalTemperatures.csv")
GT[,dt:=as.Date(GT[,dt])]
GT[,year:=year(GT[,dt])]
GT[,month:=month(GT[,dt])]
GT[,season:=ifelse(month %in% c(6,7,8),"Summer",
ifelse(month %in% c(9,10,11),"Fall",
ifelse(month %in% c(12,1,2),"Winter","Spring")
))]
ggplot(data = GT,
aes(dt,LandAverageTemperature, colour=season)) +
geom_point(na.rm = T) +
xlab("Year") +
ggtitle("Land Average temperature by year")
```
### Temperature difference in cities worldwide
```{r}
convert<-function(x){
westEast<-substr(x,nchar(x),nchar(x))
xx<-substr(x,1,nchar(x)-1)
val<-as.numeric(char2dms(paste0(strsplit(xx,'[.]')[[1]][1],'d',strsplit(xx,'[.]')[[1]][1],"'",westEast)))
return(val)
}
worldMap <- fortify(map_data("world"), region = "region")
m <- ggplot() +
geom_map(data = worldMap, map = worldMap,aes(x = long, y = lat, map_id = region, group = group),fill = "white", color = "black", size = 0.1)
allCities<-read.csv('GlobalLandTemperaturesByCity.csv')
allCities<-na.omit(allCities)
allCities$date<-as.Date(allCities$dt)
allCities$year<-as.numeric(format(allCities$date,'%Y'))
allCities <-as.data.frame(allCities %>% filter(year>=1850))
allCities$month<-as.numeric(format(allCities$date,'%m'))
allCities$Longitude<-as.character(allCities$Longitude)
allCities$Latitude<-as.character(allCities$Latitude)
start<-allCities %>% filter(year==1850)
start$LONG<-sapply(start$Longitude,convert)
start$LAT<-sapply(start$Latitude,convert)
start<-as.data.frame(start %>% group_by(Country, City) %>% select(AverageTemperature, City ,LAT, LONG, Country) %>% summarise(avgTemp_start = mean(AverageTemperature), long = mean(LONG), lat = mean(LAT)))
end<-as.data.frame(allCities %>% filter(year==2012))
end<-as.data.frame(end %>% group_by(Country,City) %>% select(AverageTemperature,City) %>% summarise(avgTemp_end = mean(AverageTemperature)))
res2<-as.data.frame(merge(start,end, by=c('Country','City')))
m +
geom_point(data=res2,aes(x=long, y=lat, size=avgTemp_end - avgTemp_start,color=avgTemp_end - avgTemp_start),alpha=.2) +
theme_fivethirtyeight() +
ggtitle('Temperature difference between 1850 and 2012') +
theme(axis.text = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + labs(size = '') +
scale_color_gradientn(name='Degrees Celsius',colors=rev(brewer.pal(10,'Spectral'))) + scale_size(guide = 'none')
```
CO2 trend and high-low temperatures {data-orientation=columns}
==========================================================================
Sidebar {.sidebar}
-----------------------------------------------------------------------
### CO2 rise in the atmosphere and subsequent temperature rise
As we can see from the CO2 trend visualization, the concentration of the green house gas CO2 has risen from 300 parts per million in the 1960s to over 400 in the present day. We see a similar trend in the rise of temperature in both the high temperatures of earth's surface, as well as the low temperatures.
Data source: https://www.kaggle.com/ucsandiego/carbon-dioxide
Column {data-width=400}
------------------------------------------------------------------------------
### CO2 trend
```{r}
df<-read.csv('archive.csv',sep=',')
colnames(df)<-c('year','month','decimal_date','carbon_dioxide_ppm','carbon_dioxide_ppm_season_adj','carbon_dioxide_ppm_fit','carbon_dioxide_ppm_season_adj_fit')
ggplot() +
geom_line(data=df,aes(x=decimal_date,y=carbon_dioxide_ppm,color="carbon_dioxide"),alpha=.75) +
geom_line(data=df,aes(x=decimal_date,y=carbon_dioxide_ppm_season_adj,color="carbon_dioxide_adjusted"),alpha=.75) +
geom_line(data=df,aes(x=decimal_date,y=carbon_dioxide_ppm_fit,color="carbon_dioxide_fit"),alpha=.75) +
geom_line(data=df,aes(x=decimal_date,y=carbon_dioxide_ppm_season_adj_fit,color="carbon_dioxide_adjusted_fit"),alpha=.75) +
scale_colour_manual(name="data",values=c(carbon_dioxide="#E2D200",carbon_dioxide_adjusted="#46ACC8",carbon_dioxide_fit="#E58601",carbon_dioxide_adjusted_fit="#B40F20")) +
theme(legend.position="top") +
xlab('') + ylab('concentrations [ppm]')
```
Column {data-width=400}
------------------------------------------------------------------------------
### High-low average temperature trend
```{r}
GT = fread("GlobalTemperatures.csv")
GT[,dt:=as.Date(GT[,dt])]
GT[,year:=year(GT[,dt])]
GT[,month:=month(GT[,dt])]
GT[,season:=ifelse(month %in% c(6,7,8),"Summer",
ifelse(month %in% c(9,10,11),"Fall",
ifelse(month %in% c(12,1,2),"Winter","Spring")
))]
# Average Max
High.GT = aggregate(GT$LandMaxTemperature,
by=list(GT$year),
FUN=mean)
# Average Min
Low.GT = aggregate(GT$LandMinTemperature,
by=list(GT$year),
FUN=mean)
HighLow.GT = data.table(merge(Low.GT, High.GT, by = "Group.1"))
names(HighLow.GT) <- c("year","Low","High")
MeltedHighLow.GT = melt(HighLow.GT,
id = "year",
na.rm = T,
variable.name = "Temperature")
ggplot(data = MeltedHighLow.GT,
aes(year, value)) +
geom_line(na.rm = T) +
facet_grid(Temperature ~ ., scales = "free_y") +
xlab("Year") +
geom_smooth() +
geom_vline(xintercept = 1900, alpha = 0.5) +
geom_vline(xintercept = 1975, alpha = 0.5) +
ggtitle("High and Low average Temperature trend")
```
Climate Change consequences {data-orientation=columns}
==========================================================================
Sidebar {.sidebar}
-----------------------------------------------------------------------
### Major Cities affected
The visualization shown here enumerates the worst affected cities in the world in terms of temperature. In addition to the temperature rise consequences, flooding is another major disastrous consequence. This is not shown in the visualization, however, some of the smaller islands around the world are already going under the sea, causing major population migration to the central parts of the land and in turn causing overcrowding.
Column {data-width=800}
-----------------------------------------------------------------------
### Most affected cities
```{r}
convert<-function(x){
westEast<-substr(x,nchar(x),nchar(x))
xx<-substr(x,1,nchar(x)-1)
val<-as.numeric(char2dms(paste0(strsplit(xx,'[.]')[[1]][1],'d',strsplit(xx,'[.]')[[1]][1],"'",westEast)))
return(val)
}
majorCities<-read.csv('GlobalLandTemperaturesByMajorCity.csv')
majorCities<-na.omit(majorCities)
majorCities$date<-as.Date(majorCities$dt)
majorCities$year<-as.numeric(format(majorCities$date,'%Y'))
majorCities <-as.data.frame(majorCities %>% filter(year>=1850))
majorCities$month<-as.numeric(format(majorCities$date,'%m'))
majorCities$Longitude<-as.character(majorCities$Longitude)
majorCities$Latitude<-as.character(majorCities$Latitude)
start<-majorCities %>% filter(year==1850)
start$LONG<-sapply(start$Longitude,convert)
start$LAT<-sapply(start$Latitude,convert)
start<-as.data.frame(start %>% group_by(Country, City) %>% select(AverageTemperature, City ,LAT, LONG, Country) %>% summarise(avgTemp_start = mean(AverageTemperature), long = mean(LONG), lat = mean(LAT)))
end<-as.data.frame(majorCities %>% filter(year==2012))
end<-as.data.frame(end %>% group_by(Country,City) %>% select(AverageTemperature,City) %>% summarise(avgTemp_end = mean(AverageTemperature)))
res2<-as.data.frame(merge(start,end, by=c('Country','City')))
ggplot(data=res2,aes(x=reorder(City,avgTemp_end - avgTemp_start),y=avgTemp_end - avgTemp_start)) +
geom_histogram(aes(fill=avgTemp_end - avgTemp_start),stat='identity',width=.75) +
coord_flip() +
theme_fivethirtyeight() +
ggtitle('World Major Cities') +
scale_fill_gradientn(name='temperature\'s difference in degrees Celsius between 1850 and 2012',colours=rev(brewer.pal(10,'Spectral'))) +
theme(legend.position='top',axis.text = element_text(size=7))
```
Ecological Footprint {data-orientation=columns}
==========================================================================
Sidebar {.sidebar}
-----------------------------------------------------------------------
### Who is responsible?
"The biocapacity is representative of a country's available resources while the ecological footprint measures a country's consumption of resources". From the visual, we can see that the countries with low ecological footprint and low Human Development Index are the most vulnerable to climate change consequences. Whereas, countries with high HDI have a high ecological footprint, and contribute most to climate change.
Data source: https://www.kaggle.com/footprintnetwork/ecological-footprint
Column
-----------------------------------------------------------------------
### Ecological Footprint vs vulnerability
```{r}
countries <- fread('countries.csv')
# Fix Column Names for Analysis
setnames(countries, old=colnames(countries), new=make.names(colnames(countries), unique = TRUE))
countries <- subset(countries, countries$Country!="Aruba")
countries <- subset(countries, countries$Country!="Montserrat")
biggest_ecological_footprint <- countries[Total.Ecological.Footprint>7.5]
vulnerable_to_climate_change <- countries[Country %in% c(
"Honduras",
"Myanmar",
"Haiti",
"Nicaragua",
"Philippines",
"Bangladesh",
"Pakistan",
"Vietnam",
"Guatemala",
"Thailand")]
subset_countries <- subset(countries,countries$Country %in% biggest_ecological_footprint$Country |
countries$Country %in% vulnerable_to_climate_change$Country)
subset_countries$climate_change[which(subset_countries$Country %in% biggest_ecological_footprint$Country)] <- "Biggest Ecological Footprint"
subset_countries$climate_change[which(subset_countries$Country %in% vulnerable_to_climate_change$Country)] <- "Most Vulnerable to Climate Change"
# Footprint vs. Biocapacity vs. HDI
library(RColorBrewer)
p <-ggplot() +
scale_x_continuous(limits=c(-3.1,5), name=("log of Biocapacity")) +
scale_y_continuous(limits=c(-1,3), name=("log of Ecological Footprint")) +
theme_grey() +
theme(axis.text = element_text(size=12),legend.position="top") +
geom_point(data=countries, mapping = aes(x=log(Total.Biocapacity), y=log(Total.Ecological.Footprint),
colour=HDI), alpha=2/3, size = 2) +
geom_label(data = subset_countries, aes(x=log(Total.Biocapacity), y=log(Total.Ecological.Footprint),
label=Country, fill=climate_change), size=4, colour='white', show.legend = TRUE,
label.padding = unit(0.17, "lines"), label.size=.2, fontface = "bold") +
scale_fill_discrete(l = 30) +
guides(colour = guide_colourbar(title="Human Development Index (HDI)",
title.position="top",
barwidth = 11.2),
fill = guide_legend(title=NULL,
direction="vertical"))
p
```