Download dslabs library.
library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
Dataset chosen: Greenhouse Gases. Shows the concentrations of greenhouse gases over 2000 years.
data("greenhouse_gases")
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggthemes)
library(ggrepel)
library(RColorBrewer)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
green <- write_csv(greenhouse_gases, "greenhouse_gases", na="")
str(green)
## 'data.frame': 300 obs. of 3 variables:
## $ year : num 20 40 60 80 100 120 140 160 180 200 ...
## $ gas : chr "CO2" "CO2" "CO2" "CO2" ...
## $ concentration: num 278 278 277 277 278 ...
head(green)
## year gas concentration
## 1 20 CO2 277.7
## 2 40 CO2 277.8
## 3 60 CO2 277.3
## 4 80 CO2 277.3
## 5 100 CO2 277.5
## 6 120 CO2 277.6
Create a new column that shows the differences in the concentrations for the gases each year. Using data.table, the alternative to a dataframe, to create the new column.
# load data table library
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
DT <- data.table(green)
# create difference column
DT[ , diff := green$concentration - shift(green$concentration)]
# convert to data frame
DT <-as.data.frame(DT)
DT1 <- DT %>% filter (year > 20)
head(DT1)
## year gas concentration diff
## 1 40 CO2 277.8 0.1
## 2 60 CO2 277.3 -0.5
## 3 80 CO2 277.3 0.0
## 4 100 CO2 277.5 0.2
## 5 120 CO2 277.6 0.1
## 6 140 CO2 278.3 0.7
Year 20 from all the gases were throwing the differences off so they were removed for plotting using filter.
To visualize the variations in concentrations, I decided on using an area graph for plotting. Used the viridis library for the colors. Couldn’t decide if I wanted the area graph with everything together or as a facet wrap so I did both. Ggplot area graph to show all of changes look together over time. The facet wrap allows for a closer look at each gas individually.
library(viridis)
## Loading required package: viridisLite
green_p <- DT1 %>% ggplot(aes(x=year, y= diff, fill= gas))+
geom_area(alpha=1,size=0.1, colour= "white") +
scale_fill_viridis(discrete=TRUE) +
labs(title = "Variations in Greenhouse Gas Concentrations",
x = "Year",
y = "Concentration Difference",
fill= "Gas")
# Stacked Area Graph
green_p + theme_classic()
# Area Graph: Facet Wrap edition
green_p + facet_wrap(vars(gas)) + theme_dark()
Used highcharter to try to plot the concentrations from a particular range of years. I choose 1600-2000. Filtered out the years I did not want then proceeded to plot with highcharter. Added a tool tip that shows the concentrations at each year.
# Narrow the dataset to concentrations from the selected range
green_1 <- green %>% filter(year > 1580)
head(green_1)
## year gas concentration
## 1 1600 CO2 275.4
## 2 1620 CO2 272.3
## 3 1640 CO2 276.6
## 4 1660 CO2 276.9
## 5 1680 CO2 275.9
## 6 1700 CO2 276.6
# Set the color of visualization
look <- brewer.pal(3, "Pastel1")
# Create the highchart
highchart () %>%
hc_add_series(data = green_1,
type = "area",
hcaes(x = year,
y = concentration,
group = gas)) %>%
hc_colors(look) %>%
hc_chart(style = list(fontFamily = "Georgia",
fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "normal",
marker = list(enabled = FALSE,
states = list(hover = list(enabled = FALSE))),
lineWidth = 0.5,
lineColor = "black")) %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="Concentration")) %>%
hc_legend(align = "right", verticalAlign = "middle",
layout = "vertical") %>%
hc_tooltip(shared = TRUE)%>%
hc_title(
text = "400 Years of Greenhouse Gases",
margin = 20,
align = "center",
style = list(color = "#22A884", useHTML = TRUE))
For the DS labs assignment, I choose the greenhouse gases dataset. In looking at the data, the first thing I wanted to look at was how the concentrations have varied over the years. Using the data table library, I converted the dataset to a data table to then add the column of each row being subtracted from each other. It was then converted back to a data frame. I noticed that the way it was created it didn’t consider the different gases so the first year, year 20, of each gas was removed using filter because the values were throwing off the graphs. The variations were plotted as an area graph two ways: a regular area graph and a facet wrap. Just for practice, I tried highcharter with trying to plot a selection of concentrations. I added the tool tip so you can follow on the graph through the years what the concentrations were.