Use dplyr and ggplot2 to process data and draw these two charts (shown below) from the Nations dataset. You do NOT need to incorporate interactivity, but you can, if you want to challenge yourself.
Examples are shown below:
Example Charts
• For both charts, you will first need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion. • Draw both charts with ggplot2. • For the first chart, you will need to filter the data with dplyr for the four desired countries. When making the chart with ggplot2 you will need to add both geom_point and geom_line layers, and use the Set1 ColorBrewer palette using: scale_color_brewer(palette = “Set1”). • For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp_percap using
summarise(sum = sum(gdp_percap, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE). • Each region’s area will be generated by the command geom_area ()
• When drawing the chart with ggplot2, you will need to use the Set2 ColorBrewer palette using scale_fill_brewer(palette = “Set2”) • Think about the difference between fill and color when making the chart, and where the above fill command needs to go in order for the regions to fill with the different colors when making the chart, and put a very thin white line around each area.
library(rvest)
## Loading required package: xml2
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag() masks stats::lag()
## x purrr::pluck() masks rvest::pluck()
library(ggsci)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(RColorBrewer)
library(ggplot2)
setwd("C:/Users/Valued Customer/Desktop/Lovebug/Montgomery College/DATA 110/Week 4")
nations <- read_csv("nations.csv")
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
Load the nations data and add a column showing GDP in trillions of dollars.
nations <- read_csv("nations.csv") %>%
mutate(gdp_tn = gdp_percap*population/1e+12 )
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
Filtering the data to the top five countries in the 2020 World Happiness in the World Happiness Report.
The five happiest countries were reported as:
The filtered data is then arranged by year so that highcharter can use it to create the order in the time series drawing. If there is no arrangement by year, any line drawn through the data will follow the path of the data order, not the chronological order.
happy5 <- nations %>%
filter(iso3c == "FIN" | iso3c == "DNK" | iso3c == "NOR" | iso3c == "ISL" | iso3c == "NLD") %>%
arrange(year)
happy5order <- nations %>%
filter(iso3c == "FIN" | iso3c == "DNK" | iso3c == "NOR" | iso3c == "ISL" | iso3c == "NLD") %>%
arrange(year)
# Attempt to change legend order at line 228
A_Finland <- (happy5order$country == "Finland")
B_Denmark <- (happy5order$iso3c == "DNK")
C_Norway <- (happy5order$iso3c == "NOR")
D_Iceland <- (happy5$iso3c == "ISL")
E_Netherlands<- (happy5$iso3c == "NLD")
happylegend <- factor(happy5, levels = c("Finland", "Denmark", "Norway", "Iceland", "Netherlands"))
# basic symbol-and-line chart, default settings
highchart() %>%
hc_add_series(data = happy5,
type = "line", hcaes(x = year,
y = gdp_tn,
group = country))
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `rename_()` is deprecated as of dplyr 0.7.0.
## Please use `rename()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# basic symbol-and-chart chart, default settings
highchart() %>%
hc_add_series(data = happy5,
type = "column", hcaes(x = year,
y = gdp_tn,
group = country))
# basic symbol-and-chart chart, default settings
highchart() %>%
hc_add_series(data = happy5,
type = "scatter", color = "lightblue",
hcaes(x = year,
y = gdp_tn,
group = country))
# trying to change the order of the legend to show from 1 to 5 on the happiness scale
highchart() %>%
hc_add_series(data = happy5,
type = "line",
hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_legend(labels("Finland", "Denmark", "Norway", "Iceland", "Netherlands"))
# define color palette
# refer to cookbook at: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
cols <- brewer.pal(5, "BrBG")
highchart() %>%
hc_add_series(data = happy5,
type = "line", hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_colors(cols)
# Iceland is not visible enough
# define color palette
# refer to cookbook at: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
cols2 <- brewer.pal(5, "Greens")
highchart() %>%
hc_add_series(data = happy5,
type = "line", hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_colors(cols2)
#too monotonous
# define color palette
# refer to cookbook at: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
cols3 <- brewer.pal(5, "Set2")
highchart() %>%
hc_add_series(data = happy5,
type = "line", hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_colors(cols3)
highchart() %>%
hc_add_series(data = happy5,
type = "line",
hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_colors(cols3) %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="GDP ($ trillion)"))
highchart() %>%
hc_add_series(data = happy5order,
type = "line",
hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_colors(cols3) %>%
hc_title(text ="GDP Growth in World's 5 Happiest Countries") %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="GDP ($ trillion)")) %>%
hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
hc_legend(align = "right",
verticalAlign = "top")
growing5 <- nations %>%
filter(iso3c == "CHN" | iso3c == "IND" | iso3c == "IDN" | iso3c == "KEN" | iso3c == "PHL") %>%
arrange(year)
highchart() %>%
hc_add_series(data = growing5,
type = "line",
hcaes(x = year,
y = gdp_tn,
group = country)) %>%
hc_colors(cols3) %>%
hc_title(text ="GDP Growth in World's 5 Fastest Growing Economies") %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="GDP ($ trillion)")) %>%
hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
hc_legend(align = "right",
verticalAlign = "top")
#group_by()
ggplot(happy5, aes(year,gdp_tn)) +
geom_line(aes(y = gdp_tn))
Default settings
# prepare data
regions <- nations %>%
group_by(year,region) %>%
summarize(gdp_tn = sum(gdp_tn, na.rm = TRUE)) %>%
arrange(year,region)
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
highchart () %>%
hc_add_series(data = regions,
type = "area",
hcaes(x = year,
y = gdp_tn,
group = region))
# prepare data
regions <- nations %>%
group_by(year,region) %>%
summarize(gdp_percap = sum(gdp_percap, na.rm = TRUE)) %>%
arrange(year,region)
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
highchart () %>%
hc_add_series(data = regions,
type = "area",
hcaes(x = year,
y = gdp_percap,
group = region))
# set color palette
cols <- brewer.pal(7, "Set2")
# stacked area chart
highchart () %>%
hc_add_series(data = regions,
type = "area",
hcaes(x = year,
y = gdp_percap,
group = region)) %>%
hc_colors(cols) %>%
hc_chart(style = list(fontFamily = "Georgia",
fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "normal",
marker = list(enabled = FALSE,
states = list(hover = list(enabled = FALSE))),
lineWidth = 0.5,
lineColor = "white")) %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="GDP ($ trillion)")) %>%
hc_legend(align = "right", verticalAlign = "top",
layout = "vertical") %>%
hc_tooltip(enabled = FALSE)
summary(happy5$gdp_percap, na.rm = TRUE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17023 23746 31727 33559 40438 66817