Week 4 Assignment

Assignment Description

Use dplyr and ggplot2 to process data and draw these two charts (shown below) from the Nations dataset. You do NOT need to incorporate interactivity, but you can, if you want to challenge yourself.

Examples are shown below:

**Example Charts**

Example Charts

Details for Nations Dataset Charts Assignment

• For both charts, you will first need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion. • Draw both charts with ggplot2. • For the first chart, you will need to filter the data with dplyr for the four desired countries. When making the chart with ggplot2 you will need to add both geom_point and geom_line layers, and use the Set1 ColorBrewer palette using: scale_color_brewer(palette = “Set1”). • For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp_percap using
summarise(sum = sum(gdp_percap, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE). • Each region’s area will be generated by the command geom_area ()
• When drawing the chart with ggplot2, you will need to use the Set2 ColorBrewer palette using scale_fill_brewer(palette = “Set2”) • Think about the difference between fill and color when making the chart, and where the above fill command needs to go in order for the regions to fill with the different colors when making the chart, and put a very thin white line around each area.

Load Libraries

library(rvest)
## Loading required package: xml2
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()         masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()
library(ggsci)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(RColorBrewer)
library(ggplot2)

Load Dataset

setwd("C:/Users/Valued Customer/Desktop/Lovebug/Montgomery College/DATA 110/Week 4")
nations <- read_csv("nations.csv")
## Parsed with column specification:
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

Load and Process Nations dataset

Load the nations data and add a column showing GDP in trillions of dollars.

nations <- read_csv("nations.csv") %>% 
  mutate(gdp_tn = gdp_percap*population/1e+12 )
## Parsed with column specification:
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

2020 Five Happiest Countries

Filter and Arrange the Data

Filtering the data to the top five countries in the 2020 World Happiness in the World Happiness Report.

The five happiest countries were reported as:

  1. Finland
  2. Denmark
  3. Norway
  4. Iceland
  5. The Netherlands

The filtered data is then arranged by year so that highcharter can use it to create the order in the time series drawing. If there is no arrangement by year, any line drawn through the data will follow the path of the data order, not the chronological order.

happy5 <- nations %>% 
  filter(iso3c == "FIN" | iso3c == "DNK" | iso3c == "NOR" | iso3c == "ISL" | iso3c == "NLD") %>% 
  arrange(year)
happy5order <- nations %>% 
  filter(iso3c == "FIN" | iso3c == "DNK" | iso3c == "NOR" | iso3c == "ISL" | iso3c == "NLD") %>% 
  arrange(year)
# Attempt to change legend order at line 228  
A_Finland <- (happy5order$country == "Finland") 
B_Denmark <- (happy5order$iso3c == "DNK") 
C_Norway  <- (happy5order$iso3c == "NOR")  
D_Iceland <- (happy5$iso3c == "ISL") 
E_Netherlands<- (happy5$iso3c == "NLD") 

Chart the Data

happylegend <- factor(happy5, levels = c("Finland", "Denmark", "Norway", "Iceland", "Netherlands"))
# basic symbol-and-line chart, default settings

highchart() %>%
  hc_add_series(data = happy5,
                   type = "line", hcaes(x = year,
                   y = gdp_tn, 
                   group = country))
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `as_data_frame()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `rename_()` is deprecated as of dplyr 0.7.0.
## Please use `rename()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# basic symbol-and-chart chart, default settings

highchart() %>%
  hc_add_series(data = happy5,
                   type = "column", hcaes(x = year,
                   y = gdp_tn, 
                   group = country))
# basic symbol-and-chart chart, default settings

highchart() %>%
  hc_add_series(data = happy5,
                   type = "scatter", color = "lightblue", 
                hcaes(x = year,
                   y = gdp_tn, 
                   group = country))
# trying to change the order of the legend to show from 1 to 5 on the happiness scale

highchart() %>%
  hc_add_series(data = happy5,
                   type = "line", 
                hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>% 
  hc_legend(labels("Finland", "Denmark", "Norway", "Iceland", "Netherlands"))

Use a ColorBrewer palette

# define color palette
# refer to cookbook at: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/

cols <- brewer.pal(5, "BrBG")
highchart() %>%
  hc_add_series(data = happy5,
                   type = "line", hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>%
  hc_colors(cols)
# Iceland is not visible enough
# define color palette
# refer to cookbook at: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/

cols2 <- brewer.pal(5, "Greens")
highchart() %>%
  hc_add_series(data = happy5,
                   type = "line", hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>%
  hc_colors(cols2)
#too monotonous
# define color palette
# refer to cookbook at: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/

cols3 <- brewer.pal(5, "Set2")
highchart() %>%
  hc_add_series(data = happy5,
                   type = "line", hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>%
  hc_colors(cols3)

Add Axis Labels

highchart() %>%
  hc_add_series(data = happy5,
                   type = "line",
                   hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>%
  hc_colors(cols3) %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="GDP ($ trillion)"))

Change the Legend Position & Create a Drawing Title

highchart() %>%
  hc_add_series(data = happy5order,
                   type = "line",
                   hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>%
  hc_colors(cols3) %>%
  hc_title(text ="GDP Growth in World's 5 Happiest Countries") %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="GDP ($ trillion)")) %>%
  hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
  hc_legend(align = "right", 
            verticalAlign = "top")

Five Fastest Growing Economies

growing5 <- nations %>% 
  filter(iso3c == "CHN" | iso3c == "IND" | iso3c == "IDN" | iso3c == "KEN" | iso3c == "PHL") %>% 
  arrange(year)
highchart() %>%
  hc_add_series(data = growing5,
                   type = "line",
                   hcaes(x = year,
                   y = gdp_tn, 
                   group = country)) %>%
  hc_colors(cols3) %>%
  hc_title(text ="GDP Growth in World's 5 Fastest Growing Economies") %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="GDP ($ trillion)")) %>%
  hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
  hc_legend(align = "right", 
            verticalAlign = "top")

ggplot2

5 Happiest Countries

#group_by()
ggplot(happy5, aes(year,gdp_tn)) + 
  geom_line(aes(y = gdp_tn))

Make an Area Chart

Default settings

# prepare data
regions <- nations %>%
  group_by(year,region) %>%
  summarize(gdp_tn = sum(gdp_tn, na.rm = TRUE)) %>%
  arrange(year,region)
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
highchart () %>%
  hc_add_series(data = regions,
                   type = "area",
                   hcaes(x = year,
                   y = gdp_tn, 
                   group = region))
# prepare data
regions <- nations %>%
  group_by(year,region) %>%
  summarize(gdp_percap = sum(gdp_percap, na.rm = TRUE)) %>%
  arrange(year,region)
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
highchart () %>%
  hc_add_series(data = regions,
                   type = "area",
                   hcaes(x = year,
                   y = gdp_percap, 
                   group = region))

Area Chart Stacked with Set2 colors

# set color palette
cols <- brewer.pal(7, "Set2")
# stacked area chart
highchart () %>%
  hc_add_series(data = regions,
                   type = "area",
                   hcaes(x = year,
                   y = gdp_percap, 
                   group = region)) %>%
  hc_colors(cols) %>% 
  hc_chart(style = list(fontFamily = "Georgia",
                        fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "normal",
                               marker = list(enabled = FALSE,
                               states = list(hover = list(enabled = FALSE))),
                               lineWidth = 0.5,
                               lineColor = "white")) %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="GDP ($ trillion)")) %>%
  hc_legend(align = "right", verticalAlign = "top",
            layout = "vertical") %>%
  hc_tooltip(enabled = FALSE)
summary(happy5$gdp_percap, na.rm = TRUE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17023   23746   31727   33559   40438   66817