Week 4 Assignment

Assignment Description

Use dplyr and ggplot2 to process data and draw these two charts (shown below) from the Nations dataset. You do NOT need to incorporate interactivity, but you can, if you want to challenge yourself.

Examples are shown below:

**Example Charts**

Example Charts

Details for Nations Dataset Charts Assignment

• For both charts, you will first need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion. • Draw both charts with ggplot2. • For the first chart, you will need to filter the data with dplyr for the four desired countries. When making the chart with ggplot2 you will need to add both geom_point and geom_line layers, and use the Set1 ColorBrewer palette using: scale_color_brewer(palette = “Set1”). • For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp_percap using
summarise(sum = sum(gdp_percap, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE). • Each region’s area will be generated by the command geom_area ()
• When drawing the chart with ggplot2, you will need to use the Set2 ColorBrewer palette using scale_fill_brewer(palette = “Set2”) • Think about the difference between fill and color when making the chart, and where the above fill command needs to go in order for the regions to fill with the different colors when making the chart, and put a very thin white line around each area.

Load Libraries

library(rvest)
## Loading required package: xml2
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter()         masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()
library(ggsci)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(RColorBrewer)
library(ggplot2)

Load Dataset

setwd("C:/Users/Valued Customer/Desktop/Lovebug/Montgomery College/DATA 110/Week 4")
nations <- read_csv("nations.csv")
## Parsed with column specification:
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

Load and Process Nations dataset

Load the nations data and add a column showing GDP in trillions of dollars.

nations <- read_csv("nations.csv") %>% 
  mutate(gdp_tn = gdp_percap*population/1e+12 )
## Parsed with column specification:
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

2020 Five Happiest Countries

Filter and Arrange the Data

Filtering the data to the top five countries in the 2020 World Happiness in the World Happiness Report.

The five happiest countries were reported as:

  1. Finland
  2. Denmark
  3. Norway
  4. Iceland
  5. The Netherlands

The filtered data is then arranged by year so that highcharter can use it to create the order in the time series drawing. If there is no arrangement by year, any line drawn through the data will follow the path of the data order, not the chronological order.

happy5 <- nations %>% 
  filter(iso3c == "FIN" | iso3c == "DNK" | iso3c == "NOR" | iso3c == "ISL" | iso3c == "NLD") %>% 
  arrange(year)
summary(happy5$gdp_tn, na.rm = TRUE)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005557 0.089288 0.165220 0.214785 0.271217 0.813793
# Attempt to change legend order at line 228  
#A_Finland <- (happy5order$country == "Finland") 
#B_Denmark <- (happy5order$iso3c == "DNK") 
#C_Norway  <- (happy5order$iso3c == "NOR")  
#D_Iceland <- (happy5$iso3c == "ISL") 
#E_Netherlands<- (happy5$iso3c == "NLD") 

Chart the Data

happylegend <- factor(happy5, levels = c("Finland", "Denmark", "Norway", "Iceland", "Netherlands"))
# prepare data
regions <- nations %>%
  group_by(year,region) %>%
  summarize(gdp_tn = sum(gdp_tn, na.rm = TRUE)) %>%
  arrange(year,region)
## `summarise()` regrouping output by 'year' (override with `.groups` argument)

Plot that I knew was wrong

ggplot(happy5, aes(year,gdp_tn)) + 
  geom_line(aes(y = gdp_tn)) +
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5)) 

Exploring Themes

p1<- ggplot(happy5, aes(x="year",y = "gdp_tn")) + 
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))+
geom_line()
p1

Still on wrong plot…

p2<- ggplot(happy5, aes(x=year,y = gdp_tn)) + 
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p2+geom_line()

Plot with dots, lines, and color by country

p3<- ggplot(happy5, aes(x=year,y = gdp_tn, color = country)) + 
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p3 + geom_point()+geom_line() 

### Editing color & size

p4<- ggplot(happy5, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(size=0.5))+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_color_manual(values=c('lightsteelblue3', 'sienna', 'honeydew3', 'darkseagreen4','thistle4'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p4 +geom_line() 

Color Play

p5<- ggplot(happy5, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(size=0.5))+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_color_manual(values=c('darkseagreen1', 'darkseagreen2', 'darkseagreen3', 'darkseagreen','darkseagreen4'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p5 +geom_line() 

Size, Shape and Color play

p6<- ggplot(happy5, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(shape=country), size=2.5)+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_color_manual(values=c('azure3','lavenderblush3','gray78', 'lightsteelblue4', 'lightsteelblue3'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p6 +geom_line() 

Theme play and alpha play

ggplot(happy5, aes(x=year,y = gdp_tn, color = country)) +
    geom_line(alpha=0.1)+
  theme_minimal()

Area Chart Stacked with Set2 colors

p6a<- ggplot(happy5, aes(x=year,y = gdp_tn, fill=country), color="white") + 
  geom_area()+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_fill_manual(values=c('azure3','lavenderblush3', 'lightsteelblue4','lightsteelblue2', 'lightsteelblue3'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p6a + geom_line(color="white")

Trying to understand how to color line

p6b<- ggplot(happy5, aes(x=year,y = gdp_tn, fill=country, color= "gray9"), color="white", lwd=2) + 
  geom_area()+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_fill_manual(values=c('azure3','lavenderblush3', 'lightsteelblue4','lightsteelblue2', 'lightsteelblue3'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p6b 

Correctly colored line

p6b<- ggplot(happy5, aes(x=year,y = gdp_tn, fill=country)) + 
  geom_area(color="white")+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_fill_manual(values=c('azure3','lavenderblush3', 'lightsteelblue4','lightsteelblue2', 'lightsteelblue3'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p6b 

Five Fastest Growing Economies

growing5 <- nations %>% 
  filter(iso3c == "CHN" | iso3c == "IND" | iso3c == "IDN" | iso3c == "KEN" | iso3c == "PHL") %>% 
  arrange(year)
p7<- ggplot(growing5, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(shape=country), size=2.5)+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_color_manual(values=c('slategray4','slategray3','slategray2', 'lightsteelblue4', 'lightsteelblue1'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p7 +geom_line(alpha=0.75) 

alpha play

p8<- ggplot(growing5, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(shape=country), size=2.5, alpha=0.5)+
  scale_shape_manual(values=c(15, 16, 17, 18, 19))+
  scale_color_manual(values=c('slategray4','slategray3','slategray2', 'lightsteelblue4', 'lightsteelblue1'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p8 +geom_line(alpha=1) 

library(viridis)
## Warning: package 'viridis' was built under R version 4.0.2
## Loading required package: viridisLite
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.0.2
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow

area chart lines trying to figure out

p8a<- ggplot(growing5, aes(x=year,y = gdp_tn, fill=country, color= "gray9"), color="white", lwd=2) + 
  geom_area()+
  theme_ipsum()+
  scale_fill_manual(values=c('azure3','lavenderblush3', 'lightsteelblue4','lightsteelblue2', 'lightsteelblue3'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p8a 

Figured out area chart lines

p8b<- ggplot(growing5, aes(x=year,y = gdp_tn, fill=country)) + 
  geom_area(color="white")+
  theme_ipsum()+
  scale_fill_manual(values=c('azure3','lavenderblush3', 'lightsteelblue4','lightsteelblue2', 'lightsteelblue3'))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p8b 

Playing with themes & alpha levels

p8c<- ggplot(growing5, aes(x=year,y = gdp_tn, fill=country), color="white", lwd=2) + 
  geom_area(alpha=0.35, size=0.5, colour="white")+
  theme_ipsum()+
  scale_fill_viridis(discrete = T)+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p8c 

Area Chart with scale_fill_brewer(palette = “Set2”)

p8d<- ggplot(growing5, aes(x=year,y = gdp_tn, fill=country), color="white", lwd=2) + 
  geom_area(alpha=0.35, size=0.5, colour="white")+
  theme_ipsum()+
  scale_fill_brewer(palette = "Set2")+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p8d 

Yay!