Unit 4 Homework Assignment

(Worth up to 10 points for each chart) Use dplyr and ggplot2 to process data and draw these two charts (shown below) from the Nations dataset. You do NOT need to incorporate interactivity, but you can, if you want to challenge yoursel

# set the environment and import the apporpriate libraries
library(highcharter)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(RColorBrewer)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

setwd("~/Documents/DATA 110/data")
nations <- read_csv("nations.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

str(nations)

## spec_tbl_df [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
##  $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
##  $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
##  $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
##  $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
##  $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
##  $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
##  $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
##  $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   iso2c = col_character(),
##   ..   iso3c = col_character(),
##   ..   country = col_character(),
##   ..   year = col_double(),
##   ..   gdp_percap = col_double(),
##   ..   population = col_double(),
##   ..   birth_rate = col_double(),
##   ..   neonat_mortal_rate = col_double(),
##   ..   region = col_character(),
##   ..   income = col_character()
##   .. )

Extract the data that we need

#  giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.

nations2 = nations %>%
  mutate(gdp_tn = gdp_percap*population/1000000000000)

# prepare data
big4 <- nations2 %>%
  filter(iso3c == "CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
  arrange(year)

According to the sample to print the graph and adjust some detail to meet the requirment

# According to the sample to print the graph and adjust some detail to meet the requirment
p1<- ggplot(big4, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(shape=country), size=2.5)+scale_color_brewer(palette = "Set1")+scale_shape_manual(values=c(16,16,16,16))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("China's Rise to Become the Largest Economy")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))+geom_line(size = 0.75,alpha = 0.75)
p1

Prepare the dataset for graph 2

#prepare the dataset for graph 2
regions <- nations2 %>%
  group_by(year,region) %>%
  summarize(gdp_tn = sum(gdp_tn, na.rm = TRUE)) %>%
  arrange(year,region)

## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

Output the graph according to the dataset

#According to the sample to print the graph and adjust some detail to meet the requirment
p2<- ggplot(regions, aes(x=year,y = gdp_tn, fill=region), color="white", lwd=2) + 
  geom_area(alpha=1, size=0.5, colour="white")+
  scale_fill_brewer(palette = "Set2")+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("GDP in Trillions by Year")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))

p2

Self-enhancement (fail)

#According to the sample to print the graph and adjust some detail to meet the requirment
p9<- ggplot(big4, aes(x=year,y = gdp_tn, color = country)) + 
  geom_point(aes(shape=country), size=2.5)+scale_color_brewer(palette = "Set1")+scale_shape_manual(values=c(16,16,16,16))+
  xlab("Year") +
  ylab("GDP (trillions)")+
  ggtitle("China's Rise to Become the Largest Economy")+
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5))+ geom_line(size = 0.75,alpha = 0.75)
p9 = ggplotly(p9,tooltip = c("gdp_tn","year","color"))
p9

Homework 4