HW_week7_nations

setwd("C:/Users/small/Desktop/MCollege/2021/DATA110/Datasets")

I loaded the libraries and the file

library(readr)
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(plotly)

## Warning: package 'plotly' was built under R version 4.0.4

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

nations <- read_csv("nations.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

nations

## # A tibble: 5,275 x 10
##    iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_r~
##    <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>            <dbl>
##  1 AD    AND   Andorra  1996         NA      64291       10.9              2.8
##  2 AD    AND   Andorra  1994         NA      62707       10.9              3.2
##  3 AD    AND   Andorra  2003         NA      74783       10.3              2  
##  4 AD    AND   Andorra  1990         NA      54511       11.9              4.3
##  5 AD    AND   Andorra  2009         NA      85474        9.9              1.7
##  6 AD    AND   Andorra  2011         NA      82326       NA                1.6
##  7 AD    AND   Andorra  2004         NA      78337       10.9              2  
##  8 AD    AND   Andorra  2010         NA      84419        9.8              1.7
##  9 AD    AND   Andorra  2001         NA      67770       11.8              2.1
## 10 AD    AND   Andorra  2002         NA      71046       11.2              2.1
## # ... with 5,265 more rows, and 2 more variables: region <chr>, income <chr>

I created a new variable ‘gdp_country’ using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion. Then I filtered the data with dplyr for the four desired countries.

nations1 <- nations %>%
  mutate(gdp_country = (gdp_percap*population)/1000000000000) %>%
  filter(country == 'China' | country == 'Germany' | country == 'Japan' | country == 'United States')
nations1

## # A tibble: 100 x 11
##    iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_r~
##    <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>            <dbl>
##  1 CN    CHN   China    1992      1260. 1164970000       18.3             29.4
##  2 CN    CHN   China    2005      5053. 1303720000       12.4             14  
##  3 CN    CHN   China    2000      2915. 1262645000       14.0             21.2
##  4 CN    CHN   China    1991      1091. 1150780000       19.7             29.7
##  5 CN    CHN   China    2013     12219. 1357380000       12.1              6.3
##  6 CN    CHN   China    1999      2650. 1252735000       14.6             22.2
##  7 CN    CHN   China    2014     13255. 1364270000       12.4              5.9
##  8 CN    CHN   China    2003      3934. 1288400000       12.4             17.1
##  9 CN    CHN   China    2004      4423. 1296075000       12.3             15.5
## 10 CN    CHN   China    1993      1453. 1178440000       18.1             28.8
## # ... with 90 more rows, and 3 more variables: region <chr>, income <chr>,
## #   gdp_country <dbl>

For the chart # 1 with ggplot2, I added both geom_point and geom_line layers, and used the Set1 ColorBrewer palette.

plot1 <- ggplot(data = nations1, aes(x= year, y = gdp_country, colour = country)) +
    geom_line() +
    geom_point() +
  labs(title = "China's Rise to Become the Largest Economy") +
  xlab("Year") +
  ylab("GDP ($ trillion)") + 
  theme_minimal(base_size = 12) +
  scale_color_brewer(palette = "Set1")
plot1 <- ggplotly(plot1)
plot1

For the chart # 2, I used dplyr to group_by region and year, and then summarized on new mutated value. (There are null values, or NAs, in this data, so I needed to use na.rm = TRUE).

nations2 <- nations %>%
  mutate(gdp_country = (gdp_percap*population)/1000000000000) %>%
 group_by(region, year) %>%
  summarise(GDP = sum(gdp_country, na.rm = TRUE))

## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.

nations2

## # A tibble: 175 x 3
## # Groups:   region [7]
##    region               year   GDP
##    <chr>               <dbl> <dbl>
##  1 East Asia & Pacific  1990  5.52
##  2 East Asia & Pacific  1991  6.03
##  3 East Asia & Pacific  1992  6.50
##  4 East Asia & Pacific  1993  7.04
##  5 East Asia & Pacific  1994  7.64
##  6 East Asia & Pacific  1995  8.29
##  7 East Asia & Pacific  1996  8.96
##  8 East Asia & Pacific  1997  9.55
##  9 East Asia & Pacific  1998  9.60
## 10 East Asia & Pacific  1999 10.1 
## # ... with 165 more rows

For Plot #2 with ggolot2, I used geom_area for each region’s area. For filling areas I applyed ColorBrewer palette using scale_fill_brewer(palette = “Set2”). To make a white line between areas, I applied ‘color’ layer.

plot2 <- ggplot(data = nations2, aes(x= year, y = GDP, fill = region)) +
  geom_area(color = 'white') +
  scale_fill_brewer(palette = "Set2") +
labs(title = "GDP by Wolrd Bank Region") +
xlab("Year") +
ylab("GDP ($ trillion)") + 
labs(fill = "Regions") +
theme_minimal(base_size = 12) 
plot2

HW_week7_nations

Olga Tolchinsky

March 22, 2021