Load libraries and read in the Nations dataset

source: World Bank Indicators portal: https://data.worldbank.org/indicator/

nations.csv variables: -iso2c iso3c Two- and Three-letter codes for each country, assigned by the International Organization for Standardization. -country Country name. -year -population Estimated total population at mid-year, including all residents apart from refugees. -gdp_percap Gross Domestic Product per capita in current international dollars, corrected for purchasing power in different territories. -life_expect Life expectancy at birth, in years. -population Estimated total population at mid-year, including all residents apart from refugees. -birth_rate Live births during the year per 1,000 people, based on mid-year population estimate. -neonat_mortal_rate Neonatal mortality rate: babies dying before reaching 28 days of age, per 1,000 live births in a given year. -region income World Bank regions and income groups, explained here.

library(tidyverse)
library(ggplot2)
library(psych)
library(RColorBrewer)
library(readr)
library(plotly)
require(mgcv)       # for GAM

nations <- read_csv('/Users/raulmiranda/Desktop/DATA 110 Fall 2020/Databases/nations.csv')

Check the top lines

head(nations)
## # A tibble: 6 x 11
##   iso2c iso3c country  year gdp_percap life_expect population birth_rate
##   <chr> <chr> <chr>   <dbl>      <dbl>       <dbl>      <dbl>      <dbl>
## 1 AD    AND   Andorra  2007         NA          NA      82683       10.1
## 2 AD    AND   Andorra  2011         NA          NA      83751       NA  
## 3 AD    AND   Andorra  2013         NA          NA      80788       NA  
## 4 AD    AND   Andorra  2008         NA          NA      83861       10.4
## 5 AD    AND   Andorra  1992         NA          NA      58888       12.1
## 6 AD    AND   Andorra  2006         NA          NA      80991       10.6
## # … with 3 more variables: neonat_mortal_rate <dbl>, region <chr>, income <chr>

Filter for only four countries: USA, China, Germany and S. Korea

uschgrkr <- nations %>% filter (grepl("United States", country) | grepl("^China", country) | grepl("Germany", country) | grepl("Korea",country)) %>% mutate(gdp = gdp_percap*population/10^12) %>% arrange()
uschgrkr
## # A tibble: 108 x 12
##    iso2c iso3c country  year gdp_percap life_expect population birth_rate
##    <chr> <chr> <chr>   <dbl>      <dbl>       <dbl>      <dbl>      <dbl>
##  1 CN    CHN   China    1996      2071.        70.4 1217550000       17.0
##  2 CN    CHN   China    2005      5093.        74.0 1303720000       12.4
##  3 CN    CHN   China    2007      6864.        74.6 1317885000       12.1
##  4 CN    CHN   China    1995      1869.        70.2 1204855000       17.1
##  5 CN    CHN   China    2008      7635.        74.8 1324655000       12.1
##  6 CN    CHN   China    1997      2277.        70.7 1230075000       16.6
##  7 CN    CHN   China    1990       987.        69.3 1135185000       21.1
##  8 CN    CHN   China    1991      1099.        69.4 1150780000       19.7
##  9 CN    CHN   China    1992      1268.        69.6 1164970000       18.3
## 10 CN    CHN   China    2001      3227.        72.4 1271850000       13.4
## # … with 98 more rows, and 4 more variables: neonat_mortal_rate <dbl>,
## #   region <chr>, income <chr>, gdp <dbl>

Group by Regions and Year

regional <- nations %>% mutate(gdp = gdp_percap*population/10^12) %>% group_by(region, year) %>% summarise(GDP = sum(gdp, na.rm = TRUE))
regional
## # A tibble: 189 x 3
## # Groups:   region [7]
##    region               year   GDP
##    <chr>               <dbl> <dbl>
##  1 East Asia & Pacific  1990  5.59
##  2 East Asia & Pacific  1991  6.10
##  3 East Asia & Pacific  1992  6.57
##  4 East Asia & Pacific  1993  7.11
##  5 East Asia & Pacific  1994  7.71
##  6 East Asia & Pacific  1995  8.39
##  7 East Asia & Pacific  1996  9.09
##  8 East Asia & Pacific  1997  9.66
##  9 East Asia & Pacific  1998  9.75
## 10 East Asia & Pacific  1999 10.3 
## # … with 179 more rows

Plot GDP by Country and Year (try Loess, lm Polynomial and GAM smoothings)

p1 <- ggplot(uschgrkr) +
        geom_point(aes(year,gdp, color=country)) + 
        geom_smooth(aes(year,gdp, color=country), method="loess", formula=y~x, se=FALSE) +
        scale_color_brewer(palette = "Set1") +  
        labs (title = "China is the Fastest Growing Economy in this Century", x="Year", 
            y="GDP ($trillion)")
p2 <- ggplot(uschgrkr) +
        geom_point(aes(year,gdp, color=country)) + 
        geom_smooth(aes(year,gdp, color=country), method="lm", formula=y~poly(x,4), se=FALSE) +
        scale_color_brewer(palette = "Set1") +  
        labs (title = "China is the Fastest Growing Economy in this Century", x="Year", 
            y="GDP ($trillion)")
p3 <- ggplot(uschgrkr) +
        geom_point(aes(year,gdp, color=country)) + 
        geom_smooth(aes(year,gdp, color=country), method="gam", formula=y~s(x), se=FALSE) +
        scale_color_brewer(palette = "Set1") +  
        labs (title = "China is the Fastest Growing Economy in this Century", x="Year", 
            y="GDP ($trillion)")

p1

p2

p3

ggplotly(p3)

Plot GDP by Region

p4 <- ggplot(regional, aes(x=year, y= GDP, fill=region)) +
        geom_area(color="white") +
        scale_color_brewer(palette = "Set2")   +
        labs (title = "GDP by World Bank Region", x="Year", y="GDP ($trillion)")
p4

ggplotly(p4)