library(ggplot2)
library(RColorBrewer)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
?dplyr::filter
nations <- read.csv("/Users/michellenguyen/Downloads/DATASETS 2/nations.csv")
head(nations)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
options(scipen=1000)
nations <- nations %>% 
  mutate(gdp_eachcountry = (gdp_percap * population)/ 1000000000000)
head(nations)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income gdp_eachcountry
## 1 Europe & Central Asia High income              NA
## 2 Europe & Central Asia High income              NA
## 3 Europe & Central Asia High income              NA
## 4 Europe & Central Asia High income              NA
## 5 Europe & Central Asia High income              NA
## 6 Europe & Central Asia High income              NA
nations_filtered <- nations %>% 
  filter(country == "China" | country == "Germany" | country == "Japan" | country == "United States")
head(nations_filtered)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    CN   CHN   China 1992   1260.162 1164970000      18.27               29.4
## 2    CN   CHN   China 2005   5053.379 1303720000      12.40               14.0
## 3    CN   CHN   China 2000   2915.415 1262645000      14.03               21.2
## 4    CN   CHN   China 1991   1091.449 1150780000      19.68               29.7
## 5    CN   CHN   China 2013  12218.521 1357380000      12.08                6.3
## 6    CN   CHN   China 1999   2649.745 1252735000      14.64               22.2
##                region              income gdp_eachcountry
## 1 East Asia & Pacific Upper middle income        1.468052
## 2 East Asia & Pacific Upper middle income        6.588191
## 3 East Asia & Pacific Upper middle income        3.681134
## 4 East Asia & Pacific Upper middle income        1.256017
## 5 East Asia & Pacific Upper middle income       16.585176
## 6 East Asia & Pacific Upper middle income        3.319429
nations_chart <- ggplot(data = nations_filtered, aes(x=year, y=gdp_eachcountry)) + geom_line(aes(color=country)) + geom_point(aes(color=country))+ labs(title="China's Rise to Become the Largest Economy") + xlab("year") + ylab("GDP ($trillion") + theme_minimal(base_size=14) + scale_color_brewer(palette = "Set1")
nations_chart

nations_groupby <- nations %>% group_by(region, year) %>% summarize(GDP = sum(gdp_eachcountry, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
head(nations_groupby)
## # A tibble: 6 × 3
## # Groups:   region [1]
##   region               year   GDP
##   <chr>               <int> <dbl>
## 1 East Asia & Pacific  1990  5.52
## 2 East Asia & Pacific  1991  6.03
## 3 East Asia & Pacific  1992  6.50
## 4 East Asia & Pacific  1993  7.04
## 5 East Asia & Pacific  1994  7.64
## 6 East Asia & Pacific  1995  8.29
nations_chart2 <- ggplot(data = nations_groupby, aes(x=year, y=GDP, fill = region))  +  labs(title="GDP by World Bank Region") + xlab("year") + ylab("GDP ($trillion")  + geom_area() + scale_fill_brewer(palette = "Set2") 
nations_chart2