Load Data

setwd("~/Desktop/MC Data Science /DATA 110 /DataSets ")
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(RColorBrewer)
nat <- read.csv("nations.csv")
head(nat)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income

Create new variable

Mutate using dplyr. GDP of each country in trillions of dollars by multiplying gdp_percap by population and dividing by a trillion.

nat1 <- nat %>% mutate(gdp_trill= (gdp_percap * population)/1000000000000)
head(nat1)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income gdp_trill
## 1 Europe & Central Asia High income        NA
## 2 Europe & Central Asia High income        NA
## 3 Europe & Central Asia High income        NA
## 4 Europe & Central Asia High income        NA
## 5 Europe & Central Asia High income        NA
## 6 Europe & Central Asia High income        NA

Chart 1

Filter the data for four desired countries (China, Germany, Japan, United States). Chart will need geom_point and geom_line layers. Use Set 1 from ColorBrewer palette using “scale_color_brewer(palette =”Set 1").

#Filter
nat2 <- nat1 %>% filter(country %in% c("China", "Germany", "Japan", "United States"))
head(nat2)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    CN   CHN   China 1992   1260.162 1164970000      18.27               29.4
## 2    CN   CHN   China 2005   5053.379 1303720000      12.40               14.0
## 3    CN   CHN   China 2000   2915.415 1262645000      14.03               21.2
## 4    CN   CHN   China 1991   1091.449 1150780000      19.68               29.7
## 5    CN   CHN   China 2013  12218.521 1357380000      12.08                6.3
## 6    CN   CHN   China 1999   2649.745 1252735000      14.64               22.2
##                region              income gdp_trill
## 1 East Asia & Pacific Upper middle income  1.468052
## 2 East Asia & Pacific Upper middle income  6.588191
## 3 East Asia & Pacific Upper middle income  3.681134
## 4 East Asia & Pacific Upper middle income  1.256017
## 5 East Asia & Pacific Upper middle income 16.585176
## 6 East Asia & Pacific Upper middle income  3.319429
#Chart
c1 <- ggplot(nat2, aes(x= year, y= gdp_trill, color= country))
c1 + geom_point() + geom_line() + scale_color_brewer(palette = "Set1") +
  xlab("Year") + ylab("GDP ($Trillion)")+
  ggtitle("China's Rise to Become the Largest Economy")+
  theme(legend.title=element_blank())

Chart 2

Group by region and year, summarize on your mutated value for gdp using summarise(GDP = sum(gdp, na.rm = TRUE)). Each region’s area will be generated by the command geom_area(). Use Set2 ColorBrewer palette using scale_fill_brewer (palette= “Set2”). Put a very thin white line around each area.

#Work on data 
nat3 <- nat1 %>% group_by(region, year) %>% summarise(GDP= sum(gdp_trill, na.rm = TRUE))
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
head(nat3)
## # A tibble: 6 x 3
## # Groups:   region [1]
##   region               year   GDP
##   <chr>               <int> <dbl>
## 1 East Asia & Pacific  1990  5.52
## 2 East Asia & Pacific  1991  6.03
## 3 East Asia & Pacific  1992  6.50
## 4 East Asia & Pacific  1993  7.04
## 5 East Asia & Pacific  1994  7.64
## 6 East Asia & Pacific  1995  8.29
#Chart
c2 <- ggplot(nat3, aes(x= year, y= GDP, fill= region))
c2 + geom_area(colour= "white") + scale_fill_brewer(palette = "Set2") +
  xlab("Year") + ylab("GDP($trillion") + 
  ggtitle("GDP by World Bank Region")