Install the packages

library(fansi)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.2     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Call “nations” dataset from default directory and create new variable gdp_tn

nations <- read_csv("nations.csv") %>%
mutate(gdp_tn = gdp_percap*population/1000000000000)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )
str(nations)
## spec_tbl_df [5,275 x 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
##  $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
##  $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
##  $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
##  $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
##  $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
##  $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
##  $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
##  $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
##  $ gdp_tn            : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   iso2c = col_character(),
##   ..   iso3c = col_character(),
##   ..   country = col_character(),
##   ..   year = col_double(),
##   ..   gdp_percap = col_double(),
##   ..   population = col_double(),
##   ..   birth_rate = col_double(),
##   ..   neonat_mortal_rate = col_double(),
##   ..   region = col_character(),
##   ..   income = col_character()
##   .. )

Check and confirm that the new variable gdp_tn exists

view(nations)

Yes, gdp_tn exists.

Filter for China, Germany, Japan and USA.

# prepare data
big4 <- nations %>%
  filter(iso3c == "CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
  arrange(year)
view(big4)

The filter has worked. Only the four countries appear in the country column.

Plot the data

# save basic chart template
big4_chart <- ggplot(big4, aes(x = year, y = gdp_tn)) + 
  labs(title = "China's Rise as the Biggest Economy") +
       xlab("Year") +
       ylab("GDP (trillions)") +
  theme_minimal(base_size = 14)
big4_chart

big4_chart +  
    geom_point(aes(color=country), size = 3)+
scale_color_brewer(palette = "Set1")

If I try to add geom-line I get a weird figure:

big4_chart +  
    geom_point(aes(color=country), size = 3)+
  geom_line()

scale_color_brewer(palette = "Set1")
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: colour
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: waiver
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: NA
##     name: waiver
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: <ggproto object: Class RangeDiscrete, Range, gg>
##         range: NULL
##         reset: function
##         train: function
##         super:  <ggproto object: Class RangeDiscrete, Range, gg>
##     rescale: function
##     reset: function
##     scale_name: brewer
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>

GDP by World Bank Regions

Group the data by region

If I do it with summarise I get this error message:

Error in summarise(GDP = sum(gdp, na.rm = TRUE)) : object ‘gdp’ not found

If I do it without summarising I get this

nations %>%
  mutate(group=factor(region, levels = c("East Asia & Pacific", "Europe & Central Asia","Latin America & Caribbean","Middle East & North Africa", "North America", "South Asia", "Sub-Saharan Africa"), ordered=TRUE))
## # A tibble: 5,275 x 12
##    iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
##    <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
##  1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
##  2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
##  3 AD    AND   Andorra  2003         NA      74783       10.3                2  
##  4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
##  5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
##  6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
##  7 AD    AND   Andorra  2004         NA      78337       10.9                2  
##  8 AD    AND   Andorra  2010         NA      84419        9.8                1.7
##  9 AD    AND   Andorra  2001         NA      67770       11.8                2.1
## 10 AD    AND   Andorra  2002         NA      71046       11.2                2.1
## # ... with 5,265 more rows, and 4 more variables: region <chr>, income <chr>,
## #   gdp_tn <dbl>, group <ord>

If I try to do the geom area, I get a wierd figure

ggplot(nations, aes(x = year, y = gdp_tn)) + 
  labs(title = "GDP by World Bank Regions") +
       xlab("Year") +
       ylab("GDP (trillions)")+
  geom_area (aes(fill=region, color= "white" ))+
  scale_fill_brewer(palette = "Set2") 
## Warning: Removed 766 rows containing missing values (position_stack).