Assignment 6 - Part 2

library(dplyr)
Warning: package 'dplyr' was built under R version 4.5.2

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.5.2

Dataset

nations <- read.csv("nations.csv")
head(nations)
  iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
1    AD   AND Andorra 1996         NA      64291       10.9                2.8
2    AD   AND Andorra 1994         NA      62707       10.9                3.2
3    AD   AND Andorra 2003         NA      74783       10.3                2.0
4    AD   AND Andorra 1990         NA      54511       11.9                4.3
5    AD   AND Andorra 2009         NA      85474        9.9                1.7
6    AD   AND Andorra 2011         NA      82326         NA                1.6
                 region      income
1 Europe & Central Asia High income
2 Europe & Central Asia High income
3 Europe & Central Asia High income
4 Europe & Central Asia High income
5 Europe & Central Asia High income
6 Europe & Central Asia High income

GDP variable

nations1 <- nations |>
  mutate(gdp = gdp_percap * population / 1000000000000)
head(nations1)
  iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
1    AD   AND Andorra 1996         NA      64291       10.9                2.8
2    AD   AND Andorra 1994         NA      62707       10.9                3.2
3    AD   AND Andorra 2003         NA      74783       10.3                2.0
4    AD   AND Andorra 1990         NA      54511       11.9                4.3
5    AD   AND Andorra 2009         NA      85474        9.9                1.7
6    AD   AND Andorra 2011         NA      82326         NA                1.6
                 region      income gdp
1 Europe & Central Asia High income  NA
2 Europe & Central Asia High income  NA
3 Europe & Central Asia High income  NA
4 Europe & Central Asia High income  NA
5 Europe & Central Asia High income  NA
6 Europe & Central Asia High income  NA

Chart 1

Filtering the 4 countries

chart1 <- nations1 |>
  filter(country %in% c("Germany", "Japan", "China", "United States"))
head(chart1)
  iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
1    CN   CHN   China 1992   1260.162 1164970000      18.27               29.4
2    CN   CHN   China 2005   5053.379 1303720000      12.40               14.0
3    CN   CHN   China 2000   2915.415 1262645000      14.03               21.2
4    CN   CHN   China 1991   1091.449 1150780000      19.68               29.7
5    CN   CHN   China 2013  12218.521 1357380000      12.08                6.3
6    CN   CHN   China 1999   2649.745 1252735000      14.64               22.2
               region              income       gdp
1 East Asia & Pacific Upper middle income  1.468052
2 East Asia & Pacific Upper middle income  6.588191
3 East Asia & Pacific Upper middle income  3.681134
4 East Asia & Pacific Upper middle income  1.256017
5 East Asia & Pacific Upper middle income 16.585176
6 East Asia & Pacific Upper middle income  3.319429

Plotting

ggplot(chart1, aes(x = year, y = gdp, color = country)) +
  geom_line() + geom_point() +
  scale_color_brewer(palette = "Set1") + 
  labs( title = "China's Rise to Become the Largest Economy",
        x = "year", y = "GDP ($ trillions)")

Chart 2

Peparing the Data

chart2 <- nations1 |>
  group_by(region, year) |>
  summarise(GDP = sum(gdp, na.rm = TRUE))
`summarise()` has regrouped the output.
ℹ Summaries were computed grouped by region and year.
ℹ Output is grouped by region.
ℹ Use `summarise(.groups = "drop_last")` to silence this message.
ℹ Use `summarise(.by = c(region, year))` for per-operation grouping
  (`?dplyr::dplyr_by`) instead.
head(chart2)
# A tibble: 6 × 3
# Groups:   region [1]
  region               year   GDP
  <chr>               <int> <dbl>
1 East Asia & Pacific  1990  5.52
2 East Asia & Pacific  1991  6.03
3 East Asia & Pacific  1992  6.50
4 East Asia & Pacific  1993  7.04
5 East Asia & Pacific  1994  7.64
6 East Asia & Pacific  1995  8.29

Plotting

ggplot(chart2, aes( x = year, y = GDP, fill = region)) +
  geom_area(color = "white", size = 0.2) +
  scale_fill_brewer(palette = "Set2") + 
  labs(title = "GDP by World Bank Region", x = "year",
       Y = "GDP (3 trillion)")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Ignoring unknown labels:
• Y : "GDP (3 trillion)"