Nations Graphs

Author

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
setwd("~/Documents/Data 110")
nations <- read_csv("nations.csv")

Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

nations

# A tibble: 5,275 × 10
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
 2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
 3 AD    AND   Andorra  2003         NA      74783       10.3                2  
 4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
 5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
 6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
 7 AD    AND   Andorra  2004         NA      78337       10.9                2  
 8 AD    AND   Andorra  2010         NA      84419        9.8                1.7
 9 AD    AND   Andorra  2001         NA      67770       11.8                2.1
10 AD    AND   Andorra  2002         NA      71046       11.2                2.1
# ℹ 5,265 more rows
# ℹ 2 more variables: region <chr>, income <chr>

nations <- nations |>
  drop_na(gdp_percap) |>
  mutate(gdp_in_trillions = gdp_percap*population/1.0e12)
nations

# A tibble: 4,509 × 11
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 AE    ARE   United…  1991     73037.    1913190       24.6                7.9
 2 AE    ARE   United…  1993     71960.    2127863       22.4                7.3
 3 AE    ARE   United…  2001     83534.    3217865       15.8                5.5
 4 AE    ARE   United…  1992     73154.    2019014       23.5                7.6
 5 AE    ARE   United…  1994     74684.    2238281       21.3                6.9
 6 AE    ARE   United…  2007     75427.    6010100       12.8                4.7
 7 AE    ARE   United…  2004     87844.    3975945       14.2                5.1
 8 AE    ARE   United…  1996     79480.    2467726       19.3                6.4
 9 AE    ARE   United…  2006     82754.    5171255       13.3                4.9
10 AE    ARE   United…  2000     84975.    3050128       16.4                5.6
# ℹ 4,499 more rows
# ℹ 3 more variables: region <chr>, income <chr>, gdp_in_trillions <dbl>

first_plot <- nations |>
  filter(country %in% c('China', 'Germany', 'Japan', 'United States'))
first_plot

# A tibble: 100 × 11
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 CN    CHN   China    1992      1260. 1164970000       18.3               29.4
 2 CN    CHN   China    2005      5053. 1303720000       12.4               14  
 3 CN    CHN   China    2000      2915. 1262645000       14.0               21.2
 4 CN    CHN   China    1991      1091. 1150780000       19.7               29.7
 5 CN    CHN   China    2013     12219. 1357380000       12.1                6.3
 6 CN    CHN   China    1999      2650. 1252735000       14.6               22.2
 7 CN    CHN   China    2014     13255. 1364270000       12.4                5.9
 8 CN    CHN   China    2003      3934. 1288400000       12.4               17.1
 9 CN    CHN   China    2004      4423. 1296075000       12.3               15.5
10 CN    CHN   China    1993      1453. 1178440000       18.1               28.8
# ℹ 90 more rows
# ℹ 3 more variables: region <chr>, income <chr>, gdp_in_trillions <dbl>

plot_num_1 <- ggplot(first_plot, aes(x = year, y = gdp_in_trillions, color = country, fill = country)) + 
  labs(title = "China's Rise to Become The Largest Economy") +
       xlab("Year") +
       ylab("GDP ($trillions)") +
  theme_minimal(base_size = 14) +
  geom_line() +
  geom_point() +
  scale_color_brewer(palette = "Set1")
plot_num_1

second_plot <- nations |>
  group_by(region, year) |>
  summarise(
    GDP = sum(gdp_in_trillions, na.rm = TRUE))

`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.

second_plot

# A tibble: 175 × 3
# Groups:   region [7]
   region               year   GDP
   <chr>               <dbl> <dbl>
 1 East Asia & Pacific  1990  5.52
 2 East Asia & Pacific  1991  6.03
 3 East Asia & Pacific  1992  6.50
 4 East Asia & Pacific  1993  7.04
 5 East Asia & Pacific  1994  7.64
 6 East Asia & Pacific  1995  8.29
 7 East Asia & Pacific  1996  8.96
 8 East Asia & Pacific  1997  9.55
 9 East Asia & Pacific  1998  9.60
10 East Asia & Pacific  1999 10.1 
# ℹ 165 more rows

plot_num_2 <- ggplot(second_plot, aes(x = year, y = GDP, color = region, fill = region)) +
  labs(title = "GDP By World Bank Region") +
  geom_area(color = 'white', linewidth = 0.4) +
  scale_fill_brewer(palette = "Set2") +
       xlab("Year") +
       ylab("GDP ($trillions)")
  plot_num_2