Nations Dataset Charts Assignment

Author

Wilfried Bilong

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
nations <- read_csv("Desktop/Data 110/nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(RColorBrewer)
view(nations)
nations_nona <- nations |> 
  filter(!is.na(gdp_percap) & !is.na(population))
nations_picked <- nations_nona %>%
  select(gdp_percap, population, country, year) %>% 
  mutate(
    GDP = (gdp_percap*population)/(10^12)
  )
nations_picked
# A tibble: 4,509 × 5
   gdp_percap population country               year   GDP
        <dbl>      <dbl> <chr>                <dbl> <dbl>
 1     73037.    1913190 United Arab Emirates  1991 0.140
 2     71960.    2127863 United Arab Emirates  1993 0.153
 3     83534.    3217865 United Arab Emirates  2001 0.269
 4     73154.    2019014 United Arab Emirates  1992 0.148
 5     74684.    2238281 United Arab Emirates  1994 0.167
 6     75427.    6010100 United Arab Emirates  2007 0.453
 7     87844.    3975945 United Arab Emirates  2004 0.349
 8     79480.    2467726 United Arab Emirates  1996 0.196
 9     82754.    5171255 United Arab Emirates  2006 0.428
10     84975.    3050128 United Arab Emirates  2000 0.259
# ℹ 4,499 more rows
nations_chosen <- nations_picked[nations_picked$country %in% c("China", "Germany", "Japan", "United States"),]
nations_chosen
# A tibble: 100 × 5
   gdp_percap population country  year   GDP
        <dbl>      <dbl> <chr>   <dbl> <dbl>
 1      1260. 1164970000 China    1992  1.47
 2      5053. 1303720000 China    2005  6.59
 3      2915. 1262645000 China    2000  3.68
 4      1091. 1150780000 China    1991  1.26
 5     12219. 1357380000 China    2013 16.6 
 6      2650. 1252735000 China    1999  3.32
 7     13255. 1364270000 China    2014 18.1 
 8      3934. 1288400000 China    2003  5.07
 9      4423. 1296075000 China    2004  5.73
10      1453. 1178440000 China    1993  1.71
# ℹ 90 more rows

Plot 1

Plot_1 <- ggplot(nations_chosen, aes(x=year, y=GDP, group=country, color=country)) +
  labs(title = "China's Rise to Becoming the Largest Economy") +
  geom_line() +
  geom_point() +
  xlab("Year") +
  ylab("GDP ($Trillions)") +
  scale_color_brewer(palette = "Set1")

Plot_1

nations_picked2 <- nations_nona %>% 
  select(region, population, year, gdp_percap) %>%
  mutate(
    GDP = (gdp_percap*population)/(10^12)
  )
nations_picked2
# A tibble: 4,509 × 5
   region                     population  year gdp_percap   GDP
   <chr>                           <dbl> <dbl>      <dbl> <dbl>
 1 Middle East & North Africa    1913190  1991     73037. 0.140
 2 Middle East & North Africa    2127863  1993     71960. 0.153
 3 Middle East & North Africa    3217865  2001     83534. 0.269
 4 Middle East & North Africa    2019014  1992     73154. 0.148
 5 Middle East & North Africa    2238281  1994     74684. 0.167
 6 Middle East & North Africa    6010100  2007     75427. 0.453
 7 Middle East & North Africa    3975945  2004     87844. 0.349
 8 Middle East & North Africa    2467726  1996     79480. 0.196
 9 Middle East & North Africa    5171255  2006     82754. 0.428
10 Middle East & North Africa    3050128  2000     84975. 0.259
# ℹ 4,499 more rows

Group by year and region

nations_picked2 <- nations_picked2 %>% group_by(region, year)
nations_picked2
# A tibble: 4,509 × 5
# Groups:   region, year [175]
   region                     population  year gdp_percap   GDP
   <chr>                           <dbl> <dbl>      <dbl> <dbl>
 1 Middle East & North Africa    1913190  1991     73037. 0.140
 2 Middle East & North Africa    2127863  1993     71960. 0.153
 3 Middle East & North Africa    3217865  2001     83534. 0.269
 4 Middle East & North Africa    2019014  1992     73154. 0.148
 5 Middle East & North Africa    2238281  1994     74684. 0.167
 6 Middle East & North Africa    6010100  2007     75427. 0.453
 7 Middle East & North Africa    3975945  2004     87844. 0.349
 8 Middle East & North Africa    2467726  1996     79480. 0.196
 9 Middle East & North Africa    5171255  2006     82754. 0.428
10 Middle East & North Africa    3050128  2000     84975. 0.259
# ℹ 4,499 more rows
nations_chosen2 <- nations_picked2 %>% summarise(
  GDP_total = sum(GDP, na.rm = TRUE)
) 
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
nations_chosen2 
# A tibble: 175 × 3
# Groups:   region [7]
   region               year GDP_total
   <chr>               <dbl>     <dbl>
 1 East Asia & Pacific  1990      5.52
 2 East Asia & Pacific  1991      6.03
 3 East Asia & Pacific  1992      6.50
 4 East Asia & Pacific  1993      7.04
 5 East Asia & Pacific  1994      7.64
 6 East Asia & Pacific  1995      8.29
 7 East Asia & Pacific  1996      8.96
 8 East Asia & Pacific  1997      9.55
 9 East Asia & Pacific  1998      9.60
10 East Asia & Pacific  1999     10.1 
# ℹ 165 more rows

Plot 2

Plot_2 <- ggplot(nations_chosen2, aes(x = year, y = GDP_total, group = region, fill = region, color='White')) + 
labs(title = "GDP by World Bank Region") + 
geom_area(color='white') + 
xlab("Year") + 
ylab("GDP ($Trillions)") + 
scale_fill_brewer(palette = "Set2")

Plot_2