Nations gdp

Author

Davi Krause

Starting libraries and Data set

Any assingment must start with the tools you will need. Here is the libraries used, and the initialization of the data set.

#Libraries
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
nations <- read_csv("nations.csv", show_col_types = FALSE)
nations_nona <- nations |>
  filter(!is.na(gdp_percap) & !is.na(population))  
# remove na's for distance and arr_delay

Creating GDP in trillions.

nations_use <- nations_nona %>%
  select(gdp_percap, population, year, country) %>%
  mutate(
    GDP = (gdp_percap * population)/(10^12)
  )

nations_use
# A tibble: 4,509 × 5
   gdp_percap population  year country                GDP
        <dbl>      <dbl> <dbl> <chr>                <dbl>
 1     73037.    1913190  1991 United Arab Emirates 0.140
 2     71960.    2127863  1993 United Arab Emirates 0.153
 3     83534.    3217865  2001 United Arab Emirates 0.269
 4     73154.    2019014  1992 United Arab Emirates 0.148
 5     74684.    2238281  1994 United Arab Emirates 0.167
 6     75427.    6010100  2007 United Arab Emirates 0.453
 7     87844.    3975945  2004 United Arab Emirates 0.349
 8     79480.    2467726  1996 United Arab Emirates 0.196
 9     82754.    5171255  2006 United Arab Emirates 0.428
10     84975.    3050128  2000 United Arab Emirates 0.259
# ℹ 4,499 more rows
nations_selected <- nations_use[nations_use$country %in% c("China", "Germany", "Japan", "United States"), ]
nations_selected
# A tibble: 100 × 5
   gdp_percap population  year country   GDP
        <dbl>      <dbl> <dbl> <chr>   <dbl>
 1      1260. 1164970000  1992 China    1.47
 2      5053. 1303720000  2005 China    6.59
 3      2915. 1262645000  2000 China    3.68
 4      1091. 1150780000  1991 China    1.26
 5     12219. 1357380000  2013 China   16.6 
 6      2650. 1252735000  1999 China    3.32
 7     13255. 1364270000  2014 China   18.1 
 8      3934. 1288400000  2003 China    5.07
 9      4423. 1296075000  2004 China    5.73
10      1453. 1178440000  1993 China    1.71
# ℹ 90 more rows

Now just see the chart

GDPs_chart <- ggplot(nations_selected, aes(x = year, y = GDP, group=country, colour= country)) +
labs(title = "China's Rise to Become the Lasegst Economy") +
geom_line() +
geom_point() +
xlab("Year") +
ylab("GDP ($Trillions))") +
scale_color_brewer(palette = "Set1")

GDPs_chart

Second Plot

Now lets see the World GDP and its division among regions, for that we need the original data set but with same more rows.

nations_use2 <- nations_nona %>%
  select(region, population, year, gdp_percap) %>%
  mutate(
    GDP = (gdp_percap * population)/(10^12)
  )
head(nations_use2)
# A tibble: 6 × 5
  region                     population  year gdp_percap   GDP
  <chr>                           <dbl> <dbl>      <dbl> <dbl>
1 Middle East & North Africa    1913190  1991     73037. 0.140
2 Middle East & North Africa    2127863  1993     71960. 0.153
3 Middle East & North Africa    3217865  2001     83534. 0.269
4 Middle East & North Africa    2019014  1992     73154. 0.148
5 Middle East & North Africa    2238281  1994     74684. 0.167
6 Middle East & North Africa    6010100  2007     75427. 0.453

Lets group by it all in the regions they are a part of and there year for us to see the time trend:

nations_use2 <- nations_use2 %>% group_by(region, year)
nations_use2
# A tibble: 4,509 × 5
# Groups:   region, year [175]
   region                     population  year gdp_percap   GDP
   <chr>                           <dbl> <dbl>      <dbl> <dbl>
 1 Middle East & North Africa    1913190  1991     73037. 0.140
 2 Middle East & North Africa    2127863  1993     71960. 0.153
 3 Middle East & North Africa    3217865  2001     83534. 0.269
 4 Middle East & North Africa    2019014  1992     73154. 0.148
 5 Middle East & North Africa    2238281  1994     74684. 0.167
 6 Middle East & North Africa    6010100  2007     75427. 0.453
 7 Middle East & North Africa    3975945  2004     87844. 0.349
 8 Middle East & North Africa    2467726  1996     79480. 0.196
 9 Middle East & North Africa    5171255  2006     82754. 0.428
10 Middle East & North Africa    3050128  2000     84975. 0.259
# ℹ 4,499 more rows
nations_selected2 <- nations_use2 %>% summarise(
  GDP_total = sum(GDP, na.rm = TRUE)
)
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
nations_selected2
# A tibble: 175 × 3
# Groups:   region [7]
   region               year GDP_total
   <chr>               <dbl>     <dbl>
 1 East Asia & Pacific  1990      5.52
 2 East Asia & Pacific  1991      6.03
 3 East Asia & Pacific  1992      6.50
 4 East Asia & Pacific  1993      7.04
 5 East Asia & Pacific  1994      7.64
 6 East Asia & Pacific  1995      8.29
 7 East Asia & Pacific  1996      8.96
 8 East Asia & Pacific  1997      9.55
 9 East Asia & Pacific  1998      9.60
10 East Asia & Pacific  1999     10.1 
# ℹ 165 more rows

Graph

Finaly we can plot a graph tha lets us see the global gpd divided by each region

GDPs_chart2 <- ggplot(nations_selected2, aes(x = year, y = GDP_total, group=region, fill= region, color='White')) +
labs(title = "GDP by world Bank Region") +
geom_area(color='white') + #here we put in the white shades
xlab("Year") +
ylab("GDP ($Trillions)") +
scale_fill_brewer(palette = "Set2")

GDPs_chart2