Nations Assignment

Author

Aline Mayrink

Load Libraries & Data

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(RColorBrewer)
library(ggfortify)
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

Load the Data

setwd("~/Desktop/DATA/Data Visualization 110/DataSets")
nations <- read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Transformation: GDP in Trillions

gdp_trillions <- nations |>
  mutate(gdp = (gdp_percap * population) / 1e12) 

New Dataset - Top 4 Countries

top_4_gdp <- gdp_trillions |>
  group_by(country) |>
  summarise(total_gdp =  sum(gdp, na.rm = TRUE)) |>
  arrange(desc(total_gdp)) |>
  slice_head( n = 4)
top_4_gdp
# A tibble: 4 × 2
  country       total_gdp
  <chr>             <dbl>
1 United States     283. 
2 China             165. 
3 Japan              88.8
4 India              78.0

Filter Original Data (Trillions)

top_4_gdp_data <- gdp_trillions |>
  filter(country %in% top_4_gdp$country)
top_4_gdp_data
# A tibble: 100 × 11
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 CN    CHN   China    1992      1260. 1164970000       18.3               29.4
 2 CN    CHN   China    2005      5053. 1303720000       12.4               14  
 3 CN    CHN   China    2000      2915. 1262645000       14.0               21.2
 4 CN    CHN   China    1991      1091. 1150780000       19.7               29.7
 5 CN    CHN   China    2013     12219. 1357380000       12.1                6.3
 6 CN    CHN   China    1999      2650. 1252735000       14.6               22.2
 7 CN    CHN   China    2014     13255. 1364270000       12.4                5.9
 8 CN    CHN   China    2003      3934. 1288400000       12.4               17.1
 9 CN    CHN   China    2004      4423. 1296075000       12.3               15.5
10 CN    CHN   China    1993      1453. 1178440000       18.1               28.8
# ℹ 90 more rows
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>

Chart 1: GDP Over Time for Top 4 Countries

ggplot(top_4_gdp_data, aes(x = year, y = gdp, color = country)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "GDP Over Time for Top 4 Countries",
       x = "Year",
       y = "GDP (Trillions)",
       color = "Country") +
  theme_minimal()

Chart 2: Regional GDP Over Time

regional_gdp <- gdp_trillions |>
  group_by(region, year) |>
  summarise(GDP = sum(gdp, na.rm = TRUE))
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
ggplot(regional_gdp, aes(x = year, y = GDP, fill = region)) +
  geom_area(color = "white", linewidth = 0.2) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Regional GDP Over Time",
       x = "Year",
       y = "GDP (Trillions)",
       fill = "Region") +
  theme_minimal()

Interactive Plot - Chart 1

top_4_gdp_interactive <- ggplot(top_4_gdp_data, aes(x = year,
                                                    y = gdp,
                                                    color = country)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "GDP Over Time for Top 4 Countries",
       x = "Year",
       y = "GDP (Trillions)",
       color = "Country") +
  theme_minimal(base_size = 12)
top_4_gdp_interactive <- ggplotly(top_4_gdp_interactive)
top_4_gdp_interactive