Nations Assignment

Author

Ryan Seabold

Import dataset and libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
data <- read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Create a GDP column

data <- data |>
  mutate(gdp = (gdp_percap * population / 10^12)) # GDP = GDP per capita time population, divided by one trillion

Filter out the unused nations

# Selected nations: Kuwait, Bahrain, Qatar, and United Arab Emirates
nations_filtered <- data |>
  filter(country == "Kuwait" | country == "Bahrain" | country == "Qatar" | country == "United Arab Emirates")

Sort the filtered nations by year

nations_filtered <- nations_filtered |>
  group_by(country) |>
  arrange(year, .by_group = TRUE)

Create a line plot

lineplot <- nations_filtered |>
  ggplot(aes(x = year, y = gdp, color = country)) +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = "Set1")
lineplot
Warning: Removed 15 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 15 rows containing missing values or values outside the scale range
(`geom_line()`).

Create an area plot

# Create a nations grouped tibble
nations_grouped <- data |>
  group_by(region, year) |>
  summarise(GDP = sum(gdp, na.rm = TRUE)) # Remove NAs so they don't affect the plot
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
areaplot <- nations_grouped |>
  ggplot(aes(x = year, y = GDP, fill = region)) +
  geom_area(color = "white", linewidth = 0.1) + # Thin white line around each area
  scale_fill_brewer(palette = "Set2") +
  labs(title = "GDP by Region and Year", x = "Year", y = "Total GDP (in trillions)", fill = "Region") +
  theme_minimal()
areaplot