#Libraries, prepare data

library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("~/Desktop/datasets")
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colSums(is.na(nations))
##              iso2c              iso3c            country               year 
##                 25                  0                  0                  0 
##         gdp_percap         population         birth_rate neonat_mortal_rate 
##                766                 14                295                525 
##             region             income 
##                  0                  0
nations<- filter(nations, !is.na(gdp_percap) & !is.na(population))

GDP

nations <- nations |>
  mutate(gdp = (gdp_percap*population)/10^12)

Graph 1

filtered_nations <- filter(nations, country %in% c("China", "Germany", "Japan", "United States"))

ggplot(filtered_nations, aes(x = year, y = gdp, color = country)) +
  geom_line() +
  geom_point() +
  labs(
    title = "GDP Over Time by Country",
    x = "Year",
    y = "GDP ($ trillions)",
    color = "Country"
  ) +
  scale_color_brewer(palette = "Set1")

Graph 2

filtered_regions <- nations |>
  group_by(region, year) |>
  summarise(
   gdp = sum(gdp, na.rm = TRUE)
  )
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and year.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, year))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
ggplot(filtered_regions, aes(x = year, y =  gdp, fill = region)) +
  geom_area(
    color = "white"
  ) +
  labs(
    title = "GDP by Region Over Time",
    x = "Year",
    y = "GDP ($ trillions)",
    fill = "Region"
  ) +
  scale_fill_brewer(palette = "Set2")