Load Packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load Dataset and add a column showing GDP in trillions of dollars

 setwd("/Users/smhenderson/Desktop/DATA110/R/Datasets")
nations <- read_csv("nations.csv") %>%
 mutate(gdp_tn = gdp_percap*population/1000000000000)
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#unique(nations$country)

Find the top 4 Countries with the highest GDPs

nations2 <- nations[order(nations$gdp_tn, decreasing = TRUE), ]
top_countries <- unique(nations2$country)[1:4]
print(top_countries)
## [1] "China"         "United States" "India"         "Japan"

Filter dataset by the top 4 countries

nations3 <- nations2 %>%
  filter(country %in% c("China", "United States", "India", "Japan")) %>%
  arrange(year)

Create a line chart showing neonatal mortality rates in countries with the highest GDPs (China, India, Japan, & USA)

ggplot(nations3, aes(x = year, y = neonat_mortal_rate, color = country, shape = country, fill = country)) +
  geom_line() +
  geom_point(size = 3) +
  labs(x = "Year", y = "Neonatal Mortality Rate", title = "Neonatal Mortality Rates in Countries with the Highest GDPs") +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  scale_shape_manual(values = c("China" = 16, "India" = 15, "Japan" = 17, "United States" = 18)) +
  theme_bw() +
  theme(panel.border = element_blank(),
        legend.title = element_blank(),
        legend.key = element_rect(fill = "white"),
        plot.title = element_text(hjust = 0.5))

Filter dataset to look at GDPs by regions

nations4 <- nations %>%
  group_by(region, year) %>%
  summarize(gdp = sum(gdp_tn, na.rm = TRUE), .groups = "drop") %>%
  arrange(year, region)

#filter to only look from year 2000 and later
nations5 <- nations4 %>%
  filter(year>= 2000) %>%
  group_by(region, year) %>%
  arrange(year, region)
#unique(nations4$region)

Area chart showing GDP by World Bank Regions

ggplot(nations5, aes(x = year, y = gdp, fill = region)) +
  geom_area(color = "white", linewidth = 0.5) +
  labs(x = "Year", y = "GDP ($trillion)", title = "GDP by World Bank Regions between 2000 - 2014") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
   theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5))