Load Packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Load Dataset and add a column showing GDP in trillions of
dollars
setwd("/Users/smhenderson/Desktop/DATA110/R/Datasets")
nations <- read_csv("nations.csv") %>%
mutate(gdp_tn = gdp_percap*population/1000000000000)
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#unique(nations$country)
Find the top 4 Countries with the highest GDPs
nations2 <- nations[order(nations$gdp_tn, decreasing = TRUE), ]
top_countries <- unique(nations2$country)[1:4]
print(top_countries)
## [1] "China" "United States" "India" "Japan"
Filter dataset by the top 4 countries
nations3 <- nations2 %>%
filter(country %in% c("China", "United States", "India", "Japan")) %>%
arrange(year)
Create a line chart showing neonatal mortality rates in countries
with the highest GDPs (China, India, Japan, & USA)
ggplot(nations3, aes(x = year, y = neonat_mortal_rate, color = country, shape = country, fill = country)) +
geom_line() +
geom_point(size = 3) +
labs(x = "Year", y = "Neonatal Mortality Rate", title = "Neonatal Mortality Rates in Countries with the Highest GDPs") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
scale_shape_manual(values = c("China" = 16, "India" = 15, "Japan" = 17, "United States" = 18)) +
theme_bw() +
theme(panel.border = element_blank(),
legend.title = element_blank(),
legend.key = element_rect(fill = "white"),
plot.title = element_text(hjust = 0.5))

Filter dataset to look at GDPs by regions
nations4 <- nations %>%
group_by(region, year) %>%
summarize(gdp = sum(gdp_tn, na.rm = TRUE), .groups = "drop") %>%
arrange(year, region)
#filter to only look from year 2000 and later
nations5 <- nations4 %>%
filter(year>= 2000) %>%
group_by(region, year) %>%
arrange(year, region)
#unique(nations4$region)
Area chart showing GDP by World Bank Regions
ggplot(nations5, aes(x = year, y = gdp, fill = region)) +
geom_area(color = "white", linewidth = 0.5) +
labs(x = "Year", y = "GDP ($trillion)", title = "GDP by World Bank Regions between 2000 - 2014") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
