Load Library & Data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.6
## ✔ ggplot2 4.0.2 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
setwd("C:/Users/tonge/Desktop/Data 110")
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Removing NAs
colSums(is.na(nations))
## iso2c iso3c country year
## 25 0 0 0
## gdp_percap population birth_rate neonat_mortal_rate
## 766 14 295 525
## region income
## 0 0
nations <- filter(nations,!is.na(gdp_percap))
nations <- filter(nations,!is.na(population))
Create a new variable where gdp of each country in trillions =
(gdp_percap * population) / a trillion
nations1 <- nations |>
mutate(gdp_trill = (gdp_percap * population) / 10^12 )
GDP by Country
nations_filtered <- filter(nations1, country %in% c("China", "Germany", "Japan", "United States"))
ggplot(nations_filtered, aes(x=year, y=gdp_trill, color = country)) +
geom_line() +
geom_point() +
labs(
title = "GDP by World Bank",
x = "Year",
y = "GDP($trillion)"
) +
scale_color_brewer(palette = "Set1")

GDP by Region
regions_filtered <- nations1 |>
group_by(region, year) |>
summarise(GDP = sum(gdp_trill, na.rm = TRUE))
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and year.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, year))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
ggplot(regions_filtered, aes(x = year, y = GDP, fill = region)) +
geom_area(
color = "white"
)+
labs(
title = "GDP by Region Over Time",
x = "Year",
y = "GDP ($ trillions)",
fill = "Region"
)+
scale_fill_brewer(palette = "Set2")
