library(RColorBrewer)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
setwd("C:/Users/MCuser/Desktop/DATA110_CourseMaterials")
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(nations)
## spec_tbl_df [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ iso2c : chr [1:5275] "AD" "AD" "AD" "AD" ...
## $ iso3c : chr [1:5275] "AND" "AND" "AND" "AND" ...
## $ country : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : num [1:5275] 1996 1994 2003 1990 2009 ...
## $ gdp_percap : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
## $ population : num [1:5275] 64291 62707 74783 54511 85474 ...
## $ birth_rate : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr [1:5275] "High income" "High income" "High income" "High income" ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
nations2 = nations %>%
mutate(gdp_tn = gdp_percap*population/1000000000000)
top4 <- nations2 %>%
filter(iso3c == "CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
arrange(year)
p1<- ggplot(top4, aes(x=year,y = gdp_tn, color = country)) +
geom_point(aes(shape=country), size=1.5)+scale_color_brewer(palette = "Set1")+scale_shape_manual(values=c(16,16,16,16))+
xlab("Year") +
ylab("GDP ($ trillions)")+
ggtitle("China's Rise to Become the Largest Economy")+
theme_light()+
theme(plot.title = element_text(hjust = 0.5))+geom_line(size = 0.5,alpha = 0.5)
p1
## Group By Region and Year
regions <- nations2 %>%
group_by(year,region) %>%
summarise(gdp_tn = sum(gdp_tn, na.rm = TRUE)) %>%
arrange(year,region)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
p2<- ggplot(regions, aes(x=year,y = gdp_tn, fill=region), color="white", lwd=2) +
geom_area(alpha=1, size=0.5, colour="white")+
scale_fill_brewer(palette = "Set2")+
xlab("Year") +
ylab("GDP ($ trillions)")+
ggtitle("GDP by World Bank Region")+
theme_light()+
theme(plot.title = element_text(hjust = 0.5))
p2