library(ggplot2)
library(RColorBrewer)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
?dplyr::filter
nations <- read.csv("/Users/michellenguyen/Downloads/DATASETS 2/nations.csv")
head(nations)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
options(scipen=1000)
nations <- nations %>%
mutate(gdp_eachcountry = (gdp_percap * population)/ 1000000000000)
head(nations)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income gdp_eachcountry
## 1 Europe & Central Asia High income NA
## 2 Europe & Central Asia High income NA
## 3 Europe & Central Asia High income NA
## 4 Europe & Central Asia High income NA
## 5 Europe & Central Asia High income NA
## 6 Europe & Central Asia High income NA
nations_filtered <- nations %>%
filter(country == "China" | country == "Germany" | country == "Japan" | country == "United States")
head(nations_filtered)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 CN CHN China 1992 1260.162 1164970000 18.27 29.4
## 2 CN CHN China 2005 5053.379 1303720000 12.40 14.0
## 3 CN CHN China 2000 2915.415 1262645000 14.03 21.2
## 4 CN CHN China 1991 1091.449 1150780000 19.68 29.7
## 5 CN CHN China 2013 12218.521 1357380000 12.08 6.3
## 6 CN CHN China 1999 2649.745 1252735000 14.64 22.2
## region income gdp_eachcountry
## 1 East Asia & Pacific Upper middle income 1.468052
## 2 East Asia & Pacific Upper middle income 6.588191
## 3 East Asia & Pacific Upper middle income 3.681134
## 4 East Asia & Pacific Upper middle income 1.256017
## 5 East Asia & Pacific Upper middle income 16.585176
## 6 East Asia & Pacific Upper middle income 3.319429
nations_chart <- ggplot(data = nations_filtered, aes(x=year, y=gdp_eachcountry)) + geom_line(aes(color=country)) + geom_point(aes(color=country))+ labs(title="China's Rise to Become the Largest Economy") + xlab("year") + ylab("GDP ($trillion") + theme_minimal(base_size=14) + scale_color_brewer(palette = "Set1")
nations_chart

nations_groupby <- nations %>% group_by(region, year) %>% summarize(GDP = sum(gdp_eachcountry, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
head(nations_groupby)
## # A tibble: 6 × 3
## # Groups: region [1]
## region year GDP
## <chr> <int> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
nations_chart2 <- ggplot(data = nations_groupby, aes(x=year, y=GDP, fill = region)) + labs(title="GDP by World Bank Region") + xlab("year") + ylab("GDP ($trillion") + geom_area() + scale_fill_brewer(palette = "Set2")
nations_chart2
