# install.packages("tidyverse")
# install.packages("dslabs")
# install.packages("dplyr")
# install.packages("ggplot2")
library(tidyverse)
## ── Attaching packages ───────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dslabs)
library(dplyr)
library(ggplot2)
library(readxl)
nations <- read.csv("~/Downloads/nations.csv")
head(nations)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
nations2 <- nations %>%
mutate(nations,gdp=(gdp_percap*population)/1000000000000) %>%
filter(country == "China" | country == "Japan" | country == "United States"| country == "Germany")
ggplot((nations2), aes(x = year, y = gdp, color = country))+
geom_point() +
geom_line() +
scale_color_brewer(palette = "Set1")+
ylab("GDP ($Trillions)")+
ggtitle("China's Rise to become the Largest Economy")

head(nations, n=20)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2.0
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## 11 AD AND Andorra 1997 NA 64147 11.2 2.6
## 12 AD AND Andorra 1993 NA 61003 11.4 3.4
## 13 AD AND Andorra 2008 NA 85616 10.4 1.8
## 14 AD AND Andorra 1999 NA 64161 12.6 2.3
## 15 AD AND Andorra 2014 NA 72786 NA 1.5
## 16 AD AND Andorra 2005 NA 81223 10.7 1.9
## 17 AD AND Andorra 2012 NA 79316 9.5 1.6
## 18 AD AND Andorra 2013 NA 75902 NA 1.5
## 19 AD AND Andorra 1992 NA 58904 12.1 3.7
## 20 AD AND Andorra 1995 NA 63854 11.0 3.0
## region income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
## 7 Europe & Central Asia High income
## 8 Europe & Central Asia High income
## 9 Europe & Central Asia High income
## 10 Europe & Central Asia High income
## 11 Europe & Central Asia High income
## 12 Europe & Central Asia High income
## 13 Europe & Central Asia High income
## 14 Europe & Central Asia High income
## 15 Europe & Central Asia High income
## 16 Europe & Central Asia High income
## 17 Europe & Central Asia High income
## 18 Europe & Central Asia High income
## 19 Europe & Central Asia High income
## 20 Europe & Central Asia High income
nations3 <- nations %>%
mutate(nations,gdp=(gdp_percap*population)/1000000000000) %>%
group_by(region) %>%
group_by(year)
view(nations3)
summarise(nations3, sum = sum(gdp_percap, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 25 x 2
## year sum
## <int> <dbl>
## 1 1990 1225550.
## 2 1991 1280913.
## 3 1992 1319715.
## 4 1993 1356759.
## 5 1994 1410427.
## 6 1995 1574917.
## 7 1996 1639416.
## 8 1997 1714568.
## 9 1998 1763558.
## 10 1999 1837916.
## # … with 15 more rows
ggplot(nations3, aes(x = year, y = gdp, fill = region))+
geom_area()+
geom_line()+
ggtitle("GDP by World Bank Region")+
scale_fill_brewer(palette = "Set2", aesthetics = "fill")
## Warning: Removed 766 rows containing missing values (position_stack).
## Warning: Removed 6 row(s) containing missing values (geom_path).
