nations.csv variables: -iso2c iso3c Two- and Three-letter codes for each country, assigned by the International Organization for Standardization. -country Country name. -year -population Estimated total population at mid-year, including all residents apart from refugees. -gdp_percap Gross Domestic Product per capita in current international dollars, corrected for purchasing power in different territories. -life_expect Life expectancy at birth, in years. -population Estimated total population at mid-year, including all residents apart from refugees. -birth_rate Live births during the year per 1,000 people, based on mid-year population estimate. -neonat_mortal_rate Neonatal mortality rate: babies dying before reaching 28 days of age, per 1,000 live births in a given year. -region income World Bank regions and income groups, explained here.
library(tidyverse)
library(ggplot2)
library(psych)
library(RColorBrewer)
library(readr)
library(plotly)
require(mgcv) # for GAM
nations <- read_csv('/Users/raulmiranda/Desktop/DATA 110 Fall 2020/Databases/nations.csv')
head(nations)
## # A tibble: 6 x 11
## iso2c iso3c country year gdp_percap life_expect population birth_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 2007 NA NA 82683 10.1
## 2 AD AND Andorra 2011 NA NA 83751 NA
## 3 AD AND Andorra 2013 NA NA 80788 NA
## 4 AD AND Andorra 2008 NA NA 83861 10.4
## 5 AD AND Andorra 1992 NA NA 58888 12.1
## 6 AD AND Andorra 2006 NA NA 80991 10.6
## # … with 3 more variables: neonat_mortal_rate <dbl>, region <chr>, income <chr>
uschgrkr <- nations %>% filter (grepl("United States", country) | grepl("^China", country) | grepl("Germany", country) | grepl("Korea",country)) %>% mutate(gdp = gdp_percap*population/10^12) %>% arrange()
uschgrkr
## # A tibble: 108 x 12
## iso2c iso3c country year gdp_percap life_expect population birth_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CN CHN China 1996 2071. 70.4 1217550000 17.0
## 2 CN CHN China 2005 5093. 74.0 1303720000 12.4
## 3 CN CHN China 2007 6864. 74.6 1317885000 12.1
## 4 CN CHN China 1995 1869. 70.2 1204855000 17.1
## 5 CN CHN China 2008 7635. 74.8 1324655000 12.1
## 6 CN CHN China 1997 2277. 70.7 1230075000 16.6
## 7 CN CHN China 1990 987. 69.3 1135185000 21.1
## 8 CN CHN China 1991 1099. 69.4 1150780000 19.7
## 9 CN CHN China 1992 1268. 69.6 1164970000 18.3
## 10 CN CHN China 2001 3227. 72.4 1271850000 13.4
## # … with 98 more rows, and 4 more variables: neonat_mortal_rate <dbl>,
## # region <chr>, income <chr>, gdp <dbl>
regional <- nations %>% mutate(gdp = gdp_percap*population/10^12) %>% group_by(region, year) %>% summarise(GDP = sum(gdp, na.rm = TRUE))
regional
## # A tibble: 189 x 3
## # Groups: region [7]
## region year GDP
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 5.59
## 2 East Asia & Pacific 1991 6.10
## 3 East Asia & Pacific 1992 6.57
## 4 East Asia & Pacific 1993 7.11
## 5 East Asia & Pacific 1994 7.71
## 6 East Asia & Pacific 1995 8.39
## 7 East Asia & Pacific 1996 9.09
## 8 East Asia & Pacific 1997 9.66
## 9 East Asia & Pacific 1998 9.75
## 10 East Asia & Pacific 1999 10.3
## # … with 179 more rows
p1 <- ggplot(uschgrkr) +
geom_point(aes(year,gdp, color=country)) +
geom_smooth(aes(year,gdp, color=country), method="loess", formula=y~x, se=FALSE) +
scale_color_brewer(palette = "Set1") +
labs (title = "China is the Fastest Growing Economy in this Century", x="Year",
y="GDP ($trillion)")
p2 <- ggplot(uschgrkr) +
geom_point(aes(year,gdp, color=country)) +
geom_smooth(aes(year,gdp, color=country), method="lm", formula=y~poly(x,4), se=FALSE) +
scale_color_brewer(palette = "Set1") +
labs (title = "China is the Fastest Growing Economy in this Century", x="Year",
y="GDP ($trillion)")
p3 <- ggplot(uschgrkr) +
geom_point(aes(year,gdp, color=country)) +
geom_smooth(aes(year,gdp, color=country), method="gam", formula=y~s(x), se=FALSE) +
scale_color_brewer(palette = "Set1") +
labs (title = "China is the Fastest Growing Economy in this Century", x="Year",
y="GDP ($trillion)")
p1
p2
p3
ggplotly(p3)
p4 <- ggplot(regional, aes(x=year, y= GDP, fill=region)) +
geom_area(color="white") +
scale_color_brewer(palette = "Set2") +
labs (title = "GDP by World Bank Region", x="Year", y="GDP ($trillion)")
p4
ggplotly(p4)