setwd("~/Desktop/MC Data Science /DATA 110 /DataSets ")
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RColorBrewer)
nat <- read.csv("nations.csv")
head(nat)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
Mutate using dplyr. GDP of each country in trillions of dollars by multiplying gdp_percap by population and dividing by a trillion.
nat1 <- nat %>% mutate(gdp_trill= (gdp_percap * population)/1000000000000)
head(nat1)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income gdp_trill
## 1 Europe & Central Asia High income NA
## 2 Europe & Central Asia High income NA
## 3 Europe & Central Asia High income NA
## 4 Europe & Central Asia High income NA
## 5 Europe & Central Asia High income NA
## 6 Europe & Central Asia High income NA
Filter the data for four desired countries (China, Germany, Japan, United States). Chart will need geom_point and geom_line layers. Use Set 1 from ColorBrewer palette using “scale_color_brewer(palette =”Set 1").
#Filter
nat2 <- nat1 %>% filter(country %in% c("China", "Germany", "Japan", "United States"))
head(nat2)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 CN CHN China 1992 1260.162 1164970000 18.27 29.4
## 2 CN CHN China 2005 5053.379 1303720000 12.40 14.0
## 3 CN CHN China 2000 2915.415 1262645000 14.03 21.2
## 4 CN CHN China 1991 1091.449 1150780000 19.68 29.7
## 5 CN CHN China 2013 12218.521 1357380000 12.08 6.3
## 6 CN CHN China 1999 2649.745 1252735000 14.64 22.2
## region income gdp_trill
## 1 East Asia & Pacific Upper middle income 1.468052
## 2 East Asia & Pacific Upper middle income 6.588191
## 3 East Asia & Pacific Upper middle income 3.681134
## 4 East Asia & Pacific Upper middle income 1.256017
## 5 East Asia & Pacific Upper middle income 16.585176
## 6 East Asia & Pacific Upper middle income 3.319429
#Chart
c1 <- ggplot(nat2, aes(x= year, y= gdp_trill, color= country))
c1 + geom_point() + geom_line() + scale_color_brewer(palette = "Set1") +
xlab("Year") + ylab("GDP ($Trillion)")+
ggtitle("China's Rise to Become the Largest Economy")+
theme(legend.title=element_blank())
Group by region and year, summarize on your mutated value for gdp using summarise(GDP = sum(gdp, na.rm = TRUE)). Each region’s area will be generated by the command geom_area(). Use Set2 ColorBrewer palette using scale_fill_brewer (palette= “Set2”). Put a very thin white line around each area.
#Work on data
nat3 <- nat1 %>% group_by(region, year) %>% summarise(GDP= sum(gdp_trill, na.rm = TRUE))
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
head(nat3)
## # A tibble: 6 x 3
## # Groups: region [1]
## region year GDP
## <chr> <int> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
#Chart
c2 <- ggplot(nat3, aes(x= year, y= GDP, fill= region))
c2 + geom_area(colour= "white") + scale_fill_brewer(palette = "Set2") +
xlab("Year") + ylab("GDP($trillion") +
ggtitle("GDP by World Bank Region")