library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggfortify)
library(htmltools)
library(plotly)
## 
## 다음의 패키지를 부착합니다: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
nations <- read.csv("nations.csv")

Use dplyr and ggplot2 to process data and draw these two charts (shown below) from the Nations dataset. You do NOT need to incorporate interactivity, but you can, if you want to challenge yourself.

For both charts, you will first need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.

Draw both charts with ggplot2.

For the first chart, you will need to filter the data with dplyr for the four desired countries. When making the chart with ggplot2 you will need to add both geom_point and geom_line layers, and use the Set1 ColorBrewer palette using: scale_color_brewer(palette = “Set1”).

For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp using

summarise(GDP = sum(gdp, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE).

Each region’s area will be generated by the command geom_area ()

When drawing the chart with ggplot2, you will need to use the Set2 ColorBrewer palette using scale_fill_brewer(palette = “Set2”)

Think about the difference between fill and color when making the chart, and where the above fill command needs to go in order for the regions to fill with the different colors when making the chart, and put a very thin white line around each area.

head(nations)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
# check for N/A
na.cols <- which(colSums(is.na(nations)) >0)
sort(colSums(sapply(nations[na.cols], is.na)),decreasing = TRUE)
##         gdp_percap neonat_mortal_rate         birth_rate              iso2c 
##                766                525                295                 25 
##         population 
##                 14
paste('Number of columns with no values:', length(na.cols))
## [1] "Number of columns with no values: 5"
nations2 <- mutate(nations, GDP = (gdp_percap*population)/(10^12))
nationsfilter <- nations2 %>%
  filter(country == "United States" | country == "Japan" | country == "China" | country == "Germany")
head(nationsfilter)
##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    CN   CHN   China 1992   1260.162 1164970000      18.27               29.4
## 2    CN   CHN   China 2005   5053.379 1303720000      12.40               14.0
## 3    CN   CHN   China 2000   2915.415 1262645000      14.03               21.2
## 4    CN   CHN   China 1991   1091.449 1150780000      19.68               29.7
## 5    CN   CHN   China 2013  12218.521 1357380000      12.08                6.3
## 6    CN   CHN   China 1999   2649.745 1252735000      14.64               22.2
##                region              income       GDP
## 1 East Asia & Pacific Upper middle income  1.468052
## 2 East Asia & Pacific Upper middle income  6.588191
## 3 East Asia & Pacific Upper middle income  3.681134
## 4 East Asia & Pacific Upper middle income  1.256017
## 5 East Asia & Pacific Upper middle income 16.585176
## 6 East Asia & Pacific Upper middle income  3.319429
nationchart <- ggplot(nationsfilter, aes(x = year, y = GDP, color = country)) +
  labs(title = "Trends of GDP") + 
  geom_point() +
  geom_line() +
  xlab("Year") + 
  ylab("GDP in $trillions") +
  scale_color_brewer(palette = "Set1")
nationchart <- ggplotly(nationchart)
nationchart
nationsgroup <- nations2 %>%
  group_by(region,year)
regiongroup <- nationsgroup %>% summarise(
  groupGDP = sum(GDP, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
regionchart <- ggplot(regiongroup, aes(x = year, y = groupGDP,  fill = region)) +
  labs(title = "GDP by Region") +
  geom_area(alpha = 0.7, size = 0.7, color = "White") +
  xlab("Year") +
  ylab("GDP in $trillions") +
  scale_fill_brewer(palette = "Set2")
regionchart <- ggplotly(regionchart)
regionchart