library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggfortify)
library(htmltools)
library(plotly)
##
## 다음의 패키지를 부착합니다: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
nations <- read.csv("nations.csv")
Use dplyr and ggplot2 to process data and draw these two charts (shown below) from the Nations dataset. You do NOT need to incorporate interactivity, but you can, if you want to challenge yourself.
For both charts, you will first need to create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.
Draw both charts with ggplot2.
For the first chart, you will need to filter the data with dplyr for the four desired countries. When making the chart with ggplot2 you will need to add both geom_point and geom_line layers, and use the Set1 ColorBrewer palette using: scale_color_brewer(palette = “Set1”).
For the second chart, using dplyr you will need to group_by region and year, and then summarize on your mutated value for gdp using
summarise(GDP = sum(gdp, na.rm = TRUE)). (There will be null values, or NAs, in this data, so you will need to use na.rm = TRUE).
Each region’s area will be generated by the command geom_area ()
When drawing the chart with ggplot2, you will need to use the Set2 ColorBrewer palette using scale_fill_brewer(palette = “Set2”)
Think about the difference between fill and color when making the chart, and where the above fill command needs to go in order for the regions to fill with the different colors when making the chart, and put a very thin white line around each area.
head(nations)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
# check for N/A
na.cols <- which(colSums(is.na(nations)) >0)
sort(colSums(sapply(nations[na.cols], is.na)),decreasing = TRUE)
## gdp_percap neonat_mortal_rate birth_rate iso2c
## 766 525 295 25
## population
## 14
paste('Number of columns with no values:', length(na.cols))
## [1] "Number of columns with no values: 5"
nations2 <- mutate(nations, GDP = (gdp_percap*population)/(10^12))
nationsfilter <- nations2 %>%
filter(country == "United States" | country == "Japan" | country == "China" | country == "Germany")
head(nationsfilter)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 CN CHN China 1992 1260.162 1164970000 18.27 29.4
## 2 CN CHN China 2005 5053.379 1303720000 12.40 14.0
## 3 CN CHN China 2000 2915.415 1262645000 14.03 21.2
## 4 CN CHN China 1991 1091.449 1150780000 19.68 29.7
## 5 CN CHN China 2013 12218.521 1357380000 12.08 6.3
## 6 CN CHN China 1999 2649.745 1252735000 14.64 22.2
## region income GDP
## 1 East Asia & Pacific Upper middle income 1.468052
## 2 East Asia & Pacific Upper middle income 6.588191
## 3 East Asia & Pacific Upper middle income 3.681134
## 4 East Asia & Pacific Upper middle income 1.256017
## 5 East Asia & Pacific Upper middle income 16.585176
## 6 East Asia & Pacific Upper middle income 3.319429
nationchart <- ggplot(nationsfilter, aes(x = year, y = GDP, color = country)) +
labs(title = "Trends of GDP") +
geom_point() +
geom_line() +
xlab("Year") +
ylab("GDP in $trillions") +
scale_color_brewer(palette = "Set1")
nationchart <- ggplotly(nationchart)
nationchart
nationsgroup <- nations2 %>%
group_by(region,year)
regiongroup <- nationsgroup %>% summarise(
groupGDP = sum(GDP, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
regionchart <- ggplot(regiongroup, aes(x = year, y = groupGDP, fill = region)) +
labs(title = "GDP by Region") +
geom_area(alpha = 0.7, size = 0.7, color = "White") +
xlab("Year") +
ylab("GDP in $trillions") +
scale_fill_brewer(palette = "Set2")
regionchart <- ggplotly(regionchart)
regionchart