Load the tidyverse library
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Import the data
nations <- read_csv("nations.csv")
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
Take a look at the first few rows
head(nations)
## # A tibble: 6 x 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r…
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## # … with 2 more variables: region <chr>, income <chr>
Create a new variable in the data, using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.
nations_with_GDP <- nations %>% mutate(GDP = (gdp_percap*population)/1000000000000)
nations_with_GDP
## # A tibble: 5,275 x 11
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r…
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # … with 5,265 more rows, and 3 more variables: region <chr>, income <chr>,
## # GDP <dbl>
###Draw dot and line chart with ggplot2
first filter data to select four countries
fourcountries <- nations_with_GDP %>% filter(country == "Russian Federation" | country == "China" | country == "United States" | country == "Germany")
Use ggplot2 to chart the fourcountries data
fourcountries_chart <- ggplot(fourcountries, aes(x = year, y = GDP, color = country)) +
xlab("year")+
ylab("GDP ($ trillion)") +
theme_minimal(base_size = 14) +
ggtitle("China's Rise to Become the World's Largest Economy") +
theme(legend.title=element_blank())
f <-fourcountries_chart +
geom_line() +
geom_point() +
scale_color_brewer(palette = "Set1")
f <- ggplotly(f)
f
###Draw Area-Filled Chart
using dplyr, group_by region and year, and then summarize on your mutated value for gdp using
summarise(GDP = sum(gdp, na.rm = TRUE)). (There are null values, or NAs, in this data, so use na.rm = TRUE).
nations_grouped <- nations_with_GDP %>% group_by(region, year) %>% summarise(GDP = sum(GDP, na.rm = TRUE))
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
head(nations_grouped)
## # A tibble: 6 x 3
## # Groups: region [1]
## region year GDP
## <chr> <dbl> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
Make the plot and generate each region’s area
regional_plot <- ggplot(nations_grouped, aes(x = year, y = GDP)) +
geom_area(aes(fill = region), color = "white") +
xlab("year")+
ylab("GDP ($ trillion)") +
theme_minimal(base_size = 14) +
ggtitle("GDP by World Bank Region") +
scale_fill_brewer(palette = "Set2")
r <- ggplotly(regional_plot)
r