Load the apropriate libraries and the Nations dataset
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(RColorBrewer)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(htmltools)
library(ggfortify)
nations <- read.csv("nations.csv")
Observe the number of variables and the variable attributes
head(nations)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income
Create the new variable GDP
nations2 <- nations %>%
mutate(gdp = gdp_percap*population / 10^12)
Observe the new variable in the dataset
head(nations2)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## region income gdp
## 1 Europe & Central Asia High income NA
## 2 Europe & Central Asia High income NA
## 3 Europe & Central Asia High income NA
## 4 Europe & Central Asia High income NA
## 5 Europe & Central Asia High income NA
## 6 Europe & Central Asia High income NA
Filter the data to show 4 desired countries
countries <- c("United States", "India", "Indonesia", "China")
nations_filtered <- nations2 %>%
filter(country %in% countries)
Plot 1
ggplot(nations_filtered, aes(x = year, y = gdp, color = country ))+ geom_line()+ geom_point() + scale_color_brewer(palette = "Set1") + xlab("Year") + ylab("GDP (trillions)") +
ggtitle("GDP of the Top 4 Most Populated Countries")

Group the data then create a summary table
nations3 <- nations2 %>% group_by(region, year) %>%
summarise(GDP = sum( gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
nations3
## # A tibble: 175 × 3
## # Groups: region [7]
## region year GDP
## <chr> <int> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
## 7 East Asia & Pacific 1996 8.96
## 8 East Asia & Pacific 1997 9.55
## 9 East Asia & Pacific 1998 9.60
## 10 East Asia & Pacific 1999 10.1
## # … with 165 more rows
Plot 2
p <- ggplot(nations3, aes(x=year, y= GDP, fill = region)) + geom_area(color = "white") + scale_fill_discrete(name = "Regions") + scale_color_brewer(palette = "Set2") + xlab("Year") + ylab ("GDP (trillions)") + ggtitle("GDP by Regions")
ggplotly(p)