In this assignment, I am going to explore nations dataset which provides information about a number of different countries from 1990 to 2014. The data is tidy and there are 10 columns to begin with such as, country (name), year, gdp_percap, populations and birth_rate.

Loading library

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.3
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Setting current working directory and loading dataset

setwd("~/Desktop/DATA110")
nations <- read_csv("nations.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )
nations
## # A tibble: 5,275 x 10
##    iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_r…
##    <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>            <dbl>
##  1 AD    AND   Andorra  1996         NA      64291       10.9              2.8
##  2 AD    AND   Andorra  1994         NA      62707       10.9              3.2
##  3 AD    AND   Andorra  2003         NA      74783       10.3              2  
##  4 AD    AND   Andorra  1990         NA      54511       11.9              4.3
##  5 AD    AND   Andorra  2009         NA      85474        9.9              1.7
##  6 AD    AND   Andorra  2011         NA      82326       NA                1.6
##  7 AD    AND   Andorra  2004         NA      78337       10.9              2  
##  8 AD    AND   Andorra  2010         NA      84419        9.8              1.7
##  9 AD    AND   Andorra  2001         NA      67770       11.8              2.1
## 10 AD    AND   Andorra  2002         NA      71046       11.2              2.1
## # … with 5,265 more rows, and 2 more variables: region <chr>, income <chr>

Creating and saving a new column called gdp using other columns in the data

nations_GDP <- nations %>%
  mutate(gdp = gdp_percap * population / 1e12)

Filtering four desired European countries and creating a scatter plot of gdp per year from 1990 - 2014

nations1 <- nations_GDP %>%
  filter(country == "France" | 
           country == "Germany" | 
           country == "Italy" | 
           country == "Spain")

plot1 <- nations1 %>%
  ggplot(aes(x = year, y = gdp,  color = country)) +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = "Set1") +
  labs(x = "Year", 
       y = "GDP ($ Trillions)", 
       title = "Largest European Economies from 1990 - 2014")
plot1

Creating an area plot of gdp by year and regions of the world

nations2 <- nations_GDP %>%
  group_by(region,year) %>%
  summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
plot2 <- nations2 %>%
  ggplot(aes(x=year,y = GDP, fill = region)) +
  geom_area(alpha=0.7, size=.6, color = "white") +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "GDP by Regions of the World") +
  xlab("Year")+
  ylab("GPD ($ Trillion)")
plot2