In this assignment, I am going to explore nations dataset which provides information about a number of different countries from 1990 to 2014. The data is tidy and there are 10 columns to begin with such as, country (name), year, gdp_percap, populations and birth_rate.
Loading library
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.3
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Setting current working directory and loading dataset
setwd("~/Desktop/DATA110")
nations <- read_csv("nations.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
nations
## # A tibble: 5,275 x 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_r…
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # … with 5,265 more rows, and 2 more variables: region <chr>, income <chr>
Creating and saving a new column called gdp using other columns in the data
nations_GDP <- nations %>%
mutate(gdp = gdp_percap * population / 1e12)
Filtering four desired European countries and creating a scatter plot of gdp per year from 1990 - 2014
nations1 <- nations_GDP %>%
filter(country == "France" |
country == "Germany" |
country == "Italy" |
country == "Spain")
plot1 <- nations1 %>%
ggplot(aes(x = year, y = gdp, color = country)) +
geom_point() +
geom_line() +
scale_color_brewer(palette = "Set1") +
labs(x = "Year",
y = "GDP ($ Trillions)",
title = "Largest European Economies from 1990 - 2014")
plot1

Creating an area plot of gdp by year and regions of the world
nations2 <- nations_GDP %>%
group_by(region,year) %>%
summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.
plot2 <- nations2 %>%
ggplot(aes(x=year,y = GDP, fill = region)) +
geom_area(alpha=0.7, size=.6, color = "white") +
scale_fill_brewer(palette = "Set2") +
labs(title = "GDP by Regions of the World") +
xlab("Year")+
ylab("GPD ($ Trillion)")
plot2
