Step 1. Pull in packages and the nations.csv dataset.
library(readr)
## Warning: package 'readr' was built under R version 3.5.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.3
## v tidyr 1.0.0 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
nations <- read_csv("nations.csv")
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
Look at the dimensions and summary of the nations dataset.
dim(nations)
## [1] 5275 10
summary(nations)
## iso2c iso3c country year
## Length:5275 Length:5275 Length:5275 Min. :1990
## Class :character Class :character Class :character 1st Qu.:1996
## Mode :character Mode :character Mode :character Median :2002
## Mean :2002
## 3rd Qu.:2008
## Max. :2014
##
## gdp_percap population birth_rate neonat_mortal_rate
## Min. : 239.7 Min. :9.004e+03 Min. : 6.90 Min. : 0.70
## 1st Qu.: 2263.6 1st Qu.:7.175e+05 1st Qu.:13.40 1st Qu.: 6.70
## Median : 6563.2 Median :5.303e+06 Median :21.60 Median :15.00
## Mean : 12788.8 Mean :2.958e+07 Mean :24.16 Mean :19.40
## 3rd Qu.: 17195.0 3rd Qu.:1.757e+07 3rd Qu.:33.88 3rd Qu.:29.48
## Max. :141968.1 Max. :1.364e+09 Max. :55.12 Max. :73.10
## NA's :766 NA's :14 NA's :295 NA's :525
## region income
## Length:5275 Length:5275
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
tail(nations)
## # A tibble: 6 x 10
## iso2c iso3c country year gdp_percap population birth_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ZW ZWE Zimbab~ 2009 1229. 13720997 36.1
## 2 ZW ZWE Zimbab~ 2010 1361. 13973897 36.3
## 3 ZW ZWE Zimbab~ 2011 1524. 14255592 36.3
## 4 ZW ZWE Zimbab~ 2012 1679. 14565482 36.1
## 5 ZW ZWE Zimbab~ 2013 1743. 14898092 35.7
## 6 ZW ZWE Zimbab~ 2014 1798. 15245855 35.2
## # ... with 3 more variables: neonat_mortal_rate <dbl>, region <chr>,
## # income <chr>
Step 2. Use mutate to make a variable called GDP. Do this by multiplying the gdp per capita by the population and dividing by 1 trillion.
nations <- mutate(nations, GDP = (gdp_percap*population)/1000000000000)
nations
## # A tibble: 5,275 x 11
## iso2c iso3c country year gdp_percap population birth_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9
## 2 AD AND Andorra 1994 NA 62707 10.9
## 3 AD AND Andorra 2003 NA 74783 10.3
## 4 AD AND Andorra 1990 NA 54511 11.9
## 5 AD AND Andorra 2009 NA 85474 9.9
## 6 AD AND Andorra 2011 NA 82326 NA
## 7 AD AND Andorra 2004 NA 78337 10.9
## 8 AD AND Andorra 2010 NA 84419 9.8
## 9 AD AND Andorra 2001 NA 67770 11.8
## 10 AD AND Andorra 2002 NA 71046 11.2
## # ... with 5,265 more rows, and 4 more variables:
## # neonat_mortal_rate <dbl>, region <chr>, income <chr>, GDP <dbl>