library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RColorBrewer)
getwd()
## [1] "C:/Users/Mitcheyla$/Desktop/DATA110 -VISUALISATION"
setwd("C:/Users/Mitcheyla$/Desktop/DATA110 -VISUALISATION")
nations <- read_csv("nations.csv")
## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(nations)
## iso2c iso3c country year
## Length:5275 Length:5275 Length:5275 Min. :1990
## Class :character Class :character Class :character 1st Qu.:1996
## Mode :character Mode :character Mode :character Median :2002
## Mean :2002
## 3rd Qu.:2008
## Max. :2014
##
## gdp_percap population birth_rate neonat_mortal_rate
## Min. : 239.7 Min. :9.004e+03 Min. : 6.90 Min. : 0.70
## 1st Qu.: 2263.6 1st Qu.:7.175e+05 1st Qu.:13.40 1st Qu.: 6.70
## Median : 6563.2 Median :5.303e+06 Median :21.60 Median :15.00
## Mean : 12788.8 Mean :2.958e+07 Mean :24.16 Mean :19.40
## 3rd Qu.: 17195.0 3rd Qu.:1.757e+07 3rd Qu.:33.88 3rd Qu.:29.48
## Max. :141968.1 Max. :1.364e+09 Max. :55.12 Max. :73.10
## NA's :766 NA's :14 NA's :295 NA's :525
## region income
## Length:5275 Length:5275
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
str(nations)
## spec_tbl_df [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ iso2c : chr [1:5275] "AD" "AD" "AD" "AD" ...
## $ iso3c : chr [1:5275] "AND" "AND" "AND" "AND" ...
## $ country : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
## $ year : num [1:5275] 1996 1994 2003 1990 2009 ...
## $ gdp_percap : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
## $ population : num [1:5275] 64291 62707 74783 54511 85474 ...
## $ birth_rate : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
## $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
## $ region : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
## $ income : chr [1:5275] "High income" "High income" "High income" "High income" ...
## - attr(*, "spec")=
## .. cols(
## .. iso2c = col_character(),
## .. iso3c = col_character(),
## .. country = col_character(),
## .. year = col_double(),
## .. gdp_percap = col_double(),
## .. population = col_double(),
## .. birth_rate = col_double(),
## .. neonat_mortal_rate = col_double(),
## .. region = col_character(),
## .. income = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
dim(nations)
## [1] 5275 10
glimpse(nations)
## Rows: 5,275
## Columns: 10
## $ iso2c <chr> "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD…
## $ iso3c <chr> "AND", "AND", "AND", "AND", "AND", "AND", "AND", "A…
## $ country <chr> "Andorra", "Andorra", "Andorra", "Andorra", "Andorr…
## $ year <dbl> 1996, 1994, 2003, 1990, 2009, 2011, 2004, 2010, 200…
## $ gdp_percap <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ population <dbl> 64291, 62707, 74783, 54511, 85474, 82326, 78337, 84…
## $ birth_rate <dbl> 10.900, 10.900, 10.300, 11.900, 9.900, NA, 10.900, …
## $ neonat_mortal_rate <dbl> 2.8, 3.2, 2.0, 4.3, 1.7, 1.6, 2.0, 1.7, 2.1, 2.1, 2…
## $ region <chr> "Europe & Central Asia", "Europe & Central Asia", "…
## $ income <chr> "High income", "High income", "High income", "High …
gdp <- read.csv(file = "nations.csv")
gdp_second <- mutate(gdp, GDP = ((gdp_percap * population)/1000000000000))
gdp3 <- filter(gdp_second, country == "Guatemala" | country == "Dominican Republic" | country == "Jamaica" | country == "Belize")
ggplot (gdp3, aes(x = year, y = GDP, color = country)) +
ylab("GDP per $ trillion)") +
theme_minimal(base_size = 12) +
ggtitle("Comparison Between Some of the Poorest Countries GDPS in North America") +
geom_point() +
geom_line() +
scale_color_brewer(palette = 'Set1')
# Create Plot 2 using region and year
gdp4 <- gdp_second %>% group_by(region, year) %>% summarise(GDP = sum(GDP, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
ggplot(gdp4, aes(year, GDP)) +
xlab("year") + ylab("GDP per $ trillion)") +
theme_minimal(base_size = 12) +
scale_fill_brewer(palette = 'Set2') +
ggtitle("GDP by Region") +
geom_area(colour = "White ", aes(fill = region))