library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(RColorBrewer)

Load nations dataset

getwd()

## [1] "C:/Users/Mitcheyla$/Desktop/DATA110 -VISUALISATION"

setwd("C:/Users/Mitcheyla$/Desktop/DATA110 -VISUALISATION")

Examine the data

nations <- read_csv("nations.csv")

## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View the dataset

summary(nations)

##     iso2c              iso3c             country               year     
##  Length:5275        Length:5275        Length:5275        Min.   :1990  
##  Class :character   Class :character   Class :character   1st Qu.:1996  
##  Mode  :character   Mode  :character   Mode  :character   Median :2002  
##                                                           Mean   :2002  
##                                                           3rd Qu.:2008  
##                                                           Max.   :2014  
##                                                                         
##    gdp_percap         population          birth_rate    neonat_mortal_rate
##  Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
##  1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
##  Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
##  Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
##  3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
##  Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
##  NA's   :766        NA's   :14          NA's   :295     NA's   :525       
##     region             income         
##  Length:5275        Length:5275       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
##

view the struture and dimension of the data

str(nations)

## spec_tbl_df [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
##  $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
##  $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
##  $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
##  $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
##  $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
##  $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
##  $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
##  $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   iso2c = col_character(),
##   ..   iso3c = col_character(),
##   ..   country = col_character(),
##   ..   year = col_double(),
##   ..   gdp_percap = col_double(),
##   ..   population = col_double(),
##   ..   birth_rate = col_double(),
##   ..   neonat_mortal_rate = col_double(),
##   ..   region = col_character(),
##   ..   income = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

dim(nations)

## [1] 5275   10

glimpse(nations)

## Rows: 5,275
## Columns: 10
## $ iso2c              <chr> "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD…
## $ iso3c              <chr> "AND", "AND", "AND", "AND", "AND", "AND", "AND", "A…
## $ country            <chr> "Andorra", "Andorra", "Andorra", "Andorra", "Andorr…
## $ year               <dbl> 1996, 1994, 2003, 1990, 2009, 2011, 2004, 2010, 200…
## $ gdp_percap         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ population         <dbl> 64291, 62707, 74783, 54511, 85474, 82326, 78337, 84…
## $ birth_rate         <dbl> 10.900, 10.900, 10.300, 11.900, 9.900, NA, 10.900, …
## $ neonat_mortal_rate <dbl> 2.8, 3.2, 2.0, 4.3, 1.7, 1.6, 2.0, 1.7, 2.1, 2.1, 2…
## $ region             <chr> "Europe & Central Asia", "Europe & Central Asia", "…
## $ income             <chr> "High income", "High income", "High income", "High …

create a new variable in the data using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.

gdp <- read.csv(file = "nations.csv")
gdp_second <- mutate(gdp, GDP = ((gdp_percap * population)/1000000000000))

Create Plot 1 with four poorest counties in North America

gdp3 <- filter(gdp_second, country == "Guatemala" | country == "Dominican Republic" | country == "Jamaica" | country == "Belize")
ggplot (gdp3, aes(x = year, y = GDP, color = country)) +
  ylab("GDP per $ trillion)") +
  theme_minimal(base_size = 12) +
  ggtitle("Comparison Between Some of the Poorest Countries GDPS in North America") +
  geom_point() +
  geom_line() +
  scale_color_brewer(palette = 'Set1')

# Create Plot 2 using region and year

gdp4 <- gdp_second %>% group_by(region, year) %>% summarise(GDP = sum(GDP, na.rm = TRUE))

## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.

ggplot(gdp4, aes(year, GDP)) +
         xlab("year") + ylab("GDP per $ trillion)") +
         theme_minimal(base_size = 12) +
         scale_fill_brewer(palette = 'Set2') +
         ggtitle("GDP by Region") +
         geom_area(colour = "White ", aes(fill = region))

Homework6

Sheyla Daccarett

2022-10-09

Load nations dataset

Examine the data

View the dataset

view the struture and dimension of the data

create a new variable in the data using mutate from dplyr, giving the GDP of each country in trillions of dollars, by multiplying gdp_percap by population and dividing by a trillion.

Create Plot 1 with four poorest counties in North America