Data 110 Nations Assignment

Load the apropriate libraries and the Nations dataset

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(RColorBrewer)
library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

library(htmltools)
library(ggfortify)
nations <- read.csv("nations.csv")

Observe the number of variables and the variable attributes

head(nations)

##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income
## 1 Europe & Central Asia High income
## 2 Europe & Central Asia High income
## 3 Europe & Central Asia High income
## 4 Europe & Central Asia High income
## 5 Europe & Central Asia High income
## 6 Europe & Central Asia High income

Create the new variable GDP

nations2 <- nations %>%
  mutate(gdp = gdp_percap*population / 10^12)

Observe the new variable in the dataset

head(nations2)

##   iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1    AD   AND Andorra 1996         NA      64291       10.9                2.8
## 2    AD   AND Andorra 1994         NA      62707       10.9                3.2
## 3    AD   AND Andorra 2003         NA      74783       10.3                2.0
## 4    AD   AND Andorra 1990         NA      54511       11.9                4.3
## 5    AD   AND Andorra 2009         NA      85474        9.9                1.7
## 6    AD   AND Andorra 2011         NA      82326         NA                1.6
##                  region      income gdp
## 1 Europe & Central Asia High income  NA
## 2 Europe & Central Asia High income  NA
## 3 Europe & Central Asia High income  NA
## 4 Europe & Central Asia High income  NA
## 5 Europe & Central Asia High income  NA
## 6 Europe & Central Asia High income  NA

Filter the data to show 4 desired countries

countries <- c("United States", "India", "Indonesia", "China")
nations_filtered <- nations2 %>%
  filter(country %in% countries)

Plot 1

ggplot(nations_filtered, aes(x = year, y = gdp, color = country ))+  geom_line()+ geom_point() + scale_color_brewer(palette = "Set1") + xlab("Year") + ylab("GDP (trillions)") + 
  ggtitle("GDP of the Top 4 Most Populated Countries")

Group the data then create a summary table

nations3 <- nations2 %>% group_by(region, year) %>%
  summarise(GDP = sum( gdp, na.rm = TRUE))

## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.

nations3

## # A tibble: 175 × 3
## # Groups:   region [7]
##    region               year   GDP
##    <chr>               <int> <dbl>
##  1 East Asia & Pacific  1990  5.52
##  2 East Asia & Pacific  1991  6.03
##  3 East Asia & Pacific  1992  6.50
##  4 East Asia & Pacific  1993  7.04
##  5 East Asia & Pacific  1994  7.64
##  6 East Asia & Pacific  1995  8.29
##  7 East Asia & Pacific  1996  8.96
##  8 East Asia & Pacific  1997  9.55
##  9 East Asia & Pacific  1998  9.60
## 10 East Asia & Pacific  1999 10.1 
## # … with 165 more rows

Plot 2

p <- ggplot(nations3, aes(x=year, y= GDP, fill = region)) + geom_area(color = "white") + scale_fill_discrete(name = "Regions") + scale_color_brewer(palette = "Set2") + xlab("Year") + ylab ("GDP (trillions)") + ggtitle("GDP by Regions")
ggplotly(p)

Data 110 Nations Assignment

Shadeja Fuentes

2023-03-07

Load the apropriate libraries and the Nations dataset

Observe the number of variables and the variable attributes

Create the new variable GDP

Observe the new variable in the dataset

Filter the data to show 4 desired countries

Plot 1

Group the data then create a summary table

Plot 2