Loading Library

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor

Data Preparation

library(readr)
setwd("C:/Users/Jerem/Downloads")
#rerading dataset into spotify
nations <- read_csv("~/Montgomery College/Fall 2021/DATA 110/Datasets/nations.csv")
## Rows: 5275 Columns: 10
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#view(nations)

Data Preparation Summary

nations %>% head() %>% knitr::kable()
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate region income
AD AND Andorra 1996 NA 64291 10.9 2.8 Europe & Central Asia High income
AD AND Andorra 1994 NA 62707 10.9 3.2 Europe & Central Asia High income
AD AND Andorra 2003 NA 74783 10.3 2.0 Europe & Central Asia High income
AD AND Andorra 1990 NA 54511 11.9 4.3 Europe & Central Asia High income
AD AND Andorra 2009 NA 85474 9.9 1.7 Europe & Central Asia High income
AD AND Andorra 2011 NA 82326 NA 1.6 Europe & Central Asia High income

Mutating Variable

nations1 <- mutate(nations, GDP = gdp_percap*population/10**12)

Chart One

nations2 <- filter(nations1, country == "Brazil" | country == "Japan" | country == "France" | country == "United States")

ggplot(nations2, aes(x = year, y = GDP, color = country)) +
  geom_point() +
  geom_line() +
  ggtitle("National Economic Performance") +
  labs(x = "Years",
       y = "GDP ($Trillions)") +
  scale_color_brewer(palette = "Set1")

Grouping and Summarize Variables

nations3 <- nations1 %>%
  group_by(region, year) %>%
  summarise(GDP = sum(GDP, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the `.groups` argument.

Chart Two

ggplot(nations3, aes(x = year, y = GDP)) +
  geom_area(color = "white", aes(fill = region)) +
  ggtitle("GDP by Region") +
  labs(x = "Years",
       y = "GDP ($ Trillions)") +
  scale_color_brewer(palette = "Set2")