HW 9, Michael Simms

HW 9, Michael Simms

library(tidyverse)
setwd("~/MC Data Science/Data 110/Datasets")
nations <- read_csv("nations.csv")
nations|>
  head()
# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>
nations |>
  str()
spc_tbl_ [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ iso2c             : chr [1:5275] "AD" "AD" "AD" "AD" ...
 $ iso3c             : chr [1:5275] "AND" "AND" "AND" "AND" ...
 $ country           : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
 $ year              : num [1:5275] 1996 1994 2003 1990 2009 ...
 $ gdp_percap        : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
 $ population        : num [1:5275] 64291 62707 74783 54511 85474 ...
 $ birth_rate        : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
 $ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
 $ region            : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
 $ income            : chr [1:5275] "High income" "High income" "High income" "High income" ...
 - attr(*, "spec")=
  .. cols(
  ..   iso2c = col_character(),
  ..   iso3c = col_character(),
  ..   country = col_character(),
  ..   year = col_double(),
  ..   gdp_percap = col_double(),
  ..   population = col_double(),
  ..   birth_rate = col_double(),
  ..   neonat_mortal_rate = col_double(),
  ..   region = col_character(),
  ..   income = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
nations2 <- nations |>
  mutate(gdp_tn = gdp_percap*population/1000000000000)
head(nations2)
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp_tn <dbl>
nations_aging <-filter(nations2, country %in%  c("Germany", "Italy", "Japan", "Spain"))
head(nations_aging)
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 DE    DEU   Germany  1997     24184.   82034771        9.9                2.9
2 DE    DEU   Germany  1990     19033.   79433029       11.4                3.4
3 DE    DEU   Germany  1996     23656.   81914831        9.7                3  
4 DE    DEU   Germany  1992     21230.   80624598       10                  3.5
5 DE    DEU   Germany  1993     21387.   81156363        9.8                3.3
6 DE    DEU   Germany  2003     29362.   82534176        8.6                2.7
# ℹ 3 more variables: region <chr>, income <chr>, gdp_tn <dbl>
unique(nations_aging$year)
 [1] 1997 1990 1996 1992 1993 2003 1995 1994 2010 2007 2005 2013 2009 1998 2011
[16] 2001 2002 1991 2006 2004 2012 2014 2008 2000 1999
#1990-2014
p1<- ggplot(data = nations_aging, mapping = aes(x = year, y = gdp_tn)) +
  geom_point() +
xlab("Year") +
  theme_minimal(base_size = 12) +
 ylab("GDP (in trillions of dollars)") +
 ggtitle("GDP (in trillions of $), for Germany, Italy, Japan, and Spain") +
  scale_color_brewer(palette = "Set1") +
 geom_line(mapping = aes(color = country))
p1

nations3 <- nations2 |>
  group_by(region, year) |>
  summarise(GDP = sum(gdp_tn, na.rm = TRUE))
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
head(nations3)
# A tibble: 6 × 3
# Groups:   region [1]
  region               year   GDP
  <chr>               <dbl> <dbl>
1 East Asia & Pacific  1990  5.52
2 East Asia & Pacific  1991  6.03
3 East Asia & Pacific  1992  6.50
4 East Asia & Pacific  1993  7.04
5 East Asia & Pacific  1994  7.64
6 East Asia & Pacific  1995  8.29
unique(nations3$region)
[1] "East Asia & Pacific"        "Europe & Central Asia"     
[3] "Latin America & Caribbean"  "Middle East & North Africa"
[5] "North America"              "South Asia"                
[7] "Sub-Saharan Africa"        
p2 <- ggplot(data = nations3, aes(x = year, y = GDP, fill = region)) +
  geom_area(alpha = 0.5, color = "white")  +
  scale_fill_brewer(palette = "Set2") +
  labs(x = "Year", y = "GDP (in trillions of dollars)", title = "GDP by World Bank Region")
p2