library(tidyverse)HW 9, Michael Simms
HW 9, Michael Simms
setwd("~/MC Data Science/Data 110/Datasets")
nations <- read_csv("nations.csv")nations|>
head()# A tibble: 6 × 10
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AD AND Andorra 1996 NA 64291 10.9 2.8
2 AD AND Andorra 1994 NA 62707 10.9 3.2
3 AD AND Andorra 2003 NA 74783 10.3 2
4 AD AND Andorra 1990 NA 54511 11.9 4.3
5 AD AND Andorra 2009 NA 85474 9.9 1.7
6 AD AND Andorra 2011 NA 82326 NA 1.6
# ℹ 2 more variables: region <chr>, income <chr>
nations |>
str()spc_tbl_ [5,275 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ iso2c : chr [1:5275] "AD" "AD" "AD" "AD" ...
$ iso3c : chr [1:5275] "AND" "AND" "AND" "AND" ...
$ country : chr [1:5275] "Andorra" "Andorra" "Andorra" "Andorra" ...
$ year : num [1:5275] 1996 1994 2003 1990 2009 ...
$ gdp_percap : num [1:5275] NA NA NA NA NA NA NA NA NA NA ...
$ population : num [1:5275] 64291 62707 74783 54511 85474 ...
$ birth_rate : num [1:5275] 10.9 10.9 10.3 11.9 9.9 NA 10.9 9.8 11.8 11.2 ...
$ neonat_mortal_rate: num [1:5275] 2.8 3.2 2 4.3 1.7 1.6 2 1.7 2.1 2.1 ...
$ region : chr [1:5275] "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" "Europe & Central Asia" ...
$ income : chr [1:5275] "High income" "High income" "High income" "High income" ...
- attr(*, "spec")=
.. cols(
.. iso2c = col_character(),
.. iso3c = col_character(),
.. country = col_character(),
.. year = col_double(),
.. gdp_percap = col_double(),
.. population = col_double(),
.. birth_rate = col_double(),
.. neonat_mortal_rate = col_double(),
.. region = col_character(),
.. income = col_character()
.. )
- attr(*, "problems")=<externalptr>
nations2 <- nations |>
mutate(gdp_tn = gdp_percap*population/1000000000000)
head(nations2)# A tibble: 6 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AD AND Andorra 1996 NA 64291 10.9 2.8
2 AD AND Andorra 1994 NA 62707 10.9 3.2
3 AD AND Andorra 2003 NA 74783 10.3 2
4 AD AND Andorra 1990 NA 54511 11.9 4.3
5 AD AND Andorra 2009 NA 85474 9.9 1.7
6 AD AND Andorra 2011 NA 82326 NA 1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp_tn <dbl>
nations_aging <-filter(nations2, country %in% c("Germany", "Italy", "Japan", "Spain"))
head(nations_aging)# A tibble: 6 × 11
iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 DE DEU Germany 1997 24184. 82034771 9.9 2.9
2 DE DEU Germany 1990 19033. 79433029 11.4 3.4
3 DE DEU Germany 1996 23656. 81914831 9.7 3
4 DE DEU Germany 1992 21230. 80624598 10 3.5
5 DE DEU Germany 1993 21387. 81156363 9.8 3.3
6 DE DEU Germany 2003 29362. 82534176 8.6 2.7
# ℹ 3 more variables: region <chr>, income <chr>, gdp_tn <dbl>
unique(nations_aging$year) [1] 1997 1990 1996 1992 1993 2003 1995 1994 2010 2007 2005 2013 2009 1998 2011
[16] 2001 2002 1991 2006 2004 2012 2014 2008 2000 1999
#1990-2014p1<- ggplot(data = nations_aging, mapping = aes(x = year, y = gdp_tn)) +
geom_point() +
xlab("Year") +
theme_minimal(base_size = 12) +
ylab("GDP (in trillions of dollars)") +
ggtitle("GDP (in trillions of $), for Germany, Italy, Japan, and Spain") +
scale_color_brewer(palette = "Set1") +
geom_line(mapping = aes(color = country))
p1nations3 <- nations2 |>
group_by(region, year) |>
summarise(GDP = sum(gdp_tn, na.rm = TRUE))`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
head(nations3)# A tibble: 6 × 3
# Groups: region [1]
region year GDP
<chr> <dbl> <dbl>
1 East Asia & Pacific 1990 5.52
2 East Asia & Pacific 1991 6.03
3 East Asia & Pacific 1992 6.50
4 East Asia & Pacific 1993 7.04
5 East Asia & Pacific 1994 7.64
6 East Asia & Pacific 1995 8.29
unique(nations3$region)[1] "East Asia & Pacific" "Europe & Central Asia"
[3] "Latin America & Caribbean" "Middle East & North Africa"
[5] "North America" "South Asia"
[7] "Sub-Saharan Africa"
p2 <- ggplot(data = nations3, aes(x = year, y = GDP, fill = region)) +
geom_area(alpha = 0.5, color = "white") +
scale_fill_brewer(palette = "Set2") +
labs(x = "Year", y = "GDP (in trillions of dollars)", title = "GDP by World Bank Region")
p2