library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.4.1 ──
## ✔ broom 1.0.9 ✔ recipes 1.3.1
## ✔ dials 1.4.2 ✔ rsample 1.3.1
## ✔ dplyr 1.1.4 ✔ tailor 0.1.0
## ✔ ggplot2 4.0.0 ✔ tidyr 1.3.1
## ✔ infer 1.0.9 ✔ tune 2.0.0
## ✔ modeldata 1.5.1 ✔ workflows 1.3.0
## ✔ parsnip 1.3.3 ✔ workflowsets 1.1.1
## ✔ purrr 1.1.0 ✔ yardstick 1.3.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
setwd("C:/Users/sarah/Downloads")
nations <- read.csv("nations.csv")
nations <- nations |>
mutate(gdp = (gdp_percap*population)/1000000000000)
head(nations, 10)
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2.0
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2.0
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## region income gdp
## 1 Europe & Central Asia High income NA
## 2 Europe & Central Asia High income NA
## 3 Europe & Central Asia High income NA
## 4 Europe & Central Asia High income NA
## 5 Europe & Central Asia High income NA
## 6 Europe & Central Asia High income NA
## 7 Europe & Central Asia High income NA
## 8 Europe & Central Asia High income NA
## 9 Europe & Central Asia High income NA
## 10 Europe & Central Asia High income NA
firstplot <- nations |>
filter(country %in% c("China", "Japan", "Germany", "United States"))
ggplot(firstplot, aes(x = year, y = gdp, group = country, color = country)) +
geom_point() +
geom_line() +
scale_color_brewer(palette = "Set1") +
labs(title = "China's Rise to Become the Largest Economy", y = "GDP ($trillion)")
secondplot <- nations |>
group_by(region, year) |>
summarise(GDP = sum(gdp, na.rm = TRUE))
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
secondplot
## # A tibble: 175 × 3
## # Groups: region [7]
## region year GDP
## <chr> <int> <dbl>
## 1 East Asia & Pacific 1990 5.52
## 2 East Asia & Pacific 1991 6.03
## 3 East Asia & Pacific 1992 6.50
## 4 East Asia & Pacific 1993 7.04
## 5 East Asia & Pacific 1994 7.64
## 6 East Asia & Pacific 1995 8.29
## 7 East Asia & Pacific 1996 8.96
## 8 East Asia & Pacific 1997 9.55
## 9 East Asia & Pacific 1998 9.60
## 10 East Asia & Pacific 1999 10.1
## # ℹ 165 more rows
ggplot(secondplot, aes(x = year, y = GDP, group = region, fill = region)) +
geom_area(color = "white") +
scale_fill_brewer(palette = "Set2") +
labs(title = "GDP by World Bank Region", y = "GDP ($trillion)")
The difference between fill and color when making a graph is that fill colores in the entirety of a shape in the graph, whereas color adds a colored border to a shape.