Nations Dataset Homework

Author

D Shima

Load the library packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)

Access the dataset

setwd ("/Users/doriashima/Desktop/data visualization course")
nationsdataset <- read.csv("nations.csv")
data(nationsdataset)
Warning in data(nationsdataset): data set 'nationsdataset' not found
head(nationsdataset)
  iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
1    AD   AND Andorra 1996         NA      64291       10.9                2.8
2    AD   AND Andorra 1994         NA      62707       10.9                3.2
3    AD   AND Andorra 2003         NA      74783       10.3                2.0
4    AD   AND Andorra 1990         NA      54511       11.9                4.3
5    AD   AND Andorra 2009         NA      85474        9.9                1.7
6    AD   AND Andorra 2011         NA      82326         NA                1.6
                 region      income
1 Europe & Central Asia High income
2 Europe & Central Asia High income
3 Europe & Central Asia High income
4 Europe & Central Asia High income
5 Europe & Central Asia High income
6 Europe & Central Asia High income

Chart 1

nationsdataset <- nationsdataset |>
mutate(gdp = (gdp_percap * population) / 1e12)

Filter four countries

countries <- nationsdataset |>
filter(country %in% c("China","Germany", "Japan", "United States"))

Draw chart 1

ggplot(countries, aes(x = year, y = gdp, color = country)) +
  geom_point(size = 2, alpha = 0.5) +
  geom_line(size = 1) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "China's Rise to Become the Largest Economy", 
       caption = "Nations Dataset",
       x = "Year",
       y = "GDP(Trillions of Dollars)" 
       ) + 
       theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Chart 2

Summarize and Group Data

gdp_summary <- nationsdataset |>
group_by(region, year)|>
summarise(GDP = sum(gdp, na.rm = TRUE))
`summarise()` has regrouped the output.
ℹ Summaries were computed grouped by region and year.
ℹ Output is grouped by region.
ℹ Use `summarise(.groups = "drop_last")` to silence this message.
ℹ Use `summarise(.by = c(region, year))` for per-operation grouping
  (`?dplyr::dplyr_by`) instead.

Draw Chart 2

ggplot(gdp_summary, aes(x = year, y = GDP, fill = region)) +
  geom_area(alpha = 0.7) +
  scale_fill_brewer(palette = "Set2") +
    labs(
      title = "GDP by World Bank Region",
      caption = "Nations Dataset",
      x = "Year",
      y = "GDP(Trillions of Dollars)",
      fill = "Region") + theme_minimal()