Nations Chart Assignment

Get Working Directory

getwd()
[1] "/Users/marieadelegrosso/Desktop/Desktop - Marie’s MacBook Air (2)/Data"

Load in Library and Data

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("/Users/marieadelegrosso/Desktop/Desktop - Marie’s MacBook Air (2)/Data")
nations <- read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
newnations <- read_csv("nations_new.csv")
Rows: 13184 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): country, iso3c, region, wb_income
dbl (5): year, birth_rate, neonatal_death_rate, population, gdp_per_cap

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Preview Data

head(nations)
# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>
head(newnations)
# A tibble: 6 × 9
  country iso3c  year birth_rate neonatal_death_rate population gdp_per_cap
  <chr>   <chr> <dbl>      <dbl>               <dbl>      <dbl>       <dbl>
1 Aruba   ABW    1960       33.9                  NA      54608          NA
2 Aruba   ABW    1961       32.8                  NA      55811          NA
3 Aruba   ABW    1962       31.6                  NA      56682          NA
4 Aruba   ABW    1963       30.4                  NA      57475          NA
5 Aruba   ABW    1964       29.1                  NA      58178          NA
6 Aruba   ABW    1965       27.9                  NA      58782          NA
# ℹ 2 more variables: region <chr>, wb_income <chr>

Clean Data for nations

nations_nona <- nations |>
  filter(!is.na(gdp_percap) & !is.na(population))

Mutate to create GDP in Trillions for nations

nationsgdp <- nations_nona |>
  mutate(gdp = gdp_percap * population /1000000000000) 
head(nationsgdp)
# A tibble: 6 × 11
  iso2c iso3c country   year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>    <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AE    ARE   United …  1991     73037.    1913190       24.6                7.9
2 AE    ARE   United …  1993     71960.    2127863       22.4                7.3
3 AE    ARE   United …  2001     83534.    3217865       15.8                5.5
4 AE    ARE   United …  1992     73154.    2019014       23.5                7.6
5 AE    ARE   United …  1994     74684.    2238281       21.3                6.9
6 AE    ARE   United …  2007     75427.    6010100       12.8                4.7
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>

Clean Data for newnations

newnations_nona <- newnations |>
  filter(!is.na(gdp_per_cap) & !is.na(population))

Mutate to create GDP in Trillions for newnations

newnationsgdp <- newnations_nona |>
  mutate(gdp = gdp_per_cap * population /1000000000000) 
head(newnationsgdp)
# A tibble: 6 × 10
  country iso3c  year birth_rate neonatal_death_rate population gdp_per_cap
  <chr>   <chr> <dbl>      <dbl>               <dbl>      <dbl>       <dbl>
1 Aruba   ABW    1986       20.6                  NA      64553       6283.
2 Aruba   ABW    1987       20.3                  NA      64450       7567.
3 Aruba   ABW    1988       19.8                  NA      64332       9275.
4 Aruba   ABW    1989       19.2                  NA      64596      10767.
5 Aruba   ABW    1990       18.7                  NA      65712      11639.
6 Aruba   ABW    1991       17.7                  NA      67864      12850.
# ℹ 3 more variables: region <chr>, wb_income <chr>, gdp <dbl>

Drawing Graphs

Filter desired countries

fournations <- nationsgdp |>
  filter((country== "France" | country== "United States" | country== "United Kingdom" | country== "China" )) 
  head(fournations )
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 CN    CHN   China    1992      1260. 1164970000       18.3               29.4
2 CN    CHN   China    2005      5053. 1303720000       12.4               14  
3 CN    CHN   China    2000      2915. 1262645000       14.0               21.2
4 CN    CHN   China    1991      1091. 1150780000       19.7               29.7
5 CN    CHN   China    2013     12219. 1357380000       12.1                6.3
6 CN    CHN   China    1999      2650. 1252735000       14.6               22.2
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>

Chart 1

plot1 <- fournations |>
  ggplot((aes(year, 
              gdp, 
              group = country, 
              colour = country))) +
  theme_bw() +
  geom_point() +
  geom_line () +
  labs(title = "GDP Over Time for P5 Countries",
       x = "Years (1990-2015)",
       y = "GDP (in $ Trillions)",
       caption = "Source: Class Data Source") +
  scale_color_brewer(palette = "Set1")
plot1

Mutate to group by region and year

nationsregion <- newnationsgdp |>
  group_by(region, year) |>
  summarise(gdp = sum(gdp, na.rm = TRUE)) |>
  mutate(percentage = gdp / sum(gdp)) 
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
nationsregion
# A tibble: 448 × 4
# Groups:   region [7]
   region              year    gdp percentage
   <chr>              <dbl>  <dbl>      <dbl>
 1 eastAsiaAndPacific  1960 0.0867   0.000299
 2 eastAsiaAndPacific  1961 0.0989   0.000341
 3 eastAsiaAndPacific  1962 0.105    0.000361
 4 eastAsiaAndPacific  1963 0.118    0.000407
 5 eastAsiaAndPacific  1964 0.135    0.000465
 6 eastAsiaAndPacific  1965 0.148    0.000508
 7 eastAsiaAndPacific  1966 0.167    0.000575
 8 eastAsiaAndPacific  1967 0.197    0.000677
 9 eastAsiaAndPacific  1968 0.226    0.000778
10 eastAsiaAndPacific  1969 0.262    0.000902
# ℹ 438 more rows

Chart 2

plot2 <- nationsregion |>
  ggplot(aes(x=year, y=gdp, fill=region)) + 
  geom_area(alpha=1 , size=0.25, colour="white") +
  theme_bw() +
  labs(title = "GDP Over Time by Region",
       x = "Years (1990-2015)",
       y = "GDP (in $ Trillions)",
       caption = "Source: Class Data Source") +
  scale_fill_brewer(palette = "Set2")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
plot2

#I used this cite https://r-graph-gallery.com/136-stacked-area-chart.html for reference when I got lost