NationsHW

Author

E Lott

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.5.2
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("C:/Users/Erika/OneDrive/Desktop/DATA 110")
nations <- read_csv("nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(nations) <- tolower(names(nations))
names(nations) <- gsub(" ","",names(nations))
head(nations)
# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>
summary(nations)
    iso2c              iso3c             country               year     
 Length:5275        Length:5275        Length:5275        Min.   :1990  
 Class :character   Class :character   Class :character   1st Qu.:1996  
 Mode  :character   Mode  :character   Mode  :character   Median :2002  
                                                          Mean   :2002  
                                                          3rd Qu.:2008  
                                                          Max.   :2014  
                                                                        
   gdp_percap         population          birth_rate    neonat_mortal_rate
 Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
 1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
 Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
 Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
 3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
 Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
 NA's   :766        NA's   :14          NA's   :295     NA's   :525       
    region             income         
 Length:5275        Length:5275       
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
nations_nona <- nations |>
  filter(!is.na(gdp_percap))
new_nations <- nations_nona |>
  mutate(gdp = (gdp_percap*population)/10^12)
library(ggplot2)
library(RColorBrewer)
new_nations2 <- new_nations |>
  filter( country %in% c("Peru", "Chile", "Bolivia", "Ecuador"))
new_nations2
# A tibble: 100 × 11
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 BO    BOL   Bolivia  1996      3119.    7717445       32.9               35.6
 2 BO    BOL   Bolivia  1993      2726.    7273824       34.1               39  
 3 BO    BOL   Bolivia  2002      3648.    8653343       29.8               28.4
 4 BO    BOL   Bolivia  2008      4987.    9599916       26.5               24.4
 5 BO    BOL   Bolivia  2009      5109.    9758799       26.0               23.4
 6 BO    BOL   Bolivia  2001      3570.    8496378       30.4               29  
 7 BO    BOL   Bolivia  2014      6654.   10561887       23.9               20.1
 8 BO    BOL   Bolivia  1991      2555.    6992521       35.0               40.9
 9 BO    BOL   Bolivia  1992      2605.    7131699       34.5               39.9
10 BO    BOL   Bolivia  1997      3265.    7870860       32.4               34  
# ℹ 90 more rows
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
plot1 <- new_nations2 |> 
  ggplot(aes(color = country, x = year, y = gdp, group = country)) +
  geom_line(position="identity") +
  scale_color_brewer(palette = "Set1") +
  geom_point(position="identity") +
  theme_minimal() +
  labs(title = "GDP in South America",
       x = "Year",
       y = "GDP ($ Trillion)",
       caption = "Nations Dataset")
plot1

new_nations_gdp <- new_nations |>
  group_by(region, year) |>
  summarize(sum_GDP = sum(gdp, na.rm = TRUE))
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
new_nations_gdp
# A tibble: 175 × 3
# Groups:   region [7]
   region               year sum_GDP
   <chr>               <dbl>   <dbl>
 1 East Asia & Pacific  1990    5.52
 2 East Asia & Pacific  1991    6.03
 3 East Asia & Pacific  1992    6.50
 4 East Asia & Pacific  1993    7.04
 5 East Asia & Pacific  1994    7.64
 6 East Asia & Pacific  1995    8.29
 7 East Asia & Pacific  1996    8.96
 8 East Asia & Pacific  1997    9.55
 9 East Asia & Pacific  1998    9.60
10 East Asia & Pacific  1999   10.1 
# ℹ 165 more rows
plot2 <- new_nations_gdp |> 
  ggplot(aes(fill = region, y = sum_GDP, x = year, group = region)) +
  geom_area(size = 0.5, position = "stack") +
  geom_area( position = "stack", color ="white", size = 0.5) +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "GDP by World Bank Region",
       x = "Year",
       y = "GDP ($ Trillion)",
       fill = "Region",
       caption = "Nations Dataset")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
plot2

I used https://stackoverflow.com/questions/12323060/geom-area-plot-with-areas-and-outlines-ggplot to help with how to get the white borders around the fill