Nations Charts Assignment

Author

David Burkart

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
Warning: package 'plotly' was built under R version 4.4.3

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
nations <- read_csv("C:/Users/dburkart/Desktop/DATA 110/data/nations.csv")
Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nations)
# A tibble: 6 × 10
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 2 more variables: region <chr>, income <chr>
summary(nations)
    iso2c              iso3c             country               year     
 Length:5275        Length:5275        Length:5275        Min.   :1990  
 Class :character   Class :character   Class :character   1st Qu.:1996  
 Mode  :character   Mode  :character   Mode  :character   Median :2002  
                                                          Mean   :2002  
                                                          3rd Qu.:2008  
                                                          Max.   :2014  
                                                                        
   gdp_percap         population          birth_rate    neonat_mortal_rate
 Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
 1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
 Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
 Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
 3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
 Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
 NA's   :766        NA's   :14          NA's   :295     NA's   :525       
    region             income         
 Length:5275        Length:5275       
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
                                      
nations_gdp <- nations |>
  mutate( gdp = gdp_percap*population/10^12)
head(nations_gdp)
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
nations_p1 <- nations_gdp |>
  filter(country %in% c("Brazil", "Venezuela, RB", "Colombia", "Mexico")) 
head(nations_p1)
# A tibble: 6 × 11
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 BR    BRA   Brazil   2002      9468.  181045592       20.0               13.9
2 BR    BRA   Brazil   1995      8029.  162755054       22.0               20  
3 BR    BRA   Brazil   1996      8227.  165303155       21.8               19.3
4 BR    BRA   Brazil   1993      7221.  157812220       22.6               21.5
5 BR    BRA   Brazil   1994      7649.  160260508       22.2               20.7
6 BR    BRA   Brazil   2001      9182.  178419396       20.5               14.9
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
p1 <- ggplot(nations_p1, aes(x = year, y = gdp, color = country)) +
  labs(title = "GDP of Top Oil Producing Countries in Latin America, 1990-2014",
  x = "Year", 
  y = "GDP ($ trillion)") +
  theme_minimal(base_size = 12) +
  geom_line() +
  geom_point() +
  scale_color_brewer(palette = "Set1") 
p1_plotly <- ggplotly(p1)
p1_plotly
nations_p2 <- nations_gdp |>
  group_by(region, year) 
head(nations_p2)
# A tibble: 6 × 11
# Groups:   region, year [6]
  iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
  <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
3 AD    AND   Andorra  2003         NA      74783       10.3                2  
4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
# ℹ 3 more variables: region <chr>, income <chr>, gdp <dbl>
nations_p2GDP <- nations_p2 |>
  summarise(sum_GDP = sum(gdp, na.rm = TRUE))
`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.
head(nations_p2GDP)
# A tibble: 6 × 3
# Groups:   region [1]
  region               year sum_GDP
  <chr>               <dbl>   <dbl>
1 East Asia & Pacific  1990    5.52
2 East Asia & Pacific  1991    6.03
3 East Asia & Pacific  1992    6.50
4 East Asia & Pacific  1993    7.04
5 East Asia & Pacific  1994    7.64
6 East Asia & Pacific  1995    8.29
p2 <- ggplot(nations_p2GDP, aes(x = year, y = sum_GDP, fill = region, color = region)) +
  labs(title = "GDP by World Bank Regions, 1990-2014",
  x = "Year", 
  y = "GDP ($ trillion)") +
  theme_minimal(base_size = 12) +
  geom_area(alpha=0.6 , size=0.5, color="white") +
  scale_fill_brewer(palette = "Set2") 
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
p2