JL_Nations_Charts

Author

Joyce Liang

Load library and Data Set

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(knitr)
library(plotly)


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

library(scales)


Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

nations <- read_csv("nations.csv")

Rows: 5275 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): iso2c, iso3c, country, region, income
dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Get rid of NA and Mutate

clean_nations <- na.omit(nations)

# Where I got the code, https://bookdown.org/rwnahhas/IntroToR/exclude-observations-with-missing-data.html

clean_nations2 <- clean_nations |>
  mutate(GDP= gdp_percap * population / 1,000,000,000,000 )

First Chart of Four Countries

desired_countries <- c("Afghanistan", "Chile", "Costa Rica", "Cameroon")

clean_nations3 <- clean_nations2 |>
filter(country %in% desired_countries)
  

clean_nations3

# A tibble: 88 × 12
   iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
 1 AF    AFG   Afghan…  2009      1526.   27207291       40.3               39.1
 2 AF    AFG   Afghan…  2011      1713.   28809167       37.6               38.1
 3 AF    AFG   Afghan…  2002       896.   21487079       47.2               43.9
 4 AF    AFG   Afghan…  2012      1934.   29726803       36.4               37.4
 5 AF    AFG   Afghan…  2005      1039.   24399948       44.9               41.7
 6 AF    AFG   Afghan…  2010      1629.   27962207       38.9               38.7
 7 AF    AFG   Afghan…  2006      1096.   25183615       43.9               41  
 8 AF    AFG   Afghan…  2004       940.   23499850       45.8               42.5
 9 AF    AFG   Afghan…  2014      1940.   31627506       34.2               36.1
10 AF    AFG   Afghan…  2003       946.   22507368       46.5               43.2
# ℹ 78 more rows
# ℹ 4 more variables: region <chr>, income <chr>, GDP <dbl>, `0` <dbl>

p1 <- ggplot(clean_nations3, aes(x= year, y= GDP, color = country )) +
      labs(title =  "Gross Domestic Product Trend in Four Countries (1990-2015)" ,
           caption= "Source: World Bank",
           x= "Year" ,
           y= "Gross Domestic Product (Trillions in USD)") +
          theme_grey(base_size = 12) +
        geom_point() +
       geom_line() +
  scale_color_brewer(palette = "Set1") +
  scale_y_continuous(labels  = 
                       label_number(scale = 1e-10, prefix = "$", accuracy = 1)) 
# Where I got the code, https://www.datakwery.com/post/2020-07-11-scientific-notation-in-r/

           
p1

Second Chart of Countries Grouped by Region and Year

options(scipen = 999)

nations_4_p2 <- clean_nations2 |>
  group_by(region, year)|>
summarise(count=n(),
          GDP_sum= sum(GDP),
          )

`summarise()` has grouped output by 'region'. You can override using the
`.groups` argument.

nations_4_p2

# A tibble: 175 × 4
# Groups:   region [7]
   region               year count GDP_sum
   <chr>               <dbl> <int>   <dbl>
 1 East Asia & Pacific  1990    22 5.41e12
 2 East Asia & Pacific  1991    23 5.91e12
 3 East Asia & Pacific  1992    23 6.37e12
 4 East Asia & Pacific  1993    24 6.90e12
 5 East Asia & Pacific  1994    24 7.49e12
 6 East Asia & Pacific  1995    24 8.13e12
 7 East Asia & Pacific  1996    25 8.80e12
 8 East Asia & Pacific  1997    24 9.37e12
 9 East Asia & Pacific  1998    24 9.43e12
10 East Asia & Pacific  1999    25 9.97e12
# ℹ 165 more rows

p2 <- ggplot(nations_4_p2,aes(x= year, y= GDP_sum, fill= region))+
  
  labs( title = "Gross Domestic Product Trend in a Region",
        x= "Year",
        y= "Gross Domestic Product (Trillions in USD)",
        caption = "Source: World Bank") +
  theme_grey(base_size = 12) +
  geom_area()+
  scale_fill_brewer(palette = "Set2")+
   scale_y_continuous(labels  = 
                       label_number(scale = 1e-12, prefix = "$", accuracy = 1)) 
p2

# Where I got the code, https://www.datakwery.com/post/2020-07-11-scientific-notation-in-r/

#Did not have any luck with interactivity 
#ggplotly(p2)