Nations Dataset

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(ggplot2)
library(dplyr)

library(knitr)

nations <- read_csv("nations.csv")

## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#nations.csv Data contains the following fields:

##iso2c iso3c Two- and Three-letter codes for each country, assigned by the International Organization for Standardization.
##country Country name.
##year
##population Estimated total population at mid-year, including all residents apart from refugees.
##gdp_percap Gross Domestic Product per capita in current international dollars, corrected for purchasing power in different territories.
##population Estimated total population at mid-year, including all residents apart from refugees.
##birth_rate Live births during the year per 1,000 people, based on mid-year population estimate.
##neonat_mortal_rate Neonatal mortality rate: babies dying before reaching 28 days of age, per 1,000 live births in a given year.
##region income World Bank regions and income groups.
##income
##gdp_tn

head(nations)

## # A tibble: 6 × 10
##   iso2c iso3c country  year gdp_percap population birth_…¹ neona…² region income
##   <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>    <dbl>   <dbl> <chr>  <chr> 
## 1 AD    AND   Andorra  1996         NA      64291     10.9     2.8 Europ… High …
## 2 AD    AND   Andorra  1994         NA      62707     10.9     3.2 Europ… High …
## 3 AD    AND   Andorra  2003         NA      74783     10.3     2   Europ… High …
## 4 AD    AND   Andorra  1990         NA      54511     11.9     4.3 Europ… High …
## 5 AD    AND   Andorra  2009         NA      85474      9.9     1.7 Europ… High …
## 6 AD    AND   Andorra  2011         NA      82326     NA       1.6 Europ… High …
## # … with abbreviated variable names ¹birth_rate, ²neonat_mortal_rate

#Calculate total GDP by region and year # total GDP, in trillions of dollars, by region, over time

nations <- read_csv("nations.csv") %>% 
 mutate(gdp = gdp_percap*population/1000000000000)

## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(nations)

## Rows: 5,275
## Columns: 11
## $ iso2c              <chr> "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD…
## $ iso3c              <chr> "AND", "AND", "AND", "AND", "AND", "AND", "AND", "A…
## $ country            <chr> "Andorra", "Andorra", "Andorra", "Andorra", "Andorr…
## $ year               <dbl> 1996, 1994, 2003, 1990, 2009, 2011, 2004, 2010, 200…
## $ gdp_percap         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ population         <dbl> 64291, 62707, 74783, 54511, 85474, 82326, 78337, 84…
## $ birth_rate         <dbl> 10.900, 10.900, 10.300, 11.900, 9.900, NA, 10.900, …
## $ neonat_mortal_rate <dbl> 2.8, 3.2, 2.0, 4.3, 1.7, 1.6, 2.0, 1.7, 2.1, 2.1, 2…
## $ region             <chr> "Europe & Central Asia", "Europe & Central Asia", "…
## $ income             <chr> "High income", "High income", "High income", "High …
## $ gdp                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…

tibble(nations)

## # A tibble: 5,275 × 11
##    iso2c iso3c country  year gdp_p…¹ popul…² birth…³ neona…⁴ region income   gdp
##    <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  <chr>  <dbl>
##  1 AD    AND   Andorra  1996      NA   64291    10.9     2.8 Europ… High …    NA
##  2 AD    AND   Andorra  1994      NA   62707    10.9     3.2 Europ… High …    NA
##  3 AD    AND   Andorra  2003      NA   74783    10.3     2   Europ… High …    NA
##  4 AD    AND   Andorra  1990      NA   54511    11.9     4.3 Europ… High …    NA
##  5 AD    AND   Andorra  2009      NA   85474     9.9     1.7 Europ… High …    NA
##  6 AD    AND   Andorra  2011      NA   82326    NA       1.6 Europ… High …    NA
##  7 AD    AND   Andorra  2004      NA   78337    10.9     2   Europ… High …    NA
##  8 AD    AND   Andorra  2010      NA   84419     9.8     1.7 Europ… High …    NA
##  9 AD    AND   Andorra  2001      NA   67770    11.8     2.1 Europ… High …    NA
## 10 AD    AND   Andorra  2002      NA   71046    11.2     2.1 Europ… High …    NA
## # … with 5,265 more rows, and abbreviated variable names ¹gdp_percap,
## #   ²population, ³birth_rate, ⁴neonat_mortal_rate

filter(nations)

## # A tibble: 5,275 × 11
##    iso2c iso3c country  year gdp_p…¹ popul…² birth…³ neona…⁴ region income   gdp
##    <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  <chr>  <dbl>
##  1 AD    AND   Andorra  1996      NA   64291    10.9     2.8 Europ… High …    NA
##  2 AD    AND   Andorra  1994      NA   62707    10.9     3.2 Europ… High …    NA
##  3 AD    AND   Andorra  2003      NA   74783    10.3     2   Europ… High …    NA
##  4 AD    AND   Andorra  1990      NA   54511    11.9     4.3 Europ… High …    NA
##  5 AD    AND   Andorra  2009      NA   85474     9.9     1.7 Europ… High …    NA
##  6 AD    AND   Andorra  2011      NA   82326    NA       1.6 Europ… High …    NA
##  7 AD    AND   Andorra  2004      NA   78337    10.9     2   Europ… High …    NA
##  8 AD    AND   Andorra  2010      NA   84419     9.8     1.7 Europ… High …    NA
##  9 AD    AND   Andorra  2001      NA   67770    11.8     2.1 Europ… High …    NA
## 10 AD    AND   Andorra  2002      NA   71046    11.2     2.1 Europ… High …    NA
## # … with 5,265 more rows, and abbreviated variable names ¹gdp_percap,
## #   ²population, ³birth_rate, ⁴neonat_mortal_rate

summary(nations)

##     iso2c              iso3c             country               year     
##  Length:5275        Length:5275        Length:5275        Min.   :1990  
##  Class :character   Class :character   Class :character   1st Qu.:1996  
##  Mode  :character   Mode  :character   Mode  :character   Median :2002  
##                                                           Mean   :2002  
##                                                           3rd Qu.:2008  
##                                                           Max.   :2014  
##                                                                         
##    gdp_percap         population          birth_rate    neonat_mortal_rate
##  Min.   :   239.7   Min.   :9.004e+03   Min.   : 6.90   Min.   : 0.70     
##  1st Qu.:  2263.6   1st Qu.:7.175e+05   1st Qu.:13.40   1st Qu.: 6.70     
##  Median :  6563.2   Median :5.303e+06   Median :21.60   Median :15.00     
##  Mean   : 12788.8   Mean   :2.958e+07   Mean   :24.16   Mean   :19.40     
##  3rd Qu.: 17195.0   3rd Qu.:1.757e+07   3rd Qu.:33.88   3rd Qu.:29.48     
##  Max.   :141968.1   Max.   :1.364e+09   Max.   :55.12   Max.   :73.10     
##  NA's   :766        NA's   :14          NA's   :295     NA's   :525       
##     region             income               gdp         
##  Length:5275        Length:5275        Min.   : 0.0000  
##  Class :character   Class :character   1st Qu.: 0.0077  
##  Mode  :character   Mode  :character   Median : 0.0324  
##                                        Mean   : 0.3259  
##                                        3rd Qu.: 0.1849  
##                                        Max.   :18.0829  
##                                        NA's   :766

plot1 <- nations %>%
  filter(iso3c == "CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
  arrange(year)

# plot
ggplot(plot1, aes(x=year, y=gdp, color=country) ) +
  geom_line() +
  geom_point() +
# title
  ggtitle("China's Rise to Become the Largest Economy") +
# size of title
  theme(plot.title=element_text(size=14), ,
# axis text size
        axis.text = element_text(size = 8)) +
# labeling x and y axis
  xlab("year") +
  ylab("GDP ($ trillion)") +
# axis title sizes
  theme(axis.title=element_text(size=12) ) +
# Color Brewer Palette
  scale_color_brewer(palette="Set1")  +
# removing legend title
  theme(legend.title = element_blank(), legend.key =  element_rect(color = NA, fill = NA) ) +
# background color white
  theme(panel.background = element_rect(fill = "white",color = "white") ) +
# grid lines light gray 
  theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', color = "#d8d8d8") ) +
  theme(panel.grid.minor = element_line(size = 0.5, linetype = 'solid', color = "#d8d8d8") )

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.

plot1

## # A tibble: 100 × 11
##    iso2c iso3c country  year gdp_p…¹ popul…² birth…³ neona…⁴ region income   gdp
##    <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>  <chr>  <dbl>
##  1 CN    CHN   China    1990    980.  1.14e9    21.1    29.7 East … Upper…  1.11
##  2 DE    DEU   Germany  1990  19033.  7.94e7    11.4     3.4 Europ… High …  1.51
##  3 JP    JPN   Japan    1990  19230.  1.24e8    10       2.5 East … High …  2.38
##  4 US    USA   United…  1990  23954.  2.50e8    16.7     5.8 North… High …  5.98
##  5 CN    CHN   China    1991   1091.  1.15e9    19.7    29.7 East … Upper…  1.26
##  6 DE    DEU   Germany  1991  20521.  8.00e7    10.4     3.5 Europ… High …  1.64
##  7 JP    JPN   Japan    1991  20467.  1.24e8     9.9     2.5 East … High …  2.54
##  8 US    USA   United…  1991  24405.  2.53e8    16.2     5.6 North… High …  6.17
##  9 CN    CHN   China    1992   1260.  1.16e9    18.3    29.4 East … Upper…  1.47
## 10 DE    DEU   Germany  1992  21230.  8.06e7    10       3.5 Europ… High …  1.71
## # … with 90 more rows, and abbreviated variable names ¹gdp_percap, ²population,
## #   ³birth_rate, ⁴neonat_mortal_rate

nations <- read_csv("nations.csv")

## Rows: 5275 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): iso2c, iso3c, country, region, income
## dbl (5): year, gdp_percap, population, birth_rate, neonat_mortal_rate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

plot2 <- nations %>% 
  mutate(gdp = gdp_percap*population/1000000000000) %>%
  group_by(year, region) %>% 
  summarise(sum = sum(gdp, na.rm = TRUE)) %>% 
  arrange(year,region)

## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

plot2

## # A tibble: 175 × 3
## # Groups:   year [25]
##     year region                       sum
##    <dbl> <chr>                      <dbl>
##  1  1990 East Asia & Pacific        5.52 
##  2  1990 Europe & Central Asia      9.36 
##  3  1990 Latin America & Caribbean  2.40 
##  4  1990 Middle East & North Africa 1.66 
##  5  1990 North America              6.54 
##  6  1990 South Asia                 1.35 
##  7  1990 Sub-Saharan Africa         0.787
##  8  1991 East Asia & Pacific        6.03 
##  9  1991 Europe & Central Asia      9.71 
## 10  1991 Latin America & Caribbean  2.55 
## # … with 165 more rows

altgraph<- ggplot(plot2, aes(x = year, y = sum, fill = region)) +
# area chart and white outline
  geom_area(color="white") + 
# title
  ggtitle("GDP by World Bank Region") +
# labeling the x and y axis
  ylab("GDP ($ trillion)") + 
  xlab("Year") +
# Color Brewer Palette
  scale_fill_brewer(palette = "Set2") +    
#  backgroud color
  theme(panel.background = element_rect(fill = "white",color = "white") ) +
# gray grid lines 
  theme(panel.grid.major = element_line(size = 0.5, linetype = 'solid', color = "#d0d0d0") ) +
  theme(panel.grid.minor = element_line(size = 0.5, linetype = 'solid', color = "#d0d0d0") )
altgraph

Nations Dataset

Grayce Wiggins

2023-03-05