Nations Dataset

#Loading up some more packages essential for the work ahead

pacman::p_load(tidyverse,devtools, zoo, ggsci,plotly, DT, highcharter,BiocManager, RColorBrewer, viridis, babynames)

#Loading up the dataset essential for the work ahead

setwd("C:/Users/clovi/OneDrive/Desktop/DATA 110")
nations <- read_csv("nations.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   iso2c = col_character(),
##   iso3c = col_character(),
##   country = col_character(),
##   year = col_double(),
##   gdp_percap = col_double(),
##   population = col_double(),
##   birth_rate = col_double(),
##   neonat_mortal_rate = col_double(),
##   region = col_character(),
##   income = col_character()
## )

nations

## # A tibble: 5,275 x 10
##    iso2c iso3c country  year gdp_percap population birth_rate neonat_mortal_rate
##    <chr> <chr> <chr>   <dbl>      <dbl>      <dbl>      <dbl>              <dbl>
##  1 AD    AND   Andorra  1996         NA      64291       10.9                2.8
##  2 AD    AND   Andorra  1994         NA      62707       10.9                3.2
##  3 AD    AND   Andorra  2003         NA      74783       10.3                2  
##  4 AD    AND   Andorra  1990         NA      54511       11.9                4.3
##  5 AD    AND   Andorra  2009         NA      85474        9.9                1.7
##  6 AD    AND   Andorra  2011         NA      82326       NA                  1.6
##  7 AD    AND   Andorra  2004         NA      78337       10.9                2  
##  8 AD    AND   Andorra  2010         NA      84419        9.8                1.7
##  9 AD    AND   Andorra  2001         NA      67770       11.8                2.1
## 10 AD    AND   Andorra  2002         NA      71046       11.2                2.1
## # ... with 5,265 more rows, and 2 more variables: region <chr>, income <chr>

#Mutating a particular Variable of the dataset

nations_Updated <- nations %>% 
  select(country, year,  gdp_percap, population,region) %>% 
  drop_na(gdp_percap) %>% 
mutate(gdp_tln = ((gdp_percap * population) / 1e+12) ) %>% 
  arrange(year)

str(nations_Updated)

## tibble [4,509 x 6] (S3: tbl_df/tbl/data.frame)
##  $ country   : chr [1:4509] "United Arab Emirates" "Antigua and Barbuda" "Albania" "Armenia" ...
##  $ year      : num [1:4509] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
##  $ gdp_percap: num [1:4509] 74017 11087 2749 2414 3107 ...
##  $ population: num [1:4509] 1811458 61906 3286542 3544695 11127870 ...
##  $ region    : chr [1:4509] "Middle East & North Africa" "Latin America & Caribbean" "Europe & Central Asia" "Europe & Central Asia" ...
##  $ gdp_tln   : num [1:4509] 0.134079 0.000686 0.009033 0.008558 0.034569 ...

The Five Biggest Economies in Africa We filter and arrange the data Here, we filter five countries with the largest or biggest economies in Africa in the first quarter of 2021

This five countries listed as follows: 1. Nigeria 2. Egypt 3. South Africa 4. Algeria 5. Morrocco

There are three different types of palettes Sequential palettes Diverging Palettes *Qualitative palettes

RColorBrewer::display.brewer.all()

The color “Dark2” is from the Diverging palettes

My_colors <- brewer.pal(5, "Dark2")

nationsUpdated_chart <- nations_Updated %>% 
  filter(country == c("Nigeria", "Egypt, Arab Rep.", "South Africa", "Algeria", "Morocco")) %>% 
  ggplot(aes(x = year, y = gdp_tln)) + 
   xlim(1990,2014) +
  labs(title = "The Five Largest Economies in Africa",
  caption = "Source: The Nations dataset") +
       xlab("Year") +
       ylab("GDP ($ trillion)") +
  theme_light(base_size = 10)

## Warning in country == c("Nigeria", "Egypt, Arab Rep.", "South Africa",
## "Algeria", : longer object length is not a multiple of shorter object length

nationsUpdated_chart +
  geom_line(aes( color = country)) +
  geom_point() +
  labs(color = "Country") +
  scale_color_brewer(palette = "Dark2")

nationsUpdated_chart <- nations_Updated %>% 
  filter(country == c("Nigeria", "Egypt, Arab Rep.", "South Africa", "Algeria", "Morocco")) %>% 
  ggplot(aes(x = year, y = gdp_tln)) + 
   xlim(1990,2014) +
  labs(title = "The Five Largest Economies in Africa",
  caption = "Source: The Nations dataset") +
       xlab("Year") +
       ylab("GDP ($ trillion)") +
  theme_dark(base_size = 10)

## Warning in country == c("Nigeria", "Egypt, Arab Rep.", "South Africa",
## "Algeria", : longer object length is not a multiple of shorter object length

nationsUpdated_chart +
  geom_line(aes( color = country)) +
  geom_point() +
  labs(color = "Country") +
  scale_color_brewer(palette = "Spectral")

Area Chart Stacked with Viridis “Discrete TRUE” colors

 regions <- nations  %>% 
  group_by(year, region) %>% 
  summarise(gdp_tln = sum(gdp_percap, na.rm = TRUE)) %>% 
  group_by(year, region)

## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

x <-  regions %>%  
 ggplot(aes(x = year, y = gdp_tln, fill = region)) +
 geom_area(color = "white")

x +
  labs(fill = "Regions") +
  labs(caption = "Source: The Nations dataset") +
       xlab("Year") +
       ylab("GDP ($ trillion)") +
  scale_fill_viridis(discrete = TRUE) +
  theme_classic(base_size = 10) +
  ggtitle("GDP by World Bank Region")

Area Chart Stacked with Dark2 colors

x +
  labs(fill = "Regions") +
  labs(caption = "Source: The Nations dataset") +
       xlab("Year") +
       ylab("GDP ($ trillion)") +
  scale_x_continuous(breaks = seq(1990,2015, 5))+
  scale_fill_brewer(palette = "Dark2") +
  theme_classic(base_size = 10) +
  ggtitle("GDP by World Bank Region")

babynames

## # A tibble: 1,924,665 x 5
##     year sex   name          n   prop
##    <dbl> <chr> <chr>     <int>  <dbl>
##  1  1880 F     Mary       7065 0.0724
##  2  1880 F     Anna       2604 0.0267
##  3  1880 F     Emma       2003 0.0205
##  4  1880 F     Elizabeth  1939 0.0199
##  5  1880 F     Minnie     1746 0.0179
##  6  1880 F     Margaret   1578 0.0162
##  7  1880 F     Ida        1472 0.0151
##  8  1880 F     Alice      1414 0.0145
##  9  1880 F     Bertha     1320 0.0135
## 10  1880 F     Sarah      1288 0.0132
## # ... with 1,924,655 more rows

Names <- babynames %>% 
  filter(name %in% c('Margaret', 'Anna', 'Emma', 'Bertha', 'Sarah')) %>% 
  filter(sex == 'F')

 Names %>% 
  ggplot(aes(x = year, y = n, fill = name, text = name)) +
  geom_area() +
  scale_fill_viridis(discrete = TRUE) +
  theme(legend.position = 'top') +
  theme_dark() +
  labs(caption = "Source: Default babynames dataset") +
       xlab("Year") +
       ylab("Total number of females (n)") +
  ggtitle('Yearwise american baby names popularity')

Nations Dataset

Onya Clovis

3/22/2021