#Loading up some more packages essential for the work ahead
pacman::p_load(tidyverse,devtools, zoo, ggsci,plotly, DT, highcharter,BiocManager, RColorBrewer, viridis, babynames)
#Loading up the dataset essential for the work ahead
setwd("C:/Users/clovi/OneDrive/Desktop/DATA 110")
nations <- read_csv("nations.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
nations
## # A tibble: 5,275 x 10
## iso2c iso3c country year gdp_percap population birth_rate neonat_mortal_rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AD AND Andorra 1996 NA 64291 10.9 2.8
## 2 AD AND Andorra 1994 NA 62707 10.9 3.2
## 3 AD AND Andorra 2003 NA 74783 10.3 2
## 4 AD AND Andorra 1990 NA 54511 11.9 4.3
## 5 AD AND Andorra 2009 NA 85474 9.9 1.7
## 6 AD AND Andorra 2011 NA 82326 NA 1.6
## 7 AD AND Andorra 2004 NA 78337 10.9 2
## 8 AD AND Andorra 2010 NA 84419 9.8 1.7
## 9 AD AND Andorra 2001 NA 67770 11.8 2.1
## 10 AD AND Andorra 2002 NA 71046 11.2 2.1
## # ... with 5,265 more rows, and 2 more variables: region <chr>, income <chr>
#Mutating a particular Variable of the dataset
nations_Updated <- nations %>%
select(country, year, gdp_percap, population,region) %>%
drop_na(gdp_percap) %>%
mutate(gdp_tln = ((gdp_percap * population) / 1e+12) ) %>%
arrange(year)
str(nations_Updated)
## tibble [4,509 x 6] (S3: tbl_df/tbl/data.frame)
## $ country : chr [1:4509] "United Arab Emirates" "Antigua and Barbuda" "Albania" "Armenia" ...
## $ year : num [1:4509] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
## $ gdp_percap: num [1:4509] 74017 11087 2749 2414 3107 ...
## $ population: num [1:4509] 1811458 61906 3286542 3544695 11127870 ...
## $ region : chr [1:4509] "Middle East & North Africa" "Latin America & Caribbean" "Europe & Central Asia" "Europe & Central Asia" ...
## $ gdp_tln : num [1:4509] 0.134079 0.000686 0.009033 0.008558 0.034569 ...
The Five Biggest Economies in Africa We filter and arrange the data Here, we filter five countries with the largest or biggest economies in Africa in the first quarter of 2021
This five countries listed as follows: 1. Nigeria 2. Egypt 3. South Africa 4. Algeria 5. Morrocco
There are three different types of palettes Sequential palettes Diverging Palettes *Qualitative palettes
RColorBrewer::display.brewer.all()
The color “Dark2” is from the Diverging palettes
My_colors <- brewer.pal(5, "Dark2")
nationsUpdated_chart <- nations_Updated %>%
filter(country == c("Nigeria", "Egypt, Arab Rep.", "South Africa", "Algeria", "Morocco")) %>%
ggplot(aes(x = year, y = gdp_tln)) +
xlim(1990,2014) +
labs(title = "The Five Largest Economies in Africa",
caption = "Source: The Nations dataset") +
xlab("Year") +
ylab("GDP ($ trillion)") +
theme_light(base_size = 10)
## Warning in country == c("Nigeria", "Egypt, Arab Rep.", "South Africa",
## "Algeria", : longer object length is not a multiple of shorter object length
nationsUpdated_chart +
geom_line(aes( color = country)) +
geom_point() +
labs(color = "Country") +
scale_color_brewer(palette = "Dark2")
nationsUpdated_chart <- nations_Updated %>%
filter(country == c("Nigeria", "Egypt, Arab Rep.", "South Africa", "Algeria", "Morocco")) %>%
ggplot(aes(x = year, y = gdp_tln)) +
xlim(1990,2014) +
labs(title = "The Five Largest Economies in Africa",
caption = "Source: The Nations dataset") +
xlab("Year") +
ylab("GDP ($ trillion)") +
theme_dark(base_size = 10)
## Warning in country == c("Nigeria", "Egypt, Arab Rep.", "South Africa",
## "Algeria", : longer object length is not a multiple of shorter object length
nationsUpdated_chart +
geom_line(aes( color = country)) +
geom_point() +
labs(color = "Country") +
scale_color_brewer(palette = "Spectral")
Area Chart Stacked with Viridis “Discrete TRUE” colors
regions <- nations %>%
group_by(year, region) %>%
summarise(gdp_tln = sum(gdp_percap, na.rm = TRUE)) %>%
group_by(year, region)
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
x <- regions %>%
ggplot(aes(x = year, y = gdp_tln, fill = region)) +
geom_area(color = "white")
x +
labs(fill = "Regions") +
labs(caption = "Source: The Nations dataset") +
xlab("Year") +
ylab("GDP ($ trillion)") +
scale_fill_viridis(discrete = TRUE) +
theme_classic(base_size = 10) +
ggtitle("GDP by World Bank Region")
Area Chart Stacked with Dark2 colors
x +
labs(fill = "Regions") +
labs(caption = "Source: The Nations dataset") +
xlab("Year") +
ylab("GDP ($ trillion)") +
scale_x_continuous(breaks = seq(1990,2015, 5))+
scale_fill_brewer(palette = "Dark2") +
theme_classic(base_size = 10) +
ggtitle("GDP by World Bank Region")
babynames
## # A tibble: 1,924,665 x 5
## year sex name n prop
## <dbl> <chr> <chr> <int> <dbl>
## 1 1880 F Mary 7065 0.0724
## 2 1880 F Anna 2604 0.0267
## 3 1880 F Emma 2003 0.0205
## 4 1880 F Elizabeth 1939 0.0199
## 5 1880 F Minnie 1746 0.0179
## 6 1880 F Margaret 1578 0.0162
## 7 1880 F Ida 1472 0.0151
## 8 1880 F Alice 1414 0.0145
## 9 1880 F Bertha 1320 0.0135
## 10 1880 F Sarah 1288 0.0132
## # ... with 1,924,655 more rows
Names <- babynames %>%
filter(name %in% c('Margaret', 'Anna', 'Emma', 'Bertha', 'Sarah')) %>%
filter(sex == 'F')
Names %>%
ggplot(aes(x = year, y = n, fill = name, text = name)) +
geom_area() +
scale_fill_viridis(discrete = TRUE) +
theme(legend.position = 'top') +
theme_dark() +
labs(caption = "Source: Default babynames dataset") +
xlab("Year") +
ylab("Total number of females (n)") +
ggtitle('Yearwise american baby names popularity')