Gapminder dataset

Health and income outcomes for 184 countries from 1960 to 2016. Also includes two character vectors, oecd and opec, with the names of OECD and OPEC countries from 2016.

Created two charts using functions of the ggplot,ggthemes,ggrepel and tidyverse.

Chunk1 - Installing Libraries

library(ggthemes)
library(ggrepel)
## Loading required package: ggplot2
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dslabs)

Chunk2 - Insight of different dataset

list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"

Chunk3 - Opening script

wrangle_files <- list.files(system.file("script", package = "dslabs"), full.names = TRUE)
wrangle_files[[25]]
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/dslabs/script/save-gapminder-example-csv.R"

Chunk4 - Loading a specific dataset by name - gapminder

data("gapminder", package = "dslabs")
gapminder <- gapminder %>% as_tibble

Chunk5 - Inspecting data

gapminder <- gapminder %>% as_tibble()
gapminder %>% head(2)
## # A tibble: 2 x 9
##   country  year infant_mortality life_expectancy fertility population      gdp
##   <fct>   <int>            <dbl>           <dbl>     <dbl>      <dbl>    <dbl>
## 1 Albania  1960             115.            62.9      6.19    1636054 NA      
## 2 Algeria  1960             148.            47.5      7.65   11124892  1.38e10
## # … with 2 more variables: continent <fct>, region <fct>

####Chunk4 - Summarizing fertility and life expectancy of the continents and labeling years using ggrepel

years <- c("1960", "1970", "1980", "1990", "2000", "2010")

plot <- gapminder %>% 
  dplyr::filter(year %in% years) %>%
  group_by(continent, year) %>%
  summarise(mean_life_expectancy = mean(life_expectancy),
            mean_fertility = mean(fertility)) %>%
  ggplot(aes(x = mean_fertility,
             y = mean_life_expectancy)) +
  ggtitle("Summarizing mean of fertility and life expectancy of the continents ")+
  geom_point(aes(colour = continent), alpha = 0.7) +
  geom_label_repel(aes(label=year), size = 2.5, box.padding = .5) +             
  guides(colour = guide_legend(override.aes = list(alpha = 1)))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
plot+theme_dark()

Chunk5 - mean life expectancy of total population per year of the continents

plot2 <- gapminder %>% 
  dplyr::group_by(continent, year) %>%
  dplyr::filter(year %in% years) %>%
  summarise(sum_population = sum(population, na.rm = TRUE),
            mean_life_exp = mean(life_expectancy, na.rm = TRUE)) %>% 
  ggplot(aes(x = year, 
             y = log10(sum_population))) +
    geom_point(aes(colour = continent, 
                   size = mean_life_exp,
                   alpha = 0.5)) +
    geom_line(aes(group = continent,
                  colour = continent)) +
   guides(colour = guide_legend(override.aes = list(alpha = 1)))+
  
  ggtitle("Summarizing mean life expectancy of total population per year of the continents ") 
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.
plot2+theme_linedraw()