source("https://raw.githubusercontent.com/traffordDataLab/assets/601e80334e0d78dfe913685561196b8b6fc278a7/theme/ggplot2/theme_lab.R")

theme_nath <- function () { 
  theme_grey(base_size = 11.5, base_family = "Roboto") %+replace% 
    theme(
      # add padding to the plot
      plot.margin = unit(rep(0.5, 4), "cm"),
      # remove the plot background and border
      plot.background = element_blank(),
      panel.background = element_blank(),
      panel.border = element_blank(),
      # make the legend and strip background transparent
      legend.background = element_rect(fill = "transparent", colour = NA),
      legend.key = element_rect(fill = "transparent", colour = NA),
      strip.background = element_rect(fill = "transparent", colour = NA),
      # add light, dotted major grid lines only
      panel.grid.major = element_line(linetype = "dotted", colour = "#757575", size = 0.3),
      panel.grid.minor = element_blank(),
      # remove the axis tick marks and hide axis lines
      axis.ticks = element_blank(),
      axis.line = element_line(color = "#FFFFFF", size = 0.3),
      # modify the bottom margins of the title and subtitle
      plot.title = element_text(size = 18, colour = "#757575", hjust = 0, margin = margin(b = 4)),
      plot.subtitle = element_text(size = 12, colour = "#757575", hjust = 0, margin = margin(b = 10)),
      # add padding to the caption
      plot.caption = element_text(size = 10, colour = "#757575", hjust = 1, margin = margin(t = 15)),
      # change to Open Sans for axes titles, tick labels, legend title and legend key, and strip text
      axis.title = element_text(family = "Open Sans", size = 11, colour = "#757575", face = "plain", hjust = 1),
      axis.text = element_text(family = "Open Sans", size = 10, colour = "#757575", face = "plain"),
      legend.title = element_text(size = 12, colour = "#757575"),
      legend.text = element_text(size = 10, colour = "#757575"),
      strip.text = element_text(family = "Open Sans", size = 12, colour = "#757575", face = "plain")
    )
}

#lista de cores da paleta
#Set2 = c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3")
library(tidyverse)
#library(patchwork)

data

Dados do gapminder, com um boost do dslabs::gapminder, que possui algumas infos mais

dslabs::gapminder

dslabs <- dslabs::gapminder %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007)) %>%
  group_by(continent, country, region, year,
           ##valores estranhos, bem diferentes da base oficial do pct gapminder
           #population, gdp, life_expectancy, 
           ## depois do merge, muitos NAs
           #infant_mortality, 
           fertility) %>%
  summarise(fertility = mean(fertility, na.rm=T)) %>% 
  ungroup() %>% 
  glimpse()
## Rows: 1,850
## Columns: 5
## $ continent <fct> Africa, Africa, Africa, Africa, Africa, Africa, Africa, Afri…
## $ country   <fct> "Algeria", "Algeria", "Algeria", "Algeria", "Algeria", "Alge…
## $ region    <fct> Northern Africa, Northern Africa, Northern Africa, Northern …
## $ year      <int> 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007, …
## $ fertility <dbl> 7.65, 7.66, 7.59, 7.24, 6.58, 5.51, 4.24, 2.99, 2.41, 2.66, …

view and obs

infant_mortality e gdp tem muito NA depois do match

dslabs %>% DT::datatable()

original

dslabs::gapminder %>% glimpse()
## Rows: 10,545
## Columns: 9
## $ country          <fct> "Albania", "Algeria", "Angola", "Antigua and Barbuda"…
## $ year             <int> 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960,…
## $ infant_mortality <dbl> 115.40, 148.20, 208.00, NA, 59.87, NA, NA, 20.30, 37.…
## $ life_expectancy  <dbl> 62.87, 47.50, 35.98, 62.97, 65.39, 66.86, 65.66, 70.8…
## $ fertility        <dbl> 6.19, 7.65, 7.32, 4.43, 3.11, 4.55, 4.82, 3.45, 2.70,…
## $ population       <dbl> 1636054, 11124892, 5270844, 54681, 20619075, 1867396,…
## $ gdp              <dbl> NA, 13828152297, NA, NA, 108322326649, NA, NA, 966778…
## $ continent        <fct> Europe, Africa, Africa, Americas, Americas, Asia, Ame…
## $ region           <fct> Southern Europe, Northern Africa, Middle Africa, Cari…

years

dslabs::gapminder %>% 
  group_by(year) %>% 
  count()
## # A tibble: 57 × 2
## # Groups:   year [57]
##     year     n
##    <int> <int>
##  1  1960   185
##  2  1961   185
##  3  1962   185
##  4  1963   185
##  5  1964   185
##  6  1965   185
##  7  1966   185
##  8  1967   185
##  9  1968   185
## 10  1969   185
## # ℹ 47 more rows

regions

dslabs %>% group_by(continent,region) %>% count() %>% select(-n) 
## # A tibble: 22 × 2
## # Groups:   continent, region [22]
##    continent region          
##    <fct>     <fct>           
##  1 Africa    Eastern Africa  
##  2 Africa    Middle Africa   
##  3 Africa    Northern Africa 
##  4 Africa    Southern Africa 
##  5 Africa    Western Africa  
##  6 Americas  Caribbean       
##  7 Americas  Central America 
##  8 Americas  Northern America
##  9 Americas  South America   
## 10 Asia      Central Asia    
## # ℹ 12 more rows

NA

dslabs::gapminder  %>% 
  #filter(year == "2002") %>% 
  group_by(year) %>% 
  summarise(across(where(is.numeric), ~sum(is.na(.))))
## # A tibble: 57 × 6
##     year infant_mortality life_expectancy fertility population   gdp
##    <int>            <int>           <int>     <int>      <int> <int>
##  1  1960               45               0         0          0    90
##  2  1961               75               0         0          0    89
##  3  1962               74               0         0          0    89
##  4  1963               70               0         0          0    89
##  5  1964               68               0         0          0    89
##  6  1965               66               0         0          0    83
##  7  1966               66               0         0          0    81
##  8  1967               62               0         0          0    80
##  9  1968               58               0         0          0    79
## 10  1969               51               0         0          0    79
## # ℹ 47 more rows

gapminder

gapminder <- gapminder::gapminder %>%
  left_join(gapminder::country_codes) %>% 
  left_join(dslabs) %>% 
  mutate(region = case_when(
        country == "Afghanistan" ~ "Central Asia",
        country == "Korea, Dem. Rep." ~ "Eastern Asia",
        country == "Korea, Rep." ~ "Eastern Asia",
        country == "Myanmar" ~ "Southeast Asia",
        country == "Reunion" ~ "Eastern Africa",
        country == "Sao Tome and Principe" ~ "Central Africa",
        country == "Somalia" ~ "Eastern Africa",
        country == "Taiwan" ~ "Eastern Asia",
        country == "Turkey" ~ "Western Asia",
        country == "Yemen, Rep." ~ "Western Asia",
    TRUE ~ region)) %>% 
  janitor::clean_names() %>% 
  mutate(continent = recode(continent,
                            "Asia" = "Ásia",
                            "Europe" = "Europa", 
                            "Africa" = "África", 
                            "Americas" = "América", 
                            "Oceania" = "Oceania")) %>% 
  relocate(region, .after = continent) %>% 
  glimpse()
## Rows: 1,704
## Columns: 10
## $ country    <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",…
## $ continent  <fct> Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia,…
## $ region     <chr> "Central Asia", "Central Asia", "Central Asia", "Central As…
## $ year       <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997,…
## $ life_exp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.…
## $ pop        <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 1…
## $ gdp_percap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134,…
## $ iso_alpha  <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AF…
## $ iso_num    <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ fertility  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5.9…
  #código complementar para identificar países sem região atribuída:
  #gapminder %>%   filter(is.na(region)) %>% select(country, continent, region) %>%  unique() 

view and obs

A base gapminder tratada tem um total de 142 países para cada um dos anos, com length(unique(gapminder$year)) anos distintos, entre min(gapminder$year) e max(gapminder$year) .

gapminder %>% DT::datatable()

countries

gapminder %>% 
  slice_max(year) %>% 
  group_by(continent) %>% 
  count() 
## # A tibble: 5 × 2
## # Groups:   continent [5]
##   continent     n
##   <fct>     <int>
## 1 África       52
## 2 América      25
## 3 Ásia         33
## 4 Europa       30
## 5 Oceania       2
gapminder %>% 
  group_by(year) %>% 
  count() %>% 
  ungroup() %>% 
  slice_max(n)
## # A tibble: 12 × 2
##     year     n
##    <int> <int>
##  1  1952   142
##  2  1957   142
##  3  1962   142
##  4  1967   142
##  5  1972   142
##  6  1977   142
##  7  1982   142
##  8  1987   142
##  9  1992   142
## 10  1997   142
## 11  2002   142
## 12  2007   142

gapminder_full

gapminder_full <- gapminder::gapminder_unfiltered %>% 
  left_join(gapminder::country_codes) %>% 
  left_join(dslabs) %>% 
  mutate(region = case_when(
      country == "Afghanistan" ~ "Central Asia",
      country == "Korea, Dem. Rep." ~ "Eastern Asia",
      country == "Korea, Rep." ~ "Eastern Asia",
      country == "Myanmar" ~ "Southeast Asia",
      country == "Reunion" ~ "Eastern Africa",
      country == "Sao Tome and Principe" ~ "Central Africa",
      country == "Somalia" ~ "Eastern Africa",
      country == "Taiwan" ~ "Eastern Asia",
      country == "Turkey" ~ "Western Asia",
      country == "Yemen, Rep." ~ "Western Asia",
  TRUE ~ region)) %>% 
  mutate(region = case_when(
      country == "Cyprus" ~ "Western Asia",
      country == "French Guiana" ~ "South America",
      country == "Guadeloupe" ~ "Caribbean",
      country == "Martinique" ~ "Caribbean",
      country == "Netherlands Antilles" ~ "South America",
  TRUE ~ region)) %>% 
  janitor::clean_names() %>% 
  mutate(continent = recode(continent,
                                 "Asia" = "Ásia",
                                 "Europe" = "Europa",
                                 "Africa" = "África",
                                 "Americas" = "América",
                                 "Oceania" = "Oceania")) %>%
  relocate(region, .after = continent) %>% 
  glimpse()
## Rows: 3,313
## Columns: 10
## $ country    <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",…
## $ continent  <fct> Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia, Ásia,…
## $ region     <chr> "Central Asia", "Central Asia", "Central Asia", "Central As…
## $ year       <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997,…
## $ life_exp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.…
## $ pop        <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 1…
## $ gdp_percap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134,…
## $ iso_alpha  <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AF…
## $ iso_num    <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8,…
## $ fertility  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 5.9…
#código complementar para identificar países sem região atribuída:
#  gapminder_full %>%   filter(is.na(region)) %>% select(country, continent, region) %>%  unique() 
    # País  Continente  Subcontinente
      # Armenia Europa  Europa Oriental
      # Belarus Europa  Europa Oriental
      # Geórgia Europa  Europa Oriental
      # Cazaquistão Ásia    Ásia Central
      # Letônia Europa  Europa Oriental
      # Lituânia    Europa  Europa Oriental
      # Rússia  Europa  Europa Oriental
      # Ucrânia Europa  Europa Oriental
      # Uzbequistão Ásia    Ásia Central

view and obs

Já a base gapminder_full, tem variedade no números de países para length(unique(gapminder_full$year)) anos distintos, entre min(gapminder_full$year) e max(gapminder_full$year). O ano com a maior quantidade de países é 2002, com 183 países

gapminder_full %>% DT::datatable()

how many countries

gapminder_full %>% 
  group_by(year) %>% 
  count() %>% 
  ungroup() %>% 
  slice_max(n)
## # A tibble: 1 × 2
##    year     n
##   <int> <int>
## 1  2002   187

FSU

gapminder_full %>% 
  filter(year == "2002") %>% 
  filter(continent == "FSU") 
## # A tibble: 9 × 10
##   country    continent region  year life_exp    pop gdp_percap iso_alpha iso_num
##   <chr>      <fct>     <chr>  <int>    <dbl>  <int>      <dbl> <chr>       <int>
## 1 Armenia    FSU       <NA>    2002     71.4 3.01e6      2692. ARM            51
## 2 Belarus    FSU       <NA>    2002     68.2 1.03e7      6546. BLR           112
## 3 Georgia    FSU       <NA>    2002     70.5 4.73e6      2638. GEO           268
## 4 Kazakhstan FSU       <NA>    2002     64.9 1.51e7      6667. KAZ           398
## 5 Latvia     FSU       <NA>    2002     70.7 2.34e6     10094. LVA           428
## 6 Lithuania  FSU       <NA>    2002     71.9 3.63e6     11009. LTU           440
## 7 Russia     FSU       <NA>    2002     65.0 1.45e8      9568. RUS           643
## 8 Ukraine    FSU       <NA>    2002     67.8 4.81e7      4335. UKR           804
## 9 Uzbekistan FSU       <NA>    2002     66.5 2.56e7      1724. UZB           860
## # ℹ 1 more variable: fertility <dbl>
gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>% 
  group_by(continent) %>% 
  count() 
## # A tibble: 5 × 2
## # Groups:   continent [5]
##   continent     n
##   <fct>     <int>
## 1 África       53
## 2 América      36
## 3 Ásia         43
## 4 Europa       35
## 5 Oceania      11

NA

gapminder_full  %>% 
  # filter(year == "2002") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007)) %>%
  group_by(year) %>% 
  summarise(across(where(is.numeric), ~sum(is.na(.)))) 
## # A tibble: 12 × 6
##     year life_exp   pop gdp_percap iso_num fertility
##    <int>    <int> <int>      <int>   <int>     <int>
##  1  1952        0     0          0       0       144
##  2  1957        0     0          0       0       144
##  3  1962        0     0          0       0        10
##  4  1967        0     0          0       0        12
##  5  1972        0     0          0       0        14
##  6  1977        0     0          0       0        14
##  7  1982        0     0          0       0        14
##  8  1987        0     0          0       0        14
##  9  1992        0     0          0       0        21
## 10  1997        0     0          0       0        21
## 11  2002        0     0          0       0        24
## 12  2007        0     0          0       0        21
gapminder_full  %>% 
  # filter(year == "2002") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007)) %>%
  group_by(year) %>% 
  summarise(across(where(is.numeric), mean, na.rm=T)) 
## # A tibble: 12 × 6
##     year life_exp       pop gdp_percap iso_num fertility
##    <int>    <dbl>     <dbl>      <dbl>   <dbl>     <dbl>
##  1  1952     49.2 16718135.      3777.    424.    NaN   
##  2  1957     51.6 18506198.      4359.    424.    NaN   
##  3  1962     54.0 19231450.      4771.    416.      5.50
##  4  1967     56.3 20701012.      5641.    413.      5.35
##  5  1972     58.5 21388403.      7612.    416.      5.15
##  6  1977     60.4 23087709.      8411.    421.      4.88
##  7  1982     62.4 25199169.      8553.    421.      4.61
##  8  1987     64.0 27561857.      8799.    421.      4.29
##  9  1992     65.0 29579355.      8961.    427.      3.89
## 10  1997     65.9 31624026.      9769.    425.      3.55
## 11  2002     66.8 33103895.     10658.    424.      3.27
## 12  2007     67.9 35812677.     12403.    425.      3.09

Magnitude

Análises feitas com base nos códigos de @traffordDataLab

Qtd países por continente

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>% 
  group_by(continent) %>% 
  count() %>% 
  ggplot(aes(x = continent, y = n, fill = continent), color = "white")  +
    geom_col(alpha = 0.8) +
    geom_text(aes(label = n), vjust = -0.5, size = 4, colour = "#757575") +
    geom_hline(yintercept=0, color = "lightgrey") +
    scale_fill_brewer(palette = "Set2") +
    labs(title = "",
         subtitle = "Contagem de Países por Continente, ano de 2002",
         caption = "Fonte: gapminder.org |  @traffordDataLab",
         x = NULL,
         y = NULL,
         fill = NULL) +
    theme_nath() + 
    scale_y_continuous(limits = c(0, 60)) +
    theme(panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_blank(),
          #panel.grid = element_blank(),
          axis.text.y=element_blank(),
          legend.position = "none") 

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>% 
  group_by(continent) %>% 
  count() %>% 
  ggplot(aes(x = continent, y = n, fill = continent), color = "white")  +
    geom_col(alpha = 0.8) +
    geom_hline(yintercept=0, color = "lightgrey") +
    scale_fill_brewer(palette = "Set2") +
    labs(title = "",
         subtitle = "Contagem de Países por Continente, ano de 2002",
         caption = "Fonte: gapminder.org |  @traffordDataLab",
         x = NULL,
         y = NULL,
         fill = NULL) +
    theme_nath() + 
    scale_y_continuous(limits = c(0, 60)) +
    theme(panel.grid.major.x = element_blank(),
          #panel.grid.major.y = element_blank(),
          #panel.grid = element_blank(),
          #axis.text.y=element_blank(),
          legend.position = "none") 

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>% 
  group_by(continent) %>% 
  count() %>% 
  ggplot(aes(x = continent, y = n, fill = continent), color = "white")  +
    geom_col(alpha = 0.8) +
    geom_text(aes(label = n), vjust = -0.5, size = 4, colour = "#757575") +
    geom_hline(yintercept=0, color = "lightgrey") +
    scale_fill_brewer(palette = "Set2") +
    labs(title = "",
         subtitle = "Contagem de Países por Continente, ano de 2002",
         caption = "Fonte: gapminder.org |  @traffordDataLab",
         x = NULL,
         y = NULL,
         fill = NULL) +
    theme_nath() + 
    scale_y_continuous(limits = c(0, 60)) +
    theme(panel.grid.major.x = element_blank(),
          #panel.grid.major.y = element_blank(),
          #panel.grid = element_blank(),
          #axis.text.y=element_blank(),
          legend.position = "none") 

Life_exp

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>% 
  group_by(continent) %>% 
  summarise(life_exp = mean(life_exp)) %>% 
  ggplot(aes(x = continent, y = life_exp, fill = continent))  +
  geom_col(fill = "#FFD92F", alpha = 0.8) +
  geom_hline(yintercept=0, color = "lightgrey") +
  labs(title = "",
       subtitle = "Média Expectativa de Vida por Continente, ano de 2002",
       caption = "Fonte: gapminder.org |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "none")

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>% 
  group_by(continent) %>% 
  summarise(life_exp = mean(life_exp)) %>% 
  ggplot(aes(x = continent, y = life_exp, fill = continent))  +
  geom_col(alpha = 0.8) +
  geom_hline(yintercept=0, color = "lightgrey") +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "",
       subtitle = "Média Expectativa de Vida por Continente, ano de 2002",
       caption = "Fonte: gapminder.org |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "none")

Comparação

absoluta

agrupado

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952,  2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n, group = continent, fill = continent)) + 
  geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
  geom_hline(yintercept=0, color = "lightgrey") +
  scale_x_continuous(breaks = c(1952, 2002), expand = c(0, 0)) +
  scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "",
       subtitle = "Contagem de Países por Continente, comparação por período",
       caption = "Source: gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  scale_y_continuous(limits = c(0, 60)) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "bottom")

(p1 <- gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year %in% c(1952,  1977, 2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n, group = continent, fill = continent)) + 
  geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
  geom_hline(yintercept=0, color = "lightgrey") +
  #scale_x_continuous(breaks = c(1952,  1977, 2002), expand = c(0, 0)) +
  scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "",
       subtitle = "Contagem de Países por Continente, comparação por período",
       caption = "Source: gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  scale_y_continuous(limits = c(0, 60)) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "bottom"))

agrupado + continent by years

(p0 <- gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year %in% c(1952,  1977, 2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n, group = continent, fill = continent)) + 
  geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
   facet_wrap(. ~ continent) +
  geom_hline(yintercept=0, color = "lightgrey") +
  scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "",
       subtitle = "Contagem de Países por Continente, comparação por período",
       caption = "Source: gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  scale_y_continuous(limits = c(0, 60)) +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "bottom"))

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year %in% c(1952,  1977, 2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
  geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
  #facet_grid(. ~ year, scales = "free_x") +
  scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "",
       subtitle = "Contagem de Países por Continente, de 1952 a 2002",
       caption = "Source: Gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "bottom")

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year %in% c(1952,  1977, 2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  mutate(year2 = as_factor(year) ) %>% 
  ggplot(aes(x = continent, y = n, group = year2, fill = fct_reorder(year2, year))) + 
  geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
  geom_hline(yintercept=0, color = "lightgrey") +
  #facet_grid(. ~ year, scales = "free_x") +
  scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
  scale_fill_grey() +
  labs(title = "",
       subtitle = "Contagem de Países por Continente, comparação por período",
       caption = "Source: Gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  scale_y_continuous(limits = c(0, 60)) +
  theme(panel.grid.major.x = element_blank())

absoluta e relativa não normalizada

(p2 <- gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year %in% c(1952,  1977, 2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n, fill = continent)) + 
  geom_col(colour = "white", size = 0.2, alpha = 0.8) +
  geom_hline(yintercept=0, color = "lightgrey") +
  scale_x_continuous(breaks = c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002),
                     expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  guides(fill = guide_legend(reverse = F)) +
  labs(title = "",
       subtitle = "Proporção da contagem de países por continente, comparação por período",
       caption = "Source: gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "right"))

relativa

(p3 <- gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year %in% c(1952,  1977, 2002)) %>%
  group_by(year, continent) %>% 
  count() %>% 
  ggplot(aes(x = year, y = n, fill = continent)) + 
  geom_col(position = "fill", colour = "white", size = 0.2, alpha = 0.8) +
  scale_x_continuous(breaks = c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002), expand = c(0, 0)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  scale_fill_brewer(palette = "Set2") +
  guides(fill = guide_legend(reverse = F)) +
  labs(title = "",
       subtitle = "Proporção da contagem de países por continente, comparação por período",
       caption = "Source: gapminder.org  |  @traffordDataLab",
       x = NULL,
       y = NULL,
       fill = NULL) +
  theme_nath() +
 geom_hline(yintercept=0, color = "lightgrey") +
  theme(panel.grid.major.x = element_blank(),
        legend.position = "right"))

clean versions

#patchwork::

(p11 <- p1 +
  theme(legend.position = "none") +
  labs(subtitle = "",
       caption = ""))
(p22 <- p2 +
  theme(legend.position = "none") +
  labs(subtitle = "",
       caption = ""))
(p33 <- p3 +
  theme(legend.position = "none") +
  labs(subtitle = "",
       caption = "") )
  

(p11 / p22 / p33)

Relação

fertility x life_exp

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "1952") %>% 
  group_by(continent) %>% 
  summarise(
    #life_exp_mean = round(mean(life_exp)),
    #life_exp_median = round(median(life_exp)),
    #life_exp_min = round(min(life_exp)),
    #life_exp_max = round(max(life_exp)),
    #NA_infant_mortality = sum(is.na(infant_mortality)), 
    NA_fertility = sum(is.na(fertility)),
    fertility = mean(fertility, na.rm = T),
    ) 
## # A tibble: 5 × 3
##   continent NA_fertility fertility
##   <fct>            <int>     <dbl>
## 1 África              53       NaN
## 2 América             25       NaN
## 3 Ásia                33       NaN
## 4 Europa              31       NaN
## 5 Oceania              2       NaN
gapminder_full %>% 
  filter(year %in% c("1952","2002")) %>%
  ggplot(aes(x = fertility, y = life_exp, color = as_factor(year)))  +
  geom_point()

( p <- gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  filter(year == "2002") %>%
  #filter(continent == "Oceania") %>%
  group_by(year, country, continent) %>% 
  summarise(
    life_exp = mean(life_exp, na.rm=T),
    fertility = mean(fertility, na.rm=T)
  ) %>% 
  ungroup() %>% 
  mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = fertility, y = life_exp, color = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    #geom_text(aes(label = country), vjust = "inward") +
    geom_point() +
    geom_smooth(method = "glm") +
    facet_grid(. ~ continent, scales = "free_x") +
    #scale_color_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none"))

# plotly::ggplotly(p)
gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  filter(year == "2002") %>%
  filter(continent == "Oceania") %>%
  group_by(year, country, continent) %>% 
  summarise(
    life_exp = mean(life_exp, na.rm=T),
    fertility = mean(fertility, na.rm=T)
  ) %>% 
  ungroup() %>% 
  mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = fertility, y = life_exp, color = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    geom_point() +
    geom_smooth(method = "glm") +
    facet_grid(. ~ continent, scales = "free_x") +
    ggrepel::geom_label_repel(aes(label = country)) +
    #scale_color_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none")

( p <- gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  filter(year == "2002") %>%
  #filter(continent == "Oceania") %>%
  # group_by(year, country, continent) %>% 
  # summarise(
  #   life_exp = mean(life_exp, na.rm=T),
  #   fertility = mean(fertility, na.rm=T)
  # ) %>% 
  # ungroup() %>% 
  mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = gdp_percap, y = life_exp, color = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    #geom_text(aes(label = country), vjust = "inward") +
    geom_point() +
    #geom_smooth(method = "glm") +
    #facet_grid(. ~ continent, scales = "free_x") +
    scale_color_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    scale_x_log10(labels = scales::dollar) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none"))

# plotly::ggplotly(p)

gdp_percap x life_exp

gapminder %>% 
  filter(year %in% c("1952","2002")) %>%
  ggplot(aes(x = gdp_percap, y = life_exp, colour = as_factor(year)))  +
  geom_point()

gapminder %>% 
  ggplot(aes(x = gdp_percap, y = life_exp))  +
    geom_point(alpha = 0.03) 

gapminder %>% 
  ggplot(aes(x = gdp_percap, y = life_exp))  +
    geom_point(aes(color = continent), alpha = 0.1) 

gapminder %>% 
  ggplot(aes(x = gdp_percap, y = life_exp))  +
    geom_point(aes(color = continent), alpha = 0.1) +
    facet_grid(.~continent)

gapminder %>% 
  ggplot(aes(x = gdp_percap, y = life_exp))  +
    geom_point(aes(color = continent), alpha = 0.1) +
    facet_wrap(continent~.)

gapminder %>% 
  ggplot(aes(x = gdp_percap, y = life_exp, color = continent))  +
    geom_point() +
    geom_text(aes(label = country))

Evolução

year x life_exp

gapminder %>% 
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    #geom_point(color = "gray") +
    #geom_line(aes(group = country), color = "gray") +
    geom_point(alpha = 0.3) +
    scale_colour_brewer(palette = "Set2") +
    guides(fill = guide_legend(reverse = F)) +
    labs(title = "",
       subtitle = "...",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank())

gapminder %>% 
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    #geom_point(color = "gray") +
    #geom_line(aes(group = country), color = "gray") +
    geom_point(alpha = 0.3) +
    geom_line(aes(group = country), alpha = 0.3) +
    scale_colour_brewer(palette = "Set2") +
    guides(fill = guide_legend(reverse = F)) +
    labs(title = "",
       subtitle = "...",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank())

gapminder %>% 
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    geom_point(alpha = 0.3, color = "lightgray") +
    geom_line(aes(group = country), alpha = 0.3, color = "lightgray") +
    geom_smooth() +
    scale_colour_brewer(palette = "Set2") +
    guides(fill = guide_legend(reverse = F)) +
    labs(title = "",
       subtitle = "...",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank())

gapminder %>% 
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    #geom_point(color = "gray") +
    #geom_line(aes(group = country), color = "gray") +
    geom_point(alpha = 0.3) +
    geom_line(aes(group = country), alpha = 0.3) +
    geom_smooth() +
    scale_colour_brewer(palette = "Set2") +
    guides(fill = guide_legend(reverse = F)) +
    labs(title = "",
       subtitle = "...",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank())

gapminder %>% 
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    geom_point(alpha = 0.3) +
    geom_line(aes(group = country), alpha = 0.3) +
    geom_smooth() + 
    facet_grid( continent ~ .) +
    scale_colour_brewer(palette = "Set2") +
    labs(title = "",
       subtitle = "...",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
          legend.position = "none")

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    geom_point(alpha = 0.3) +
    geom_line(aes(group = country), alpha = 0.3) +
    geom_smooth() + 
    facet_grid( continent ~ .) +
    scale_colour_brewer(palette = "Set2") +
    labs(title = "",
       subtitle = "...",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
          legend.position = "none")

Distribution

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>%
  #filter(continent == "Oceania") %>%
  filter(continent %in% c("Europa", "Ásia")) %>%
  #filter(country == "Australia") %>%
  # group_by(year, country, continent) %>% 
  # summarise(
  #   life_exp = mean(life_exp, na.rm=T),
  #   fertility = mean(fertility, na.rm=T)
  # ) %>% 
  # ungroup() %>% 
  #mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = life_exp, color = continent, fill = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    geom_density(alpha = 0.6) +
    #geom_smooth(method = "glm") +
    facet_grid(continent ~ ., scales = "free_x") +
    scale_color_brewer(palette = "Set2") +
    scale_fill_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none")

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == "2002") %>%
  #filter(continent == "Oceania") %>%
  #filter(country == "Australia") %>%
  # group_by(year, country, continent) %>% 
  # summarise(
  #   life_exp = mean(life_exp, na.rm=T),
  #   fertility = mean(fertility, na.rm=T)
  # ) %>% 
  # ungroup() %>% 
  #mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = life_exp, y = continent, color = continent, fill = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    ggridges::geom_density_ridges(alpha = 0.8) +
    #geom_smooth(method = "glm") +
    #facet_grid(. ~ country, scales = "free_x") +
    scale_fill_brewer(palette = "Set2") +
    scale_color_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none")

Drafts

Treemap

gapminder_full %>% 
  filter(continent != "FSU") %>% 
  filter(year == 2002) %>%
  mutate(gdp_percap = pop * gdp_percap) %>% 
  group_by(year, country, continent) %>%
  summarise(
    life_exp = mean(life_exp, na.rm=T),
    fertility = mean(fertility, na.rm=T),
    gdp_percap = mean(gdp_percap, na.rm=T)
  ) %>%
  ungroup() %>%
  ggplot(aes(area = gdp_percap, fill = continent, subgroup = continent, label = country)) +
    treemapify::geom_treemap() +
    treemapify::geom_treemap_subgroup_border(colour = "black") +
    treemapify::geom_treemap_subgroup_text(fontface = "bold", colour = "#f0f0f0", alpha = 0.7, place = "bottomleft") +
    treemapify::geom_treemap_text(colour = "white", place = "centre", reflow = TRUE) +
    scale_fill_brewer(palette = "Set2") +
    labs(title = "",
         subtitle = "Country GDP by continent, 2002",
         caption = "Source: Gapminder.org  |  @traffordDataLab",
         x = NULL, 
         y = NULL, 
         fill = NULL) +
    theme_nath() +
    theme(legend.position = "none")

## Lollipop + América

gapminder_full %>%
  filter(continent != "FSU") %>% 
  filter(year == 2002) %>% 
  filter(continent == "América") %>%
  #filter(region %in% c("Canada", "United States", "Dominican Republic", "Netherlands Antilles")) %>%
  ggplot(aes(life_exp, fct_reorder(region, life_exp))) + 
    geom_point(color = "#FC8D62", alpha = 0.8) +
    theme_nath() +
    labs(title = "",
       subtitle = "Expectativa de Vida, média dos países por região, América, 2002",
       caption = "Source: Gapminder.org",
       x = NULL,
       y = NULL,
       fill = NULL) +
    theme(panel.grid.major.x = element_blank(),
        legend.position = "none")

da Relação a Evolução– oceania

gapminder %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year == "2002") %>%
  filter(continent == "Oceania") %>%
  filter(country == "Australia") %>%
  # group_by(year, country, continent) %>% 
  # summarise(
  #   life_exp = mean(life_exp, na.rm=T),
  #   fertility = mean(fertility, na.rm=T)
  # ) %>% 
  # ungroup() %>% 
  mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = gdp_percap, y = life_exp, color = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    geom_point() +
    geom_smooth(method = "glm") +
    facet_grid(. ~ country, scales = "free_x") +
    ggrepel::geom_label_repel(aes(label = country)) +
    #scale_color_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none")

gapminder %>% 
  filter(continent != "FSU") %>% 
  filter(year %in% c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002)) %>%
  #filter(year == "2002") %>%
  filter(continent == "Oceania") %>%
  filter(country == "Australia") %>%
  # group_by(year, country, continent) %>% 
  # summarise(
  #   life_exp = mean(life_exp, na.rm=T),
  #   fertility = mean(fertility, na.rm=T)
  # ) %>% 
  # ungroup() %>% 
  #mutate(year = as_factor(year)) %>% 
  ggplot(aes(x = year, y = life_exp, color = continent))  +
    #ggplot(aes(x = continent, y = n, group = year, fill = year)) + 
    #geom_col(position = "dodge", colour = "white", size = 0.2, alpha = 0.8) +
    #scale_y_continuous(labels = scales::comma, expand = c(0, 0)) +
    #stat_summary(geom="point", fun.data = ~mean(.x,na.rm=T)) +
    geom_point() +
    geom_line() +
    #geom_smooth(method = "glm") +
    #facet_grid(. ~ country, scales = "free_x") +
    ggrepel::geom_label_repel(aes(label = country)) +
    #scale_color_brewer(palette = "Set2") +
    # labs(title = "",
    #      subtitle = "Contagem de Países por Continente, de 1952 a 2002",
    #      caption = "Source: Gapminder.org",
    #      x = NULL,
    #      y = NULL,
    #      fill = NULL) +
    theme_nath() +
    theme(panel.grid.major.x = element_blank(),
         legend.position = "none")