#Global Map - demo

From Riinu Pius - The University of Edinburgh

[https://media.ed.ac.uk/media/HealthyR+demoA+Global+Map/1_sz1b44q4]

Day 04 of HealthyR demo

Global Map - demo

library(tidyverse)
library(ggthemes)
library(countrycode)
library(mapproj)
library(maps)
wwc_outcomes = readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-07-09/wwc_outcomes.csv")
team_lookup  = read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-07-09/codes.csv")

Explore data

Count years

wwc_outcomes %>% 
  count(year)
## # A tibble: 8 × 2
##    year     n
##   <dbl> <int>
## 1  1991    52
## 2  1995    52
## 3  1999    64
## 4  2003    64
## 5  2007    64
## 6  2011    64
## 7  2015   104
## 8  2019   104

Games per country

wwc_outcomes %>% 
  filter(year == 2019) %>% 
  count(team)
## # A tibble: 24 × 2
##    team      n
##    <chr> <int>
##  1 ARG       3
##  2 AUS       4
##  3 BRA       4
##  4 CAN       4
##  5 CHI       3
##  6 CHN       4
##  7 CMR       4
##  8 ENG       7
##  9 ESP       4
## 10 FRA       5
## # ℹ 14 more rows

Teams per year

wwc_outcomes %>% 
  group_by(year) %>% 
  distinct(team, .keep_all = TRUE) %>% 
  count(team) %>% 
  count(year)
## # A tibble: 8 × 2
## # Groups:   year [8]
##    year     n
##   <dbl> <int>
## 1  1991    12
## 2  1995    12
## 3  1999    16
## 4  2003    16
## 5  2007    16
## 6  2011    16
## 7  2015    24
## 8  2019    24

Riinu’s version

wwc_outcomes %>% 
  distinct(year, team) %>% 
  count(year)
## # A tibble: 8 × 2
##    year     n
##   <dbl> <int>
## 1  1991    12
## 2  1995    12
## 3  1999    16
## 4  2003    16
## 5  2007    16
## 6  2011    16
## 7  2015    24
## 8  2019    24

Understand round

wwc_outcomes %>% 
  distinct(round)
## # A tibble: 6 × 1
##   round              
##   <chr>              
## 1 Group              
## 2 Quarter Final      
## 3 Semi Final         
## 4 Third Place Playoff
## 5 Final              
## 6 Round of 16

Niall - who won each year?

wwc_outcomes %>% 
  filter(round == "Final") %>% 
  filter(win_status == "Won")
## # A tibble: 8 × 7
##    year team  score round yearly_game_id team_num win_status
##   <dbl> <chr> <dbl> <chr>          <dbl>    <dbl> <chr>     
## 1  1991 USA       2 Final             26        2 Won       
## 2  1995 NOR       2 Final             26        2 Won       
## 3  1999 USA       0 Final             32        1 Won       
## 4  2003 GER       2 Final             32        1 Won       
## 5  2007 GER       2 Final             32        1 Won       
## 6  2011 JPN       2 Final             32        1 Won       
## 7  2015 USA       5 Final             52        1 Won       
## 8  2019 USA       2 Final             52        1 Won

Mapping

Create summary table

countries_takenpart = wwc_outcomes %>% 
  distinct(team, year) %>% 
  count(team)

Change 3 letter codes to country names

countries_takenpart %>% 
  mutate(country = countrycode(team, origin = "iso3c", destination = "country.name"))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `country = countrycode(team, origin = "iso3c", destination =
##   "country.name")`.
## Caused by warning:
## ! Some values were not matched unambiguously: CHI, CRC, DEN, ENG, EQG, GER, NED, RSA, SCO, SUI, TPE
## # A tibble: 36 × 3
##    team      n country      
##    <chr> <int> <chr>        
##  1 ARG       3 Argentina    
##  2 AUS       7 Australia    
##  3 BRA       8 Brazil       
##  4 CAN       7 Canada       
##  5 CHI       1 <NA>         
##  6 CHN       7 China        
##  7 CIV       1 Côte d’Ivoire
##  8 CMR       2 Cameroon     
##  9 COL       2 Colombia     
## 10 CRC       1 <NA>         
## # ℹ 26 more rows

Countries in our dataset are not ISO coded, so we will use a separate look-up.

Join look-up

countries_takenpart = left_join(countries_takenpart, team_lookup, by = c("team" = "team"))

Can we map yet?!

world_map = map_data("world") %>% 
  filter(! long > 180)

countries = world_map %>% 
  distinct(region) %>% 
  rowid_to_column()

Add our data

mycountries = left_join(countries, countries_takenpart, by = c("region" = "country"))

Check our data

mycountries %>% 
  filter(!is.na(n))
##    rowid            region team n
## 1      9         Argentina  ARG 3
## 2     13         Australia  AUS 7
## 3     33            Brazil  BRA 8
## 4     39            Canada  CAN 7
## 5     40       Switzerland  SUI 1
## 6     41             Chile  CHI 1
## 7     44          Cameroon  CMR 2
## 8     48          Colombia  COL 2
## 9     51        Costa Rica  CRC 1
## 10    57           Germany  GER 8
## 11    60           Denmark  DEN 4
## 12    63           Ecuador  ECU 1
## 13    67             Spain  ESP 2
## 14    77            France  FRA 4
## 15    84             Ghana  GHA 3
## 16    88 Equatorial Guinea  EQG 1
## 17   111             Italy  ITA 3
## 18   113           Jamaica  JAM 1
## 19   116             Japan  JPN 8
## 20   125       South Korea  KOR 3
## 21   145            Mexico  MEX 3
## 22   164           Nigeria  NGA 8
## 23   170       Netherlands  NED 2
## 24   171            Norway  NOR 8
## 25   174       New Zealand  NZL 5
## 26   185       North Korea  PRK 4
## 27   194            Russia  RUS 2
## 28   216            Sweden  SWE 8
## 29   224          Thailand  THA 2
## 30   250      South Africa  RSA 1

30 countries - we have lost 6 countries.

Do an anti-join!

anti_join(countries_takenpart, countries, by = c("country" = "region"))
## # A tibble: 6 × 3
##   team      n country                    
##   <chr> <int> <chr>                      
## 1 CHN       7 China PR                   
## 2 CIV       1 Ivory Coast (Côte d'Ivoire)
## 3 ENG       5 England                    
## 4 SCO       1 Scotland                   
## 5 TPE       1 Chinese Taipei             
## 6 USA       8 United States

Solve mismatches

countries_takenpart = countries_takenpart %>% 
  mutate(country = case_when(
    country == "China PR" ~ "China",
    country == "Ivory Coast (Côte d'Ivoire)" ~ "Ivory Coast",
    country == "Chinese Taipei" ~ "Taiwan",
    country == "United States" ~ "USA",
    TRUE ~ country
  ))

Recreate

mycountries = left_join(countries, countries_takenpart, by = c("region" = "country"))

Map

mycountries %>% 
  ggplot(aes(fill = n, map_id = region)) +
  geom_map(map = world_map) +
  expand_limits(x = world_map$long, y = world_map$lat) +
  coord_map("moll")+
  theme_map()