reading in data

artwork <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-12/artwork.csv')
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   id = col_double(),
##   artistId = col_double(),
##   year = col_double(),
##   acquisitionYear = col_double(),
##   width = col_double(),
##   height = col_double(),
##   depth = col_double(),
##   thumbnailCopyright = col_logical()
## )
## i Use `spec()` for the full column specifications.
artists <- readr::read_csv("https://github.com/tategallery/collection/raw/master/artist_data.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   id = col_double(),
##   name = col_character(),
##   gender = col_character(),
##   dates = col_character(),
##   yearOfBirth = col_double(),
##   yearOfDeath = col_double(),
##   placeOfBirth = col_character(),
##   placeOfDeath = col_character(),
##   url = col_character()
## )

Extracting country

artists <- artists %>% 
  janitor::clean_names() %>% 
  filter(!is.na(place_of_birth),
         !is.na(place_of_death))

artists <- artists %>% 
  mutate(country_of_birth = ifelse(str_detect(place_of_birth, "\\,"), str_extract(place_of_birth, ",.*"), place_of_birth)) %>%
  mutate(country_of_birth = str_remove(country_of_birth, ","),
         country_of_birth = str_squish(country_of_birth),
         country_of_birth = factor(country_of_birth))
  
  

artists %>% 
  count(place_of_birth, sort = T) %>% 
  slice_max(n, n = 10) %>%
  mutate(place_of_birth = fct_reorder(place_of_birth, n)) %>% 
  ggplot(aes(n, place_of_birth)) + 
  geom_col()

Country of death

artists %>% 
  count(place_of_death, sort = T)
## # A tibble: 502 x 2
##    place_of_death                n
##    <chr>                     <int>
##  1 London, United Kingdom      415
##  2 Paris, France                81
##  3 New York, United States      45
##  4 Roma, Italia                 22
##  5 Edinburgh, United Kingdom    16
##  6 England, United Kingdom      16
##  7 Hampstead, United Kingdom    13
##  8 Milano, Italia               13
##  9 Kent, United Kingdom          9
## 10 Dublin, Éire                  8
## # ... with 492 more rows
artists <- artists %>% 
  mutate(country_of_death = ifelse(str_detect(place_of_death, "\\,"), str_extract(place_of_death, ",.*"), place_of_death)) %>%
  mutate(country_of_death = str_remove(country_of_death, ","),
         country_of_death = str_squish(country_of_death),
         country_of_death = factor(country_of_death))
  
artists %>% 
  count(country_of_death, sort = T)
## # A tibble: 82 x 2
##    country_of_death     n
##    <fct>            <int>
##  1 United Kingdom     807
##  2 France             155
##  3 United States      126
##  4 Italia              58
##  5 Deutschland         31
##  6 Schweiz             21
##  7 Éire                14
##  8 Nederland           13
##  9 España              11
## 10 Australia            9
## # ... with 72 more rows

Circo of migration patters

p_load(circlize)

circo <- artists %>% 
  filter(country_of_death != "London",
         country_of_birth != "London") %>% 
  count(country_of_birth, country_of_death, sort = T) %>% 
  rowwise() %>% 
  filter(country_of_birth != country_of_death) %>% 
  filter(n>3)

circos.clear()

# jpeg(
#     filename="figures/tate_circo.jpeg",
#     width=8,
#     height=6,
#     units="in",
#     res=1000)

chordDiagram(circo, scale = F)
title("Where do Tate's artists migrate?")

# dev.off()

Geocoding locations

Many artists died in london, from where did they come??

destination_london <- artists %>% 
  filter(str_detect(place_of_death, "London")) %>% 
  count(place_of_birth, sort = T)

Geocoding using tidy geocoder

p_load(tidygeocoder)

# geo_code <- destination_london %>% 
#   geocode(address = place_of_birth, method = "cascade")

# write.csv(geo_code, file = "data/tate_geocode.csv")

Map of Europe

geo_code <- read.csv("data/tate_geocode.csv")

destination_london <- destination_london %>% 
  separate(place_of_birth, c("city", "country"), sep = ", ", fill = "left")

# jpeg(
#     filename="figures/tate_map.jpeg",
#     width=8,
#     height=6,
#     units="in",
#     res=1000)

geo_code %>% 
  separate(place_of_birth, c("city", "country"), sep = ", ", fill = "left") %>% 
  filter(between(lat, 45, 60),
         between(long, -10, 15)) %>%
  ggplot(aes(long, lat)) +
  borders(colour = "gray85", fill = "gray70") +
  coord_sf(ylim = c(45, 60), xlim = c(-10, 15)) +
  geom_point(aes(size = n, colour = country), alpha = .5) +
  geom_text(aes(long, lat, label = city), check_overlap = T) +
  scale_size_continuous(range = c(3,15), breaks = waiver()) +
  theme(legend.position = "none",
        panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.text = element_blank()) +
  labs(caption = "Size represents number of artists from location",
       title = "Where are the Tate's artists born, who died in London?",
       x= "",
       y = "")
## Warning: Removed 8 rows containing missing values (geom_text).

# dev.off()