reading in data
artwork <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-12/artwork.csv')
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character(),
## id = col_double(),
## artistId = col_double(),
## year = col_double(),
## acquisitionYear = col_double(),
## width = col_double(),
## height = col_double(),
## depth = col_double(),
## thumbnailCopyright = col_logical()
## )
## i Use `spec()` for the full column specifications.
artists <- readr::read_csv("https://github.com/tategallery/collection/raw/master/artist_data.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## id = col_double(),
## name = col_character(),
## gender = col_character(),
## dates = col_character(),
## yearOfBirth = col_double(),
## yearOfDeath = col_double(),
## placeOfBirth = col_character(),
## placeOfDeath = col_character(),
## url = col_character()
## )
artists <- artists %>%
janitor::clean_names() %>%
filter(!is.na(place_of_birth),
!is.na(place_of_death))
artists <- artists %>%
mutate(country_of_birth = ifelse(str_detect(place_of_birth, "\\,"), str_extract(place_of_birth, ",.*"), place_of_birth)) %>%
mutate(country_of_birth = str_remove(country_of_birth, ","),
country_of_birth = str_squish(country_of_birth),
country_of_birth = factor(country_of_birth))
artists %>%
count(place_of_birth, sort = T) %>%
slice_max(n, n = 10) %>%
mutate(place_of_birth = fct_reorder(place_of_birth, n)) %>%
ggplot(aes(n, place_of_birth)) +
geom_col()
Country of death
artists %>%
count(place_of_death, sort = T)
## # A tibble: 502 x 2
## place_of_death n
## <chr> <int>
## 1 London, United Kingdom 415
## 2 Paris, France 81
## 3 New York, United States 45
## 4 Roma, Italia 22
## 5 Edinburgh, United Kingdom 16
## 6 England, United Kingdom 16
## 7 Hampstead, United Kingdom 13
## 8 Milano, Italia 13
## 9 Kent, United Kingdom 9
## 10 Dublin, Éire 8
## # ... with 492 more rows
artists <- artists %>%
mutate(country_of_death = ifelse(str_detect(place_of_death, "\\,"), str_extract(place_of_death, ",.*"), place_of_death)) %>%
mutate(country_of_death = str_remove(country_of_death, ","),
country_of_death = str_squish(country_of_death),
country_of_death = factor(country_of_death))
artists %>%
count(country_of_death, sort = T)
## # A tibble: 82 x 2
## country_of_death n
## <fct> <int>
## 1 United Kingdom 807
## 2 France 155
## 3 United States 126
## 4 Italia 58
## 5 Deutschland 31
## 6 Schweiz 21
## 7 Éire 14
## 8 Nederland 13
## 9 España 11
## 10 Australia 9
## # ... with 72 more rows
p_load(circlize)
circo <- artists %>%
filter(country_of_death != "London",
country_of_birth != "London") %>%
count(country_of_birth, country_of_death, sort = T) %>%
rowwise() %>%
filter(country_of_birth != country_of_death) %>%
filter(n>3)
circos.clear()
# jpeg(
# filename="figures/tate_circo.jpeg",
# width=8,
# height=6,
# units="in",
# res=1000)
chordDiagram(circo, scale = F)
title("Where do Tate's artists migrate?")
# dev.off()
Many artists died in london, from where did they come??
destination_london <- artists %>%
filter(str_detect(place_of_death, "London")) %>%
count(place_of_birth, sort = T)
Geocoding using tidy geocoder
p_load(tidygeocoder)
# geo_code <- destination_london %>%
# geocode(address = place_of_birth, method = "cascade")
# write.csv(geo_code, file = "data/tate_geocode.csv")
Map of Europe
geo_code <- read.csv("data/tate_geocode.csv")
destination_london <- destination_london %>%
separate(place_of_birth, c("city", "country"), sep = ", ", fill = "left")
# jpeg(
# filename="figures/tate_map.jpeg",
# width=8,
# height=6,
# units="in",
# res=1000)
geo_code %>%
separate(place_of_birth, c("city", "country"), sep = ", ", fill = "left") %>%
filter(between(lat, 45, 60),
between(long, -10, 15)) %>%
ggplot(aes(long, lat)) +
borders(colour = "gray85", fill = "gray70") +
coord_sf(ylim = c(45, 60), xlim = c(-10, 15)) +
geom_point(aes(size = n, colour = country), alpha = .5) +
geom_text(aes(long, lat, label = city), check_overlap = T) +
scale_size_continuous(range = c(3,15), breaks = waiver()) +
theme(legend.position = "none",
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank()) +
labs(caption = "Size represents number of artists from location",
title = "Where are the Tate's artists born, who died in London?",
x= "",
y = "")
## Warning: Removed 8 rows containing missing values (geom_text).
# dev.off()