Code
library(osmdata)
library(tidyverse)
library(sf)
library(gender)
library(genderdata)
library(leaflet)
library(units)
# remotes::install_github("fozy81/opendatascotland")
library(opendatascotland)Maps for street names in Edinburgh, kings and queens, male and female given names
For illustrative purposes…caveats apply.
library(osmdata)
library(tidyverse)
library(sf)
library(gender)
library(genderdata)
library(leaflet)
library(units)
# remotes::install_github("fozy81/opendatascotland")
library(opendatascotland)Overpass query to download highway data and display a map
bb <- getbb("edinburgh uk", format_out = "polygon")
king <- opq(bbox = bb) %>%
add_osm_feature(key = "name", value = "King", value_exact = FALSE) %>%
osmdata_sf()
queen <- opq("Edinburgh, U.K.") %>%
add_osm_feature(key = "name", value = "Queen", value_exact = FALSE) %>%
osmdata_sf()
king$osm_points$gender <- "King"
queen$osm_points$gender <- "Queen"
king$osm_lines$gender <- "King"
queen$osm_lines$gender <- "Queen"
royalty <- c(king, queen)
royalty <- royalty$osm_lines
royalty <- filter(royalty, !is.na(name)) %>%
filter(grepl("(?<!\\w)Queen\\W|(?<!\\w)King\\W", name, perl = TRUE))
# The trim_osmdata() function not working, use polygon of Edinburgh City Council downloaded using opendatascotland
datasets <- opendatascotland::ods_search("Open data - scottish local authority boundaries", refresh = TRUE)
areas <- opendatascotland::ods_get(datasets)
area <- areas$`Open_data_-_scottish_local_authority_boundaries_Stirling_Council`
edinburgh <- area %>%
filter(local_auth == "City of Edinburgh")
# trim
royalty_cut <- st_intersects(edinburgh, royalty)[[1]]
royalty_cut <- royalty[royalty_cut, ]
p <- ggplot() +
geom_sf(data = edinburgh, fill = "lightgrey") +
geom_sf(data = royalty_cut, aes(color = gender)) +
theme_void() +
scale_colour_manual(name = "", values = c("purple", "gold"))
ppal <- colorFactor(
palette = c("purple","gold"),
domain = royalty_cut$gender)
leaflet(royalty_cut) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolylines(color = ~pal(gender), label = royalty_cut$name)Total highway length
royalty_cut$length <- st_length(royalty_cut$geometry)
royal_length <- royalty_cut %>%
group_by(gender) %>%
summarise(length = sum(length))
rp <- ggplot(royal_length, aes(gender, length)) +
geom_col(aes(fill = gender)) +
scale_fill_manual(name = "", values = c("purple", "gold")) +
theme(legend.position="none")
rpdata <- opq(bbox = bb) %>%
add_osm_feature(key = "highway") %>%
osmdata_sf()
osm_lines <- data$osm_lines
# trim
cropped_ids <- st_intersects(edinburgh, osm_lines)[[1]]
cropped_data <- osm_lines[cropped_ids, ]
# Get given names from records
names <- genderdata::napp
names <- filter(names, country == "United Kingdom")
# Find popular names
names_sum <- names %>%
group_by(name) %>%
mutate("sum_female" = sum(female), "sum_male" = sum(male)) %>%
select(-year, -female, -male) %>%
distinct() %>%
mutate("total" = sum_female + sum_male)
# Function to find given name in highway name ------------
find_name <- function(place_name) {
regex_word <- paste0("(?<!\\w)", given_name, "\\W")
found_name <- stringr::str_extract(place_name,
regex(regex_word,
ignore_case = TRUE))
if (!all(is.na(found_name))) {
found <- data.frame("place_name" = place_name,
"given_name" = trimws(tolower(na.omit(found_name))),
check.names = FALSE)
} else {
return()
}
}
# Find names (unique)
given_name <- unique(names$name)
place_name <- unique(cropped_data$name)
found_names <- purrr::map_df(place_name, find_name, .progress = TRUE)
found_names$given_name <- gsub("'", "", found_names$given_name)
# join rows which match a given name
check <- inner_join(found_names, names_sum, by = join_by(given_name == name), multiple = "all", relationship = "many-to-many")
# If more than one match return given name which is more popular
check_unique <- purrr::map_df(split(check, check$place_name), function(place) {
place <- filter(place, total == max(total))
place <- place[1, ]
})
# join to matching geodata
finish <- inner_join(cropped_data, check_unique, by = join_by(name == place_name))
# get gender of name
gender_name <- gender(finish$given_name, method = "napp") %>% distinct()
finish_gender <- inner_join(finish, gender_name, by = join_by(given_name == name))
# Use only more common given name - trial and error > 3 seems to work okay
finish_gender <- finish_gender %>% filter(total > 3)
g <- ggplot() +
geom_sf(data = edinburgh, fill = "lightgrey") +
geom_sf(data = finish_gender, aes(color = gender)) +
theme_void() +
scale_colour_manual(name = "", values = c("gold", "purple")) +
ggtitle("Highways categorised by male or female given names in Edinburgh", subtitle = "Map data from OpenStreetMap, highway names filtered by regular expression \nbased on names from North Atlantic Population Project")
gpal <- colorFactor(
palette = c("gold", "purple"),
domain = finish_gender$gender)
leaflet(finish_gender) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolylines(color = ~pal(gender), label = finish_gender$name)Total highway length
finish_gender$length <- st_length(finish_gender$geometry)
gender_length <- finish_gender %>%
group_by(gender) %>%
summarise(length = sum(length))
gl <- ggplot(gender_length, aes(gender, length)) +
geom_col(aes(fill = gender)) +
scale_fill_manual(name = "", values = c("gold","purple")) +
theme(legend.position="none")
gl