#====================================================
# St. Lucia Tourism + Income Analysis
# Google Places + District Boundary + Income Data
#====================================================
#-------------------------------
# 0. Packages
#-------------------------------
library(googleway)
library(dplyr)
library(purrr)
library(sf)
library(stringr)
library(tidyr)
library(geodata)
## Warning: package 'geodata' was built under R version 4.5.3
## Loading required package: terra
## terra 1.8.70
##
## Attaching package: 'terra'
## The following object is masked from 'package:tidyr':
##
## extract
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(readxl)
library(ggplot2)
#-------------------------------
# 1. Set folder path and API key
#-------------------------------
base_path <- "C:/Users/ddtmd/OneDrive - The Pennsylvania State University/Desktop/ST.Lucia_R"
dir.create(file.path(base_path, "DATA"), showWarnings = FALSE, recursive = TRUE)
api_key <- "AIzaSyCRImFk0YgNURmOAmoapkIYGxp4_iqoG9Q "
#-------------------------------
# 2. Google Places function
#-------------------------------
get_all_places <- function(search_string, key, max_pages = 3) {
res <- google_places(
search_string = search_string,
key = key
)
results_list <- list(res$results)
token <- res$next_page_token
i <- 1
while (!is.null(token) && i < max_pages) {
Sys.sleep(2)
res_next <- google_places(
search_string = search_string,
key = key,
page_token = token
)
results_list <- append(results_list, list(res_next$results))
token <- res_next$next_page_token
i <- i + 1
}
bind_rows(results_list)
}
clean_places <- function(results_df) {
results_df %>%
mutate(
lat = geometry$location$lat,
lng = geometry$location$lng
) %>%
transmute(
place_id,
name,
address = formatted_address,
rating,
user_ratings_total,
lat,
lng,
search_term
)
}
#-------------------------------
# 3. Retrieve Google Places data
#-------------------------------
search_terms <- c(
"resort in St Lucia",
"beach resort in St Lucia",
"beachfront resort in St Lucia",
"all inclusive resort in St Lucia",
"luxury resort in St Lucia",
"hotel in St Lucia",
"beachfront hotel in St Lucia"
)
sl_all_raw <- map_dfr(search_terms, function(term) {
message("Searching: ", term)
get_all_places(
search_string = term,
key = api_key,
max_pages = 3
) %>%
mutate(search_term = term)
})
## Searching: resort in St Lucia
## Searching: beach resort in St Lucia
## Searching: beachfront resort in St Lucia
## Searching: all inclusive resort in St Lucia
## Searching: luxury resort in St Lucia
## Searching: hotel in St Lucia
## Searching: beachfront hotel in St Lucia
sl_places <- clean_places(sl_all_raw)
sl_places_unique <- sl_places %>%
distinct(place_id, .keep_all = TRUE) %>%
mutate(
type = case_when(
str_detect(str_to_lower(search_term), "all inclusive") ~ "all_inclusive",
str_detect(str_to_lower(search_term), "beachfront") ~ "beachfront",
str_detect(str_to_lower(search_term), "beach resort") ~ "beach_resort",
str_detect(str_to_lower(search_term), "luxury") ~ "luxury_resort",
str_detect(str_to_lower(search_term), "resort") ~ "resort",
str_detect(str_to_lower(search_term), "hotel") ~ "hotel",
TRUE ~ "other"
)
)
sl_places <- sl_places_unique
write.csv(
sl_places,
file.path(base_path, "DATA", "94_slucia_tourism_google_unique.csv"),
row.names = FALSE
)
#-------------------------------
# 4. District boundary
#-------------------------------
sl_district <- geodata::gadm(
country = "LCA",
level = 1,
path = file.path(base_path, "DATA")
)
sl_district <- st_as_sf(sl_district)
sl_district_clean <- sl_district %>%
mutate(
District = case_when(
NAME_1 == "SoufriĆØre" ~ "Soufriere",
TRUE ~ NAME_1
)
) %>%
group_by(District) %>%
summarise(
geometry = st_union(geometry),
.groups = "drop"
)
#-------------------------------
# 5. Tourism points by district
#-------------------------------
sl_places_sf <- st_as_sf(
sl_places,
coords = c("lng", "lat"),
crs = 4326
)
sl_places_sf <- st_transform(sl_places_sf, st_crs(sl_district_clean))
places_district <- st_join(
sl_places_sf,
sl_district_clean,
join = st_within
)
district_counts <- places_district %>%
st_drop_geometry() %>%
filter(!is.na(District)) %>%
group_by(District, type) %>%
summarise(n = n(), .groups = "drop") %>%
pivot_wider(
names_from = type,
values_from = n,
values_fill = 0
) %>%
mutate(
total_tourism_establishments = rowSums(across(where(is.numeric)))
)
library(mapview)
mapview(sl_places_sf)
# 6. Coastal tourism density
#-------------------------------
sl_coastline <- sl_district_clean %>%
st_union() %>%
st_boundary()
sl_district_proj <- st_transform(sl_district_clean, 32620)
sl_coastline_proj <- st_transform(sl_coastline, 32620)
coast_buffer <- st_buffer(sl_coastline_proj, dist = 2000)
district_coastal <- st_intersection(sl_district_proj, coast_buffer)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
district_coastal_area <- district_coastal %>%
mutate(
coastal_area_km2 = as.numeric(st_area(geometry)) / 1000000
) %>%
st_drop_geometry() %>%
select(District, coastal_area_km2)
district_counts <- district_counts %>%
left_join(district_coastal_area, by = "District") %>%
mutate(
coastal_tourism_density_km2 =
total_tourism_establishments / coastal_area_km2
)
tourism_map_sf <- sl_district_clean %>%
left_join(district_counts, by = "District")
mapview(tourism_map_sf, zcol = "coastal_tourism_density_km2")
# 7. Income data
#-------------------------------
income_2022 <- read_excel(
"C:/Users/ddtmd/OneDrive - The Pennsylvania State University/ST.Lucia_Data/Income_2022.xlsx"
)
# Castries weighted average
castries_fixed <- income_2022 %>%
filter(grepl("Castries", District)) %>%
mutate(
total_pop =
`A_1 to 9,999 EC$` +
`B_10,000 to 19,999 EC$` +
`C_20,000 to 29,999 EC$` +
`D_30,000 to 39,999 EC$` +
`E_40,000 to 49,999 EC$` +
`F_50,000 to 74,999 EC$` +
`G_75,000 EC$ and above`
)
castries_income <- sum(
castries_fixed$total_pop * castries_fixed$Total,
na.rm = TRUE
) / sum(castries_fixed$total_pop, na.rm = TRUE)
income_clean <- income_2022 %>%
filter(District != "Total") %>%
mutate(
District = case_when(
District == "Anse La Raye" ~ "Anse-la-Raye",
TRUE ~ District
)
) %>%
filter(!grepl("Castries", District)) %>%
select(
District,
avg_income_2022 = Total
) %>%
bind_rows(
tibble(
District = "Castries",
avg_income_2022 = castries_income
)
)
#-------------------------------
# 8. Income map
#-------------------------------
income_map_sf <- sl_district_clean %>%
left_join(income_clean, by = "District") %>%
mutate(
income_class = cut(
avg_income_2022,
breaks = c(26883.16, 27964.68, 30981.22, 36523.34, 37256.84, 45412.04),
include.lowest = TRUE,
labels = c(
"26883.16 - 27964.68",
"30055.47 - 30981.22",
"31454.91 - 36523.34",
"36677.16 - 37256.84",
"37260.17 - 45412.04"
)
)
)
income_map_sf %>%
filter(is.na(avg_income_2022))
## Simple feature collection with 0 features and 3 fields
## Bounding box: xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS: WGS 84
## # A tibble: 0 Ć 4
## # ⹠4 variables: District <chr>, geometry <GEOMETRY [°]>,
## # avg_income_2022 <dbl>, income_class <fct>
p <- ggplot(income_map_sf) +
geom_sf(aes(fill = income_class), color = "black", linewidth = 0.3) +
scale_fill_manual(
values = c(
"26883.16 - 27964.68" = "#d9ffff",
"30055.47 - 30981.22" = "#99c2ff",
"31454.91 - 36523.34" = "#5f7ee6",
"36677.16 - 37256.84" = "#2f47d6",
"37260.17 - 45412.04" = "#0000cc"
),
name = "Average gross annual income in EC dollars",
drop = FALSE,
na.value = "grey80"
) +
labs(
title = "Saint Lucia: Districts",
subtitle = "Average gross annual income in EC dollars",
caption = "2022 Census processed with Redatam WebServer\nUNECLAC and the Government of Saint Lucia"
) +
theme_void() +
theme(legend.position = "left")
p

ggsave(
file.path(base_path, "st_lucia_income_map_fixed.pdf"),
plot = p,
width = 6,
height = 8
)
ggsave(
file.path(base_path, "st_lucia_income_map_fixed.png"),
plot = p,
width = 6,
height = 8,
dpi = 300
)
#-------------------------------
# 9. Final analysis dataset
#-------------------------------
analysis_df <- income_map_sf %>%
st_drop_geometry() %>%
left_join(district_counts, by = "District")
names(analysis_df)
## [1] "District" "avg_income_2022"
## [3] "income_class" "hotel"
## [5] "resort" "beach_resort"
## [7] "luxury_resort" "all_inclusive"
## [9] "beachfront" "total_tourism_establishments"
## [11] "coastal_area_km2" "coastal_tourism_density_km2"
# 10. Tourism density and income plot
#-------------------------------
ggplot(analysis_df, aes(x = coastal_tourism_density_km2, y = avg_income_2022)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Coastal tourism density per km²",
y = "Average gross annual income in EC dollars",
title = "Tourism Density and Average Income by District"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
