Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  sf,
  readxl,
  googlesheets4,
  GGally
)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
theme_set(theme_bw())

options(
    digits = 3
)

Data

#read internet data from 3 files
#table 1 from https://www.speedtest.net/global-index, mobile
speedtest_mobile = read_csv("data/table 1.csv") %>% 
  miss_filter()
## New names:
## Rows: 222 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...2, Country dbl (2): #, Mbps
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
#table 2 from ditto, broadband
speedtest_broadband = read_csv("data/table 2.csv") %>% 
  miss_filter()
## New names:
## Rows: 322 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...2, Country dbl (2): #, Mbps
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
#table from https://www.cable.co.uk/broadband/speed/worldwide-speed-league/
cable_couk = read_excel("data/cable co uk worldwide_speed_league_data.xlsx")
## Warning: Expecting numeric in K6 / R6C11: got a date
## New names:
## • `` -> `...9`
## • `` -> `...11`
#add ISO 3 codes for joining
#first dataset
speedtest_mobile %<>% mutate(
  ISO = pu_translate(Country),
  speedtest_mobile = Mbps
)
## No exact match: Hong Kong (SAR)
## Best fuzzy match found: Hong Kong (SAR) -> Hong Kong (CHN) with distance 3.00
#second
speedtest_broadband %<>% mutate(
  ISO = pu_translate(Country),
  speedtest_broadband = Mbps
)
## No exact match: Hong Kong (SAR)
## No exact match: Macau (SAR)
## No exact match: The Bahamas
## Best fuzzy match found: Hong Kong (SAR) -> Hong Kong (CHN) with distance 3.00
## Best fuzzy match found: Macau (SAR) -> Macau (CHN) with distance 3.00
## Best fuzzy match found: The Bahamas -> Bahamas with distance 4.00
#third
cable_couk %<>%
  df_legalize_names() %>% 
  mutate(
  ISO = pu_translate(country),
  cable_couk_broadband = Mean_download_speed_Mbps
)
## No exact match: St Kitts and Nevis
## No exact match: Ã…land Islands
## No exact match: St Vincent and Grenadines
## No exact match: Bonaire Sint Eustatius and Saba
## No exact match: Congo Republic
## Best fuzzy match found: St Kitts and Nevis -> St. Kitts and Nevis with distance 1.00
## Best fuzzy match found: Ã…land Islands -> Aaland Islands with distance 2.00
## Best fuzzy match found: St Vincent and Grenadines -> Saint Vincent and Grenadines with distance 3.00
## Best fuzzy match found: Bonaire Sint Eustatius and Saba -> Bonaire, Sint Eustatius and Saba with distance 1.00
## Best fuzzy match found: Congo Republic -> Congo Republic of with distance 3.00
#join
d = full_join(
  speedtest_mobile %>% select(ISO, speedtest_mobile),
  speedtest_broadband %>% select(ISO, speedtest_broadband),
  by = "ISO"
) %>% 
  full_join(
    cable_couk %>% select(ISO, cable_couk_broadband),
    by = "ISO"
  )

#no errors check
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
d$ISO[duplicated(d$ISO)]
## character(0)
#national IQ data
# gs4_deauth()
# NIQ_emil = read_sheet("https://docs.google.com/spreadsheets/d/1cReoeIZLlbxOrN4_wnj52Q6UWFepXSJGi3Gj5m6ZAZg/edit?gid=0#gid=0")

#Seb's improved version
NIQ_seb = read_csv("data/jensen kirk 2024 Niqresults.csv")
## New names:
## Rows: 198 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): name, alpha3 dbl (3): ...1, NIQ, seadj
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#join
d = full_join(
  d,
  NIQ_seb %>% rename(ISO = alpha3) %>% select(ISO, NIQ),
  by = "ISO"
)

#no errors check
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
d$ISO[duplicated(d$ISO)]
## character(0)
#population size and population density from https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population_density
wiki_pop_density = read_csv("data/wiki pop density.csv") %>% 
  df_legalize_names() %>% 
  mutate(
    country = str_remove(Location, " https.*"),
    ISO = pu_translate(country)
  ) %>% 
  #filter bad units
  filter(
    !str_detect(country, "France \\(metropolitan & overseas\\)"),
    !str_detect(country, "World \\(all land\\)")
  )
## Rows: 252 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Location, Notes
## num (5): Pop./km2, Pop./sq mi, Population, Land area(km2), Land area(sq mi)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## No exact match: Gibraltar (UK)
## 
## No exact match: Sint Maarten (NL)
## 
## No exact match: Bermuda (UK)
## 
## No exact match: Guernsey (UK)
## 
## No exact match: Jersey (UK)
## 
## No exact match: Palestine(Gaza Strip, West Bank)
## 
## No exact match: Aruba (NL)
## 
## No exact match: Curaçao (NL)
## 
## No exact match: Réunion (France)
## 
## No exact match: Puerto Rico (US)
## 
## No exact match: Guam (US)
## 
## No exact match: U.S. Virgin Islands (US)
## 
## No exact match: Cayman Islands (UK)
## 
## No exact match: American Samoa (US)
## 
## No exact match: British Virgin Islands (UK)
## 
## No exact match: Anguilla (UK)
## 
## No exact match: Northern Mariana Islands (US)
## 
## No exact match: France (metropolitan & overseas)
## 
## No exact match: Caribbean Netherlands (Netherlands)
## 
## No exact match: Wallis and Futuna (France)
## 
## No exact match: Akrotiri and Dhekelia (UK)
## 
## No exact match: World (excluding Antarctica)
## 
## No exact match: Norfolk Island (Australia)
## 
## No exact match: World (all land)
## 
## No exact match: Turks and Caicos Islands (UK)
## 
## No exact match: Saint Helena, Ascension and Tristan da Cunha (UK)
## 
## No exact match: Cocos (Keeling) Islands (Australia)
## 
## No exact match: Montserrat (UK)
## 
## No exact match: Saint Pierre and Miquelon (France)
## 
## No exact match: Christmas Island (Australia)
## 
## No exact match: Western Sahara (disputed)
## 
## No exact match: Pitcairn Islands (UK)
## 
## No exact match: Falkland Islands (UK)
## 
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## 
## Best fuzzy match found: Sint Maarten (NL) -> Sint Maarten (NLD) with distance 1.00
## 
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## 
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## 
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## 
## Best fuzzy match found: Palestine(Gaza Strip, West Bank) -> Palestine, State of with distance 18.00
## 
## Best fuzzy match found: Aruba (NL) -> Aruba (NLD) with distance 1.00
## 
## Best fuzzy match found: Curaçao (NL) -> Curaçao with distance 5.00
## 
## Best fuzzy match found: Réunion (France) -> Reunion (France) with distance 1.00
## 
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## 
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## 
## Best fuzzy match found: U.S. Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 1.00
## 
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## 
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## 
## Best fuzzy match found: British Virgin Islands (UK) -> British Virgin Islands (GBR) with distance 3.00
## 
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## 
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## 
## Best fuzzy match found: France (metropolitan & overseas) -> France (metropolitan) with distance 11.00
## 
## Best fuzzy match found: Caribbean Netherlands (Netherlands) -> Caribbean Netherlands with distance 14.00
## 
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00
## 
## Best fuzzy match found: Akrotiri and Dhekelia (UK) -> Akrotiri and Dhekelia with distance 5.00
## 
## Best fuzzy match found: World (excluding Antarctica) -> Antarctica with distance 18.00
## 
## Best fuzzy match found: Norfolk Island (Australia) -> Norfolk Island with distance 12.00
## 
## Best fuzzy match found: World (all land) -> Ã…land (Finland) with distance 8.00
## 
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## 
## Best fuzzy match found: Saint Helena, Ascension and Tristan da Cunha (UK) -> Saint Helena, Ascension and Tristan da Cunha with distance 5.00
## 
## Best fuzzy match found: Cocos (Keeling) Islands (Australia) -> Cocos (Keeling) Islands with distance 12.00
## 
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## 
## Best fuzzy match found: Saint Pierre and Miquelon (France) -> Saint Pierre and Miquelon with distance 9.00
## 
## Best fuzzy match found: Christmas Island (Australia) -> Christmas Island with distance 12.00
## 
## Best fuzzy match found: Western Sahara (disputed) -> Western Sahara with distance 11.00
## 
## Best fuzzy match found: Pitcairn Islands (UK) -> Pitcairn Islands with distance 5.00
## 
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00
#urbanization data from https://en.wikipedia.org/wiki/Urbanization_by_sovereign_state
wiki_urbanization = read_csv("data/wiki urbanization.csv") %>% 
  df_legalize_names()  %>% 
  mutate(
    country = str_remove(Country_or_territory, " https.*"),
    ISO = pu_translate(country)
  )
## Rows: 228 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Country or territory, Urban population  (2022)[1], Urbanization rat...
## dbl (1): % of total population  (2023)[2]
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## No exact match: Anguilla (UK)
## 
## No exact match: Bermuda (UK)
## 
## No exact match: Cayman Islands (UK)
## 
## No exact match: Gibraltar (UK)
## 
## No exact match: U.S. Virgin Islands (US)
## 
## No exact match: Guam (US)
## 
## No exact match: Turks and Caicos Islands (UK)
## 
## No exact match: Puerto Rico (US)
## 
## No exact match: Northern Mariana Islands (US)
## 
## No exact match: Saint Pierre and Miquelon (France)
## 
## No exact match: American Samoa (US)
## 
## No exact match: Falkland Islands (UK)
## 
## No exact match: World
## 
## No exact match: Isle of Man (UK)
## 
## No exact match: British Virgin Islands (UK)
## 
## No exact match: Saint Helena, Ascension and Tristan da Cunha (UK)
## 
## No exact match: Guernsey (UK)
## 
## No exact match: Jersey (UK)
## 
## No exact match: Montserrat (UK)
## 
## No exact match: Wallis and Futuna (France)
## 
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## 
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## 
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## 
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## 
## Best fuzzy match found: U.S. Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 1.00
## 
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## 
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## 
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## 
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## 
## Best fuzzy match found: Saint Pierre and Miquelon (France) -> Saint Pierre and Miquelon with distance 9.00
## 
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## 
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00
## 
## Best fuzzy match found: World -> world with distance 1.00
## 
## Best fuzzy match found: Isle of Man (UK) -> Isle of Man (GBR) with distance 3.00
## 
## Best fuzzy match found: British Virgin Islands (UK) -> British Virgin Islands (GBR) with distance 3.00
## 
## Best fuzzy match found: Saint Helena, Ascension and Tristan da Cunha (UK) -> Saint Helena, Ascension and Tristan da Cunha with distance 5.00
## 
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## 
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## 
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## 
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00
#join but only keep overlap with existing, lot of aggregate units we want to avoid
d %<>% left_join(
  wiki_pop_density %>% select(ISO, Population, Pop_km2),
  by = "ISO"
) %>% left_join(
  wiki_urbanization %>% select(ISO, pct_of_total_population_2023_2) %>% rename(urbanization = pct_of_total_population_2023_2),
  by = "ISO"
)

#no errors check
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
d$ISO[duplicated(d$ISO)]
## character(0)
#log version of pop density because it has insane distribution
d$Pop_km2_log = log(d$Pop_km2)

#income data from https://worldpopulationreview.com/country-rankings/median-income-by-country
incomes = read_csv("data/median-income-by-country-2024.csv") %>% 
  mutate(
    ISO = pu_translate(country)
  )
## Rows: 165 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (4): MedianIncome_IntDollars_2020, MeanIncome_IntDollars_2020, GDPPPP_In...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#check for dups
assert_that(!anyDuplicated(incomes$ISO))
## [1] TRUE
#join
d = left_join(
  d,
  incomes %>% select(-GDPPPPYear, -country),
  by = "ISO"
)

#check for dups
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
#country names
d$name = pu_translate(d$ISO, reverse = T)

#population density groups
d$Pop_km2_ord = discretize(d$Pop_km2, breaks = 7, equal_range = F)
d$Pop_km2_ord %>% table2()
#spatial data
world_map = read_sf("data/World_Countries/World_Countries.shp") %>% 
  mutate(
    ISO = pu_translate(COUNTRY)
  ) %>% 
  #remove bad subnational units
  filter(
    !str_detect(ISO, "SJM")
  )
## No exact match: American Samoa (US)
## No exact match: Anguilla (UK)
## No exact match: Bermuda (UK)
## No exact match: Cayman Islands (UK)
## No exact match: Cocos (Keeling) Islands (Australia)
## No exact match: Northern Mariana Islands (US)
## No exact match: Jarvis Island (US)
## No exact match: Falkland Islands (UK)
## No exact match: Baker Island (US)
## No exact match: French Southern & Antarctic Lands (France)
## No exact match: Gibraltar (UK)
## No exact match: Guernsey (UK)
## No exact match: Guam (US)
## No exact match: Heard Island & McDonald Islands (Australia)
## No exact match: Howland Island (US)
## No exact match: Isle of Man (UK)
## No exact match: British Indian Ocean Territory (UK)
## No exact match: Jersey (UK)
## No exact match: Jan Mayen (Norway)
## No exact match: Johnston Atoll (US)
## No exact match: Christmas Island (Australia)
## No exact match: Montserrat (UK)
## No exact match: Midway Islands (US)
## No exact match: Norfolk Island (Australia)
## No exact match: Pitcairn Islands (UK)
## No exact match: Spratly Islands (Disputed)
## No exact match: Puerto Rico (US)
## No exact match: St. Pierre and Miquelon (France)
## No exact match: St. Helena (UK)
## No exact match: South Georgia and the South Sandwich Is (UK)
## No exact match: Turks and Caicos Islands (UK)
## No exact match: British Virgin Islands(UK)
## No exact match: American Virgin Islands (US)
## No exact match: Wallis and Futuna (France)
## No exact match: Wake Island (US)
## No exact match: Curacao (Netherlands)
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## Best fuzzy match found: Cocos (Keeling) Islands (Australia) -> Cocos (Keeling) Islands with distance 12.00
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## Best fuzzy match found: Jarvis Island (US) -> Jarvis Island (USA) with distance 1.00
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00
## Best fuzzy match found: Baker Island (US) -> Baker Island (USA) with distance 1.00
## Best fuzzy match found: French Southern & Antarctic Lands (France) -> French Southern Territories (France) with distance 14.00
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## Best fuzzy match found: Heard Island & McDonald Islands (Australia) -> Heard Island and McDonald Islands with distance 15.00
## Best fuzzy match found: Howland Island (US) -> Howland Island (USA) with distance 1.00
## Best fuzzy match found: Isle of Man (UK) -> Isle of Man (GBR) with distance 3.00
## Best fuzzy match found: British Indian Ocean Territory (UK) -> British Indian Ocean Territory (GBR) with distance 3.00
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## Best fuzzy match found: Jan Mayen (Norway) -> Svalbard (Norway) with distance 8.00
## Best fuzzy match found: Johnston Atoll (US) -> Johnston Atoll (USA) with distance 1.00
## Best fuzzy match found: Christmas Island (Australia) -> Christmas Island with distance 12.00
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## Best fuzzy match found: Midway Islands (US) -> Midway Islands (USA) with distance 1.00
## Best fuzzy match found: Norfolk Island (Australia) -> Norfolk Island with distance 12.00
## Best fuzzy match found: Pitcairn Islands (UK) -> Pitcairn Islands with distance 5.00
## Best fuzzy match found: Spratly Islands (Disputed) -> Spratly Islands with distance 11.00
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## Best fuzzy match found: St. Pierre and Miquelon (France) -> St. Pierre and Miquelon with distance 9.00
## Best fuzzy match found: St. Helena (UK) -> St. Helena with distance 5.00
## Best fuzzy match found: South Georgia and the South Sandwich Is (UK) -> South Georgia and the South Sandwich Islands with distance 5.00
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## Best fuzzy match found: British Virgin Islands(UK) -> British Virgin Islands with distance 4.00
## Best fuzzy match found: American Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 9.00
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00
## Best fuzzy match found: Wake Island (US) -> Wake Island (USA) with distance 1.00
## Best fuzzy match found: Curacao (Netherlands) -> Curaçao (Netherlands) with distance 1.00
#any duplicates?
assert_that(!anyDuplicated(world_map$ISO))
## [1] TRUE
world_map$ISO[duplicated(world_map$ISO)]
## character(0)
#derive lat and lon from centroids
world_map_latlon = world_map %>% 
  st_centroid() %>% 
  st_coordinates() %>% 
  as_tibble() %>% 
  rename(
    lon = X,
    lat = Y
  ) %>% mutate(
    ISO = world_map$ISO
  )
## Warning: st_centroid assumes attributes are constant over geometries
#copy back
world_map$lat = world_map_latlon$lat
world_map$lon = world_map_latlon$lon

#join
d_spatial = left_join(
  world_map %>% select(ISO, geometry, lat, lon),
  d,
  by = "ISO"
)

#internet speed lags
d_spatial %<>% 
  spatial_knn(
    k = 3,
    vars = c("speedtest_mobile", "speedtest_broadband", "cable_couk_broadband")
  )

#check lags
spatial_lag_cors(d_spatial)
##     speedtest_mobile  speedtest_broadband cable_couk_broadband 
##                0.531                0.525                0.672

Analysis

#correlation matrix
d %>% 
  select(NIQ, GDPPPP_IntDollars, MedianIncome_IntDollars_2020, speedtest_mobile, speedtest_broadband, cable_couk_broadband, Population, Pop_km2, Pop_km2_log, urbanization) %>%
  GG_heatmap()

#better version
d %>% 
  select(NIQ, GDPPPP_IntDollars, MedianIncome_IntDollars_2020, speedtest_mobile, speedtest_broadband, cable_couk_broadband, Population, Pop_km2, Pop_km2_log, urbanization) %>%
  ggpairs(columnLabels = c("NIQ", "GDPpc", "median\nincome", "speedtest\nmobile", "speedtest\nbroadband", "Google\nbroadband", "population", "density", "density_log", "urban%"))
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 77 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 125 rows containing missing values
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 125 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_density()`).

GG_save("figs/correlation_matrix.png")
## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 77 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 125 rows containing missing values
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 125 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_density()`).
#summary stats
d %>% 
  select(NIQ, GDPPPP_IntDollars, MedianIncome_IntDollars_2020, speedtest_mobile, speedtest_broadband, cable_couk_broadband, Population, Pop_km2, Pop_km2_log, urbanization) %>%
  describe2()
#maps
#urbanization
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = urbanization)) +
  scale_fill_viridis_c(guide = ) +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Urbanization",
    fill = "% of population living in urban areas"
  )

GG_save("figs/urbanization_map.png")

#pop density
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = Pop_km2)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Population density",
    fill = "People per km^2"
  )

GG_save("figs/pop_density_map.png")

#pop density again, but with ordinal scale
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = Pop_km2_ord)) +
  scale_fill_viridis_d() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Population density, ordinal scale",
    fill = "People per km^2"
  )

GG_save("figs/pop_density_map_ordinal.png")

#log version
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = Pop_km2_log)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Population density (log)",
    fill = "Log people per km^2"
  )

GG_save("figs/pop_density_log_map.png")

#internet speed google
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = cable_couk_broadband)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "cable.co.uk (Google M labs) broadband speed",
    fill = "Mbps"
  )

GG_save("figs/M labs broadband speed.png")

#Europe zoom
# Zoom in on Europe
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = cable_couk_broadband)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "cable.co.uk (Google M labs) broadband speed",
    fill = "Mbps"
  ) +
  coord_sf(xlim = c(-22, 35), ylim = c(35, 70)) +
  # Add labels on centroids
  geom_sf_label(aes(label = round(cable_couk_broadband, 1)), size = 3)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_label()`).

GG_save("figs/M labs broadband speed_europe.png")
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): Removed 31 rows containing missing values or values outside the scale range
## (`geom_label()`).
#speedtest net broadband
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = speedtest_broadband)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "speedtest.net broadband speed",
    fill = "Mbps"
  )

#scatterplot
d_spatial %>% 
  GG_scatter("cable_couk_broadband", "speedtest_broadband", case_names = "name") +
  labs(
    title = "Broadband speed comparison",
    x = "cable.co.uk (Google M labs) broadband speed",
    y = "speedtest.net broadband speed"
  )
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/M labs vs. speedtest.png")
## `geom_smooth()` using formula = 'y ~ x'
#density vs. urbanization
d_spatial %>% 
  GG_scatter("Pop_km2_log", "urbanization", case_names = "name") +
  labs(
    title = "Population density vs. urbanization",
    x = "Population density, log",
    y = "Urbanization"
  )
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/density_vs_urbanization.png")
## `geom_smooth()` using formula = 'y ~ x'
#standardized variables
d_spatial_z = d_spatial %>% 
  df_standardize()
## Skipped ISO because it is a character (string)
## Skipped geometry because it is class sfc_MULTIPOLYGON, sfc
## Skipped name because it is a character (string)
## Skipped Pop_km2_ord because it is class factor
#regression models
model_list_google = list(
  urban_alone = lm(cable_couk_broadband ~ urbanization, data = d_spatial_z),
  NIQ_alone = lm(cable_couk_broadband ~ NIQ, data = d_spatial_z),
  median_income_alone = lm(cable_couk_broadband ~ MedianIncome_IntDollars_2020, data = d_spatial_z),
  spatial_lag_alone = lm(cable_couk_broadband ~ cable_couk_broadband_lag, data = d_spatial_z),
  all_predictors = lm(cable_couk_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020, data = d_spatial_z),
  with_spatial_lag = lm(cable_couk_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020 + cable_couk_broadband_lag, data = d_spatial_z)
)

model_list_google %>% 
  summarize_models(asterisks_only = F)
#plot models
model_list_google %>% 
  get_model_coefs() %>% 
  GG_plot_models()

#compare models automatically
compare_predictors(d_spatial_z,
                   outcome = "cable_couk_broadband",
                   predictors = c("urbanization", "NIQ", "MedianIncome_IntDollars_2020", "cable_couk_broadband_lag")
) %>% 
  kirkegaard::GG_plot_models() +
  labs(
    title = "Model comparison",
    subtitle = "cable.co.uk (Google M labs) broadband speed"
  )

GG_save("figs/compare_models_google.png")

#same thing, but use speedtest broadband data
#models
model_list_speedtest = list(
  urban_alone = lm(speedtest_broadband ~ urbanization, data = d_spatial_z),
  NIQ_alone = lm(speedtest_broadband ~ NIQ, data = d_spatial_z),
  median_income_alone = lm(speedtest_broadband ~ MedianIncome_IntDollars_2020, data = d_spatial_z),
  spatial_lag_alone = lm(speedtest_broadband ~ speedtest_broadband_lag, data = d_spatial_z),
  all_predictors = lm(speedtest_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020, data = d_spatial_z),
  with_spatial_lag = lm(speedtest_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020 + speedtest_broadband_lag, data = d_spatial_z)
)

#summary
model_list_speedtest %>% 
  summarize_models(asterisks_only = F)
#in a plot
compare_predictors(d_spatial_z,
                   outcome = "speedtest_broadband",
                   predictors = c("urbanization", "NIQ", "MedianIncome_IntDollars_2020", "cable_couk_broadband_lag")
) %>% 
  kirkegaard::GG_plot_models() +
  labs(
    title = "Model comparison",
    subtitle = "speedtest.net broadband speed"
  )

GG_save("figs/compare_models_speedtest.png")

Meta

#versions
write_sessioninfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] GGally_2.2.1          googlesheets4_1.1.1   readxl_1.4.3         
##  [4] sf_1.0-16             kirkegaard_2024-09-17 psych_2.4.6.26       
##  [7] assertthat_0.2.1      weights_1.0.4         Hmisc_5.1-3          
## [10] magrittr_2.0.3        lubridate_1.9.3       forcats_1.0.0        
## [13] stringr_1.5.1         dplyr_1.1.4           purrr_1.0.2          
## [16] readr_2.1.5           tidyr_1.3.1           tibble_3.2.1         
## [19] ggplot2_3.5.1         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] DBI_1.2.3          mnormt_2.1.1       gridExtra_2.3     
##   [4] s2_1.1.7           rlang_1.1.4        e1071_1.7-14      
##   [7] compiler_4.4.1     mgcv_1.9-1         gdata_3.0.0       
##  [10] systemfonts_1.1.0  vctrs_0.6.5        wk_0.9.2          
##  [13] crayon_1.5.3       pkgconfig_2.0.3    shape_1.4.6.1     
##  [16] fastmap_1.2.0      backports_1.5.0    labeling_0.4.3    
##  [19] utf8_1.2.4         rmarkdown_2.28     tzdb_0.4.0        
##  [22] nloptr_2.1.1       ragg_1.3.2         bit_4.0.5         
##  [25] xfun_0.47          glmnet_4.1-8       jomo_2.7-6        
##  [28] cachem_1.1.0       jsonlite_1.8.8     highr_0.11        
##  [31] pan_1.9            terra_1.7-78       broom_1.0.6       
##  [34] parallel_4.4.1     cluster_2.1.6      R6_2.5.1          
##  [37] bslib_0.8.0        stringi_1.8.4      RColorBrewer_1.1-3
##  [40] boot_1.3-30        rpart_4.1.23       jquerylib_0.1.4   
##  [43] cellranger_1.1.0   Rcpp_1.0.13        iterators_1.0.14  
##  [46] knitr_1.48         base64enc_0.1-3    Matrix_1.6-5      
##  [49] splines_4.4.1      nnet_7.3-19        timechange_0.3.0  
##  [52] tidyselect_1.2.1   stringdist_0.9.12  rstudioapi_0.16.0 
##  [55] yaml_2.3.10        codetools_0.2-19   plyr_1.8.9        
##  [58] lattice_0.22-5     withr_3.0.1        evaluate_0.24.0   
##  [61] foreign_0.8-86     survival_3.7-0     ggstats_0.6.0     
##  [64] units_0.8-5        proxy_0.4-27       pillar_1.9.0      
##  [67] mice_3.16.0        KernSmooth_2.23-24 checkmate_2.3.2   
##  [70] foreach_1.5.2      generics_0.1.3     vroom_1.6.5       
##  [73] hms_1.1.3          munsell_0.5.1      scales_1.3.0      
##  [76] minqa_1.2.8        gtools_3.9.5       class_7.3-22      
##  [79] glue_1.7.0         tools_4.4.1        data.table_1.16.0 
##  [82] lme4_1.1-35.5      fs_1.6.4           grid_4.4.1        
##  [85] colorspace_2.1-1   nlme_3.1-165       htmlTable_2.4.3   
##  [88] googledrive_2.1.1  Formula_1.2-5      cli_3.6.3         
##  [91] textshaping_0.4.0  fansi_1.0.6        viridisLite_0.4.2 
##  [94] gargle_1.5.2       gtable_0.3.5       sass_0.4.9        
##  [97] digest_0.6.37      classInt_0.4-10    farver_2.1.2      
## [100] htmlwidgets_1.6.4  htmltools_0.5.8.1  lifecycle_1.0.4   
## [103] mitml_0.4-5        bit64_4.0.5        MASS_7.3-61
#write data to file for reuse
d_spatial %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}