Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  sf,
  readxl,
  googlesheets4,
  GGally
)

## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

theme_set(theme_bw())

options(
    digits = 3
)

Data

#read internet data from 3 files
#table 1 from https://www.speedtest.net/global-index, mobile
speedtest_mobile = read_csv("data/table 1.csv") %>% 
  miss_filter()

## New names:
## Rows: 222 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...2, Country dbl (2): #, Mbps
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`

#table 2 from ditto, broadband
speedtest_broadband = read_csv("data/table 2.csv") %>% 
  miss_filter()

## New names:
## Rows: 322 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...2, Country dbl (2): #, Mbps
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`

#table from https://www.cable.co.uk/broadband/speed/worldwide-speed-league/
cable_couk = read_excel("data/cable co uk worldwide_speed_league_data.xlsx")

## Warning: Expecting numeric in K6 / R6C11: got a date

## New names:
## • `` -> `...9`
## • `` -> `...11`

#add ISO 3 codes for joining
#first dataset
speedtest_mobile %<>% mutate(
  ISO = pu_translate(Country),
  speedtest_mobile = Mbps
)

## No exact match: Hong Kong (SAR)
## Best fuzzy match found: Hong Kong (SAR) -> Hong Kong (CHN) with distance 3.00

#second
speedtest_broadband %<>% mutate(
  ISO = pu_translate(Country),
  speedtest_broadband = Mbps
)

## No exact match: Hong Kong (SAR)
## No exact match: Macau (SAR)
## No exact match: The Bahamas
## Best fuzzy match found: Hong Kong (SAR) -> Hong Kong (CHN) with distance 3.00
## Best fuzzy match found: Macau (SAR) -> Macau (CHN) with distance 3.00
## Best fuzzy match found: The Bahamas -> Bahamas with distance 4.00

#third
cable_couk %<>%
  df_legalize_names() %>% 
  mutate(
  ISO = pu_translate(country),
  cable_couk_broadband = Mean_download_speed_Mbps
)

## No exact match: St Kitts and Nevis
## No exact match: Åland Islands
## No exact match: St Vincent and Grenadines
## No exact match: Bonaire Sint Eustatius and Saba
## No exact match: Congo Republic
## Best fuzzy match found: St Kitts and Nevis -> St. Kitts and Nevis with distance 1.00
## Best fuzzy match found: Åland Islands -> Aaland Islands with distance 2.00
## Best fuzzy match found: St Vincent and Grenadines -> Saint Vincent and Grenadines with distance 3.00
## Best fuzzy match found: Bonaire Sint Eustatius and Saba -> Bonaire, Sint Eustatius and Saba with distance 1.00
## Best fuzzy match found: Congo Republic -> Congo Republic of with distance 3.00

#join
d = full_join(
  speedtest_mobile %>% select(ISO, speedtest_mobile),
  speedtest_broadband %>% select(ISO, speedtest_broadband),
  by = "ISO"
) %>% 
  full_join(
    cable_couk %>% select(ISO, cable_couk_broadband),
    by = "ISO"
  )

#no errors check
assert_that(!anyDuplicated(d$ISO))

## [1] TRUE

d$ISO[duplicated(d$ISO)]

## character(0)

#national IQ data
# gs4_deauth()
# NIQ_emil = read_sheet("https://docs.google.com/spreadsheets/d/1cReoeIZLlbxOrN4_wnj52Q6UWFepXSJGi3Gj5m6ZAZg/edit?gid=0#gid=0")

#Seb's improved version
NIQ_seb = read_csv("data/jensen kirk 2024 Niqresults.csv")

## New names:
## Rows: 198 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): name, alpha3 dbl (3): ...1, NIQ, seadj
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

#join
d = full_join(
  d,
  NIQ_seb %>% rename(ISO = alpha3) %>% select(ISO, NIQ),
  by = "ISO"
)

#no errors check
assert_that(!anyDuplicated(d$ISO))

## [1] TRUE

d$ISO[duplicated(d$ISO)]

## character(0)

#population size and population density from https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population_density
wiki_pop_density = read_csv("data/wiki pop density.csv") %>% 
  df_legalize_names() %>% 
  mutate(
    country = str_remove(Location, " https.*"),
    ISO = pu_translate(country)
  ) %>% 
  #filter bad units
  filter(
    !str_detect(country, "France \\(metropolitan & overseas\\)"),
    !str_detect(country, "World \\(all land\\)")
  )

## Rows: 252 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Location, Notes
## num (5): Pop./km2, Pop./sq mi, Population, Land area(km2), Land area(sq mi)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## No exact match: Gibraltar (UK)
## 
## No exact match: Sint Maarten (NL)
## 
## No exact match: Bermuda (UK)
## 
## No exact match: Guernsey (UK)
## 
## No exact match: Jersey (UK)
## 
## No exact match: Palestine(Gaza Strip, West Bank)
## 
## No exact match: Aruba (NL)
## 
## No exact match: Curaçao (NL)
## 
## No exact match: Réunion (France)
## 
## No exact match: Puerto Rico (US)
## 
## No exact match: Guam (US)
## 
## No exact match: U.S. Virgin Islands (US)
## 
## No exact match: Cayman Islands (UK)
## 
## No exact match: American Samoa (US)
## 
## No exact match: British Virgin Islands (UK)
## 
## No exact match: Anguilla (UK)
## 
## No exact match: Northern Mariana Islands (US)
## 
## No exact match: France (metropolitan & overseas)
## 
## No exact match: Caribbean Netherlands (Netherlands)
## 
## No exact match: Wallis and Futuna (France)
## 
## No exact match: Akrotiri and Dhekelia (UK)
## 
## No exact match: World (excluding Antarctica)
## 
## No exact match: Norfolk Island (Australia)
## 
## No exact match: World (all land)
## 
## No exact match: Turks and Caicos Islands (UK)
## 
## No exact match: Saint Helena, Ascension and Tristan da Cunha (UK)
## 
## No exact match: Cocos (Keeling) Islands (Australia)
## 
## No exact match: Montserrat (UK)
## 
## No exact match: Saint Pierre and Miquelon (France)
## 
## No exact match: Christmas Island (Australia)
## 
## No exact match: Western Sahara (disputed)
## 
## No exact match: Pitcairn Islands (UK)
## 
## No exact match: Falkland Islands (UK)
## 
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## 
## Best fuzzy match found: Sint Maarten (NL) -> Sint Maarten (NLD) with distance 1.00
## 
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## 
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## 
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## 
## Best fuzzy match found: Palestine(Gaza Strip, West Bank) -> Palestine, State of with distance 18.00
## 
## Best fuzzy match found: Aruba (NL) -> Aruba (NLD) with distance 1.00
## 
## Best fuzzy match found: Curaçao (NL) -> Curaçao with distance 5.00
## 
## Best fuzzy match found: Réunion (France) -> Reunion (France) with distance 1.00
## 
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## 
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## 
## Best fuzzy match found: U.S. Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 1.00
## 
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## 
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## 
## Best fuzzy match found: British Virgin Islands (UK) -> British Virgin Islands (GBR) with distance 3.00
## 
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## 
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## 
## Best fuzzy match found: France (metropolitan & overseas) -> France (metropolitan) with distance 11.00
## 
## Best fuzzy match found: Caribbean Netherlands (Netherlands) -> Caribbean Netherlands with distance 14.00
## 
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00
## 
## Best fuzzy match found: Akrotiri and Dhekelia (UK) -> Akrotiri and Dhekelia with distance 5.00
## 
## Best fuzzy match found: World (excluding Antarctica) -> Antarctica with distance 18.00
## 
## Best fuzzy match found: Norfolk Island (Australia) -> Norfolk Island with distance 12.00
## 
## Best fuzzy match found: World (all land) -> Åland (Finland) with distance 8.00
## 
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## 
## Best fuzzy match found: Saint Helena, Ascension and Tristan da Cunha (UK) -> Saint Helena, Ascension and Tristan da Cunha with distance 5.00
## 
## Best fuzzy match found: Cocos (Keeling) Islands (Australia) -> Cocos (Keeling) Islands with distance 12.00
## 
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## 
## Best fuzzy match found: Saint Pierre and Miquelon (France) -> Saint Pierre and Miquelon with distance 9.00
## 
## Best fuzzy match found: Christmas Island (Australia) -> Christmas Island with distance 12.00
## 
## Best fuzzy match found: Western Sahara (disputed) -> Western Sahara with distance 11.00
## 
## Best fuzzy match found: Pitcairn Islands (UK) -> Pitcairn Islands with distance 5.00
## 
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00

#urbanization data from https://en.wikipedia.org/wiki/Urbanization_by_sovereign_state
wiki_urbanization = read_csv("data/wiki urbanization.csv") %>% 
  df_legalize_names()  %>% 
  mutate(
    country = str_remove(Country_or_territory, " https.*"),
    ISO = pu_translate(country)
  )

## Rows: 228 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Country or territory, Urban population  (2022)[1], Urbanization rat...
## dbl (1): % of total population  (2023)[2]
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## No exact match: Anguilla (UK)
## 
## No exact match: Bermuda (UK)
## 
## No exact match: Cayman Islands (UK)
## 
## No exact match: Gibraltar (UK)
## 
## No exact match: U.S. Virgin Islands (US)
## 
## No exact match: Guam (US)
## 
## No exact match: Turks and Caicos Islands (UK)
## 
## No exact match: Puerto Rico (US)
## 
## No exact match: Northern Mariana Islands (US)
## 
## No exact match: Saint Pierre and Miquelon (France)
## 
## No exact match: American Samoa (US)
## 
## No exact match: Falkland Islands (UK)
## 
## No exact match: World
## 
## No exact match: Isle of Man (UK)
## 
## No exact match: British Virgin Islands (UK)
## 
## No exact match: Saint Helena, Ascension and Tristan da Cunha (UK)
## 
## No exact match: Guernsey (UK)
## 
## No exact match: Jersey (UK)
## 
## No exact match: Montserrat (UK)
## 
## No exact match: Wallis and Futuna (France)
## 
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## 
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## 
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## 
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## 
## Best fuzzy match found: U.S. Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 1.00
## 
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## 
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## 
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## 
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## 
## Best fuzzy match found: Saint Pierre and Miquelon (France) -> Saint Pierre and Miquelon with distance 9.00
## 
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## 
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00
## 
## Best fuzzy match found: World -> world with distance 1.00
## 
## Best fuzzy match found: Isle of Man (UK) -> Isle of Man (GBR) with distance 3.00
## 
## Best fuzzy match found: British Virgin Islands (UK) -> British Virgin Islands (GBR) with distance 3.00
## 
## Best fuzzy match found: Saint Helena, Ascension and Tristan da Cunha (UK) -> Saint Helena, Ascension and Tristan da Cunha with distance 5.00
## 
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## 
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## 
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## 
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00

#join but only keep overlap with existing, lot of aggregate units we want to avoid
d %<>% left_join(
  wiki_pop_density %>% select(ISO, Population, Pop_km2),
  by = "ISO"
) %>% left_join(
  wiki_urbanization %>% select(ISO, pct_of_total_population_2023_2) %>% rename(urbanization = pct_of_total_population_2023_2),
  by = "ISO"
)

#no errors check
assert_that(!anyDuplicated(d$ISO))

## [1] TRUE

d$ISO[duplicated(d$ISO)]

## character(0)

#log version of pop density because it has insane distribution
d$Pop_km2_log = log(d$Pop_km2)

#income data from https://worldpopulationreview.com/country-rankings/median-income-by-country
incomes = read_csv("data/median-income-by-country-2024.csv") %>% 
  mutate(
    ISO = pu_translate(country)
  )

## Rows: 165 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (4): MedianIncome_IntDollars_2020, MeanIncome_IntDollars_2020, GDPPPP_In...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#check for dups
assert_that(!anyDuplicated(incomes$ISO))

## [1] TRUE

#join
d = left_join(
  d,
  incomes %>% select(-GDPPPPYear, -country),
  by = "ISO"
)

#check for dups
assert_that(!anyDuplicated(d$ISO))

## [1] TRUE

#country names
d$name = pu_translate(d$ISO, reverse = T)

#population density groups
d$Pop_km2_ord = discretize(d$Pop_km2, breaks = 7, equal_range = F)
d$Pop_km2_ord %>% table2()

#spatial data
world_map = read_sf("data/World_Countries/World_Countries.shp") %>% 
  mutate(
    ISO = pu_translate(COUNTRY)
  ) %>% 
  #remove bad subnational units
  filter(
    !str_detect(ISO, "SJM")
  )

## No exact match: American Samoa (US)
## No exact match: Anguilla (UK)
## No exact match: Bermuda (UK)
## No exact match: Cayman Islands (UK)
## No exact match: Cocos (Keeling) Islands (Australia)
## No exact match: Northern Mariana Islands (US)
## No exact match: Jarvis Island (US)
## No exact match: Falkland Islands (UK)
## No exact match: Baker Island (US)
## No exact match: French Southern & Antarctic Lands (France)
## No exact match: Gibraltar (UK)
## No exact match: Guernsey (UK)
## No exact match: Guam (US)
## No exact match: Heard Island & McDonald Islands (Australia)
## No exact match: Howland Island (US)
## No exact match: Isle of Man (UK)
## No exact match: British Indian Ocean Territory (UK)
## No exact match: Jersey (UK)
## No exact match: Jan Mayen (Norway)
## No exact match: Johnston Atoll (US)
## No exact match: Christmas Island (Australia)
## No exact match: Montserrat (UK)
## No exact match: Midway Islands (US)
## No exact match: Norfolk Island (Australia)
## No exact match: Pitcairn Islands (UK)
## No exact match: Spratly Islands (Disputed)
## No exact match: Puerto Rico (US)
## No exact match: St. Pierre and Miquelon (France)
## No exact match: St. Helena (UK)
## No exact match: South Georgia and the South Sandwich Is (UK)
## No exact match: Turks and Caicos Islands (UK)
## No exact match: British Virgin Islands(UK)
## No exact match: American Virgin Islands (US)
## No exact match: Wallis and Futuna (France)
## No exact match: Wake Island (US)
## No exact match: Curacao (Netherlands)
## Best fuzzy match found: American Samoa (US) -> American Samoa with distance 5.00
## Best fuzzy match found: Anguilla (UK) -> Anguilla (GBR) with distance 3.00
## Best fuzzy match found: Bermuda (UK) -> Bermuda with distance 5.00
## Best fuzzy match found: Cayman Islands (UK) -> Cayman Islands with distance 5.00
## Best fuzzy match found: Cocos (Keeling) Islands (Australia) -> Cocos (Keeling) Islands with distance 12.00
## Best fuzzy match found: Northern Mariana Islands (US) -> Northern Mariana Islands with distance 5.00
## Best fuzzy match found: Jarvis Island (US) -> Jarvis Island (USA) with distance 1.00
## Best fuzzy match found: Falkland Islands (UK) -> Falkland Islands (GBR) with distance 3.00
## Best fuzzy match found: Baker Island (US) -> Baker Island (USA) with distance 1.00
## Best fuzzy match found: French Southern & Antarctic Lands (France) -> French Southern Territories (France) with distance 14.00
## Best fuzzy match found: Gibraltar (UK) -> Gibraltar (GBR) with distance 3.00
## Best fuzzy match found: Guernsey (UK) -> Guernsey (GBR) with distance 3.00
## Best fuzzy match found: Guam (US) -> Guam (USA) with distance 1.00
## Best fuzzy match found: Heard Island & McDonald Islands (Australia) -> Heard Island and McDonald Islands with distance 15.00
## Best fuzzy match found: Howland Island (US) -> Howland Island (USA) with distance 1.00
## Best fuzzy match found: Isle of Man (UK) -> Isle of Man (GBR) with distance 3.00
## Best fuzzy match found: British Indian Ocean Territory (UK) -> British Indian Ocean Territory (GBR) with distance 3.00
## Best fuzzy match found: Jersey (UK) -> Jersey (GBR) with distance 3.00
## Best fuzzy match found: Jan Mayen (Norway) -> Svalbard (Norway) with distance 8.00
## Best fuzzy match found: Johnston Atoll (US) -> Johnston Atoll (USA) with distance 1.00
## Best fuzzy match found: Christmas Island (Australia) -> Christmas Island with distance 12.00
## Best fuzzy match found: Montserrat (UK) -> Montserrat with distance 5.00
## Best fuzzy match found: Midway Islands (US) -> Midway Islands (USA) with distance 1.00
## Best fuzzy match found: Norfolk Island (Australia) -> Norfolk Island with distance 12.00
## Best fuzzy match found: Pitcairn Islands (UK) -> Pitcairn Islands with distance 5.00
## Best fuzzy match found: Spratly Islands (Disputed) -> Spratly Islands with distance 11.00
## Best fuzzy match found: Puerto Rico (US) -> Puerto Rico (USA) with distance 1.00
## Best fuzzy match found: St. Pierre and Miquelon (France) -> St. Pierre and Miquelon with distance 9.00
## Best fuzzy match found: St. Helena (UK) -> St. Helena with distance 5.00
## Best fuzzy match found: South Georgia and the South Sandwich Is (UK) -> South Georgia and the South Sandwich Islands with distance 5.00
## Best fuzzy match found: Turks and Caicos Islands (UK) -> Turks and Caicos Islands with distance 5.00
## Best fuzzy match found: British Virgin Islands(UK) -> British Virgin Islands with distance 4.00
## Best fuzzy match found: American Virgin Islands (US) -> U.S. Virgin Islands (USA) with distance 9.00
## Best fuzzy match found: Wallis and Futuna (France) -> Wallis and Futuna Islands with distance 6.00
## Best fuzzy match found: Wake Island (US) -> Wake Island (USA) with distance 1.00
## Best fuzzy match found: Curacao (Netherlands) -> Curaçao (Netherlands) with distance 1.00

#any duplicates?
assert_that(!anyDuplicated(world_map$ISO))

## [1] TRUE

world_map$ISO[duplicated(world_map$ISO)]

## character(0)

#derive lat and lon from centroids
world_map_latlon = world_map %>% 
  st_centroid() %>% 
  st_coordinates() %>% 
  as_tibble() %>% 
  rename(
    lon = X,
    lat = Y
  ) %>% mutate(
    ISO = world_map$ISO
  )

## Warning: st_centroid assumes attributes are constant over geometries

#copy back
world_map$lat = world_map_latlon$lat
world_map$lon = world_map_latlon$lon

#join
d_spatial = left_join(
  world_map %>% select(ISO, geometry, lat, lon),
  d,
  by = "ISO"
)

#internet speed lags
d_spatial %<>% 
  spatial_knn(
    k = 3,
    vars = c("speedtest_mobile", "speedtest_broadband", "cable_couk_broadband")
  )

#check lags
spatial_lag_cors(d_spatial)

##     speedtest_mobile  speedtest_broadband cable_couk_broadband 
##                0.531                0.525                0.672

Analysis

#correlation matrix
d %>% 
  select(NIQ, GDPPPP_IntDollars, MedianIncome_IntDollars_2020, speedtest_mobile, speedtest_broadband, cable_couk_broadband, Population, Pop_km2, Pop_km2_log, urbanization) %>%
  GG_heatmap()

#better version
d %>% 
  select(NIQ, GDPPPP_IntDollars, MedianIncome_IntDollars_2020, speedtest_mobile, speedtest_broadband, cable_couk_broadband, Population, Pop_km2, Pop_km2_log, urbanization) %>%
  ggpairs(columnLabels = c("NIQ", "GDPpc", "median\nincome", "speedtest\nmobile", "speedtest\nbroadband", "Google\nbroadband", "population", "density", "density_log", "urban%"))

## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 77 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 125 rows containing missing values

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 125 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_density()`).

GG_save("figs/correlation_matrix.png")

## Warning: Removed 37 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 39 rows containing missing values

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 77 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 140 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 98 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 76 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 140 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 124 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 125 rows containing missing values

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 98 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 74 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 75 rows containing missing values

## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 76 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 6 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 10 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 124 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 14 rows containing missing values

## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 74 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 125 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 75 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_density()`).

#summary stats
d %>% 
  select(NIQ, GDPPPP_IntDollars, MedianIncome_IntDollars_2020, speedtest_mobile, speedtest_broadband, cable_couk_broadband, Population, Pop_km2, Pop_km2_log, urbanization) %>%
  describe2()

#maps
#urbanization
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = urbanization)) +
  scale_fill_viridis_c(guide = ) +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Urbanization",
    fill = "% of population living in urban areas"
  )

GG_save("figs/urbanization_map.png")

#pop density
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = Pop_km2)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Population density",
    fill = "People per km^2"
  )

GG_save("figs/pop_density_map.png")

#pop density again, but with ordinal scale
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = Pop_km2_ord)) +
  scale_fill_viridis_d() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Population density, ordinal scale",
    fill = "People per km^2"
  )

GG_save("figs/pop_density_map_ordinal.png")

#log version
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = Pop_km2_log)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "Population density (log)",
    fill = "Log people per km^2"
  )

GG_save("figs/pop_density_log_map.png")

#internet speed google
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = cable_couk_broadband)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "cable.co.uk (Google M labs) broadband speed",
    fill = "Mbps"
  )

GG_save("figs/M labs broadband speed.png")

#Europe zoom
# Zoom in on Europe
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = cable_couk_broadband)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "cable.co.uk (Google M labs) broadband speed",
    fill = "Mbps"
  ) +
  coord_sf(xlim = c(-22, 35), ylim = c(35, 70)) +
  # Add labels on centroids
  geom_sf_label(aes(label = round(cable_couk_broadband, 1)), size = 3)

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_label()`).

GG_save("figs/M labs broadband speed_europe.png")

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): Removed 31 rows containing missing values or values outside the scale range
## (`geom_label()`).

#speedtest net broadband
d_spatial %>% 
  ggplot() +
  geom_sf(aes(fill = speedtest_broadband)) +
  scale_fill_viridis_c() +
  theme_minimal() +
  #move legend to bottom
  theme(legend.position = "bottom") +
  labs(
    title = "speedtest.net broadband speed",
    fill = "Mbps"
  )

#scatterplot
d_spatial %>% 
  GG_scatter("cable_couk_broadband", "speedtest_broadband", case_names = "name") +
  labs(
    title = "Broadband speed comparison",
    x = "cable.co.uk (Google M labs) broadband speed",
    y = "speedtest.net broadband speed"
  )

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/M labs vs. speedtest.png")

## `geom_smooth()` using formula = 'y ~ x'

#density vs. urbanization
d_spatial %>% 
  GG_scatter("Pop_km2_log", "urbanization", case_names = "name") +
  labs(
    title = "Population density vs. urbanization",
    x = "Population density, log",
    y = "Urbanization"
  )

## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/density_vs_urbanization.png")

## `geom_smooth()` using formula = 'y ~ x'

#standardized variables
d_spatial_z = d_spatial %>% 
  df_standardize()

## Skipped ISO because it is a character (string)
## Skipped geometry because it is class sfc_MULTIPOLYGON, sfc
## Skipped name because it is a character (string)
## Skipped Pop_km2_ord because it is class factor

#regression models
model_list_google = list(
  urban_alone = lm(cable_couk_broadband ~ urbanization, data = d_spatial_z),
  NIQ_alone = lm(cable_couk_broadband ~ NIQ, data = d_spatial_z),
  median_income_alone = lm(cable_couk_broadband ~ MedianIncome_IntDollars_2020, data = d_spatial_z),
  spatial_lag_alone = lm(cable_couk_broadband ~ cable_couk_broadband_lag, data = d_spatial_z),
  all_predictors = lm(cable_couk_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020, data = d_spatial_z),
  with_spatial_lag = lm(cable_couk_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020 + cable_couk_broadband_lag, data = d_spatial_z)
)

model_list_google %>% 
  summarize_models(asterisks_only = F)

#plot models
model_list_google %>% 
  get_model_coefs() %>% 
  GG_plot_models()

#compare models automatically
compare_predictors(d_spatial_z,
                   outcome = "cable_couk_broadband",
                   predictors = c("urbanization", "NIQ", "MedianIncome_IntDollars_2020", "cable_couk_broadband_lag")
) %>% 
  kirkegaard::GG_plot_models() +
  labs(
    title = "Model comparison",
    subtitle = "cable.co.uk (Google M labs) broadband speed"
  )

GG_save("figs/compare_models_google.png")

#same thing, but use speedtest broadband data
#models
model_list_speedtest = list(
  urban_alone = lm(speedtest_broadband ~ urbanization, data = d_spatial_z),
  NIQ_alone = lm(speedtest_broadband ~ NIQ, data = d_spatial_z),
  median_income_alone = lm(speedtest_broadband ~ MedianIncome_IntDollars_2020, data = d_spatial_z),
  spatial_lag_alone = lm(speedtest_broadband ~ speedtest_broadband_lag, data = d_spatial_z),
  all_predictors = lm(speedtest_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020, data = d_spatial_z),
  with_spatial_lag = lm(speedtest_broadband ~ urbanization + NIQ + MedianIncome_IntDollars_2020 + speedtest_broadband_lag, data = d_spatial_z)
)

#summary
model_list_speedtest %>% 
  summarize_models(asterisks_only = F)

#in a plot
compare_predictors(d_spatial_z,
                   outcome = "speedtest_broadband",
                   predictors = c("urbanization", "NIQ", "MedianIncome_IntDollars_2020", "cable_couk_broadband_lag")
) %>% 
  kirkegaard::GG_plot_models() +
  labs(
    title = "Model comparison",
    subtitle = "speedtest.net broadband speed"
  )

GG_save("figs/compare_models_speedtest.png")

Internet speed: global variation explained by NIQ, wealth, urbanization

17 September, 2024

Init

Data

Analysis

Meta