library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
#json
jsonlite,
#raster
raster,
#plots
patchwork,
#tables
flextable
)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
##
## Loading required package: sp
##
## Attaching package: 'raster'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'patchwork'
##
## The following object is masked from 'package:raster':
##
## area
##
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:raster':
##
## rotate
##
## The following object is masked from 'package:purrr':
##
## compose
theme_set(theme_bw())
options(
digits = 3
)
#right functions
select = dplyr::select
#read wiki data json
wiki_cities = fromJSON("data/wiki-climate.json") %>%
df_legalize_names()
#add Mount Everest as a fake city to this dataset
wiki_cities = bind_rows(
wiki_cities,
data.frame(
name = "Mount Everest",
country = "Nepal",
gps_lat = 27.9881,
gps_lon = 86.9250
)
)
#filter to variables of interest
d = wiki_cities %>%
select(name, country, gps_lat, gps_lon, year_mean_C) %>%
mutate(
year_mean_C = as.numeric(year_mean_C),
abs_lat = abs(gps_lat)
) %>% filter(
!is.na(gps_lat), !is.na(gps_lon)
)
#geo tiff
#temps, 12x
temp_dir = "data/wc2.1_10m_tavg"
#read files
temps = map(list.files(temp_dir, full.names = T), raster)
#elevation
elev = raster("data/wc2.1_30s_elev.tif")
# Define points of interest (longitude, latitude in decimal degrees)
points <- data.frame(
lon = d$gps_lon, # Longitude first
lat = d$gps_lat # Latitude second
)
# Extract values for each point
extracted_temps <- map_dfc(temps, ~extract(., points[, c("lon", "lat")]))
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
extracted_elev <- extract(elev, points[, c("lon", "lat")])
# Combine results into a data frame
results <- data.frame(
Temperature_C = extracted_temps %>% rowMeans(),
Altitude_m = extracted_elev
)
#join
d = bind_cols(d, results)
#label for Mount Everest alone
d$label = ifelse(d$name == "Mount Everest", "Mount Everest", "")
d$Everest = d$name == "Mount Everest"
#sort so Everest is first or last for plotting names
d = d %>% arrange(!Everest)
#cors
d %>%
select(Temperature_C, Altitude_m, gps_lat, gps_lon, abs_lat) %>%
GG_heatmap()
GG_save("figs/cors.png")
#plot vars vs. temp
p_lat_temp = GG_scatter(d, "gps_lat", "Temperature_C", case_names = "name", alpha = 0.1) +
geom_smooth() +
labs(
x = "Latitude",
y = "Temperature (C)"
) +
#highlight Everest
# encircling points where col1 value is more than 6
geom_point(data = d %>% filter(Everest),
color="purple",
pch=21,
size=30
)
#lon
p_lon_temp = d %>% GG_scatter("gps_lon", "Temperature_C", case_names = "name", alpha = 0.1) +
geom_smooth() +
labs(
x = "Longitude",
y = "Temperature (C)"
) +
#highlight Everest
# encircling points where col1 value is more than 6
geom_point(data = d %>% filter(Everest),
color="purple",
pch=21,
size=30
)
#abs lat
p_abs_lat_temp = d %>% arrange(!Everest) %>% GG_scatter("abs_lat", "Temperature_C", case_names = "name", alpha = 0.1) +
geom_smooth() +
labs(
x = "Absolute Latitude",
y = "Temperature (C)"
) +
#highlight Everest
# encircling points where col1 value is more than 6
geom_point(data = d %>% filter(Everest),
color="purple",
pch=21,
size=30
)
#altitude
p_alt_temp = d %>% arrange(!Everest) %>% GG_scatter("Altitude_m", "Temperature_C", case_names = "name", alpha = 0.1) +
geom_smooth() +
labs(
x = "Altitude (m)",
y = "Temperature (C)"
) +
#highlight Everest
# encircling points where col1 value is more than 6
geom_point(data = d %>% filter(Everest),
color="purple",
pch=21,
size=30
)
#combined plot
p_lat_temp + p_lon_temp + p_abs_lat_temp + p_alt_temp
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
GG_save("figs/vars_vs_temp.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#models
d_std = d %>% df_standardize()
## Skipped name because it is a character (string)
## Skipped country because it is a character (string)
## Skipped label because it is a character (string)
## Skipped Everest because it is a logical (boolean)
mods = list(
altitude = lm(Temperature_C ~ Altitude_m, data = d_std),
abs_latitude = lm(Temperature_C ~ abs_lat, data = d_std),
both = lm(Temperature_C ~ Altitude_m + abs_lat, data = d_std),
interaction = lm(Temperature_C ~ Altitude_m * abs_lat, data = d_std)
)
mods %>% summarize_models() %>% flextable() %>% set_caption("Models for predicting annual mean temperature")
Predictor/Model | altitude | abs_latitude | both | interaction |
---|---|---|---|---|
(Intercept) | -0.01 (0.010) | 0.00 (0.005) | 0.00 (0.004) | 0.01 (0.004***) |
abs_lat | -0.88 (0.005***) | -0.94 (0.004***) | -0.94 (0.004***) | |
Altitude_m | -0.04 (0.010***) | -0.27 (0.004***) | -0.21 (0.005***) | |
Altitude_m:abs_lat | 0.07 (0.004***) | |||
R2 adj. | 0.001 | 0.771 | 0.836 | 0.841 |
N | 10123 | 10241 | 10123 | 10123 |
#predictions
ggeffects::ggpredict(mods[[4]], terms = c("Altitude_m", "abs_lat")) %>%
plot(show_data = T) +
labs(
x = "Altitude (m)",
y = "Temperature (C)"
) +
#highlight Everest
geom_point(data = tibble(x = d$Altitude_m[1], predicted = -7, group_col = 1),
color="purple",
fill = NA,
pch=21,
size=30
) +
geom_text(data = tibble(x = d$Altitude_m[1], predicted = -7, group_col = 1, label = "Mount Everest"), aes(label = label), size = 3)
## Data points may overlap. Use the `jitter` argument to add some amount of
## random variation to the location of data points and avoid overplotting.
GG_save("figs/interaction_plot.png")
#versions
write_sessioninfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] flextable_0.9.7 patchwork_1.2.0 raster_3.6-31
## [4] sp_2.1-4 jsonlite_1.8.8 kirkegaard_2025-01-08
## [7] psych_2.4.6.26 assertthat_0.2.1 weights_1.0.4
## [10] Hmisc_5.1-3 magrittr_2.0.3 lubridate_1.9.3
## [13] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [16] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [19] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 mnormt_2.1.1 gridExtra_2.3
## [4] rlang_1.1.4 e1071_1.7-14 compiler_4.4.2
## [7] mgcv_1.9-1 gdata_3.0.0 systemfonts_1.1.0
## [10] vctrs_0.6.5 pkgconfig_2.0.3 shape_1.4.6.1
## [13] fastmap_1.2.0 backports_1.5.0 labeling_0.4.3
## [16] utf8_1.2.4 rmarkdown_2.28 tzdb_0.4.0
## [19] haven_2.5.4 nloptr_2.1.1 ragg_1.3.2
## [22] xfun_0.47 glmnet_4.1-8 jomo_2.7-6
## [25] cachem_1.1.0 highr_0.11 ggeffects_1.7.0
## [28] uuid_1.2-1 pan_1.9 terra_1.8-21
## [31] broom_1.0.6 parallel_4.4.2 cluster_2.1.8
## [34] R6_2.5.1 bslib_0.8.0 stringi_1.8.4
## [37] boot_1.3-31 rpart_4.1.24 jquerylib_0.1.4
## [40] Rcpp_1.0.13 iterators_1.0.14 knitr_1.48
## [43] base64enc_0.1-3 Matrix_1.7-2 splines_4.4.2
## [46] nnet_7.3-20 timechange_0.3.0 tidyselect_1.2.1
## [49] rstudioapi_0.16.0 yaml_2.3.10 codetools_0.2-19
## [52] plyr_1.8.9 lattice_0.22-5 withr_3.0.1
## [55] askpass_1.2.0 evaluate_0.24.0 foreign_0.8-88
## [58] sf_1.0-16 survival_3.8-3 units_0.8-5
## [61] proxy_0.4-27 zip_2.3.1 xml2_1.3.6
## [64] pillar_1.9.0 mice_3.16.0 KernSmooth_2.23-26
## [67] checkmate_2.3.2 foreach_1.5.2 insight_0.20.3
## [70] generics_0.1.3 hms_1.1.3 munsell_0.5.1
## [73] scales_1.3.0 minqa_1.2.8 gtools_3.9.5
## [76] class_7.3-23 glue_1.7.0 gdtools_0.4.1
## [79] tools_4.4.2 data.table_1.16.0 lme4_1.1-35.5
## [82] grid_4.4.2 datawizard_0.12.2 colorspace_2.1-1
## [85] nlme_3.1-167 htmlTable_2.4.3 Formula_1.2-5
## [88] cli_3.6.3 textshaping_0.4.0 officer_0.6.7
## [91] fontBitstreamVera_0.1.1 fansi_1.0.6 gtable_0.3.5
## [94] sass_0.4.9 digest_0.6.37 fontquiver_0.2.1
## [97] classInt_0.4-10 farver_2.1.2 htmlwidgets_1.6.4
## [100] htmltools_0.5.8.1 lifecycle_1.0.4 mitml_0.4-5
## [103] fontLiberation_0.1.0 openssl_2.2.1 MASS_7.3-64
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}