library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
googlesheets4,
rms,
ggeffects,
parameters,
patchwork
)
theme_set(theme_bw())
options(
digits = 3
)
#multithreading
#library(future)
#plan(multisession(workers = 8))
#prices
#https://worldpopulationreview.com/country-rankings/cost-of-electricity-by-country
gs4_deauth()
elec_prices = read_sheet("https://docs.google.com/spreadsheets/d/1-CfYzgcsWXeHVmWkpMqxDPYiEctMqcu7WmXY5auyTPs/edit?gid=0#gid=0", sheet = 1, skip = 1) %>%
df_legalize_names() %>%
mutate(
ISO = pu_translate(country)
)
## ✔ Reading from "Electricity price and renewables".
## ✔ Range ''prices'!2:10000000'.
#electricity sources
#https://en.wikipedia.org/wiki/List_of_countries_by_renewable_electricity_production#Renewable_production_(percent)
elec_sources = read_sheet("https://docs.google.com/spreadsheets/d/1-CfYzgcsWXeHVmWkpMqxDPYiEctMqcu7WmXY5auyTPs/edit?gid=0#gid=0", sheet = 2, skip = 1) %>%
df_legalize_names() %>%
mutate(
ISO = pu_translate(Location)
)
## ✔ Reading from "Electricity price and renewables".
## ✔ Range ''renewables'!2:10000000'.
## No exact match: World
## Best fuzzy match found: World -> world with distance 1.00
#gdp data
gdp_per_person = read_csv("data/gdp-per-capita-worldbank/gdp-per-capita-worldbank.csv") %>% df_legalize_names() %>%
rename(
ISO = Code
)
## Rows: 7063 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per capita, PPP (constant 2021 international $)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gdp_per_worker = read_csv("data/gdp-per-person-employed-constant-ppp/gdp-per-person-employed-constant-ppp.csv") %>% df_legalize_names() %>%
rename(
ISO = Code
)
## Rows: 6240 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per person employed (constant 2021 PPP $)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#median consumption per day
median_consump = read_csv("data/daily-median-income/daily-median-income.csv") %>%
df_legalize_names() %>%
rename(
ISO = Code
)
## Rows: 2635 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Entity, Code, 990177-annotations
## dbl (2): Year, Median income or consumption
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#too sparse
#2023 or 2024 data
d = inner_join(
elec_prices %>% select(-country),
elec_sources %>% select(-Location, -Year),
by = "ISO"
) %>%
left_join(
gdp_per_person %>% filter(Year == 2023) %>% select(ISO, GDP_per_capita_PPP_constant_2021_international, -Entity, -Year),
by = "ISO"
) %>%
left_join(
gdp_per_worker %>% filter(Year == 2023) %>% select(ISO, GDP_per_person_employed_constant_2021_PPP, -Entity, -Year),
by = "ISO"
)
#rename
d %<>% rename(
GDPpc = GDP_per_capita_PPP_constant_2021_international,
GDPpw = GDP_per_person_employed_constant_2021_PPP
)
#other variables
d %<>% mutate(
#prices, to cents
USD_kw_march2024 = USD_kw_march2024 * 100,
USD_kw_sep2024 = USD_kw_sep2024 * 100,
# USD_kw_abs_diff = abs(USD_kw_sep2024 - USD_kw_march2024),
# USD_kw_minmax_ratio = USD_kw_abs_diff / USD_kw,
#log10 gdp
log10_GDPpc = log10(GDPpc),
log10_GDPpw = log10(GDPpw),
#GDP to 1000s
GDPpc = GDPpc / 1000,
GDPpw = GDPpw / 1000,
#to fractions, not percent
Renew = Renew / 100,
Hydro = Hydro / 100,
Wind = Wind / 100,
Solar = Solar / 100,
Bio = Bio / 100,
Geo = Geo / 100,
#solar wind
SolarWind = Solar + Wind,
) %>%
#need a second mutate to update variables...
mutate(
#other price variables
USD_kw = select(., USD_kw_march2024:USD_kw_sep2024) %>% rowMeans(na.rm = T),
#recalculate totals due avoid rounding issues
Renew = Hydro + Wind + Solar + Bio + Geo,
notRenew = 1 - Renew,
)
attr(d$SolarWind, "label") = NULL
#no missing
d2 = d %>% miss_filter()
#correlations
d2 %>%
select(
-ISO, -GDPpw, -log10_GDPpw, -log10_GDPpc, -USD_kw_march2024, -USD_kw_sep2024
) %>%
GG_heatmap(remove_diag = T, cross_out_nonsig = T)
GG_save("figs/correlations.png")
#simple plots
patchwork::wrap_plots(
d2 %>% GG_scatter("GDPpc", "USD_kw", case_names = "ISO") +
scale_y_continuous(limits = c(0, 60), breaks = seq(0, 100, 10)) +
labs(
x = "GDP per capita (1000s)",
y = "Price (cents per kWh)"
),
d2 %>% GG_scatter("SolarWind", "USD_kw", case_names = "ISO") +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_y_continuous(limits = c(0, 60), breaks = seq(0, 100, 10)) +
labs(
x = "Solar + Wind (% of total)",
y = "Price (cents per kWh)"
),
axes = "collect_y"
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/scatters_prices.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#show distribution of elec sources by country
#stacked bars
d2 %>%
select(ISO, Hydro:Geo, notRenew) %>%
pivot_longer(
cols = -ISO,
names_to = "Renewable",
values_to = "Fraction"
) %>%
#filter to richest countries
filter(
ISO %in% (d %>% filter(GDPpc > 50) %>% pull(ISO))
) %>%
#add names
mutate(
country = pu_translate(ISO, reverse = T)
) %>%
ggplot(aes(y = country, x = Fraction, fill = Renewable)) +
geom_bar(stat = "identity") +
#add text of the values inside the bars in the center
#dont show values of 0%
geom_text(aes(label = ifelse(Fraction > 0.005, scales::percent(Fraction, accuracy = 1), "")), position = position_stack(vjust = 0.5)) +
labs(
x = "Fraction of total electricity",
y = "Country",
title = "Electricity sources in richest countries"
)
GG_save("figs/sources_stacked_bars.png")
#regressions
mods_price = list(
ols(USD_kw ~ GDPpc, data = d2),
# ols(USD_kw ~ GDPpw, data = d2),
# ols(USD_kw ~ log10_GDPpc, data = d2),
# ols(USD_kw ~ log10_GDPpw, data = d2),
# ols(USD_kw ~ GDPpc + GDPpw, data = d2),
ols(USD_kw ~ SolarWind, data = d2),
ols(USD_kw ~ Renew, data = d2),
ols(USD_kw ~ Hydro + Hydro + Wind + Solar + Bio + Geo, data = d2),
ols(USD_kw ~ GDPpc + Hydro + Hydro + Wind + Solar + Bio + Geo, data = d2),
ols(USD_kw ~ Wind + Solar, data = d2),
ols(USD_kw ~ GDPpc + Wind + Solar, data = d2),
ols(USD_kw ~ GDPpc + SolarWind, data = d2),
ols(USD_kw ~ GDPpc * SolarWind, data = d2),
ols(USD_kw ~ rcs(GDPpc, 3) + SolarWind, data = d2),
ols(USD_kw ~ GDPpc + rcs(SolarWind, 3), data = d2)
# ols(USD_kw ~ rcs(GDPpc, 3) + rcs(SolarWind, 3), data = d2)
)
mods_price %>%
summarize_models() %>%
#nice table
flextable::flextable()
Predictor/Model | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
---|---|---|---|---|---|---|---|---|---|---|---|
Intercept | 10.58 (1.361***) | 10.05 (1.015***) | 13.79 (1.690***) | 10.34 (1.431***) | 7.60 (1.695***) | 10.26 (1.077***) | 8.07 (1.260***) | 8.00 (1.174***) | 7.28 (1.409***) | 7.08 (1.695***) | 6.28 (1.465***) |
GDPpc | 0.18 (0.033***) | 0.09 (0.031*) | 0.09 (0.030**) | 0.09 (0.030**) | 0.11 (0.036**) | (nonlinear) | 0.10 (0.029***) | ||||
SolarWind | 55.96 (5.906***) | 48.37 (6.188***) | 56.46 (10.693***) | 47.90 (6.230***) | (nonlinear) | ||||||
Renew | 5.43 (3.080) | ||||||||||
Hydro | -2.18 (2.625) | -0.17 (2.652) | |||||||||
Wind | 51.45 (8.914***) | 43.51 (9.117***) | 59.57 (8.383***) | 49.25 (8.769***) | |||||||
Solar | 45.87 (15.150**) | 46.69 (14.749**) | 47.46 (15.174**) | 46.48 (14.696**) | |||||||
Bio | 25.17 (11.126) | 21.83 (10.893) | |||||||||
Geo | 8.41 (13.638) | 10.50 (13.295) | |||||||||
GDPpc * SolarWind | -0.15 (0.166) | ||||||||||
R2 adj. | 0.185 | 0.402 | 0.016 | 0.414 | 0.444 | 0.399 | 0.437 | 0.441 | 0.440 | 0.439 | 0.452 |
N | 133 | 133 | 133 | 133 | 133 | 133 | 133 | 133 | 133 | 133 | 133 |
#model tests
#linear interaction GDPpc * SolarWind
lrtest(
mods_price[[8]],
mods_price[[9]]
)
##
## Model 1: USD_kw ~ GDPpc + SolarWind
## Model 2: USD_kw ~ GDPpc * SolarWind
##
## L.R. Chisq d.f. P
## 0.885 1.000 0.347
#nonlinear gdppc
lrtest(
mods_price[[8]],
mods_price[[10]]
)
##
## Model 1: USD_kw ~ GDPpc + SolarWind
## Model 2: USD_kw ~ rcs(GDPpc, 3) + SolarWind
##
## L.R. Chisq d.f. P
## 0.587 1.000 0.443
#nonlinear solarwind
lrtest(
mods_price[[8]],
mods_price[[11]]
)
##
## Model 1: USD_kw ~ GDPpc + SolarWind
## Model 2: USD_kw ~ GDPpc + rcs(SolarWind, 3)
##
## L.R. Chisq d.f. P
## 3.7699 1.0000 0.0522
#plot nonlinear gdp + solarwind
ggpredict(
mods_price[[8]],
terms = c("SolarWind", "GDPpc"),
) %>%
plot() +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
x = "Solar + Wind (% of total)",
y = "Price (cents per kWh)",
title = "Electricity price as function GDPpc + SolarWind"
) +
guides(
color = guide_legend(title = "GDP per capita (1000s)", position = "bottom")
)
GG_save("figs/electricity_price_gdp_solarwind.png")
#compare effect sizes
model_parameters(mods_price[[8]], standardize = "refit")
#price for Denmark with 0% SolarWind
predict(
mods_price[[8]],
newdata = bind_rows(
d %>% filter(ISO == "DNK") %>% mutate(adjustment = "current"),
d %>% filter(ISO == "DNK") %>% mutate(SolarWind = 0, adjustment = "0% SolarWind"),
d %>% filter(ISO == "DNK") %>% mutate(SolarWind = 1, adjustment = "100% SolarWind")
)
)
## 1 2 3
## 47.7 14.8 63.2
#versions
write_sessioninfo()
## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.3.0 parameters_0.25.0 ggeffects_2.2.1
## [4] rms_8.0-0 googlesheets4_1.1.1 readxl_1.4.5
## [7] kirkegaard_2025-05-02 psych_2.5.3 assertthat_0.2.1
## [10] weights_1.0.4 Hmisc_5.2-3 magrittr_2.0.3
## [13] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
## [16] dplyr_1.1.4 purrr_1.0.4 readr_2.1.5
## [19] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.2
## [22] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.17.1 jsonlite_2.0.0 shape_1.4.6.1
## [4] datawizard_1.0.2 TH.data_1.1-3 jomo_2.7-6
## [7] farver_2.1.2 nloptr_2.2.1 rmarkdown_2.29
## [10] fs_1.6.6 ragg_1.4.0 vctrs_0.6.5
## [13] minqa_1.2.8 askpass_1.2.1 base64enc_0.1-3
## [16] htmltools_0.5.8.1 polspline_1.1.25 haven_2.5.4
## [19] curl_6.2.2 broom_1.0.8 cellranger_1.1.0
## [22] Formula_1.2-5 mitml_0.4-5 sass_0.4.10
## [25] bslib_0.9.0 htmlwidgets_1.6.4 plyr_1.8.9
## [28] sandwich_3.1-1 zoo_1.8-14 cachem_1.1.0
## [31] uuid_1.2-1 lifecycle_1.0.4 iterators_1.0.14
## [34] pkgconfig_2.0.3 Matrix_1.7-3 R6_2.6.1
## [37] fastmap_1.2.0 rbibutils_2.3 digest_0.6.37
## [40] colorspace_2.1-1 textshaping_1.0.0 labeling_0.4.3
## [43] timechange_0.3.0 gdata_3.0.1 httr_1.4.7
## [46] mgcv_1.9-1 compiler_4.5.0 gargle_1.5.2
## [49] bit64_4.6.0-1 fontquiver_0.2.1 withr_3.0.2
## [52] htmlTable_2.4.3 backports_1.5.0 pan_1.9
## [55] MASS_7.3-65 quantreg_6.1 openssl_2.3.2
## [58] gtools_3.9.5 tools_4.5.0 foreign_0.8-90
## [61] googledrive_2.1.1 zip_2.3.2 nnet_7.3-20
## [64] glue_1.8.0 nlme_3.1-168 grid_4.5.0
## [67] stringdist_0.9.15 checkmate_2.3.2 cluster_2.1.8.1
## [70] generics_0.1.3 gtable_0.3.6 tzdb_0.5.0
## [73] data.table_1.17.0 hms_1.1.3 xml2_1.3.8
## [76] foreach_1.5.2 pillar_1.10.2 vroom_1.6.5
## [79] splines_4.5.0 lattice_0.22-5 survival_3.8-3
## [82] bit_4.6.0 SparseM_1.84-2 tidyselect_1.2.1
## [85] fontLiberation_0.1.0 knitr_1.50 fontBitstreamVera_0.1.1
## [88] reformulas_0.4.0 gridExtra_2.3 xfun_0.52
## [91] stringi_1.8.7 yaml_2.3.10 boot_1.3-31
## [94] evaluate_1.0.3 codetools_0.2-19 officer_0.6.8
## [97] gdtools_0.4.2 cli_3.6.4 rpart_4.1.24
## [100] systemfonts_1.2.2 Rdpack_2.6.4 munsell_0.5.1
## [103] jquerylib_0.1.4 Rcpp_1.0.14 parallel_4.5.0
## [106] MatrixModels_0.5-4 bayestestR_0.15.3 lme4_1.1-37
## [109] glmnet_4.1-8 mvtnorm_1.3-3 scales_1.3.0
## [112] insight_1.2.0 crayon_1.5.3 flextable_0.9.7
## [115] rlang_1.1.6 multcomp_1.4-28 mnormt_2.1.1
## [118] mice_3.17.0
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")
#OSF
if (F) {
library(osfr)
#login
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload all files in project
#overwrite existing (versioning)
osf_upload(
osf_proj,
path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"),
conflicts = "overwrite"
)
}