Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readxl,
  googlesheets4,
  rms,
  ggeffects,
  parameters,
  patchwork
)

theme_set(theme_bw())

options(
    digits = 3
)

#multithreading
#library(future)
#plan(multisession(workers = 8))

Functions

Data

#prices
#https://worldpopulationreview.com/country-rankings/cost-of-electricity-by-country
gs4_deauth()
elec_prices = read_sheet("https://docs.google.com/spreadsheets/d/1-CfYzgcsWXeHVmWkpMqxDPYiEctMqcu7WmXY5auyTPs/edit?gid=0#gid=0", sheet = 1, skip = 1) %>% 
  df_legalize_names() %>% 
  mutate(
    ISO = pu_translate(country)
  )
## ✔ Reading from "Electricity price and renewables".
## ✔ Range ''prices'!2:10000000'.
#electricity sources
#https://en.wikipedia.org/wiki/List_of_countries_by_renewable_electricity_production#Renewable_production_(percent)
elec_sources = read_sheet("https://docs.google.com/spreadsheets/d/1-CfYzgcsWXeHVmWkpMqxDPYiEctMqcu7WmXY5auyTPs/edit?gid=0#gid=0", sheet = 2, skip = 1) %>% 
  df_legalize_names() %>% 
  mutate(
    ISO = pu_translate(Location)
  )
## ✔ Reading from "Electricity price and renewables".
## ✔ Range ''renewables'!2:10000000'.
## No exact match: World
## Best fuzzy match found: World -> world with distance 1.00
#gdp data
gdp_per_person = read_csv("data/gdp-per-capita-worldbank/gdp-per-capita-worldbank.csv") %>% df_legalize_names() %>% 
  rename(
    ISO = Code
  )
## Rows: 7063 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per capita, PPP (constant 2021 international $)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gdp_per_worker = read_csv("data/gdp-per-person-employed-constant-ppp/gdp-per-person-employed-constant-ppp.csv") %>% df_legalize_names() %>% 
  rename(
    ISO = Code
  )
## Rows: 6240 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, GDP per person employed (constant 2021 PPP $)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#median consumption per day
median_consump = read_csv("data/daily-median-income/daily-median-income.csv") %>% 
  df_legalize_names() %>% 
  rename(
    ISO = Code
  )
## Rows: 2635 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Entity, Code, 990177-annotations
## dbl (2): Year, Median income or consumption
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#too sparse

Join

#2023 or 2024 data
d = inner_join(
  elec_prices %>% select(-country),
  elec_sources %>% select(-Location, -Year),
  by = "ISO"
) %>% 
  left_join(
    gdp_per_person %>% filter(Year == 2023) %>% select(ISO, GDP_per_capita_PPP_constant_2021_international, -Entity, -Year),
    by = "ISO"
  ) %>% 
  left_join(
    gdp_per_worker %>% filter(Year == 2023) %>% select(ISO, GDP_per_person_employed_constant_2021_PPP, -Entity, -Year),
    by = "ISO"
  )

#rename
d %<>% rename(
  GDPpc = GDP_per_capita_PPP_constant_2021_international,
  GDPpw = GDP_per_person_employed_constant_2021_PPP
)

#other variables
d %<>% mutate(
  #prices, to cents
  USD_kw_march2024 = USD_kw_march2024 * 100,
  USD_kw_sep2024 = USD_kw_sep2024 * 100,

  # USD_kw_abs_diff = abs(USD_kw_sep2024 - USD_kw_march2024),
  # USD_kw_minmax_ratio = USD_kw_abs_diff / USD_kw,
  
  #log10 gdp
  log10_GDPpc = log10(GDPpc),
  log10_GDPpw = log10(GDPpw),
  
  #GDP to 1000s
  GDPpc = GDPpc / 1000,
  GDPpw = GDPpw / 1000,
  
  #to fractions, not percent
  Renew = Renew / 100,
  Hydro = Hydro / 100,
  Wind = Wind / 100,
  Solar = Solar / 100,
  Bio = Bio / 100,
  Geo = Geo / 100,
  
  #solar wind
  SolarWind = Solar + Wind,
) %>% 
  #need a second mutate to update variables...
  mutate(
    #other price variables
    USD_kw = select(., USD_kw_march2024:USD_kw_sep2024) %>% rowMeans(na.rm = T),
    
    #recalculate totals due avoid rounding issues
    Renew = Hydro + Wind + Solar + Bio + Geo,
    notRenew = 1 - Renew,
  )
attr(d$SolarWind, "label") = NULL

Analysis

#no missing
d2 = d %>% miss_filter()

#correlations
d2 %>% 
  select(
    -ISO, -GDPpw, -log10_GDPpw, -log10_GDPpc, -USD_kw_march2024, -USD_kw_sep2024
  ) %>% 
  GG_heatmap(remove_diag = T, cross_out_nonsig = T)

GG_save("figs/correlations.png")

#simple plots
patchwork::wrap_plots(
  d2 %>% GG_scatter("GDPpc", "USD_kw", case_names = "ISO") +
    scale_y_continuous(limits = c(0, 60), breaks = seq(0, 100, 10)) +
    labs(
      x = "GDP per capita (1000s)",
      y = "Price (cents per kWh)"
    ),

  d2 %>% GG_scatter("SolarWind", "USD_kw", case_names = "ISO") +
    scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
    scale_y_continuous(limits = c(0, 60), breaks = seq(0, 100, 10)) +
    labs(
      x = "Solar + Wind (% of total)",
      y = "Price (cents per kWh)"
    ),
  axes = "collect_y"
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/scatters_prices.png")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#show distribution of elec sources by country
#stacked bars
d2 %>% 
  select(ISO, Hydro:Geo, notRenew) %>%
  pivot_longer(
    cols = -ISO,
    names_to = "Renewable",
    values_to = "Fraction"
  ) %>% 
  #filter to richest countries
  filter(
    ISO %in% (d %>% filter(GDPpc > 50) %>% pull(ISO))
  ) %>% 
  #add names
  mutate(
    country = pu_translate(ISO, reverse = T)
  ) %>% 
  ggplot(aes(y = country, x = Fraction, fill = Renewable)) +
  geom_bar(stat = "identity") +
  #add text of the values inside the bars in the center
  #dont show values of 0%
  geom_text(aes(label = ifelse(Fraction > 0.005, scales::percent(Fraction, accuracy = 1), "")), position = position_stack(vjust = 0.5)) +
  labs(
    x = "Fraction of total electricity",
    y = "Country",
    title = "Electricity sources in richest countries"
  )

GG_save("figs/sources_stacked_bars.png")

#regressions
mods_price = list(
  ols(USD_kw ~ GDPpc, data = d2),
  # ols(USD_kw ~ GDPpw, data = d2),
  # ols(USD_kw ~ log10_GDPpc, data = d2),
  # ols(USD_kw ~ log10_GDPpw, data = d2),
  # ols(USD_kw ~ GDPpc + GDPpw, data = d2),
  ols(USD_kw ~ SolarWind, data = d2),
  ols(USD_kw ~ Renew, data = d2),
  ols(USD_kw ~ Hydro + Hydro + Wind + Solar + Bio + Geo, data = d2),
  ols(USD_kw ~ GDPpc + Hydro + Hydro + Wind + Solar + Bio + Geo, data = d2),
  ols(USD_kw ~ Wind + Solar, data = d2),
  ols(USD_kw ~ GDPpc + Wind + Solar, data = d2),
  ols(USD_kw ~ GDPpc + SolarWind, data = d2),
  ols(USD_kw ~ GDPpc * SolarWind, data = d2),
  ols(USD_kw ~ rcs(GDPpc, 3) + SolarWind, data = d2),
  ols(USD_kw ~ GDPpc + rcs(SolarWind, 3), data = d2)
  # ols(USD_kw ~ rcs(GDPpc, 3) + rcs(SolarWind, 3), data = d2)
)

mods_price %>% 
  summarize_models() %>% 
  #nice table
  flextable::flextable()

Predictor/Model

1

2

3

4

5

6

7

8

9

10

11

Intercept

10.58 (1.361***)

10.05 (1.015***)

13.79 (1.690***)

10.34 (1.431***)

7.60 (1.695***)

10.26 (1.077***)

8.07 (1.260***)

8.00 (1.174***)

7.28 (1.409***)

7.08 (1.695***)

6.28 (1.465***)

GDPpc

0.18 (0.033***)

0.09 (0.031*)

0.09 (0.030**)

0.09 (0.030**)

0.11 (0.036**)

(nonlinear)

0.10 (0.029***)

SolarWind

55.96 (5.906***)

48.37 (6.188***)

56.46 (10.693***)

47.90 (6.230***)

(nonlinear)

Renew

5.43 (3.080)

Hydro

-2.18 (2.625)

-0.17 (2.652)

Wind

51.45 (8.914***)

43.51 (9.117***)

59.57 (8.383***)

49.25 (8.769***)

Solar

45.87 (15.150**)

46.69 (14.749**)

47.46 (15.174**)

46.48 (14.696**)

Bio

25.17 (11.126)

21.83 (10.893)

Geo

8.41 (13.638)

10.50 (13.295)

GDPpc * SolarWind

-0.15 (0.166)

R2 adj.

0.185

0.402

0.016

0.414

0.444

0.399

0.437

0.441

0.440

0.439

0.452

N

133

133

133

133

133

133

133

133

133

133

133

#model tests
#linear interaction GDPpc * SolarWind
lrtest(
  mods_price[[8]],
  mods_price[[9]]
)
## 
## Model 1: USD_kw ~ GDPpc + SolarWind
## Model 2: USD_kw ~ GDPpc * SolarWind
## 
## L.R. Chisq       d.f.          P 
##      0.885      1.000      0.347
#nonlinear gdppc
lrtest(
  mods_price[[8]],
  mods_price[[10]]
)
## 
## Model 1: USD_kw ~ GDPpc + SolarWind
## Model 2: USD_kw ~ rcs(GDPpc, 3) + SolarWind
## 
## L.R. Chisq       d.f.          P 
##      0.587      1.000      0.443
#nonlinear solarwind
lrtest(
  mods_price[[8]],
  mods_price[[11]]
)
## 
## Model 1: USD_kw ~ GDPpc + SolarWind
## Model 2: USD_kw ~ GDPpc + rcs(SolarWind, 3)
## 
## L.R. Chisq       d.f.          P 
##     3.7699     1.0000     0.0522
#plot nonlinear gdp + solarwind
ggpredict(
  mods_price[[8]],
  terms = c("SolarWind", "GDPpc"),
) %>% 
  plot() +
  scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  labs(
    x = "Solar + Wind (% of total)",
    y = "Price (cents per kWh)",
    title = "Electricity price as function GDPpc + SolarWind"
  ) +
  guides(
    color = guide_legend(title = "GDP per capita (1000s)", position = "bottom")
  )

GG_save("figs/electricity_price_gdp_solarwind.png")

#compare effect sizes
model_parameters(mods_price[[8]], standardize = "refit")
#price for Denmark with 0% SolarWind
predict(
  mods_price[[8]],
  newdata = bind_rows(
    d %>% filter(ISO == "DNK") %>% mutate(adjustment = "current"),
    d %>% filter(ISO == "DNK") %>% mutate(SolarWind = 0, adjustment = "0% SolarWind"),
    d %>% filter(ISO == "DNK") %>% mutate(SolarWind = 1, adjustment = "100% SolarWind")
  )
)
##    1    2    3 
## 47.7 14.8 63.2

Meta

#versions
write_sessioninfo()
## R version 4.5.0 (2025-04-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.3.0       parameters_0.25.0     ggeffects_2.2.1      
##  [4] rms_8.0-0             googlesheets4_1.1.1   readxl_1.4.5         
##  [7] kirkegaard_2025-05-02 psych_2.5.3           assertthat_0.2.1     
## [10] weights_1.0.4         Hmisc_5.2-3           magrittr_2.0.3       
## [13] lubridate_1.9.4       forcats_1.0.0         stringr_1.5.1        
## [16] dplyr_1.1.4           purrr_1.0.4           readr_2.1.5          
## [19] tidyr_1.3.1           tibble_3.2.1          ggplot2_3.5.2        
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1       jsonlite_2.0.0          shape_1.4.6.1          
##   [4] datawizard_1.0.2        TH.data_1.1-3           jomo_2.7-6             
##   [7] farver_2.1.2            nloptr_2.2.1            rmarkdown_2.29         
##  [10] fs_1.6.6                ragg_1.4.0              vctrs_0.6.5            
##  [13] minqa_1.2.8             askpass_1.2.1           base64enc_0.1-3        
##  [16] htmltools_0.5.8.1       polspline_1.1.25        haven_2.5.4            
##  [19] curl_6.2.2              broom_1.0.8             cellranger_1.1.0       
##  [22] Formula_1.2-5           mitml_0.4-5             sass_0.4.10            
##  [25] bslib_0.9.0             htmlwidgets_1.6.4       plyr_1.8.9             
##  [28] sandwich_3.1-1          zoo_1.8-14              cachem_1.1.0           
##  [31] uuid_1.2-1              lifecycle_1.0.4         iterators_1.0.14       
##  [34] pkgconfig_2.0.3         Matrix_1.7-3            R6_2.6.1               
##  [37] fastmap_1.2.0           rbibutils_2.3           digest_0.6.37          
##  [40] colorspace_2.1-1        textshaping_1.0.0       labeling_0.4.3         
##  [43] timechange_0.3.0        gdata_3.0.1             httr_1.4.7             
##  [46] mgcv_1.9-1              compiler_4.5.0          gargle_1.5.2           
##  [49] bit64_4.6.0-1           fontquiver_0.2.1        withr_3.0.2            
##  [52] htmlTable_2.4.3         backports_1.5.0         pan_1.9                
##  [55] MASS_7.3-65             quantreg_6.1            openssl_2.3.2          
##  [58] gtools_3.9.5            tools_4.5.0             foreign_0.8-90         
##  [61] googledrive_2.1.1       zip_2.3.2               nnet_7.3-20            
##  [64] glue_1.8.0              nlme_3.1-168            grid_4.5.0             
##  [67] stringdist_0.9.15       checkmate_2.3.2         cluster_2.1.8.1        
##  [70] generics_0.1.3          gtable_0.3.6            tzdb_0.5.0             
##  [73] data.table_1.17.0       hms_1.1.3               xml2_1.3.8             
##  [76] foreach_1.5.2           pillar_1.10.2           vroom_1.6.5            
##  [79] splines_4.5.0           lattice_0.22-5          survival_3.8-3         
##  [82] bit_4.6.0               SparseM_1.84-2          tidyselect_1.2.1       
##  [85] fontLiberation_0.1.0    knitr_1.50              fontBitstreamVera_0.1.1
##  [88] reformulas_0.4.0        gridExtra_2.3           xfun_0.52              
##  [91] stringi_1.8.7           yaml_2.3.10             boot_1.3-31            
##  [94] evaluate_1.0.3          codetools_0.2-19        officer_0.6.8          
##  [97] gdtools_0.4.2           cli_3.6.4               rpart_4.1.24           
## [100] systemfonts_1.2.2       Rdpack_2.6.4            munsell_0.5.1          
## [103] jquerylib_0.1.4         Rcpp_1.0.14             parallel_4.5.0         
## [106] MatrixModels_0.5-4      bayestestR_0.15.3       lme4_1.1-37            
## [109] glmnet_4.1-8            mvtnorm_1.3-3           scales_1.3.0           
## [112] insight_1.2.0           crayon_1.5.3            flextable_0.9.7        
## [115] rlang_1.1.6             multcomp_1.4-28         mnormt_2.1.1           
## [118] mice_3.17.0
#write data to file for reuse
d %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}