Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## Loading required package: weights

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: assertthat

## 
## Attaching package: 'assertthat'

## The following object is masked from 'package:tibble':
## 
##     has_name

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following object is masked from 'package:Hmisc':
## 
##     describe

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## 
## Attaching package: 'kirkegaard'

## The following object is masked from 'package:psych':
## 
##     rescale

## The following object is masked from 'package:assertthat':
## 
##     are_equal

## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric

## The following object is masked from 'package:base':
## 
##     +

load_packages(
    patchwork,
    metafor,
    broom
)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading required package: metadat

## 
## Loading the 'metafor' package (version 3.4-0). For an
## introduction to the package please type: help(metafor)

theme_set(theme_bw())

options(
    digits = 3
)

Analysis 1

#yearly gain rate is 10/100 years, so 0.1 cm/year
#simulate uniformly distributed studies of different sample sizes
height_intercept = 170
height_slope = 0.1
height_sd = 7

sim_study = function(n) {
  study_year = runif(1, 1900, 2000)
  true_height_mean = height_intercept + ((study_year-1900)*height_slope)
  study_data = rnorm(n = n, mean = true_height_mean, sd = height_sd)
  
  describe2(study_data, all_vars = T) %>% mutate(study_year = study_year)
}

#sim studies
set.seed(1)
small_studies = map_df(rep(5, 100), sim_study)
large_studies = map_df(rep(1000, 100), sim_study)

#meta-analysis
(meta_small_studies = small_studies %$% rma(yi = mean, sei = se, mods = ~ study_year))

## 
## Mixed-Effects Model (k = 100; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     4.9431 (SE = 1.5266)
## tau (square root of estimated tau^2 value):             2.2233
## I^2 (residual heterogeneity / unaccounted variability): 59.59%
## H^2 (unaccounted variability / sampling variability):   2.47
## R^2 (amount of heterogeneity accounted for):            53.25%
## 
## Test for Residual Heterogeneity:
## QE(df = 98) = 310.7530, p-val < .0001
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 62.2808, p-val < .0001
## 
## Model Results:
## 
##             estimate       se     zval    pval     ci.lb    ci.ub      
## intrcpt     -13.5916  23.8425  -0.5701  0.5686  -60.3221  33.1389      
## study_year    0.0967   0.0123   7.8918  <.0001    0.0727   0.1208  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(meta_large_studies = large_studies %$% rma(yi = mean, sei = se, mods = ~ study_year))

## 
## Mixed-Effects Model (k = 100; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0070)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            100.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 98) = 89.0232, p-val = 0.7304
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 15728.4171, p-val < .0001
## 
## Model Results:
## 
##             estimate      se      zval    pval     ci.lb     ci.ub      
## intrcpt     -20.5775  1.5601  -13.1899  <.0001  -23.6352  -17.5197  *** 
## study_year    0.1003  0.0008  125.4130  <.0001    0.0987    0.1019  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#cors
(small_studies_r = cor.test(small_studies$mean, small_studies$study_year) %>% tidy())

(large_studies_r = cor.test(large_studies$mean, large_studies$study_year) %>% tidy())

#plots
ggplot(small_studies, aes(study_year, mean)) +
  geom_point() +
  scale_y_continuous("Average height (men)") +
  scale_x_continuous("Study year") +
  GG_text(text = str_glue("r = {str_round(small_studies_r$estimate, 3)}")) +
  geom_smooth(method = lm) ->
  plot_small

ggplot(large_studies, aes(study_year, mean)) +
  geom_point() +
  scale_y_continuous("Average height (men)") +
  scale_x_continuous("Study year") +
  GG_text(text = str_glue("r = {str_round(large_studies_r$estimate, 3)}")) +
  geom_smooth(method = lm) ->
  plot_large

plot_small + plot_large

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

GG_save("meta_signal_noise.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Analysis 2

#yearly gain rate is 10/100 years, so 0.1 cm/year
#simulate uniformly distributed studies of different sample sizes
r_intercept = .1
r_slope = 0.001

sim_study2 = function(n) {
  study_year = runif(1, 1900, 2000)
  true_r = r_intercept + ((study_year-1900)*r_slope)
  study_data = MASS::mvrnorm(
    n = n, 
    mu = rep(0, 2),
    Sigma = matrix(c(1, true_r, true_r, 1), nrow = 2)
    )
  
  cor.test(study_data[, 1], study_data[, 2]) %>% tidy() %>% mutate(study_year = study_year, n = n)
}

#sim studies
set.seed(2)
small_studies = map_df(rep(50, 1000), sim_study2)
large_studies = map_df(rep(2000, 1000), sim_study2)

#estimate the SE/var
#https://www.metafor-project.org/doku.php/tips:hunter_schmidt_method
small_studies = escalc(measure="COR", ri=estimate, ni=n, data=small_studies, vtype="AV")
large_studies = escalc(measure="COR", ri=estimate, ni=n, data=large_studies, vtype="AV")

#meta-analysis
(meta_small_studies = small_studies %$% rma(yi = estimate, vi = vi, mods = ~ study_year))

## 
## Mixed-Effects Model (k = 1000; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0009)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            0.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 998) = 931.6312, p-val = 0.9338
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 31.6845, p-val < .0001
## 
## Model Results:
## 
##             estimate      se     zval    pval    ci.lb    ci.ub      
## intrcpt      -1.5270  0.2978  -5.1280  <.0001  -2.1106  -0.9434  *** 
## study_year    0.0009  0.0002   5.6289  <.0001   0.0006   0.0012  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(meta_large_studies = large_studies %$% rma(yi = estimate, vi = vi, mods = ~ study_year))

## 
## Mixed-Effects Model (k = 1000; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0000)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            100.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 998) = 994.6136, p-val = 0.5243
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 1666.3299, p-val < .0001
## 
## Model Results:
## 
##             estimate      se      zval    pval    ci.lb    ci.ub      
## intrcpt      -1.7544  0.0467  -37.5719  <.0001  -1.8460  -1.6629  *** 
## study_year    0.0010  0.0000   40.8207  <.0001   0.0009   0.0010  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#cors
(small_studies_r = cor.test(small_studies$estimate, small_studies$study_year) %>% tidy())

(large_studies_r = cor.test(large_studies$estimate, large_studies$study_year) %>% tidy())

#plots
ggplot(small_studies, aes(study_year, estimate)) +
  geom_point(alpha = .2) +
  scale_y_continuous("Height-income correlation", breaks = seq(-1, 1, .1)) +
  scale_x_continuous("Study year") +
  GG_text(text = str_glue("r = {str_round(small_studies_r$estimate, 3)}")) +
  geom_smooth(method = lm) ->
  plot_small

ggplot(large_studies, aes(study_year, estimate)) +
  geom_point(alpha = .2) +
  scale_y_continuous("Height-income correlation", breaks = seq(-1, 1, .1)) +
  scale_x_continuous("Study year") +
  GG_text(text = str_glue("r = {str_round(large_studies_r$estimate, 3)}")) +
  geom_smooth(method = lm) ->
  plot_large

plot_small + plot_large

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

GG_save("meta_signal_noise2.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Meta

write_sessioninfo()

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] broom_0.8.0           metafor_3.4-0         metadat_1.2-0        
##  [4] Matrix_1.4-1          patchwork_1.1.1       kirkegaard_2022-08-04
##  [7] psych_2.2.3           assertthat_0.2.1      weights_1.0.4        
## [10] Hmisc_4.7-0           Formula_1.2-4         survival_3.2-13      
## [13] lattice_0.20-45       magrittr_2.0.3        forcats_0.5.1        
## [16] stringr_1.4.0         dplyr_1.0.9           purrr_0.3.4          
## [19] readr_2.1.2           tidyr_1.2.0           tibble_3.1.7         
## [22] ggplot2_3.3.6         tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-157        fs_1.5.2            lubridate_1.8.0    
##  [4] RColorBrewer_1.1-3  httr_1.4.3          tools_4.2.0        
##  [7] backports_1.4.1     bslib_0.3.1         utf8_1.2.2         
## [10] R6_2.5.1            rpart_4.1.16        mgcv_1.8-40        
## [13] DBI_1.1.2           colorspace_2.0-3    nnet_7.3-17        
## [16] withr_2.5.0         mnormt_2.0.2        tidyselect_1.1.2   
## [19] gridExtra_2.3       compiler_4.2.0      cli_3.3.0          
## [22] rvest_1.0.2         htmlTable_2.4.0     mice_3.14.0        
## [25] xml2_1.3.3          labeling_0.4.2      sass_0.4.1         
## [28] scales_1.2.0        checkmate_2.1.0     digest_0.6.29      
## [31] foreign_0.8-82      minqa_1.2.4         rmarkdown_2.14     
## [34] base64enc_0.1-3     jpeg_0.1-9          pkgconfig_2.0.3    
## [37] htmltools_0.5.2     lme4_1.1-29         highr_0.9          
## [40] dbplyr_2.1.1        fastmap_1.1.0       htmlwidgets_1.5.4  
## [43] rlang_1.0.2         readxl_1.4.0        rstudioapi_0.13    
## [46] farver_2.1.0        jquerylib_0.1.4     generics_0.1.2     
## [49] jsonlite_1.8.0      gtools_3.9.2        Rcpp_1.0.8.3       
## [52] munsell_0.5.0       fansi_1.0.3         lifecycle_1.0.1    
## [55] stringi_1.7.6       yaml_2.3.5          mathjaxr_1.6-0     
## [58] MASS_7.3-57         grid_4.2.0          parallel_4.2.0     
## [61] gdata_2.18.0        crayon_1.5.1        haven_2.5.0        
## [64] splines_4.2.0       hms_1.1.1           tmvnsim_1.0-2      
## [67] knitr_1.39          pillar_1.7.0        boot_1.3-28        
## [70] reprex_2.0.1        glue_1.6.2          evaluate_0.15      
## [73] latticeExtra_0.6-29 data.table_1.14.2   modelr_0.1.8       
## [76] nloptr_2.0.1        png_0.1-7           vctrs_0.4.1        
## [79] tzdb_0.3.0          cellranger_1.1.0    gtable_0.3.0       
## [82] xfun_0.30           cluster_2.1.3       ellipsis_0.3.2

The signal and the noise in meta-analysis

06 August, 2022

Init

Analysis 1

Analysis 2

Meta