Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## Loading required package: metafor
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading the 'metafor' package (version 3.0-2). For an
## introduction to the package please type: help(metafor)
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
## 
##     set_names
## The following object is masked from 'package:assertthat':
## 
##     has_name
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## 
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:rlang':
## 
##     is_logical
## The following object is masked from 'package:psych':
## 
##     rescale
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  mirt,
  rms,
  GGally,
  readxl
)
## Loading required package: stats4
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## Attaching package: 'rms'
## The following object is masked from 'package:metafor':
## 
##     vif
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
theme_set(theme_bw())

options(
  contrasts=c("contr.treatment", "contr.treatment")
)

Data

#read admix data
d = read_tsv("data/Aggregate-data_Subtypes_of_Native_American_Ancestry_and_Death.txt", comment = "#")
## Rows: 1805 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (6): gender, region, ageclass, socioecost, education, salary
## dbl (10): hgdp, ceu_3, yri_3, mapaym, ceu_3z, yri_3z, aym, map, ceu_4, yri_4
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#recode
d %<>% mutate(
  id = 1:n(),
  income = salary %>% mapvalues(from = c("z<350 000", "350-450", "450+", "Missing"), to = c(1, 2, 3, NA)) %>% ordered(),
  education = education %>% ordered(levels = c("Primary/secondary school", "Technical", "University/postgrade")),
  social_class = socioecost %>% mapvalues(from = c("ABC1", "C2", "C3", "D", "Missing"), to = c(1, 2, 3, 4, NA)) %>% ordered() %>% fct_rev(),
  age = ageclass %>% mapvalues(from = "z>32", to = ">32") %>% ordered(levels = c("<24", "24-26", "27-32", ">32")),
  mapaym2 = map+aym
)

# gender (male, female)
# region (Arica, Tarapaca, Antofagasta, Atacama, Coquimbo, Valparaiso, ZMetropolitana, OHiggins, Maule, Biobio, Araucania, Rios, Lagos, Aisen, Magallanes) 
# ageclass (<24, 24-26, 27-32, z>32)
# socioecost (ABC1, C2, C3, D, Missing)
# education (Primary/secondary school, Technical, University/postgrade)
# salary (z<350 000, 350-450, 450+, Missing)
# hgdp (numeric values from 0 to 1): Native American ancestry proportions from supervised ADMIXTURE with 3 references (CEU, YRI, HGDP)
# ceu_3 (numeric values from 0 to 1): European ancestry proportions from supervised ADMIXTURE with 3 references (CEU, YRI, HGDP)
# yri_3 (numeric values from 0 to 1): African ancestry proportions from supervised ADMIXTURE with 3 references (CEU, YRI, HGDP)
# mapaym (numeric values from 0 to 1): Native American ancestry proportions from supervised ADMIXTURE with 3 references (CEU, YRI, Mapuche and Aymara grouped together)
# ceu_3z (numeric values from 0 to 1): European ancestry proportions from supervised ADMIXTURE with 3 references (CEU, YRI, Mapuche and Aymara grouped together)
# yri_3z (numeric values from 0 to 1): African ancestry proportions from supervised ADMIXTURE with 3 references (CEU, YRI, Mapuche and Aymara grouped together)
# aym (numeric values from 0 to 1): Aymara ancestry proportions from supervised ADMIXTURE with 4 references (CEU, YRI, Mapuche, Aymara)
# map (numeric values from 0 to 1): Mapuche ancestry proportions from supervised ADMIXTURE with 4 references (CEU, YRI, Mapuche, Aymara)
# ceu_4 (numeric values from 0 to 1): European ancestry proportions from supervised ADMIXTURE with 4 references (CEU, YRI, Mapuche, Aymara)
# yri_4 (numeric values from 0 to 1): African ancestry proportions from supervised ADMIXTURE with 4 references (CEU, YRI, Mapuche, Aymara)

#ancestry vars
ancestry_vars = d %>% 
  select(hgdp:yri_4) %>% 
  names()

ancestry_set_1 = c("hgdp", "ceu_3", "yri_3")
ancestry_set_2 = c("mapaym", "ceu_3z", "yri_3z")
ancestry_set_3 = c("aym", "map", "ceu_4", "yri_4")

#regional data
regional = read_excel("data/regional data.xlsx")

Functions

# regression line function for ggpairs
#https://stackoverflow.com/questions/30856602/how-to-add-line-fit-on-ggally-correlation-plot-matrix
lowerfun <- function(data, mapping) {
  ggplot(data = data, mapping = mapping)+ 
    geom_point(alpha = .25) + 
    geom_smooth(method = "loess", formula = y ~ x, 
                fill = "blue", color = "red", size = 0.5)
}

Analyses

S factor

#these are ordinals
SES_vars = c("income", "education", "social_class")

#cors
d %>% 
  select(!!SES_vars) %>% 
  map_df(as.numeric) %>% 
  mixedCor()
## Warning in polychoric(data[, p], smooth = smooth, global = global, weight =
## weight, : The items do not have an equal number of response alternatives, global
## set to FALSE.
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 2 cells were adjusted
## for 0 values using the correction for continuity. Examine your data carefully.
## Call: mixedCor(data = .)
##              incom edctn scl_c
## income       1.00             
## education    0.45  1.00       
## social_class 0.80  0.73  1.00
#factor analysis
s_factor = irt.fa(
  x = d %>% 
  select(!!SES_vars) %>% 
  map_df(as.numeric),
  nfactors = 1
)
## Warning in polychoric(x): The items do not have an equal number of response
## alternatives, global set to FALSE.

## Warning in polychoric(x): 2 cells were adjusted for 0 values using the
## correction for continuity. Examine your data carefully.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in sqrt(1 - t$loadings[, i]^2): NaNs produced
## Warning in sqrt(1 - t$loadings^2): NaNs produced
## Warning in irt.fa(x = d %>% select(!!SES_vars) %>% map_df(as.numeric), nfactors = 1): A discrimination  with a NaN value was replaced with the maximum discrimination for factor 1 and  item(s)  3
## examine the factor analysis object (fa)  to identify the Heywood case. 
## The item informations are probably suspect as well for this factor.  
## You might try a different factor extraction technique.

s_factor
## Item Response Analysis using Factor Analysis  
## 
## Call: irt.fa(x = d %>% select(!!SES_vars) %>% map_df(as.numeric), nfactors = 1)
## Item Response Analysis using Factor Analysis  
## 
##  Summary information by factor and item
##  Factor =  1 
##                 -3    -2   -1    0    1    2     3
## income        0.18  0.38 0.55 0.51 0.30 0.13  0.05
## education     0.05  0.11 0.23 0.37 0.42 0.32  0.18
## social_class  0.07  0.19 0.41 0.65 0.75 0.63  0.36
## Test Info     0.30  0.68 1.18 1.52 1.47 1.07  0.59
## SEM           1.83  1.22 0.92 0.81 0.83 0.96  1.31
## Reliability  -2.37 -0.48 0.16 0.34 0.32 0.07 -0.71
## 
## Factor analysis with Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## 
## Test of the hypothesis that 1 factor is sufficient.
## The degrees of freedom for the model is 0  and the objective function was  0.11 
## The number of observations was  1805  with Chi Square =  194.85  with prob <  NA 
## 
## The root mean square of the residuals (RMSA) is  0.04 
## The df corrected root mean square of the residuals is  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
s_factor$fa
## Factor Analysis using method =  minres
## Call: fa(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, 
##     fm = fm)
## Standardized loadings (pattern matrix) based upon correlation matrix
##               MR1   h2     u2 com
## income       0.74 0.55  0.449   1
## education    0.68 0.46  0.542   1
## social_class 1.02 1.05 -0.048   1
## 
##                 MR1
## SS loadings    2.06
## Proportion Var 0.69
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  3  and the objective function was  1.89 with Chi Square of  3400.07
## The degrees of freedom for the model are 0  and the objective function was  0.11 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  NA 
## 
## The harmonic number of observations is  1805 with the empirical chi square  20.98  with prob <  NA 
## The total number of observations was  1805  with Likelihood Chi Square =  194.85  with prob <  NA 
## 
## Tucker Lewis Index of factoring reliability =  -Inf
## Fit based upon off diagonal values = 1
#mirt
s_factor_mirt = mirt(
  data = d %>% 
  select(!!SES_vars) %>% 
  map_df(as.numeric),
  model = 1,
  itemtype = c("nominal", "nominal", "nominal")
)
## 
Iteration: 1, Log-Lik: -4621.448, Max-Change: 4.13311
Iteration: 2, Log-Lik: -3225.701, Max-Change: 1.77520
Iteration: 3, Log-Lik: -3116.486, Max-Change: 0.78061
Iteration: 4, Log-Lik: -3072.821, Max-Change: 0.92416
Iteration: 5, Log-Lik: -3051.338, Max-Change: 0.70424
Iteration: 6, Log-Lik: -3040.009, Max-Change: 0.58114
Iteration: 7, Log-Lik: -3033.767, Max-Change: 0.50749
Iteration: 8, Log-Lik: -3030.280, Max-Change: 0.33793
Iteration: 9, Log-Lik: -3027.882, Max-Change: 0.40246
Iteration: 10, Log-Lik: -3024.082, Max-Change: 0.16176
Iteration: 11, Log-Lik: -3022.968, Max-Change: 0.38202
Iteration: 12, Log-Lik: -3022.472, Max-Change: 0.46090
Iteration: 13, Log-Lik: -3021.022, Max-Change: 0.14928
Iteration: 14, Log-Lik: -3020.701, Max-Change: 0.49693
Iteration: 15, Log-Lik: -3020.483, Max-Change: 0.46776
Iteration: 16, Log-Lik: -3019.766, Max-Change: 0.38457
Iteration: 17, Log-Lik: -3019.641, Max-Change: 0.39636
Iteration: 18, Log-Lik: -3019.552, Max-Change: 0.39882
Iteration: 19, Log-Lik: -3019.195, Max-Change: 0.41482
Iteration: 20, Log-Lik: -3019.120, Max-Change: 0.40918
Iteration: 21, Log-Lik: -3019.068, Max-Change: 0.40251
Iteration: 22, Log-Lik: -3018.845, Max-Change: 0.38846
Iteration: 23, Log-Lik: -3018.806, Max-Change: 0.37775
Iteration: 24, Log-Lik: -3018.776, Max-Change: 0.37795
Iteration: 25, Log-Lik: -3018.638, Max-Change: 0.00368
Iteration: 26, Log-Lik: -3018.633, Max-Change: 0.00334
Iteration: 27, Log-Lik: -3018.632, Max-Change: 0.00000
s_factor_mirt@Fit
## $G2
## [1] NaN
## 
## $p
## [1] NaN
## 
## $TLI
## [1] NaN
## 
## $CFI
## [1] NaN
## 
## $RMSEA
## [1] NaN
## 
## $df
## [1] 21
## 
## $AIC
## [1] 6065.264
## 
## $AICc
## [1] 6065.499
## 
## $BIC
## [1] 6142.241
## 
## $SABIC
## [1] 6097.763
## 
## $HQ
## [1] 6093.675
## 
## $logLik
## [1] -3018.632
## 
## $logPrior
## [1] 0
## 
## $SElogLik
## [1] 0
## 
## $F
##                     F1
## income       0.7994839
## education    0.5714313
## social_class 0.9915645
## 
## $h2
##       income    education social_class 
##    0.6391745    0.3265338    0.9832002
#scores
s_factor_mirt_scores = mirt::fscores(
  s_factor_mirt,
  full.scores = T,
  full.scores.SE = T
)

#add to main
d$S = s_factor_mirt_scores[, 1] %>% standardize()

#unit weighted S factor
d$S_unit = d %>% 
  select(!!SES_vars) %>% 
  map_df(as.numeric) %>% 
  df_standardize() %>% 
  rowMeans() %>% 
  standardize()

#relations
d %>% 
  select(!!SES_vars, S, S_unit) %>% 
  map_df(as.numeric) %>% 
  mixedCor()
## Warning in polychoric(data[, p], smooth = smooth, global = global, weight =
## weight, : The items do not have an equal number of response alternatives, global
## set to FALSE.
## Warning in matpLower(x, nvar, gminx, gmaxx, gminy, gmaxy): 2 cells were adjusted
## for 0 values using the correction for continuity. Examine your data carefully.
## Call: mixedCor(data = .)
##              incom edctn scl_c S    S_unt
## income       1.00                        
## education    0.45  1.00                  
## social_class 0.80  0.73  1.00            
## S            0.92  0.96  1.00  1.00      
## S_unit       0.94  0.96  1.00  0.97 1.00

Descriptive stats

d %>% 
  select(S, !!ancestry_vars) %>% 
  describe2()
#ordinals
d$age %>% table2()
d$gender %>% table2()
d$region %>% table2()

Plots

#ancestries among each other
d %>% 
  select(S, !!ancestry_set_3) %>% 
  ggpairs(
    lower = list(continuous = wrap(lowerfun)),
    columnLabels = c("Social status", "Aymara ancestry", "Mapuche ancestry", "European ancestry", "African ancestry")
    )

GG_save("figs/ggpairs.png")

#ensure ancestry sets sum to 1
d %>% 
  select(!!ancestry_set_1) %>% 
  rowSums() %>% 
  describe2()
d %>% 
  select(!!ancestry_set_2) %>% 
  rowSums() %>% 
  describe2()
d %>% 
  select(!!ancestry_set_3) %>% 
  rowSums() %>% 
  describe2()
#are ancestry estimates are consistent across sets?
d %>% 
  select(hgdp, mapaym, aym, map, mapaym2) %>% 
  wtd.cors()
##               hgdp     mapaym        aym        map    mapaym2
## hgdp     1.0000000  0.9992532  0.8824527 -0.3326785  0.9804084
## mapaym   0.9992532  1.0000000  0.8788837 -0.3238963  0.9833253
## aym      0.8824527  0.8788837  1.0000000 -0.7356212  0.7791240
## map     -0.3326785 -0.3238963 -0.7356212  1.0000000 -0.1485028
## mapaym2  0.9804084  0.9833253  0.7791240 -0.1485028  1.0000000

Regressions

#regression model function
do_reg = function(outcome, ancestries, control_sets, data) {
  # browser()
  #make formulas
  f1 = str_glue("{outcome} ~ {str_c(ancestries, collapse = ' + ')}") %>% as.formula()
  f2 = str_glue("{outcome} ~ {str_c(ancestries, collapse = ' + ')} + {str_c(control_sets[[1]], collapse = ' + ')}") %>% as.formula()
  f3 = str_glue("{outcome} ~ {str_c(ancestries, collapse = ' + ')} + {str_c(control_sets[[1]], collapse = ' + ')} + {str_c(control_sets[[2]], collapse = ' + ')}") %>% as.formula()

  #do regs
  regs = list(
    simple = ols(f1, data = d),
    basic = ols(f2, data = d),
    regions = ols(f3, data = d)
  )
  
  regs
}

#primary analysis
do_reg(
  outcome = "S",
  ancestries = ancestry_set_3[-3],
  control_sets = list(c("age", "gender"), "region")) %>% 
  summarize_models(asterisks_only = F, collapse_factors = "region")
#other sets of ancestry variables
do_reg(
  outcome = "S",
  ancestries = ancestry_set_1[-2],
  control_sets = list(c("age", "gender"), "region")) %>% 
  summarize_models(asterisks_only = F, collapse_factors = "region")
do_reg(
  outcome = "S",
  ancestries = ancestry_set_2[-2],
  control_sets = list(c("age", "gender"), "region")) %>% 
  summarize_models(asterisks_only = F, collapse_factors = "region")
#manual mapaym
do_reg(
  outcome = "S",
  ancestries = c("mapaym2", "yri_4"),
  control_sets = list(c("age", "gender"), "region")) %>% 
  summarize_models(asterisks_only = F, collapse_factors = "region")
#individual slopes
list(
  Mapuche = ols(S ~ map, data = d),
  Aymara = ols(S ~ aym, data = d),
  European = ols(S ~ ceu_4, data = d),
  African = ols(S ~ yri_4, data = d)
) %>% summarize_models(asterisks_only = F)

Regional

#aggregate to regional
d$region %>% table2()
#merge Rios to Lagos, and Africa to Tarapica
#says so here https://en.wikipedia.org/wiki/Ranked_lists_of_Chilean_regions#By_international_HDI
#the report says the same thing
d$region2 = d$region %>% mapvalues(from = c("Rios", "Arica"), to = c("Lagos", "Tarapaca"))

#compute means
regions_merged = d %>% 
  plyr::ddply("region2", function(dd) {
    tibble(
      euro = mean(dd$ceu_4),
      aym = mean(dd$aym),
      map = mean(dd$map),
      african = mean(dd$yri_4),
      S = mean(dd$S),
      n = nrow(dd),
      sqrtn = sqrt(n)
    )
  }) %>% 
  full_join(regional, by = c("region2" ="Region2"))

#cors
regions_merged %>% 
  select(euro:S, HDI, SIMCE, PSU) %>% 
    ggpairs(
    lower = list(continuous = wrap(lowerfun)),
    columnLabels = c("European ancestry", "Aymara ancestry", "Mapuche ancestry", "African ancestry", "Social status", "Human development index", "SIMCE", "PSU")
    ) +
  theme(text = element_text(size = 8))

GG_save("figs/regional_cors.png")

#with p values
regions_merged %>% 
  select(euro:S, HDI, SIMCE, PSU) %>% 
  cor_matrix(p_val = T)
##         euro    aym        map        african    S       HDI       SIMCE  
## euro    "1"     "-0.42"    "-0.17"    "-0.03"    "0.25"  "0.16"    "0.38" 
## aym     "-0.42" "1"        "-0.82***" "0.77**"   "0.09"  "0.59"    "-0.16"
## map     "-0.17" "-0.82***" "1"        "-0.85***" "-0.26" "-0.75**" "-0.07"
## african "-0.03" "0.77**"   "-0.85***" "1"        "0.23"  "0.64"    "0.02" 
## S       "0.25"  "0.09"     "-0.26"    "0.23"     "1"     "0.59"    "0.63" 
## HDI     "0.16"  "0.59"     "-0.75**"  "0.64"     "0.59"  "1"       "0.60" 
## SIMCE   "0.38"  "-0.16"    "-0.07"    "0.02"     "0.63"  "0.60"    "1"    
## PSU     "0.69*" "-0.42"    "0.02"     "-0.05"    "0.05"  "0.04"    "0.42" 
##         PSU    
## euro    "0.69*"
## aym     "-0.42"
## map     "0.02" 
## african "-0.05"
## S       "0.05" 
## HDI     "0.04" 
## SIMCE   "0.42" 
## PSU     "1"
#regress
ols(HDI ~ aym + map + african, data = regions_merged) %>% 
  list() %>% 
  summarize_models()
ols(SIMCE ~ aym + map + african, data = regions_merged) %>% 
  list() %>% 
  summarize_models()
ols(PSU ~ aym + map + african, data = regions_merged) %>% 
  list() %>% 
  summarize_models()
#weights
#with p values
regions_merged %>% 
  select(euro:S, SIMCE:HDI) %>% 
  cor_matrix(asterisks_only = F, p_val = T, weights = regions_merged$sqrtn)
##         euro                aym                     map                    
## euro    "1"                 "-0.68%a [p=0.011]"     "0.23%a [p=0.447]"     
## aym     "-0.68%a [p=0.011]" "1"                     "-0.87%a [p=<0.001***]"
## map     "0.23%a [p=0.447]"  "-0.87%a [p=<0.001***]" "1"                    
## african "-0.27%a [p=0.379]" "0.78%a [p=0.002**]"    "-0.88%a [p=<0.001***]"
## S       "0.09%a [p=0.771]"  "0.34%a [p=0.258]"      "-0.51%a [p=0.076]"    
## SIMCE   "0.59%a [p=0.033]"  "-0.31%a [p=0.306]"     "0.01%a [p=0.969]"     
## HDI     "-0.03%a [p=0.923]" "0.63%a [p=0.022]"      "-0.81%a [p=<0.001***]"
##         african                 S                   SIMCE              
## euro    "-0.27%a [p=0.379]"     "0.09%a [p=0.771]"  "0.59%a [p=0.033]" 
## aym     "0.78%a [p=0.002**]"    "0.34%a [p=0.258]"  "-0.31%a [p=0.306]"
## map     "-0.88%a [p=<0.001***]" "-0.51%a [p=0.076]" "0.01%a [p=0.969]" 
## african "1"                     "0.46%a [p=0.112]"  "-0.01%a [p=0.984]"
## S       "0.46%a [p=0.112]"      "1"                 "0.41%a [p=0.165]" 
## SIMCE   "-0.01%a [p=0.984]"     "0.41%a [p=0.165]"  "1"                
## HDI     "0.73%a [p=0.004**]"    "0.70%a [p=0.007*]" "0.49%a [p=0.093]" 
##         HDI                    
## euro    "-0.03%a [p=0.923]"    
## aym     "0.63%a [p=0.022]"     
## map     "-0.81%a [p=<0.001***]"
## african "0.73%a [p=0.004**]"   
## S       "0.70%a [p=0.007*]"    
## SIMCE   "0.49%a [p=0.093]"     
## HDI     "1"
#regress
ols(HDI ~ aym + map + african, data = regions_merged, weights = regions_merged$sqrtn)
## Linear Regression Model
##  
##  ols(formula = HDI ~ aym + map + african, data = regions_merged, 
##      weights = regions_merged$sqrtn)
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs      13    LR chi2     15.03    R2       0.685    
##  sigma0.0758    d.f.            3    R2 adj   0.580    
##  d.f.      9    Pr(> chi2) 0.0018    g        0.038    
##  
##  Residuals
##  
##         Min         1Q     Median         3Q        Max 
##  -0.0499846 -0.0133877 -0.0005058  0.0072193  0.0443796 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept  1.0313 0.1409  7.32 <0.0001 
##  aym       -0.1305 0.1443 -0.90 0.3892  
##  map       -0.4612 0.2301 -2.00 0.0760  
##  african    0.4734 1.7850  0.27 0.7968  
## 

Meta

write_sessioninfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] readxl_1.3.1          GGally_2.1.2          rms_6.2-0            
##  [4] SparseM_1.81          mirt_1.34             kirkegaard_2021-12-11
##  [7] rlang_0.4.12          metafor_3.0-2         Matrix_1.4-0         
## [10] psych_2.1.9           magrittr_2.0.1        assertthat_0.2.1     
## [13] weights_1.0.4         Hmisc_4.6-0           Formula_1.2-4        
## [16] survival_3.2-13       lattice_0.20-45       forcats_0.5.1        
## [19] stringr_1.4.0         dplyr_1.0.7           purrr_0.3.4          
## [22] readr_2.1.0           tidyr_1.1.4           tibble_3.1.6         
## [25] ggplot2_3.3.5         tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.3.0       plyr_1.8.6            GPArotation_2014.11-1
##   [4] splines_4.1.2         listenv_0.8.0         TH.data_1.1-0        
##   [7] digest_0.6.28         foreach_1.5.1         htmltools_0.5.2      
##  [10] gdata_2.18.0          fansi_0.5.0           checkmate_2.0.0      
##  [13] cluster_2.1.2         tzdb_0.2.0            recipes_0.1.17       
##  [16] globals_0.14.0        modelr_0.1.8          gower_0.2.2          
##  [19] matrixStats_0.61.0    vroom_1.5.6           sandwich_3.0-1       
##  [22] jpeg_0.1-9            colorspace_2.0-2      rvest_1.0.2          
##  [25] haven_2.4.3           xfun_0.28             crayon_1.4.2         
##  [28] jsonlite_1.7.2        lme4_1.1-27.1         zoo_1.8-9            
##  [31] iterators_1.0.13      glue_1.5.0            gtable_0.3.0         
##  [34] ipred_0.9-12          MatrixModels_0.5-0    future.apply_1.8.1   
##  [37] dcurver_0.9.2         scales_1.1.1          mvtnorm_1.1-3        
##  [40] DBI_1.1.1             Rcpp_1.0.7            htmlTable_2.3.0      
##  [43] tmvnsim_1.0-2         bit_4.0.4             foreign_0.8-81       
##  [46] lava_1.6.10           prodlim_2019.11.13    htmlwidgets_1.5.4    
##  [49] httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2       
##  [52] mice_3.13.0           farver_2.1.0          reshape_0.8.8        
##  [55] pkgconfig_2.0.3       nnet_7.3-16           sass_0.4.0           
##  [58] dbplyr_2.1.1          utf8_1.2.2            caret_6.0-90         
##  [61] labeling_0.4.2        tidyselect_1.1.1      reshape2_1.4.4       
##  [64] munsell_0.5.0         cellranger_1.1.0      tools_4.1.2          
##  [67] cli_3.1.0             generics_0.1.1        broom_0.7.10         
##  [70] mathjaxr_1.4-0        evaluate_0.14         fastmap_1.1.0        
##  [73] yaml_2.2.1            bit64_4.0.5           ModelMetrics_1.2.2.2 
##  [76] knitr_1.36            fs_1.5.0              future_1.23.0        
##  [79] nlme_3.1-152          quantreg_5.86         xml2_1.3.2           
##  [82] compiler_4.1.2        rstudioapi_0.13       png_0.1-7            
##  [85] reprex_2.0.1          bslib_0.3.1           stringi_1.7.5        
##  [88] highr_0.9             nloptr_1.2.2.3        vegan_2.5-7          
##  [91] permute_0.9-5         vctrs_0.3.8           pillar_1.6.4         
##  [94] lifecycle_1.0.1       jquerylib_0.1.4       data.table_1.14.2    
##  [97] conquer_1.2.1         R6_2.5.1              latticeExtra_0.6-29  
## [100] gridExtra_2.3         parallelly_1.28.1     codetools_0.2-18     
## [103] polspline_1.1.19      boot_1.3-28           MASS_7.3-54          
## [106] gtools_3.9.2          withr_2.4.2           mnormt_2.0.2         
## [109] Deriv_4.1.3           multcomp_1.4-17       mgcv_1.8-38          
## [112] parallel_4.1.2        hms_1.1.1             grid_4.1.2           
## [115] rpart_4.1-15          timeDate_3043.102     class_7.3-19         
## [118] minqa_1.2.4           rmarkdown_2.11        pROC_1.18.0          
## [121] lubridate_1.8.0       base64enc_0.1-3
#data output to table
regions_merged %>% select(Region, n, everything(), -sqrtn, -region2)
#export to file for reuse
d %>% write_rds("data/data_for_reuse.rds")

#upload files
if (F) {
  library(osfr)
  osf_auth(read_lines("~/.config/osf_token"))
  osf_proj = osf_retrieve_node("https://osf.io/dn4rw/")
  osf_upload(osf_proj, conflicts = "overwrite", path = 
               c(
                 "data", "figs",
                 "notebook.html", "notebook.Rmd",
                 "sessions_info.txt"
               ))
}