Main analyses
Now we come to the main analyses.
Does mining-activity confound the results?
First, we examine whether the S analyses are robust to adjustments for mining-activity.
### EXPLORE MINING DATA TO SEE IF IT CONFOUNDS ASSOCIATIONS
# mining data -------------------------------------------------------------
d_mining = d_main[c("Active_mines", "Mining_volume", "Mining_value", "Mining_employees")]
wtd.cors(d_mining)
## Active_mines Mining_volume Mining_value Mining_employees
## Active_mines 1.00 0.38 0.62 0.53
## Mining_volume 0.38 1.00 0.63 0.81
## Mining_value 0.62 0.63 1.00 0.83
## Mining_employees 0.53 0.81 0.83 1.00
# per capita --------------------------------------------------------------
d_mining_pc = d_mining / d_main$pop
wtd.cors(d_mining_pc)
## Active_mines Mining_volume Mining_value Mining_employees
## Active_mines 1.00 0.15 0.17 0.27
## Mining_volume 0.15 1.00 0.32 0.76
## Mining_value 0.17 0.32 1.00 0.80
## Mining_employees 0.27 0.76 0.80 1.00
fa_mining = fa(d_mining_pc)
d_main$mining = fa_mining$scores %>% as.vector
# residualize -------------------------------------------------------------------
wtd.cors(d_main[c(v_primary_vars, "mining")]) %>% round(2)
## CA S European White Skin_brightness European_SIRE
## CA 1.00 0.87 0.44 0.49 0.65 0.50
## S 0.87 1.00 0.42 0.43 0.71 0.50
## European 0.44 0.42 1.00 0.72 0.22 0.69
## White 0.49 0.43 0.72 1.00 0.41 0.97
## Skin_brightness 0.65 0.71 0.22 0.41 1.00 0.48
## European_SIRE 0.50 0.50 0.69 0.97 0.48 1.00
## Latitude 0.48 0.64 0.12 0.25 0.56 0.38
## Temperature -0.49 -0.61 0.08 -0.08 -0.57 -0.19
## mining -0.10 -0.05 -0.38 -0.42 0.09 -0.35
## Latitude Temperature mining
## CA 0.48 -0.49 -0.10
## S 0.64 -0.61 -0.05
## European 0.12 0.08 -0.38
## White 0.25 -0.08 -0.42
## Skin_brightness 0.56 -0.57 0.09
## European_SIRE 0.38 -0.19 -0.35
## Latitude 1.00 -0.93 0.16
## Temperature -0.93 1.00 -0.20
## mining 0.16 -0.20 1.00
d_S_mining = d_S2
d_S_mining$mining = d_main$mining
#name vars
#otherwise the residualization function fails
colnames(d_S_mining) %<>% make.names
d_S_mining = df_residualize(d_S_mining, resid.vars = "mining", return.resid.vars = F)
#set names back
colnames(d_S_mining) = colnames(d_S2)
# factor analyze ----------------------------------------------------------
mining_s_fa = list(standard = fa_s_prov, mining_controlled = fa(d_S_mining, scores = "Bartlett", weight = d_main$pop %>% sqrt))
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
fa_plot_loadings(mining_s_fa)
fa_congruence_matrix(mining_s_fa)
## MR1 MR1
## MR1 1 1
## MR1 1 1
Individual-level analyses
Second, the main individual-level analyses.
### INDIVIDUAL-LEVEL ANALYSES
#Here we analyze the relationship between CA, S, SIRE and skin color
# some initial plots ------------------------------------------------------
GG_group_means(d_lapop, var = "Skin_brightness", groupvar = "SIRE", type = "violin") +
ylab("Skin brightness")
## Missing values were removed.
ggsave("figures/individual/Skin_brightness_by_SIRE.png")
## Saving 7 x 5 in image
#table
psych::describeBy(d_lapop$Skin_brightness, d_lapop$SIRE, mat = T) %>%
as_data_frame() %>%
dplyr::select(group1, n, mean, sd) %>%
write_clipboard()
## group1 n mean sd
## X11 Blanca 2845.00 7.93 1.26
## X12 IndÃÂgena 51.00 5.73 1.64
## X13 Mestiza 1246.00 6.73 1.48
## X14 Mulata 7.00 6.29 2.29
## X15 Negra 37.00 4.73 1.61
## X16 not stated 173.00 7.25 1.66
## X17 Otro 60.00 6.55 1.29
GG_scatter(d_lapop, "CA", "S", case_names = F) +
xlab("Cognitive ability (political knowledge proxy)") +
ylab("General socioeconomic status (15 indicators)")
ggplot(d_lapop, aes(CA, S, color = year)) +
geom_point() +
geom_smooth(method = lm) +
xlab("Cognitive ability (political knowledge proxy)") +
ylab("S, general socioeconomic factor (15 indicators)") +
theme_bw()
ggsave("figures/individual/CA_S_year.png")
## Saving 7 x 5 in image
count.pairwise(d_lapop[c("CA", "S")])
## CA S
## CA 5920 5920
## S 5920 5920
# compare cognitive scoring methods ---------------------------------------
#we find out which scoring method is best by comparing them within years for their correlation to S
d_ca_s_cors = ddply(d_lapop, .variables = "year", .fun = function(block) {
cors = wtd.cors(block[c("CA", "S")]) %>% round(2)
data_frame("CA_S" = cors[1, 2])
})
#add reliability
d_ca_s_cors$reliability = lapop_ca_rel
lapop_ca_rel
## 2008 2010 2012 2014
## 0.67 0.69 0.43 0.82
#estimated true score cors
d_ca_s_cors$CA_S_true = aaply(d_ca_s_cors, .margins = 1, .fun = function(x) {
x[["CA_S"]] / sqrt(x[["reliability"]])
}, .expand = F)
#output
d_ca_s_cors %>% write_clipboard()
## year CA S reliability CA S true
## 1 2008 0.52 0.67 0.64
## 2 2010 0.44 0.69 0.53
## 3 2012 0.33 0.43 0.50
## 4 2014 0.48 0.82 0.53
mean(d_ca_s_cors$CA_S_true) %>% round(2)
## [1] 0.55
#pooled estimate
wtd.cors(d_lapop$S, d_lapop$CA)
## [,1]
## [1,] 0.44
mean(lapop_ca_rel)
## [1] 0.65
wtd.cors(d_lapop$S, d_lapop$CA) / sqrt(mean(lapop_ca_rel))
## [,1]
## [1,] 0.55
# raw correlations --------------------------------------------------------
v_reliabilities = c(mean(lapop_ca_rel), 1, 1)
d_lapop[c("CA", "S", "Skin_brightness")] %>% wtd.cors() %>% correct.cor(v_reliabilities) %>% write_clipboard()
## CA S Skin brightness
## CA 0.65 0.55 0.20
## S 0.44 1.00 0.26
## Skin brightness 0.16 0.26 1.00
# skin and CA/S within SIRE -------------------------------------------------
ggplot(d_lapop, aes(Skin_brightness, CA, color = SIRE)) +
geom_point() +
geom_smooth(method = lm, se = F)
## Warning: Removed 1501 rows containing non-finite values (stat_smooth).
## Warning: Removed 1501 rows containing missing values (geom_point).
#nothing too useful to be seen here
#correlations within SIREs with CIs
cors_SIRE_CI = ddply(d_lapop, .variables = "SIRE", .fun = function(dat) {
cors = cor_matrix(dat[c("CA", "S", "Skin_brightness")], CI = .95, reliabilities = v_reliabilities)
data.frame(SB_CA = cors[3, 1],
SB_S = cors[3, 2],
CA_S = cors[1, 2],
n = count.pairwise(dat[c("CA", "S", "Skin_brightness")]) %>% MAT_half %>% max
)
})
cors_SIRE_CI %>% write_clipboard()
## SIRE SB CA SB S CA S
## 1 Blanca 0.21 [0.17 0.24] 0.20 [0.17 0.24] 0.55 [0.53 0.57]
## 2 IndÃÂgena -0.09 [-0.36 0.19] 0.34 [0.07 0.56] 0.58 [0.41 0.71]
## 3 Mestiza 0.12 [0.06 0.17] 0.22 [0.17 0.27] 0.52 [0.48 0.55]
## 4 Mulata -0.29 [-0.86 0.59] 0.65 [-0.21 0.94] -0.21 [-0.65 0.34]
## 5 Negra 0.03 [-0.30 0.35] 0.09 [-0.24 0.41] 0.38 [0.12 0.59]
## 6 not stated 0.22 [0.08 0.36] 0.35 [0.21 0.47] 0.56 [0.47 0.65]
## 7 Otro 0.20 [-0.05 0.43] 0.49 [0.26 0.66] 0.47 [0.27 0.63]
## n
## 1 3869.00
## 2 77.00
## 3 1596.00
## 4 15.00
## 5 54.00
## 6 236.00
## 7 73.00
cors_SIRE_CI %>% write_tsv("output/within_SIRE_cors.tsv")
#weighted mean
#recalculate without CIs, and get all pairwise n's
cors_SIRE = ddply(d_lapop, .variables = "SIRE", .fun = function(dat) {
cors = cor_matrix(dat[c("CA", "S", "Skin_brightness")], reliabilities = v_reliabilities)
cors_n = count.pairwise(dat[c("CA", "S", "Skin_brightness")])
data.frame(SB_CA = cors[3, 1],
SB_S = cors[3, 2],
CA_S = cors[1, 2],
n_SB_CA = cors_n[3, 1],
n_SB_S = cors_n[3, 2],
n_CA_S = cors_n[1, 2]
)
})
wtd_mean(cors_SIRE$SB_CA, w = cors_SIRE$n_SB_CA)
## [1] 0.18
wtd_mean(cors_SIRE$SB_S, w = cors_SIRE$n_SB_S)
## [1] 0.22
wtd_mean(cors_SIRE$CA_S, w = cors_SIRE$n_CA_S)
## [1] 0.54
#models -- does SIRE have incremental validity above SB and CA?
(fit = lm("CA ~ SIRE", data = d_lapop) %>% MOD_summary(progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.00 NA NA NA
## SIRE: IndÃÂgena -0.56 0.114 -0.78 -0.333
## SIRE: Mestiza -0.15 0.030 -0.21 -0.094
## SIRE: Mulata -0.66 0.257 -1.17 -0.160
## SIRE: Negra -0.68 0.136 -0.95 -0.418
## SIRE: not stated -0.38 0.067 -0.51 -0.247
## SIRE: Otro -0.34 0.117 -0.57 -0.113
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 5920 5913 0.017 0.016 0.014
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.13 0.13
(fit = lm("CA ~ SIRE + Skin_brightness", data = d_lapop) %>% MOD_summary(progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.000 NA NA NA
## SIRE: IndÃÂgena -0.273 0.141 -0.550 0.0041
## SIRE: Mestiza 0.025 0.036 -0.045 0.0958
## SIRE: Mulata -0.567 0.373 -1.298 0.1650
## SIRE: Negra -0.190 0.167 -0.517 0.1364
## SIRE: not stated -0.292 0.077 -0.444 -0.1401
## SIRE: Otro -0.244 0.129 -0.497 0.0098
## Skin_brightness 0.153 0.016 0.121 0.1850
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 4419 4411 0.032 0.03 0.027
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.076 0.077
## Skin_brightness 0.138 0.139
(fit = lm("S ~ SIRE", data = d_lapop) %>% MOD_summary(progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.00 NA NA NA
## SIRE: IndÃÂgena -0.71 0.113 -0.93 -0.492
## SIRE: Mestiza -0.39 0.029 -0.45 -0.336
## SIRE: Mulata -0.43 0.253 -0.93 0.063
## SIRE: Negra -0.74 0.134 -1.00 -0.474
## SIRE: not stated -0.45 0.066 -0.57 -0.317
## SIRE: Otro -0.45 0.116 -0.67 -0.218
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 5920 5913 0.043 0.042 0.04
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.21 0.21
(fit = lm("S ~ SIRE + Skin_brightness", data = d_lapop) %>% MOD_summary(progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.00 NA NA NA
## SIRE: IndÃÂgena -0.14 0.138 -0.41 0.134
## SIRE: Mestiza -0.13 0.035 -0.20 -0.060
## SIRE: Mulata 0.15 0.365 -0.56 0.870
## SIRE: Negra -0.19 0.163 -0.51 0.131
## SIRE: not stated -0.19 0.076 -0.34 -0.041
## SIRE: Otro -0.23 0.126 -0.48 0.020
## Skin_brightness 0.24 0.016 0.21 0.270
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 4419 4411 0.074 0.073 0.071
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.065 0.067
## Skin_brightness 0.216 0.219
(fit = lm("S ~ SIRE + Skin_brightness + CA", data = d_lapop) %>% MOD_summary(progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.000 NA NA NA
## SIRE: IndÃÂgena -0.032 0.127 -0.28 0.217
## SIRE: Mestiza -0.139 0.032 -0.20 -0.075
## SIRE: Mulata 0.373 0.335 -0.28 1.031
## SIRE: Negra -0.115 0.150 -0.41 0.179
## SIRE: not stated -0.077 0.070 -0.21 0.060
## SIRE: Otro -0.135 0.116 -0.36 0.093
## Skin_brightness 0.180 0.015 0.15 0.209
## CA 0.385 0.014 0.36 0.412
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 4419 4410 0.22 0.22 0.21
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.061 0.069
## Skin_brightness 0.161 0.179
## CA 0.379 0.394
#output
write_clipboard(fit$coefs)
## Beta SE CI lower CI upper
## SIRE: Blanca 0.00
## SIRE: IndÃÂgena -0.03 0.13 -0.28 0.22
## SIRE: Mestiza -0.14 0.03 -0.20 -0.08
## SIRE: Mulata 0.37 0.34 -0.28 1.03
## SIRE: Negra -0.11 0.15 -0.41 0.18
## SIRE: not stated -0.08 0.07 -0.21 0.06
## SIRE: Otro -0.13 0.12 -0.36 0.09
## Skin brightness 0.18 0.01 0.15 0.21
## CA 0.39 0.01 0.36 0.41
# correlations within provinces -------------------------------------------
ggplot(d_lapop, aes(Skin_brightness, CA, color = province_name)) +
geom_point() +
geom_smooth(method = lm, se = F)
## Warning: Removed 1501 rows containing non-finite values (stat_smooth).
## Warning: Removed 1501 rows containing missing values (geom_point).
#nothing too useful to be seen here
#correlations within SIREs with CIs
cors_prov_CI = ddply(d_lapop, .variables = "province_name", .fun = function(dat) {
cors = cor_matrix(dat[c("CA", "S", "Skin_brightness")], CI = .95, reliabilities = v_reliabilities)
data.frame(SB_CA = cors[3, 1],
SB_S = cors[3, 2],
CA_S = cors[1, 2],
n = count.pairwise(dat[c("CA", "S", "Skin_brightness")]) %>% MAT_half %>% max
)
})
cors_prov_CI %>% write_clipboard(print=T)
## province name SB CA SB S
## 1 Buenos Aires 0.27 [0.19 0.35] 0.30 [0.22 0.38]
## 2 Buenos Aires City (DC) 0.14 [0.09 0.19] 0.17 [0.12 0.22]
## 3 Catamarca -0.56 [-0.85 -0.01] 0.45 [-0.13 0.80]
## 4 Chaco 0.08 [-0.03 0.19] 0.21 [0.09 0.31]
## 5 Chubut -0.22 [-0.63 0.29] -0.22 [-0.63 0.29]
## 6 Córdoba 0.06 [-0.05 0.18] 0.05 [-0.07 0.16]
## 7 Corrientes 0.20 [-0.03 0.41] 0.28 [0.05 0.48]
## 8 Entre RÃÂos -0.15 [-0.36 0.07] 0.13 [-0.09 0.34]
## 9 Formosa -0.39 [-0.72 0.10] -0.10 [-0.54 0.38]
## 10 Jujuy 0.30 [-0.11 0.62] 0.07 [-0.34 0.45]
## 11 La Pampa 0.21 [0.02 0.38] 0.11 [-0.08 0.29]
## 12 La Rioja -0.02 [-0.59 0.56] -0.09 [-0.63 0.51]
## 13 Mendoza 0.16 [0.03 0.28] 0.30 [0.18 0.41]
## 14 Misiones -0.03 [-0.36 0.31] 0.00 [-0.33 0.34]
## 15 Neuquén -0.11 [-0.35 0.15] 0.13 [-0.12 0.37]
## 16 RÃÂo Negro 0.28 [0.12 0.43] 0.23 [0.07 0.39]
## 17 Salta 0.14 [-0.03 0.29] -0.12 [-0.28 0.04]
## 18 San Juan 0.52 [0.30 0.68] 0.41 [0.18 0.60]
## 19 San Luis 0.41 [-0.13 0.76] -0.46 [-0.79 0.07]
## 20 Santa Fe 0.21 [0.11 0.30] 0.26 [0.17 0.36]
## 21 Santiago del Estero -0.05 [-0.21 0.12] 0.18 [0.02 0.34]
## 22 Tierra del Fuego -0.15 [-0.69 0.49] -0.12 [-0.67 0.52]
## 23 Tucumán 0.05 [-0.08 0.18] 0.25 [0.13 0.37]
## CA S n
## 1 0.46 [0.39 0.53] 504.00
## 2 0.48 [0.45 0.52] 2242.00
## 3 0.04 [-0.35 0.42] 26.00
## 4 0.60 [0.53 0.67] 328.00
## 5 0.43 [0.11 0.67] 34.00
## 6 0.59 [0.52 0.65] 429.00
## 7 0.92 [0.88 0.94] 111.00
## 8 0.56 [0.43 0.67] 128.00
## 9 0.60 [0.34 0.77] 37.00
## 10 0.62 [0.41 0.76] 52.00
## 11 0.58 [0.44 0.70] 108.00
## 12 0.87 [0.72 0.94] 26.00
## 13 0.60 [0.53 0.67] 312.00
## 14 0.71 [0.58 0.81] 74.00
## 15 0.51 [0.33 0.65] 87.00
## 16 0.40 [0.27 0.52] 170.00
## 17 0.38 [0.25 0.50] 186.00
## 18 0.66 [0.51 0.76] 84.00
## 19 0.14 [-0.24 0.48] 29.00
## 20 0.52 [0.45 0.58] 493.00
## 21 0.53 [0.41 0.63] 168.00
## 22 0.46 [0.05 0.73] 23.00
## 23 0.36 [0.26 0.46] 269.00
#weighted mean
#recalculate without CIs, and get all pairwise n's
cors_prov = ddply(d_lapop, .variables = "province_name", .fun = function(dat) {
cors = cor_matrix(dat[c("CA", "S", "Skin_brightness")], reliabilities = v_reliabilities)
cors_n = count.pairwise(dat[c("CA", "S", "Skin_brightness")])
data.frame(SB_CA = cors[3, 1],
SB_S = cors[3, 2],
CA_S = cors[1, 2],
n_SB_CA = cors_n[3, 1],
n_SB_S = cors_n[3, 2],
n_CA_S = cors_n[1, 2]
)
})
wtd_mean(cors_prov$SB_CA, w = cors_prov$n_SB_CA)
## [1] 0.14
wtd_mean(cors_prov$SB_S, w = cors_prov$n_SB_S)
## [1] 0.19
wtd_mean(cors_prov$CA_S, w = cors_prov$n_CA_S)
## [1] 0.51
#models -- does prov have incremental validity above SB and CA?
(fit = lm("CA ~ province_name", data = d_lapop) %>% MOD_summary(progress = F))
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## province_name: Buenos Aires 0.000 NA NA NA
## province_name: Buenos Aires City (DC) 0.135 0.048 0.040 0.2296
## province_name: Catamarca -0.250 0.197 -0.637 0.1364
## province_name: Chaco -0.507 0.070 -0.644 -0.3710
## province_name: Chubut -0.384 0.174 -0.725 -0.0436
## province_name: Córdoba 0.075 0.064 -0.052 0.2010
## province_name: Corrientes -0.093 0.103 -0.294 0.1090
## province_name: Entre RÃÂos -0.102 0.097 -0.293 0.0880
## province_name: Formosa -0.427 0.167 -0.755 -0.1000
## province_name: Jujuy -0.564 0.143 -0.844 -0.2839
## province_name: La Pampa -0.143 0.104 -0.347 0.0608
## province_name: La Rioja -0.105 0.197 -0.491 0.2820
## province_name: Mendoza 0.109 0.071 -0.029 0.2477
## province_name: Misiones -0.049 0.122 -0.288 0.1904
## province_name: Neuquén 0.310 0.114 0.086 0.5327
## province_name: RÃÂo Negro 0.022 0.087 -0.148 0.1927
## province_name: Salta 0.100 0.084 -0.065 0.2645
## province_name: San Juan -0.130 0.116 -0.357 0.0962
## province_name: San Luis 0.324 0.187 -0.043 0.6916
## province_name: Santa Fe -0.113 0.062 -0.235 0.0088
## province_name: Santiago del Estero -0.553 0.087 -0.724 -0.3814
## province_name: Tierra del Fuego 0.345 0.209 -0.065 0.7550
## province_name: Tucumán -0.148 0.074 -0.293 -0.0031
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 5920 5897 0.042 0.038 0.033
##
##
## Etas from analysis of variance
## Eta Eta_partial
## province_name 0.2 0.2
(fit = lm("CA ~ province_name + Skin_brightness + SIRE", data = d_lapop) %>% MOD_summary(progress = F))
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## province_name: Buenos Aires 0.0000 NA NA NA
## province_name: Buenos Aires City (DC) 0.1097 0.050 0.0113 0.208
## province_name: Catamarca -0.1671 0.272 -0.7004 0.366
## province_name: Chaco -0.4319 0.072 -0.5737 -0.290
## province_name: Chubut -0.3934 0.239 -0.8612 0.074
## province_name: Córdoba 0.2312 0.071 0.0919 0.370
## province_name: Corrientes -0.1602 0.123 -0.4021 0.082
## province_name: Entre RÃÂos -0.0601 0.117 -0.2885 0.168
## province_name: Formosa -0.5264 0.232 -0.9819 -0.071
## province_name: Jujuy -0.7543 0.198 -1.1431 -0.366
## province_name: La Pampa -0.1570 0.103 -0.3588 0.045
## province_name: La Rioja 0.2257 0.283 -0.3292 0.781
## province_name: Mendoza 0.0838 0.076 -0.0644 0.232
## province_name: Misiones 0.3073 0.169 -0.0249 0.640
## province_name: Neuquén 0.3495 0.131 0.0918 0.607
## province_name: RÃÂo Negro 0.0538 0.093 -0.1293 0.237
## province_name: Salta 0.2585 0.092 0.0784 0.439
## province_name: San Juan 0.0270 0.132 -0.2325 0.286
## province_name: San Luis 0.5098 0.254 0.0109 1.009
## province_name: Santa Fe -0.1889 0.067 -0.3199 -0.058
## province_name: Santiago del Estero -0.4518 0.095 -0.6373 -0.266
## province_name: Tierra del Fuego 0.5697 0.295 -0.0083 1.148
## province_name: Tucumán -0.1228 0.078 -0.2757 0.030
## Skin_brightness 0.1014 0.017 0.0682 0.135
## SIRE: Blanca 0.0000 NA NA NA
## SIRE: IndÃÂgena -0.3589 0.140 -0.6325 -0.085
## SIRE: Mestiza -0.0081 0.036 -0.0792 0.063
## SIRE: Mulata -0.6620 0.369 -1.3848 0.061
## SIRE: Negra -0.2123 0.164 -0.5346 0.110
## SIRE: not stated -0.3079 0.076 -0.4579 -0.158
## SIRE: Otro -0.2381 0.128 -0.4889 0.013
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 4419 4389 0.073 0.066 0.058
##
##
## Etas from analysis of variance
## Eta Eta_partial
## province_name 0.202 0.206
## Skin_brightness 0.087 0.090
## SIRE 0.079 0.081
(fit = lm("S ~ province_name", data = d_lapop) %>% MOD_summary(progress = F))
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## province_name: Buenos Aires 0.0000 NA NA NA
## province_name: Buenos Aires City (DC) 0.0564 0.047 -0.036 0.148
## province_name: Catamarca -0.9334 0.192 -1.309 -0.558
## province_name: Chaco -0.7419 0.068 -0.874 -0.609
## province_name: Chubut -0.0167 0.169 -0.347 0.314
## province_name: Córdoba 0.0088 0.063 -0.114 0.131
## province_name: Corrientes -0.2764 0.100 -0.472 -0.081
## province_name: Entre RÃÂos -0.2326 0.094 -0.417 -0.048
## province_name: Formosa -0.5336 0.162 -0.852 -0.216
## province_name: Jujuy -0.8279 0.139 -1.100 -0.556
## province_name: La Pampa -0.0016 0.101 -0.200 0.196
## province_name: La Rioja -0.4986 0.192 -0.874 -0.123
## province_name: Mendoza 0.0896 0.069 -0.045 0.224
## province_name: Misiones -0.3903 0.119 -0.623 -0.158
## province_name: Neuquén -0.0391 0.111 -0.256 0.178
## province_name: RÃÂo Negro -0.0594 0.084 -0.225 0.106
## province_name: Salta -0.4606 0.082 -0.621 -0.300
## province_name: San Juan -0.3587 0.112 -0.579 -0.139
## province_name: San Luis -0.6560 0.182 -1.013 -0.299
## province_name: Santa Fe -0.0465 0.060 -0.165 0.072
## province_name: Santiago del Estero -1.2435 0.085 -1.410 -1.077
## province_name: Tierra del Fuego 0.3639 0.203 -0.034 0.762
## province_name: Tucumán -0.4234 0.072 -0.564 -0.282
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 5920 5897 0.096 0.093 0.089
##
##
## Etas from analysis of variance
## Eta Eta_partial
## province_name 0.31 0.31
(fit = lm("S ~ province_name + Skin_brightness", data = d_lapop) %>% MOD_summary(progress = F))
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## province_name: Buenos Aires 0.000 NA NA NA
## province_name: Buenos Aires City (DC) 0.057 0.048 -0.037 0.152
## province_name: Catamarca -1.104 0.261 -1.615 -0.592
## province_name: Chaco -0.571 0.069 -0.707 -0.435
## province_name: Chubut -0.207 0.229 -0.656 0.242
## province_name: Córdoba 0.021 0.068 -0.112 0.155
## province_name: Corrientes -0.259 0.118 -0.490 -0.029
## province_name: Entre RÃÂos -0.122 0.111 -0.341 0.096
## province_name: Formosa -0.163 0.223 -0.599 0.274
## province_name: Jujuy -0.893 0.190 -1.266 -0.520
## province_name: La Pampa 0.022 0.099 -0.172 0.215
## province_name: La Rioja -0.124 0.271 -0.656 0.408
## province_name: Mendoza 0.076 0.072 -0.066 0.218
## province_name: Misiones -0.079 0.162 -0.398 0.239
## province_name: Neuquén 0.037 0.126 -0.210 0.284
## province_name: RÃÂo Negro 0.031 0.090 -0.145 0.206
## province_name: Salta -0.434 0.087 -0.605 -0.263
## province_name: San Juan -0.329 0.127 -0.578 -0.081
## province_name: San Luis -0.816 0.243 -1.293 -0.339
## province_name: Santa Fe -0.064 0.064 -0.190 0.061
## province_name: Santiago del Estero -1.138 0.091 -1.315 -0.960
## province_name: Tierra del Fuego 0.741 0.283 0.186 1.295
## province_name: Tucumán -0.286 0.075 -0.432 -0.139
## Skin_brightness 0.189 0.015 0.161 0.218
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 4419 4395 0.14 0.14 0.13
##
##
## Etas from analysis of variance
## Eta Eta_partial
## province_name 0.27 0.28
## Skin_brightness 0.18 0.19
(fit = lm("S ~ province_name + Skin_brightness + CA", data = d_lapop) %>% MOD_summary(progress = F))
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## province_name: Buenos Aires 0.0000 NA NA NA
## province_name: Buenos Aires City (DC) 0.0157 0.044 -0.071 0.10297
## province_name: Catamarca -1.0533 0.241 -1.526 -0.58011
## province_name: Chaco -0.4175 0.064 -0.544 -0.29115
## province_name: Chubut -0.0731 0.212 -0.489 0.34239
## province_name: Córdoba -0.0622 0.063 -0.186 0.06140
## province_name: Corrientes -0.2130 0.109 -0.426 0.00024
## province_name: Entre RÃÂos -0.1025 0.103 -0.305 0.09975
## province_name: Formosa 0.0159 0.206 -0.388 0.42020
## province_name: Jujuy -0.6267 0.176 -0.972 -0.28094
## province_name: La Pampa 0.0691 0.091 -0.110 0.24797
## province_name: La Rioja -0.2181 0.251 -0.710 0.27420
## province_name: Mendoza 0.0379 0.067 -0.094 0.16933
## province_name: Misiones -0.1953 0.150 -0.490 0.09941
## province_name: Neuquén -0.0966 0.117 -0.325 0.13215
## province_name: RÃÂo Negro 0.0095 0.083 -0.153 0.17191
## province_name: Salta -0.5314 0.081 -0.690 -0.37303
## province_name: San Juan -0.3494 0.117 -0.580 -0.11909
## province_name: San Luis -0.9953 0.225 -1.437 -0.55380
## province_name: Santa Fe 0.0043 0.059 -0.112 0.12034
## province_name: Santiago del Estero -0.9776 0.084 -1.142 -0.81277
## province_name: Tierra del Fuego 0.5258 0.262 0.012 1.03944
## province_name: Tucumán -0.2475 0.069 -0.383 -0.11195
## Skin_brightness 0.1470 0.014 0.120 0.17396
## CA 0.3634 0.013 0.337 0.38961
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 4419 4394 0.27 0.26 0.26
##
##
## Etas from analysis of variance
## Eta Eta_partial
## province_name 0.23 0.26
## Skin_brightness 0.14 0.16
## CA 0.35 0.38
#full models
lm("CA ~ SIRE + province_name + Skin_brightness", data = d_lapop) %>% MOD_summary(progress = F)
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.0000 NA NA NA
## SIRE: IndÃÂgena -0.3589 0.140 -0.6325 -0.085
## SIRE: Mestiza -0.0081 0.036 -0.0792 0.063
## SIRE: Mulata -0.6620 0.369 -1.3848 0.061
## SIRE: Negra -0.2123 0.164 -0.5346 0.110
## SIRE: not stated -0.3079 0.076 -0.4579 -0.158
## SIRE: Otro -0.2381 0.128 -0.4889 0.013
## province_name: Buenos Aires 0.0000 NA NA NA
## province_name: Buenos Aires City (DC) 0.1097 0.050 0.0113 0.208
## province_name: Catamarca -0.1671 0.272 -0.7004 0.366
## province_name: Chaco -0.4319 0.072 -0.5737 -0.290
## province_name: Chubut -0.3934 0.239 -0.8612 0.074
## province_name: Córdoba 0.2312 0.071 0.0919 0.370
## province_name: Corrientes -0.1602 0.123 -0.4021 0.082
## province_name: Entre RÃÂos -0.0601 0.117 -0.2885 0.168
## province_name: Formosa -0.5264 0.232 -0.9819 -0.071
## province_name: Jujuy -0.7543 0.198 -1.1431 -0.366
## province_name: La Pampa -0.1570 0.103 -0.3588 0.045
## province_name: La Rioja 0.2257 0.283 -0.3292 0.781
## province_name: Mendoza 0.0838 0.076 -0.0644 0.232
## province_name: Misiones 0.3073 0.169 -0.0249 0.640
## province_name: Neuquén 0.3495 0.131 0.0918 0.607
## province_name: RÃÂo Negro 0.0538 0.093 -0.1293 0.237
## province_name: Salta 0.2585 0.092 0.0784 0.439
## province_name: San Juan 0.0270 0.132 -0.2325 0.286
## province_name: San Luis 0.5098 0.254 0.0109 1.009
## province_name: Santa Fe -0.1889 0.067 -0.3199 -0.058
## province_name: Santiago del Estero -0.4518 0.095 -0.6373 -0.266
## province_name: Tierra del Fuego 0.5697 0.295 -0.0083 1.148
## province_name: Tucumán -0.1228 0.078 -0.2757 0.030
## Skin_brightness 0.1014 0.017 0.0682 0.135
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 4419 4389 0.073 0.066 0.058
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.079 0.081
## province_name 0.202 0.206
## Skin_brightness 0.087 0.090
lm("CA ~ SIRE + province_name + Skin_brightness", data = d_lapop) %>% MOD_summary(progress = F)
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.0000 NA NA NA
## SIRE: IndÃÂgena -0.3589 0.140 -0.6325 -0.085
## SIRE: Mestiza -0.0081 0.036 -0.0792 0.063
## SIRE: Mulata -0.6620 0.369 -1.3848 0.061
## SIRE: Negra -0.2123 0.164 -0.5346 0.110
## SIRE: not stated -0.3079 0.076 -0.4579 -0.158
## SIRE: Otro -0.2381 0.128 -0.4889 0.013
## province_name: Buenos Aires 0.0000 NA NA NA
## province_name: Buenos Aires City (DC) 0.1097 0.050 0.0113 0.208
## province_name: Catamarca -0.1671 0.272 -0.7004 0.366
## province_name: Chaco -0.4319 0.072 -0.5737 -0.290
## province_name: Chubut -0.3934 0.239 -0.8612 0.074
## province_name: Córdoba 0.2312 0.071 0.0919 0.370
## province_name: Corrientes -0.1602 0.123 -0.4021 0.082
## province_name: Entre RÃÂos -0.0601 0.117 -0.2885 0.168
## province_name: Formosa -0.5264 0.232 -0.9819 -0.071
## province_name: Jujuy -0.7543 0.198 -1.1431 -0.366
## province_name: La Pampa -0.1570 0.103 -0.3588 0.045
## province_name: La Rioja 0.2257 0.283 -0.3292 0.781
## province_name: Mendoza 0.0838 0.076 -0.0644 0.232
## province_name: Misiones 0.3073 0.169 -0.0249 0.640
## province_name: Neuquén 0.3495 0.131 0.0918 0.607
## province_name: RÃÂo Negro 0.0538 0.093 -0.1293 0.237
## province_name: Salta 0.2585 0.092 0.0784 0.439
## province_name: San Juan 0.0270 0.132 -0.2325 0.286
## province_name: San Luis 0.5098 0.254 0.0109 1.009
## province_name: Santa Fe -0.1889 0.067 -0.3199 -0.058
## province_name: Santiago del Estero -0.4518 0.095 -0.6373 -0.266
## province_name: Tierra del Fuego 0.5697 0.295 -0.0083 1.148
## province_name: Tucumán -0.1228 0.078 -0.2757 0.030
## Skin_brightness 0.1014 0.017 0.0682 0.135
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 4419 4389 0.073 0.066 0.058
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.079 0.081
## province_name 0.202 0.206
## Skin_brightness 0.087 0.090
lm("S ~ SIRE + province_name + Skin_brightness + CA", data = d_lapop) %>% MOD_summary(progress = F)
## The model data contains characters. These were automatically converteed but you should probably do this before calling this function.
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## SIRE: Blanca 0.0000 NA NA NA
## SIRE: IndÃÂgena 0.0034 0.124 -0.240 0.2464
## SIRE: Mestiza -0.1137 0.032 -0.177 -0.0506
## SIRE: Mulata 0.4913 0.327 -0.150 1.1330
## SIRE: Negra -0.1327 0.146 -0.419 0.1534
## SIRE: not stated -0.1246 0.068 -0.258 0.0088
## SIRE: Otro -0.1695 0.114 -0.392 0.0532
## province_name: Buenos Aires 0.0000 NA NA NA
## province_name: Buenos Aires City (DC) 0.0096 0.045 -0.078 0.0970
## province_name: Catamarca -1.0147 0.241 -1.488 -0.5413
## province_name: Chaco -0.4248 0.064 -0.551 -0.2984
## province_name: Chubut -0.0546 0.212 -0.470 0.3607
## province_name: Córdoba -0.0579 0.063 -0.182 0.0659
## province_name: Corrientes -0.2642 0.110 -0.479 -0.0495
## province_name: Entre RÃÂos -0.1336 0.103 -0.336 0.0692
## province_name: Formosa -0.0334 0.206 -0.438 0.3711
## province_name: Jujuy -0.6241 0.176 -0.970 -0.2784
## province_name: La Pampa 0.0472 0.091 -0.132 0.2265
## province_name: La Rioja -0.1809 0.251 -0.673 0.3116
## province_name: Mendoza 0.0301 0.067 -0.102 0.1617
## province_name: Misiones -0.2283 0.150 -0.523 0.0667
## province_name: Neuquén -0.1116 0.117 -0.340 0.1173
## province_name: RÃÂo Negro 0.0028 0.083 -0.160 0.1653
## province_name: Salta -0.5051 0.082 -0.665 -0.3451
## province_name: San Juan -0.3538 0.117 -0.584 -0.1235
## province_name: San Luis -1.0478 0.226 -1.491 -0.6047
## province_name: Santa Fe 0.0011 0.059 -0.115 0.1175
## province_name: Santiago del Estero -0.9862 0.084 -1.151 -0.8211
## province_name: Tierra del Fuego 0.5513 0.262 0.038 1.0647
## province_name: Tucumán -0.2590 0.069 -0.395 -0.1232
## Skin_brightness 0.1266 0.015 0.097 0.1562
## CA 0.3626 0.013 0.336 0.3889
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 4419 4388 0.27 0.26 0.26
##
##
## Etas from analysis of variance
## Eta Eta_partial
## SIRE 0.055 0.065
## province_name 0.227 0.257
## Skin_brightness 0.108 0.126
## CA 0.349 0.378
Province-level analyses
Third, the main province-level analyses.
### PREDICTOR ANALYSIS
# aliases/vars -----------------------------------------------------------------
#aliases
d_main$S_knoema = d_main$S_wtd
d_main$CA_ONE = d_main$CA
# correlations ------------------------------------------------------------
v_primary_vars = c("CA_ONE", "CA_lapop", "CA_lapop_aggr", "S_knoema", "S_lapop", "S_lapop_aggr", "S_comb", "HDI2011", "HDI1996", "European", "European_SIRE", "White", "Skin_brightness", "Latitude", "Temperature")
wtd.cors(d_main[v_primary_vars]) %>% round(2)
## CA_ONE CA_lapop CA_lapop_aggr S_knoema S_lapop
## CA_ONE 1.00 0.56 0.60 0.81 0.76
## CA_lapop 0.56 1.00 0.97 0.64 0.63
## CA_lapop_aggr 0.60 0.97 1.00 0.66 0.69
## S_knoema 0.81 0.64 0.66 1.00 0.68
## S_lapop 0.76 0.63 0.69 0.68 1.00
## S_lapop_aggr 0.75 0.68 0.75 0.66 0.99
## S_comb 0.87 0.71 0.75 0.95 0.86
## HDI2011 0.82 0.61 0.64 0.88 0.78
## HDI1996 0.76 0.52 0.56 0.83 0.65
## European 0.44 0.31 0.40 0.36 0.43
## European_SIRE 0.50 0.40 0.52 0.31 0.66
## White 0.49 0.35 0.47 0.26 0.58
## Skin_brightness 0.65 0.52 0.55 0.62 0.68
## Latitude 0.48 0.50 0.53 0.58 0.69
## Temperature -0.49 -0.55 -0.54 -0.55 -0.66
## S_lapop_aggr S_comb HDI2011 HDI1996 European European_SIRE
## CA_ONE 0.75 0.87 0.82 0.76 0.44 0.50
## CA_lapop 0.68 0.71 0.61 0.52 0.31 0.40
## CA_lapop_aggr 0.75 0.75 0.64 0.56 0.40 0.52
## S_knoema 0.66 0.95 0.88 0.83 0.36 0.31
## S_lapop 0.99 0.86 0.78 0.65 0.43 0.66
## S_lapop_aggr 1.00 0.86 0.76 0.63 0.47 0.69
## S_comb 0.86 1.00 0.90 0.82 0.42 0.50
## HDI2011 0.76 0.90 1.00 0.87 0.18 0.36
## HDI1996 0.63 0.82 0.87 1.00 0.38 0.33
## European 0.47 0.42 0.18 0.38 1.00 0.69
## European_SIRE 0.69 0.50 0.36 0.33 0.69 1.00
## White 0.60 0.43 0.28 0.30 0.72 0.97
## Skin_brightness 0.68 0.71 0.68 0.54 0.22 0.48
## Latitude 0.69 0.64 0.70 0.57 0.12 0.38
## Temperature -0.65 -0.61 -0.71 -0.49 0.08 -0.19
## White Skin_brightness Latitude Temperature
## CA_ONE 0.49 0.65 0.48 -0.49
## CA_lapop 0.35 0.52 0.50 -0.55
## CA_lapop_aggr 0.47 0.55 0.53 -0.54
## S_knoema 0.26 0.62 0.58 -0.55
## S_lapop 0.58 0.68 0.69 -0.66
## S_lapop_aggr 0.60 0.68 0.69 -0.65
## S_comb 0.43 0.71 0.64 -0.61
## HDI2011 0.28 0.68 0.70 -0.71
## HDI1996 0.30 0.54 0.57 -0.49
## European 0.72 0.22 0.12 0.08
## European_SIRE 0.97 0.48 0.38 -0.19
## White 1.00 0.41 0.25 -0.08
## Skin_brightness 0.41 1.00 0.56 -0.57
## Latitude 0.25 0.56 1.00 -0.93
## Temperature -0.08 -0.57 -0.93 1.00
wtd.cors(d_main[v_primary_vars], weight = d_main$n_mean %>% sqrt) %>% round(2)
## CA_ONE CA_lapop CA_lapop_aggr S_knoema S_lapop
## CA_ONE 1.00 0.65 0.71 0.91 0.77
## CA_lapop 0.65 1.00 0.97 0.69 0.75
## CA_lapop_aggr 0.71 0.97 1.00 0.72 0.81
## S_knoema 0.91 0.69 0.72 1.00 0.74
## S_lapop 0.77 0.75 0.81 0.74 1.00
## S_lapop_aggr 0.75 0.75 0.83 0.70 0.99
## S_comb 0.92 0.76 0.81 0.97 0.88
## HDI2011 0.92 0.69 0.73 0.93 0.77
## HDI1996 0.86 0.53 0.59 0.87 0.59
## European 0.58 0.32 0.43 0.56 0.52
## European_SIRE 0.53 0.45 0.59 0.46 0.70
## White 0.54 0.40 0.55 0.46 0.64
## Skin_brightness 0.75 0.65 0.68 0.76 0.79
## Latitude 0.50 0.50 0.54 0.56 0.66
## Temperature -0.37 -0.54 -0.51 -0.40 -0.59
## S_lapop_aggr S_comb HDI2011 HDI1996 European European_SIRE
## CA_ONE 0.75 0.92 0.92 0.86 0.58 0.53
## CA_lapop 0.75 0.76 0.69 0.53 0.32 0.45
## CA_lapop_aggr 0.83 0.81 0.73 0.59 0.43 0.59
## S_knoema 0.70 0.97 0.93 0.87 0.56 0.46
## S_lapop 0.99 0.88 0.77 0.59 0.52 0.70
## S_lapop_aggr 1.00 0.85 0.74 0.56 0.53 0.73
## S_comb 0.85 1.00 0.93 0.82 0.58 0.58
## HDI2011 0.74 0.93 1.00 0.89 0.43 0.49
## HDI1996 0.56 0.82 0.89 1.00 0.55 0.39
## European 0.53 0.58 0.43 0.55 1.00 0.69
## European_SIRE 0.73 0.58 0.49 0.39 0.69 1.00
## White 0.67 0.56 0.46 0.41 0.75 0.97
## Skin_brightness 0.77 0.82 0.77 0.60 0.40 0.57
## Latitude 0.65 0.62 0.56 0.44 0.29 0.44
## Temperature -0.57 -0.48 -0.44 -0.19 0.06 -0.15
## White Skin_brightness Latitude Temperature
## CA_ONE 0.54 0.75 0.50 -0.37
## CA_lapop 0.40 0.65 0.50 -0.54
## CA_lapop_aggr 0.55 0.68 0.54 -0.51
## S_knoema 0.46 0.76 0.56 -0.40
## S_lapop 0.64 0.79 0.66 -0.59
## S_lapop_aggr 0.67 0.77 0.65 -0.57
## S_comb 0.56 0.82 0.62 -0.48
## HDI2011 0.46 0.77 0.56 -0.44
## HDI1996 0.41 0.60 0.44 -0.19
## European 0.75 0.40 0.29 0.06
## European_SIRE 0.97 0.57 0.44 -0.15
## White 1.00 0.53 0.34 -0.05
## Skin_brightness 0.53 1.00 0.52 -0.46
## Latitude 0.34 0.52 1.00 -0.84
## Temperature -0.05 -0.46 -0.84 1.00
wtd.cors(d_main[v_primary_vars], weight = d_main$pop %>% sqrt) %>% round(2)
## CA_ONE CA_lapop CA_lapop_aggr S_knoema S_lapop
## CA_ONE 1.00 0.60 0.65 0.85 0.76
## CA_lapop 0.60 1.00 0.97 0.64 0.67
## CA_lapop_aggr 0.65 0.97 1.00 0.65 0.72
## S_knoema 0.85 0.64 0.65 1.00 0.72
## S_lapop 0.76 0.67 0.72 0.72 1.00
## S_lapop_aggr 0.75 0.70 0.77 0.69 0.99
## S_comb 0.88 0.71 0.75 0.96 0.88
## HDI2011 0.87 0.62 0.65 0.86 0.72
## HDI1996 0.81 0.49 0.54 0.80 0.57
## European 0.48 0.35 0.44 0.49 0.53
## European_SIRE 0.47 0.39 0.53 0.33 0.62
## White 0.46 0.35 0.49 0.32 0.58
## Skin_brightness 0.69 0.56 0.57 0.69 0.72
## Latitude 0.47 0.46 0.50 0.57 0.66
## Temperature -0.43 -0.52 -0.48 -0.50 -0.61
## S_lapop_aggr S_comb HDI2011 HDI1996 European European_SIRE
## CA_ONE 0.75 0.88 0.87 0.81 0.48 0.47
## CA_lapop 0.70 0.71 0.62 0.49 0.35 0.39
## CA_lapop_aggr 0.77 0.75 0.65 0.54 0.44 0.53
## S_knoema 0.69 0.96 0.86 0.80 0.49 0.33
## S_lapop 0.99 0.88 0.72 0.57 0.53 0.62
## S_lapop_aggr 1.00 0.86 0.71 0.55 0.56 0.67
## S_comb 0.86 1.00 0.87 0.77 0.54 0.48
## HDI2011 0.71 0.87 1.00 0.86 0.26 0.37
## HDI1996 0.55 0.77 0.86 1.00 0.42 0.32
## European 0.56 0.54 0.26 0.42 1.00 0.67
## European_SIRE 0.67 0.48 0.37 0.32 0.67 1.00
## White 0.62 0.46 0.32 0.31 0.72 0.97
## Skin_brightness 0.71 0.76 0.68 0.51 0.33 0.48
## Latitude 0.65 0.61 0.58 0.46 0.24 0.36
## Temperature -0.59 -0.55 -0.54 -0.30 0.02 -0.07
## White Skin_brightness Latitude Temperature
## CA_ONE 0.46 0.69 0.47 -0.43
## CA_lapop 0.35 0.56 0.46 -0.52
## CA_lapop_aggr 0.49 0.57 0.50 -0.48
## S_knoema 0.32 0.69 0.57 -0.50
## S_lapop 0.58 0.72 0.66 -0.61
## S_lapop_aggr 0.62 0.71 0.65 -0.59
## S_comb 0.46 0.76 0.61 -0.55
## HDI2011 0.32 0.68 0.58 -0.54
## HDI1996 0.31 0.51 0.46 -0.30
## European 0.72 0.33 0.24 0.02
## European_SIRE 0.97 0.48 0.36 -0.07
## White 1.00 0.44 0.26 0.00
## Skin_brightness 0.44 1.00 0.51 -0.50
## Latitude 0.26 0.51 1.00 -0.88
## Temperature 0.00 -0.50 -0.88 1.00
#output
combine_upperlower(wtd.cors(d_main[v_primary_vars], weight = d_main$pop %>% sqrt),
wtd.cors(d_main[v_primary_vars])) %>%
write_clipboard()
## CA ONE CA lapop CA lapop aggr S knoema S lapop
## CA ONE 0.60 0.65 0.85 0.76
## CA lapop 0.56 0.97 0.64 0.67
## CA lapop aggr 0.60 0.97 0.65 0.72
## S knoema 0.81 0.64 0.66 0.72
## S lapop 0.76 0.63 0.69 0.68
## S lapop aggr 0.75 0.68 0.75 0.66 0.99
## S comb 0.87 0.71 0.75 0.95 0.86
## HDI2011 0.82 0.61 0.64 0.88 0.78
## HDI1996 0.76 0.52 0.56 0.83 0.65
## European 0.44 0.31 0.40 0.36 0.43
## European SIRE 0.50 0.40 0.52 0.31 0.66
## White 0.49 0.35 0.47 0.26 0.58
## Skin brightness 0.65 0.52 0.55 0.62 0.68
## Latitude 0.48 0.50 0.53 0.58 0.69
## Temperature -0.49 -0.55 -0.54 -0.55 -0.66
## S lapop aggr S comb HDI2011 HDI1996 European European SIRE
## CA ONE 0.75 0.88 0.87 0.81 0.48 0.47
## CA lapop 0.70 0.71 0.62 0.49 0.35 0.39
## CA lapop aggr 0.77 0.75 0.65 0.54 0.44 0.53
## S knoema 0.69 0.96 0.86 0.80 0.49 0.33
## S lapop 0.99 0.88 0.72 0.57 0.53 0.62
## S lapop aggr 0.86 0.71 0.55 0.56 0.67
## S comb 0.86 0.87 0.77 0.54 0.48
## HDI2011 0.76 0.90 0.86 0.26 0.37
## HDI1996 0.63 0.82 0.87 0.42 0.32
## European 0.47 0.42 0.18 0.38 0.67
## European SIRE 0.69 0.50 0.36 0.33 0.69
## White 0.60 0.43 0.28 0.30 0.72 0.97
## Skin brightness 0.68 0.71 0.68 0.54 0.22 0.48
## Latitude 0.69 0.64 0.70 0.57 0.12 0.38
## Temperature -0.65 -0.61 -0.71 -0.49 0.08 -0.19
## White Skin brightness Latitude Temperature
## CA ONE 0.46 0.69 0.47 -0.43
## CA lapop 0.35 0.56 0.46 -0.52
## CA lapop aggr 0.49 0.57 0.50 -0.48
## S knoema 0.32 0.69 0.57 -0.50
## S lapop 0.58 0.72 0.66 -0.61
## S lapop aggr 0.62 0.71 0.65 -0.59
## S comb 0.46 0.76 0.61 -0.55
## HDI2011 0.32 0.68 0.58 -0.54
## HDI1996 0.31 0.51 0.46 -0.30
## European 0.72 0.33 0.24 0.02
## European SIRE 0.97 0.48 0.36 -0.07
## White 0.44 0.26 0.00
## Skin brightness 0.41 0.51 -0.50
## Latitude 0.25 0.56 -0.88
## Temperature -0.08 -0.57 -0.93
# OLS ---------------------------------------------------------------------
#some wtf error with standardized betas not fitting with correlations
#CA
(fit = lm("CA ~ European + Temperature", data = d_main) %>%
MOD_summary(runs = 200, progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## European 0.48 0.16 0.15 0.81
## Temperature -0.53 0.16 -0.86 -0.20
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 24 21 0.47 0.42 -0.85
##
##
## Etas from analysis of variance
## Eta Eta_partial
## European 0.48 0.55
## Temperature 0.53 0.59
(fit = lm("CA ~ European + Temperature", data = d_main, weights = d_main$pop %>% sqrt) %>%
MOD_summary(runs = 200, progress = F))
## Warning in sqrt(.): NaNs produced
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## European 0.5 0.17 0.15 0.85
## Temperature -0.6 0.22 -1.06 -0.13
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 CA 24 21 0.43 0.37 -2.3
##
##
## Etas from analysis of variance
## Eta Eta_partial
## European NaN 1
## Temperature NaN 1
fit %>% `[[`("coefs") %>% write_clipboard(print = T, clean_names = T)
## Beta SE CI lower CI upper
## European 0.50 0.17 0.15 0.85
## Temperature -0.60 0.22 -1.06 -0.13
#S
(fit = lm("S ~ CA + European + Temperature", data = d_main) %>%
MOD_summary(runs = 200, progress = F))
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## CA 0.64 0.13 0.37 0.920
## European 0.16 0.12 -0.08 0.401
## Temperature -0.31 0.12 -0.56 -0.063
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 24 20 0.82 0.79 0.3
##
##
## Etas from analysis of variance
## Eta Eta_partial
## CA 0.47 0.74
## European 0.13 0.30
## Temperature 0.25 0.50
(fit = lm("S ~ CA + European + Temperature", data = d_main, weights = d_main$pop %>% sqrt) %>%
MOD_summary(runs = 200, progress = F))
## Warning in sqrt(.): NaNs produced
##
## ---- Model summary ----
## Model coefficients
## Beta SE CI_lower CI_upper
## CA 0.65 0.12 0.408 0.900
## European 0.24 0.11 0.013 0.465
## Temperature -0.39 0.14 -0.678 -0.093
##
##
## Model meta-data
## outcome N df R2 R2-adj. R2-cv
## 1 S 24 20 0.84 0.82 0.4
##
##
## Etas from analysis of variance
## Eta Eta_partial
## CA NaN 1
## European NaN 1
## Temperature NaN 1
fit %>% `[[`("coefs") %>% write_clipboard(print = T, clean_names = T)
## Beta SE CI lower CI upper
## CA 0.65 0.12 0.41 0.90
## European 0.24 0.11 0.01 0.47
## Temperature -0.39 0.14 -0.68 -0.09
# LASSO -------------------------------------------------------------------
#CA
LASSO_CA = MOD_LASSO(d_main, dependent = "CA", predictors = c("European", "Temperature"),
runs = 500, weights_ = d_main$pop %>% sqrt, progress = F, nfolds = 5)
## Data standardized.
MOD_summarize_models(LASSO_CA) %>% write_clipboard()
## European Temperature
## mean 0.04 -0.04
## median 0.00 0.00
## sd 0.08 0.08
## mad 0.00 0.00
## fraction zeroNA 0.74 0.77
#S
LASSO_S = MOD_LASSO(d_main, dependent = "S", predictors = c("CA", "European", "Temperature"),
runs = 500, weights_ = d_main$pop %>% sqrt, progress = F, nfolds = 5)
## Data standardized.
MOD_summarize_models(LASSO_S) %>% write_clipboard()
## CA European Temperature
## mean 0.64 0.13 -0.24
## median 0.64 0.14 -0.25
## sd 0.01 0.05 0.06
## mad 0.00 0.04 0.05
## fraction zeroNA 0.00 0.01 0.00
#OLS fit
fit = lm("S ~ CA + European + Mean.C", data = d_main, weights = d_main$pop %>% sqrt) %>%
MOD_summary(runs = 200, progress = F)
## Warning in sqrt(.): NaNs produced
fit$coefs %>% write_clipboard(clean_names = T, print = T)
## Beta SE CI lower CI upper
## CA 0.65 0.12 0.41 0.90
## European 0.24 0.11 0.01 0.47
## Mean C -0.39 0.14 -0.68 -0.09
# path model -------------------------------------------------------------
wtd.cors(d_main[c("S", "CA", "Temperature", "Latitude", "European")])
## S CA Temperature Latitude European
## S 1.00 0.87 -0.612 0.64 0.418
## CA 0.87 1.00 -0.489 0.48 0.440
## Temperature -0.61 -0.49 1.000 -0.93 0.084
## Latitude 0.64 0.48 -0.933 1.00 0.120
## European 0.42 0.44 0.084 0.12 1.000
#make path model
#simple
str_mod = "
S ~ CA + European
CA ~ European
"
#complex
str_mod = "
S ~ CA + European + Temperature
CA ~ European + Temperature
European ~ Temperature
"
#fit unwtd, make design, fit wtd
fit = sem(model = str_mod, data = d_main, std.ov = T)
parameterestimates(fit)
## lhs op rhs est se z pvalue ci.lower ci.upper
## 1 S ~ CA 0.645 0.120 5.37 0.000 0.409 0.881
## 2 S ~ European 0.161 0.105 1.53 0.127 -0.045 0.367
## 3 S ~ Temperature -0.310 0.108 -2.87 0.004 -0.523 -0.098
## 4 CA ~ European 0.484 0.149 3.25 0.001 0.192 0.776
## 5 CA ~ Temperature -0.529 0.149 -3.55 0.000 -0.821 -0.237
## 6 European ~ Temperature 0.084 0.203 0.41 0.681 -0.315 0.482
## 7 S ~~ S 0.176 0.051 3.46 0.001 0.076 0.275
## 8 CA ~~ CA 0.507 0.146 3.46 0.001 0.220 0.793
## 9 European ~~ European 0.952 0.275 3.46 0.001 0.413 1.490
## 10 Temperature ~~ Temperature 0.958 0.000 NA NA 0.958 0.958
#weighted
design = svydesign(ids = ~1,
data = d_main,
weights = d_main$pop %>% sqrt)
fit_wtd = lavaan.survey(fit, design)
## Warning in lavData(data = data, group = group, cluster = cluster, ov.names
## = ov.names, : lavaan WARNING: std.ov argument is ignored if only sample
## statistics are provided.
parameterestimates(fit_wtd, standardized = T)
## lhs op rhs est se z pvalue ci.lower ci.upper
## 1 S ~ CA 0.628 0.098 6.41 0.000 0.436 0.820
## 2 S ~ European 1.494 0.677 2.21 0.027 0.167 2.821
## 3 S ~ Temperature -0.092 0.027 -3.45 0.001 -0.145 -0.040
## 4 CA ~ European 3.246 1.558 2.08 0.037 0.192 6.301
## 5 CA ~ Temperature -0.149 0.047 -3.16 0.002 -0.241 -0.057
## 6 European ~ Temperature 0.001 0.007 0.18 0.857 -0.012 0.015
## 7 S ~~ S 0.143 0.033 4.30 0.000 0.078 0.208
## 8 CA ~~ CA 0.557 0.222 2.51 0.012 0.122 0.992
## 9 European ~~ European 0.022 0.010 2.33 0.020 0.004 0.041
## 10 Temperature ~~ Temperature 8.493 0.000 NA NA 8.493 8.493
## 11 S ~1 0.728 0.470 1.55 0.122 -0.194 1.650
## 12 CA ~1 0.655 1.268 0.52 0.606 -1.830 3.140
## 13 European ~1 0.628 0.128 4.92 0.000 0.378 0.878
## 14 Temperature ~1 17.550 0.000 NA NA 17.550 17.550
## std.lv std.all std.nox
## 1 0.628 0.645 0.645
## 2 1.494 0.232 0.232
## 3 -0.092 -0.281 -0.096
## 4 3.246 0.492 0.492
## 5 -0.149 -0.441 -0.151
## 6 0.001 0.025 0.008
## 7 0.143 0.156 0.156
## 8 0.557 0.575 0.575
## 9 0.022 0.999 0.999
## 10 8.493 1.000 8.493
## 11 0.728 0.759 0.759
## 12 0.655 0.665 0.665
## 13 0.628 4.214 4.214
## 14 17.550 6.022 17.550
# plots -------------------------------------------------------------------
GG_scatter(d_main, "CA", "S", weights = d_main$pop %>% sqrt, case_names = d_main$province_name) +
xlab("Cognitive ability (mean of multiple scholastic tests)") +
ylab("S, General socioeconomic factor (34 indicators)")
ggsave("figures/province/CA_S.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "European", "CA", weights = d_main$pop %>% sqrt, case_names = d_main$province_name) +
xlab("European genomic ancestry (estimated from multiple studies)") +
ylab("Cognitive ability (mean of multiple scholastic tests)")
ggsave("figures/province/Euro_CA.png")
## Saving 7 x 5 in image
GG_scatter(d_main, "European", "S", weights = d_main$pop %>% sqrt, case_names = d_main$province_name) +
xlab("European genomic ancestry (estimated from multiple studies)") +
ylab("S, General socioeconomic factor (34 indicators)")
ggsave("figures/province/Euro_S.png")
## Saving 7 x 5 in image
# multiple ancestries -----------------------------------------------------
MOD_APSLM("CA", predictors = c("African", "Amerindian", "European"), data = d_main, .weights = d_main$pop %>% sqrt)
##
|
| | 0%
|
|=========== | 17%
|
|====================== | 33%
|
|================================ | 50%
|
|=========================================== | 67%
|
|====================================================== | 83%
|
|=================================================================| 100%
## $beta_matrix
## African Amerindian European AIC BIC r2 r2.adj. r2_cv R R.adj. N
## 1 -0.46 NA NA 72 75 0.18 0.14 -0.63 0.43 0.38 24
## 2 NA -0.40 NA 73 76 0.15 0.11 -1.37 0.39 0.34 24
## 3 NA NA 0.49 70 74 0.23 0.20 -1.95 0.48 0.44 24
## 4 -0.42 -0.36 NA 70 75 0.30 0.23 -2.07 0.55 0.48 24
## 5 -0.31 NA 0.38 70 75 0.30 0.23 -2.06 0.55 0.48 24
## 6 NA 0.99 1.43 70 75 0.30 0.23 -2.06 0.55 0.48 24
## 7 -0.52 -0.70 -0.36 72 78 0.30 0.20 -3.36 0.55 0.44 24
##
## $all_models
## $all_models[[1]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African
## 0.064 -0.457
##
##
## $all_models[[2]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) Amerindian
## 0.0743 -0.4011
##
##
## $all_models[[3]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) European
## 0.0355 0.4881
##
##
## $all_models[[4]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African Amerindian
## 0.00199 -0.41717 -0.35900
##
##
## $all_models[[5]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African European
## 0.00234 -0.30639 0.37973
##
##
## $all_models[[6]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) Amerindian European
## 0.00336 0.99051 1.42778
##
##
## $all_models[[7]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African Amerindian European
## 0.00167 -0.52190 -0.69833 -0.35899
MOD_APSLM("S", predictors = c("African", "Amerindian", "European"), data = d_main, .weights = d_main$pop %>% sqrt)
##
|
| | 0%
|
|=========== | 17%
|
|====================== | 33%
|
|================================ | 50%
|
|=========================================== | 67%
|
|====================================================== | 83%
|
|=================================================================| 100%
## $beta_matrix
## African Amerindian European AIC BIC r2 r2.adj. r2_cv R R.adj. N
## 1 -0.38 NA NA 74 78 0.12 0.082 -0.333 0.35 0.29 24
## 2 NA -0.49 NA 71 75 0.22 0.185 -0.099 0.47 0.43 24
## 3 NA NA 0.55 69 73 0.29 0.254 -0.106 0.54 0.50 24
## 4 -0.33 -0.46 NA 70 75 0.31 0.246 -0.321 0.56 0.50 24
## 5 -0.19 NA 0.48 70 75 0.31 0.247 -0.318 0.56 0.50 24
## 6 NA 0.62 1.14 70 75 0.31 0.248 -0.317 0.56 0.50 24
## 7 1.75 6.27 7.12 72 78 0.32 0.213 -1.906 0.56 0.46 24
##
## $all_models
## $all_models[[1]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African
## 0.106 -0.381
##
##
## $all_models[[2]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) Amerindian
## 0.0843 -0.4905
##
##
## $all_models[[3]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) European
## 0.0478 0.5513
##
##
## $all_models[[4]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African Amerindian
## 0.027 -0.331 -0.457
##
##
## $all_models[[5]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African European
## 0.0273 -0.1892 0.4843
##
##
## $all_models[[6]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) Amerindian European
## 0.0276 0.6209 1.1403
##
##
## $all_models[[7]]
##
## Call:
## lm(formula = models[model.idx], data = data, weights = .weights)
##
## Coefficients:
## (Intercept) African Amerindian European
## 0.0333 1.7466 6.2730 7.1201
Maps
Fourth, we create the maps for a nice visual presentation.
### CREATE MAPS OF VARIABLES
#https://cran.r-project.org/web/packages/choroplethr/vignettes/i-creating-admin1-maps.html
# load regions ------------------------------------------------------------
d_argentina_admin1 = get_admin1_regions("argentina")
#add data
d_argentina_admin1$Province = str_replace(d_argentina_admin1$region, pattern = "provincia de ", replacement = "") %>%
str_replace(pattern = "provincia del ", replacement = "") %>% pu_translate(superunit="ARG", messages = 0)
#attempt to match using fuzzymatch
# fuzzyjoin::stringdist_left_join(x = d_argentina_admin1,
# y = d_main[1],
# by = c("region_short" = "Wiki_name"),
# method = "lcs",
# max_dist = 7)
#this never works better than manual solutions!
#sort by short choro name
# sort_df(d_argentina_admin1, vars = "region_short")
d_argentina_admin1 = dplyr::left_join(d_argentina_admin1, d_main[c("Province", "name", "S", "European", "CA", "mining", "Temperature")], by = c("Province" = "Province"))
# plot --------------------------------------------------------------------
#neutral
gg = admin1_map("argentina")
#get average lat lon by AD
d_geo = ddply(gg$data, .variables = "region", .fun = function(block) {
c("long" = averages(block$long, types = "midrange") %>% unname,
"lat" = averages(block$lat, types = "midrange") %>% unname
)
})
#merge these into the main dataset
d_argentina_admin1 = dplyr::full_join(d_argentina_admin1, d_geo, by = c("region" = "region"))
#CA
d_argentina_admin1$value = d_argentina_admin1$CA
admin1_choropleth(country.name = "argentina",
df = d_argentina_admin1,
legend = "Cognitive ability (Z-scale)",
num_colors = 1) +
geom_text(data = d_argentina_admin1, aes(long, lat, label = name, group = NULL), size = 2.5)
ggsave("figures/province/maps/map_CA.png")
## Saving 7 x 5 in image
#S
d_argentina_admin1$value = d_argentina_admin1$S
admin1_choropleth(country.name = "argentina",
df = d_argentina_admin1,
legend = "General socioeconomic factor (Z-scale)",
num_colors = 1) +
geom_text(data = d_argentina_admin1, aes(long, lat, label = name, group = NULL), size = 2.5)
ggsave("figures/province/maps/map_S.png")
## Saving 7 x 5 in image
#euro
d_argentina_admin1$value = d_argentina_admin1$European
admin1_choropleth(country.name = "argentina",
df = d_argentina_admin1,
legend = "European genetic ancestry (proportion)",
num_colors = 1) +
geom_text(data = d_argentina_admin1, aes(long, lat, label = name, group = NULL), size = 2.5)
ggsave("figures/province/maps/map_euro.png")
## Saving 7 x 5 in image
#temperature (control)
d_argentina_admin1$value = d_argentina_admin1$Temperature
admin1_choropleth(country.name = "argentina",
df = d_argentina_admin1,
legend = "Mean temperature (C)",
num_colors = 1) +
geom_text(data = d_argentina_admin1, aes(long, lat, label = name, group = NULL), size = 2.5)
ggsave("figures/province/maps/map_temp.png")
## Saving 7 x 5 in image
#mining (control)
d_argentina_admin1$value = d_argentina_admin1$mining
admin1_choropleth(country.name = "argentina",
df = d_argentina_admin1,
legend = "Mining (factor score)",
num_colors = 1) +
geom_text(data = d_argentina_admin1, aes(long, lat, label = name, group = NULL), size = 2.5)
ggsave("figures/province/maps/map_mining.png")
## Saving 7 x 5 in image
#output data for reprex
write_rds(d_argentina_admin1, "data/map_data.rds")