Cognitive ability
#age heaping transformed
#simple approach with z scores
d$ah_z = d %>% select(`1861`:`1881`) %>% map_df(standardize) %>% rowMeans(na.rm = T)
#beta reg
#transform to 0-1 scale
d %<>% mutate(
`1861_01` = 1 - (`1861` - 100)/400,
`1871_01` = 1 - (`1871` - 100)/400,
`1881_01` = 1 - (`1881` - 100)/400
)
#long format
ah_01_long = d %>%
select(ID, `1861_01`:`1881_01`) %>%
pivot_longer(cols = -ID, names_to = "year", values_to = "ah") %>%
mutate(year = str_replace(year, "_01", "") %>% as.numeric())
#fit model
beta_fit = betareg::betareg(
ah ~ as.factor(year) + ID,
data = ah_01_long
)
beta_fit
##
## Call:
## betareg::betareg(formula = ah ~ as.factor(year) + ID, data = ah_01_long)
##
## Coefficients (mean model with logit link):
## (Intercept) as.factor(year)1871 as.factor(year)1881
## 1.37470 0.04390 0.42310
## IDAL IDAN IDAP
## 0.67699 0.52821 0.64612
## IDAQ IDAR IDAV
## 0.63535 0.47283 0.07664
## IDBA IDBG IDBL
## -0.01140 1.40801 1.46270
## IDBN IDBO IDBS
## 0.17663 1.20417 1.24536
## IDCA IDCB IDCE
## -0.03942 0.29563 0.28132
## IDCH IDCL IDCN
## 0.59586 0.20548 0.68477
## IDCO IDCR IDCS
## 1.29317 1.32744 0.04079
## IDCT IDCZ IDFC
## 0.17732 -0.07492 0.79957
## IDFE IDFG IDFI
## 0.98700 0.14338 0.73404
## IDGE IDGR IDIM
## 0.71936 0.56131 0.84254
## IDLE IDLI IDMC
## 0.22601 0.88920 0.46360
## IDME IDMI IDMN
## 0.26687 1.26225 1.32278
## IDMO IDMS IDMT
## 1.09730 0.71416 0.05858
## IDNA IDNO IDPA
## 0.14340 1.04477 0.52624
## IDPC IDPD IDPG
## 0.91170 1.39392 0.07202
## IDPI IDPR IDPU
## 0.75781 1.10051 0.45386
## IDPV IDPZ IDRA
## 0.97739 0.05858 0.90377
## IDRC IDRE IDRM
## 0.05108 1.10026 0.48969
## IDRO IDSA IDSI
## 1.13831 0.09151 0.79362
## IDSO IDSR IDSS
## 1.24661 -0.02591 -0.14861
## IDTE IDTO IDTP
## 0.54536 0.92838 0.15522
## IDTV IDUD IDVE
## 1.47647 1.66523 1.36097
## IDVI IDVR
## 1.36682 1.56095
##
## Phi coefficients (precision model with identity link):
## (phi)
## 536
beta_fit %>% summary()
##
## Call:
## betareg::betareg(formula = ah ~ as.factor(year) + ID, data = ah_01_long)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.9292 -0.8155 -0.0702 0.6777 4.7654
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.37470 0.06632 20.727 < 2e-16 ***
## as.factor(year)1871 0.04390 0.02352 1.867 0.06196 .
## as.factor(year)1881 0.42310 0.02529 16.733 < 2e-16 ***
## IDAL 0.67699 0.10522 6.434 1.24e-10 ***
## IDAN 0.52821 0.10163 5.198 2.02e-07 ***
## IDAP 0.64612 0.10444 6.186 6.15e-10 ***
## IDAQ 0.63535 0.10417 6.099 1.07e-09 ***
## IDAR 0.47283 0.10039 4.710 2.48e-06 ***
## IDAV 0.07664 0.09304 0.824 0.41011
## IDBA -0.01140 0.09172 -0.124 0.90107
## IDBG 1.40801 0.12985 10.844 < 2e-16 ***
## IDBL 1.46270 0.15995 9.145 < 2e-16 ***
## IDBN 0.17663 0.09466 1.866 0.06206 .
## IDBO 1.20417 0.12169 9.895 < 2e-16 ***
## IDBS 1.24536 0.12325 10.104 < 2e-16 ***
## IDCA -0.03942 0.09133 -0.432 0.66598
## IDCB 0.29563 0.09679 3.054 0.00226 **
## IDCE 0.28132 0.09653 2.914 0.00356 **
## IDCH 0.59586 0.10321 5.773 7.77e-09 ***
## IDCL 0.20548 0.09516 2.159 0.03083 *
## IDCN 0.68477 0.10542 6.495 8.28e-11 ***
## IDCO 1.29317 0.12512 10.336 < 2e-16 ***
## IDCR 1.32744 0.12649 10.494 < 2e-16 ***
## IDCS 0.04079 0.09249 0.441 0.65923
## IDCT 0.17732 0.09468 1.873 0.06108 .
## IDCZ -0.07492 0.09084 -0.825 0.40955
## IDFC 0.79957 0.10851 7.369 1.72e-13 ***
## IDFE 0.98700 0.11415 8.646 < 2e-16 ***
## IDFG 0.14338 0.09411 1.524 0.12761
## IDFI 0.73404 0.10672 6.878 6.05e-12 ***
## IDGE 0.71936 0.10633 6.766 1.33e-11 ***
## IDGR 0.56131 0.10239 5.482 4.20e-08 ***
## IDIM 0.84254 0.10974 7.678 1.62e-14 ***
## IDLE 0.22601 0.09552 2.366 0.01798 *
## IDLI 0.88920 0.11111 8.003 1.22e-15 ***
## IDMC 0.46360 0.10019 4.627 3.71e-06 ***
## IDME 0.26687 0.09626 2.772 0.00556 **
## IDMI 1.26225 0.12390 10.187 < 2e-16 ***
## IDMN 1.32278 0.15202 8.701 < 2e-16 ***
## IDMO 1.09730 0.11784 9.312 < 2e-16 ***
## IDMS 0.71416 0.10619 6.725 1.75e-11 ***
## IDMT 0.05858 0.09276 0.632 0.52767
## IDNA 0.14340 0.09411 1.524 0.12755
## IDNO 1.04477 0.11605 9.003 < 2e-16 ***
## IDPA 0.52624 0.10158 5.181 2.21e-07 ***
## IDPC 0.91170 0.11179 8.155 3.48e-16 ***
## IDPD 1.39392 0.15597 8.937 < 2e-16 ***
## IDPG 0.07202 0.09297 0.775 0.43855
## IDPI 0.75781 0.10736 7.059 1.68e-12 ***
## IDPR 1.10051 0.11795 9.330 < 2e-16 ***
## IDPU 0.45386 0.09998 4.539 5.64e-06 ***
## IDPV 0.97739 0.11384 8.585 < 2e-16 ***
## IDPZ 0.05858 0.09276 0.632 0.52767
## IDRA 0.90377 0.11155 8.102 5.41e-16 ***
## IDRC 0.05108 0.09265 0.551 0.58139
## IDRE 1.10026 0.11794 9.329 < 2e-16 ***
## IDRM 0.48969 0.11713 4.181 2.91e-05 ***
## IDRO 1.13831 0.14256 7.985 1.41e-15 ***
## IDSA 0.09151 0.09327 0.981 0.32654
## IDSI 0.79362 0.10834 7.325 2.39e-13 ***
## IDSO 1.24661 0.12330 10.110 < 2e-16 ***
## IDSR -0.02591 0.09152 -0.283 0.77707
## IDSS -0.14861 0.08988 -1.653 0.09824 .
## IDTE 0.54536 0.10202 5.346 9.01e-08 ***
## IDTO 0.92838 0.11230 8.267 < 2e-16 ***
## IDTP 0.15522 0.09430 1.646 0.09976 .
## IDTV 1.47647 0.16077 9.184 < 2e-16 ***
## IDUD 1.66523 0.17266 9.644 < 2e-16 ***
## IDVE 1.36097 0.15412 8.831 < 2e-16 ***
## IDVI 1.36682 0.15445 8.850 < 2e-16 ***
## IDVR 1.56095 0.16593 9.407 < 2e-16 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 536.00 54.03 9.921 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 578.8 on 72 Df
## Pseudo R-squared: 0.9335
## Number of iterations: 90 (BFGS) + 2 (Fisher scoring)
#save coefs back
beta_coef = beta_fit %>% broom::tidy() %>%
filter(str_detect(term, "^ID")) %>%
mutate(
ID = str_replace(term, "^ID", "")
)
#manually add 0 value for the index ID
beta_coef = bind_rows(
tibble(
ID = ah_01_long$ID %>% factor() %>% levels() %>% head(1),
estimate = 0
),
beta_coef
)
#join to main
d$ah_coef = NULL
d = left_join(
d,
beta_coef %>% mutate(ah_coef = estimate) %>% select(ah_coef, ID),
by = "ID"
)
#compare methods
GG_scatter(d, "ah_coef", "ah_z")
## `geom_smooth()` using formula 'y ~ x'

#ability historical and present
invalsi_label = "INVALSI achievement test score from 2010's"
ah_label = "Age heaping score from 1880's (beta regression coefficient)"
GG_scatter(d, "ah_coef", "ach", case_names = "name") +
ylab(invalsi_label) +
xlab(ah_label)
## `geom_smooth()` using formula 'y ~ x'

ggsave("figures/cognitive_stability.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
Main results
#main vars
main_vars = c("ah_coef", "ach", "S", "QOL_2021", "lat", "lon", "Altitudine")
#correlations
d %>% select(!!main_vars) %>% cor_matrix(p_val = T, asterisks = .01)
## ah_coef ach S QOL_2021 lat lon Altitudine
## ah_coef "1" "0.73*" "0.78*" "0.75*" "0.86*" "-0.61*" "-0.20"
## ach "0.73*" "1" "0.82*" "0.80*" "0.85*" "-0.51*" "-0.09"
## S "0.78*" "0.82*" "1" "0.88*" "0.86*" "-0.65*" "-0.11"
## QOL_2021 "0.75*" "0.80*" "0.88*" "1" "0.81*" "-0.62*" "-0.13"
## lat "0.86*" "0.85*" "0.86*" "0.81*" "1" "-0.64*" "-0.17"
## lon "-0.61*" "-0.51*" "-0.65*" "-0.62*" "-0.64*" "1" "0.07"
## Altitudine "-0.20" "-0.09" "-0.11" "-0.13" "-0.17" "0.07" "1"
#n pairs
d %>% select(!!main_vars) %>% pairwiseCount()
## ah_coef ach S QOL_2021 lat lon Altitudine
## ah_coef 69 69 69 68 69 69 69
## ach 69 107 107 106 107 107 101
## S 69 107 107 106 107 107 101
## QOL_2021 68 106 106 107 106 106 100
## lat 69 107 107 106 107 107 101
## lon 69 107 107 106 107 107 101
## Altitudine 69 101 101 100 101 101 101
#ggpairs
d %>% select(!!main_vars) %>%
GGally::ggpairs()
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Warning: Removed 43 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 44 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values
## Warning: Removed 43 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values
## Warning: Removed 43 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values
## Warning: Removed 44 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Removed 6 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 12 rows containing missing values
## Warning: Removed 43 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Removed 5 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values
## Warning: Removed 43 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Removed 5 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values
## Warning: Removed 43 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Removed 11 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Removed 11 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing non-finite values (stat_density).

#labs
S_ylab = ylab("General factor of socioeconomic variables")
#achievement and S
GG_scatter(d, "ach", "S", case_names = "name") +
xlab(invalsi_label) +
S_ylab
## `geom_smooth()` using formula 'y ~ x'

ggsave("figures/ach_S.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
#age heaping and S
GG_scatter(d, "ah_coef", "S", case_names = "name") +
xlab(ah_label) +
S_ylab
## `geom_smooth()` using formula 'y ~ x'

ggsave("figures/ah_S.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
#regressions
#subset with ah data
d_z = map_df(d %>% select(!!main_vars), standardize)
d_ah = d_z %>% filter(!is.na(ah_coef))
list(
ols(S ~ lat + lon + Altitudine, data = d_z),
ols(S ~ ach, data = d_z),
ols(S ~ ach + lat + lon + Altitudine, data = d_z),
#ah subset
ols(S ~ lat + lon + Altitudine, data = d_ah),
ols(S ~ ah_coef, data = d_ah),
ols(S ~ ah_coef + lat + lon + Altitudine, data = d_ah)
) %>%
summarize_models(asterisks_only = F)
# Geography of intelligence
list(
ols(ach ~ lat + lon + Altitudine, data = d_z),
ols(ah_coef ~ lat + lon + Altitudine, data = d_z)
) %>%
summarize_models(asterisks_only = F)
Maps
#plot map with names
ggplot(sf_provinces3) +
geom_sf() +
geom_text(aes(x = lon, y = lat, label = DEN_UTS))

#join shapefile
#keep all overlapping data
d_sf = full_join(sf_provinces3, d, by = "ID")
#remove empty units
# d_sf %<>% filter(!is.na(geometry))
d_sf %<>% filter(!st_is_empty(.))
#dups
d_sf$ID %>% anyDuplicated()
## [1] 0
#plot with values
ggplot(d_sf, aes(fill = S)) +
geom_sf() +
scale_fill_continuous("Well-being index") +
theme(legend.position = "top") ->
map_S
map_S

ggplot(d_sf, aes(fill = ach)) +
geom_sf() +
scale_fill_continuous(invalsi_label) +
theme(legend.position = "top") ->
map_invalsi
map_invalsi

ggplot(d_sf, aes(fill = ah_coef)) +
geom_sf() +
scale_fill_continuous(ah_label) +
theme(legend.position = "top") ->
map_ah
map_ah

#together
map_S + map_invalsi + map_ah

#terrible! how to deal with the bad legends?
#tmap maybe easier
tmap_S = tm_shape(d_sf) +
tm_polygons("S", title = "Well-being index", n = 10)
tmap_S
## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_S, filename = "figures/map_S.png")
## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/map_S.png
## Resolution: 1850.329 by 2383.359 pixels
## Size: 6.167765 by 7.944531 inches (300 dpi)
d_sf$ah_coef_z = d_sf$ah_coef %>% standardize()
tmap_ah = tm_shape(d_sf) +
tm_polygons("ah_coef_z", title = "Age heaping", n = 10)
tmap_ah
## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_ah, filename = "figures/map_ah.png")
## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/map_ah.png
## Resolution: 1850.329 by 2383.359 pixels
## Size: 6.167765 by 7.944531 inches (300 dpi)
tmap_invalsi = tm_shape(d_sf) +
tm_polygons("ach", title = "INVALSI", n = 10)
tmap_invalsi
## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_invalsi, filename = "figures/map_invalsi.png")
## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/map_invalsi.png
## Resolution: 1850.329 by 2383.359 pixels
## Size: 6.167765 by 7.944531 inches (300 dpi)
#side by side!!
tmap_combo = tmap_arrange(tmap_S, tmap_ah, tmap_invalsi, nrow = 1)
tmap_combo
## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_combo, filename = "figures/maps_combined.png")
## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/maps_combined.png
## Resolution: 2100 by 2100 pixels
## Size: 7 by 7 inches (300 dpi)
Social and geographical