Initial

#packages
library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## Loading required package: weights

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: assertthat

## 
## Attaching package: 'assertthat'

## The following object is masked from 'package:tibble':
## 
##     has_name

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following object is masked from 'package:Hmisc':
## 
##     describe

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## 
## Attaching package: 'kirkegaard'

## The following object is masked from 'package:psych':
## 
##     rescale

## The following object is masked from 'package:assertthat':
## 
##     are_equal

## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric

## The following object is masked from 'package:base':
## 
##     +

load_packages(
  betareg,
  rms,
  lavaan,
  readxl,
  sf,
  tmap,
  patchwork
)

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## This is lavaan 0.6-12
## lavaan is FREE software! Please report any bugs.

## 
## Attaching package: 'lavaan'

## The following object is masked from 'package:psych':
## 
##     cor2cov

## Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3; sf_use_s2() is TRUE

#theme
theme_set(theme_bw())

Data

Social and geographical

#phenotype data
inc = read_csv("data/income.csv", na = "")

## Rows: 107 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Province, ID
## dbl (3): Euro, Euro_index, Euro_change
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#this file had a lot of errors!
geo = read_csv("data/geo.csv", na = "") %>%
  #combine lat and lon values to a single number
  mutate(
  lat_old = Latitudine + (Latitudine2/60),
  lon_old = Longitudine + (Longitudine2/60)
)

## New names:
## Rows: 101 Columns: 31
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ID, Provincia dbl (29): Altitudine, Latitudine, Latitudine2, Longitudine,
## Longitudine2, Ge...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `Gennaio` -> `Gennaio...8`
## • `Gennaio` -> `Gennaio...9`
## • `Febbraio` -> `Febbraio...10`
## • `Febbraio` -> `Febbraio...11`
## • `Marzo` -> `Marzo...12`
## • `Marzo` -> `Marzo...13`
## • `Aprile` -> `Aprile...14`
## • `Aprile` -> `Aprile...15`
## • `Maggio` -> `Maggio...16`
## • `Maggio` -> `Maggio...17`
## • `Giugno` -> `Giugno...18`
## • `Giugno` -> `Giugno...19`
## • `Luglio` -> `Luglio...20`
## • `Luglio` -> `Luglio...21`
## • `Agosto` -> `Agosto...22`
## • `Agosto` -> `Agosto...23`
## • `Settembre` -> `Settembre...24`
## • `Settembre` -> `Settembre...25`
## • `Ottobre` -> `Ottobre...26`
## • `Ottobre` -> `Ottobre...27`
## • `Novembre` -> `Novembre...28`
## • `Novembre` -> `Novembre...29`
## • `Dicembre` -> `Dicembre...30`
## • `Dicembre` -> `Dicembre...31`

#age heaping data from PDF
ah_all = read_csv("data/age_heaping.csv", na = "")

## Rows: 86 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Name, ID, Type, notes
## dbl (3): 1861, 1871, 1881
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

ah = ah_all %>% filter(Type == "Province") #only provinces, not regions

#numerical age heaping data file from Brian
ah2 = haven::read_stata("data/new data 2022-07-04.dta") %>% 
  filter(group == "total") %>% 
  mutate(
    age_heaping = 1 - (w - 100)/400,
    provstr = mapvalues(provstr, "porto_maurizio", "Imperia"),
    ID = pu_translate(str_to_sentence(provstr), superunit = "ITA", ad_level = 2)
  )

## No exact match: Ascoli_piceno
## No exact match: Forli
## No exact match: Massa_carrara
## No exact match: Reggio_calabria
## No exact match: Reggio_emilia
## Best fuzzy match found: Ascoli_piceno -> Ascoli Piceno with distance 2.00
## Best fuzzy match found: Forli -> Forlì with distance 1.00
## Best fuzzy match found: Massa_carrara -> Massa Carrara with distance 2.00
## Best fuzzy match found: Reggio_calabria -> Reggio Calabria with distance 2.00
## Best fuzzy match found: Reggio_emilia -> Reggio Emilia with distance 2.00

#no dups
assert_that(!anyDuplicated(ah2$ID))

## [1] TRUE

#these data are OK; but since they are not split by year, we use the previously reported values

#own compilation
social = read_excel("data/merged_social_data.xlsx") %>% df_legalize_names() %>% filter(!is.na(ID))

#quality of life final scores (their method)
qof2021_final = read_csv("data/tabellaClassifica.csv") %>% rename(
  "QOL_2021" = "Punteggio 2021"
) %>% mutate(
  QOL_2021 = standardize(QOL_2021)
)

## New names:
## Rows: 107 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): Var.'21/'20, Provincia, ID dbl (3): Rank, Punteggio 2021, Medaglie
## 1990-2021 lgl (1): ...2
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`

#quality of life 2021
qof2021 = read_csv("data/quality of life 2021 - 20211213_QDV2021_001.csv")

## Rows: 9630 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): NOME PROVINCIA (ISTAT), CODICE NUTS 3 2021, CODICE PROVINCIA ISTAT ...
## dbl (1): VALORE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#select variables for use
qof2021_selected = qof2021 %>% 
  filter(INDICATORE %in% c(
    "Laureati e altri titoli terziari",
    "Anni di studio",
    "Quoziente di natalità",
    "Rapine",
    "Pagamenti oltre i 30 giorni",
    "Start up innovative",
    "Qualità dell'aria"
  )) %>% 
  pivot_wider(names_from = INDICATORE,
              values_from = VALORE,
              id_cols = c(`NOME PROVINCIA (ISTAT)`))

#rename
qof2021_selected %<>% rename(
  "tertiary_education" = "Laureati e altri titoli terziari",
  "years_of_education" = "Anni di studio",
  "birth_rate" = "Quoziente di natalità",
  "robberies" = "Rapine",
  "bad_payers" = "Pagamenti oltre i 30 giorni",
  "startups" = "Start up innovative",
  "air_quality" = "Qualità dell'aria"
)

#ad IDs
qof2021_ids = read_excel("data/cross_ids.xlsx")

## New names:
## • `` -> `...3`

qof2021_selected$ID = mapvalues(
  qof2021_selected$`NOME PROVINCIA (ISTAT)`,
  from = qof2021_ids$`Name QOL21`,
  to = qof2021_ids$`ID QOL21`
)

## The following `from` values were not present in `x`: NA, NA, NA

Invalsi

#ach = read_csv("data/achievement.csv", na = "")
invalsi = read_excel("data/Matrice_medie_provinciali.xlsx")

#invalsi data needs to be averaged across years and subjects
invalsi %>% 
  #make an overall in
  pivot_wider(
    id_cols = c(Sigla_provincia, Nome_provincia),
    names_from = c(Anno, Grado, Materia),
    values_from = c(punteggio_medio_wle)
  ) ->
  invalsi_long

#cors
GG_heatmap(invalsi_long[-c(1:2)], add_values = F, short_x_labels = T, reorder_vars = T)

#some data are messed up!

invalsi_fa = fa(invalsi_long[-c(1:2)] %>% miss_impute())

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

invalsi_fa_scores = tibble(
  ID = invalsi_long$Sigla_provincia,
  province = invalsi_long$Nome_provincia,
  ach = invalsi_fa$scores[, 1] %>% standardize())

#factor analysis with more factors, do we get group factors for grades, subject?
invalsi_fa_2 = fa(invalsi_long[-c(1:2)] %>% miss_impute(), nfactors = 2)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Loading required namespace: GPArotation

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

invalsi_fa_2

## Factor Analysis using method =  minres
## Call: fa(r = invalsi_long[-c(1:2)] %>% miss_impute(), nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         MR1   MR2    h2    u2 com
## 2012-13_2_Italiano     0.73  0.27 0.719 0.281 1.3
## 2012-13_2_Matematica   0.67  0.38 0.756 0.244 1.6
## 2012-13_5_Italiano     0.84  0.18 0.824 0.176 1.1
## 2012-13_5_Matematica   0.89  0.14 0.893 0.107 1.0
## 2012-13_8_Italiano     0.64  0.37 0.697 0.303 1.6
## 2012-13_8_Matematica   0.84  0.10 0.774 0.226 1.0
## 2012-13_10_Italiano    0.84  0.01 0.707 0.293 1.0
## 2012-13_10_Matematica  0.98 -0.12 0.906 0.094 1.0
## 2013-14_2_Italiano     0.51  0.47 0.631 0.369 2.0
## 2013-14_2_Matematica   0.19  0.58 0.443 0.557 1.2
## 2013-14_5_Italiano     0.79  0.26 0.812 0.188 1.2
## 2013-14_5_Matematica   0.80  0.23 0.811 0.189 1.2
## 2013-14_8_Italiano     0.81  0.13 0.744 0.256 1.1
## 2013-14_8_Matematica   0.93  0.06 0.907 0.093 1.0
## 2013-14_10_Italiano    0.87 -0.11 0.715 0.285 1.0
## 2013-14_10_Matematica  0.97 -0.16 0.868 0.132 1.1
## 2014-15_2_Italiano     0.56  0.47 0.701 0.299 1.9
## 2014-15_2_Matematica   0.61  0.41 0.699 0.301 1.7
## 2014-15_5_Italiano     0.65  0.41 0.753 0.247 1.7
## 2014-15_5_Matematica   0.79  0.18 0.751 0.249 1.1
## 2014-15_8_Italiano     0.88  0.13 0.867 0.133 1.0
## 2014-15_8_Matematica   0.94  0.06 0.932 0.068 1.0
## 2014-15_10_Italiano    0.85 -0.03 0.703 0.297 1.0
## 2014-15_10_Matematica  0.90 -0.10 0.758 0.242 1.0
## 2015-16_2_Italiano     0.48  0.55 0.697 0.303 2.0
## 2015-16_2_Matematica   0.30  0.69 0.691 0.309 1.4
## 2015-16_5_Italiano     0.54  0.56 0.796 0.204 2.0
## 2015-16_5_Matematica   0.27  0.63 0.573 0.427 1.3
## 2015-16_8_Italiano     0.83  0.22 0.841 0.159 1.1
## 2015-16_8_Matematica   0.87  0.11 0.825 0.175 1.0
## 2015-16_10_Italiano    0.90 -0.11 0.753 0.247 1.0
## 2015-16_10_Matematica  0.91 -0.10 0.780 0.220 1.0
## 2016-17_2_Italiano     0.42  0.64 0.752 0.248 1.7
## 2016-17_2_Matematica   0.15  0.80 0.736 0.264 1.1
## 2016-17_5_Italiano     0.68  0.44 0.835 0.165 1.7
## 2016-17_5_Matematica   0.53  0.59 0.812 0.188 2.0
## 2016-17_8_Italiano     0.83  0.23 0.864 0.136 1.2
## 2016-17_8_Matematica   0.94  0.05 0.911 0.089 1.0
## 2016-17_10_Italiano    0.87  0.00 0.758 0.242 1.0
## 2016-17_10_Matematica  0.94 -0.03 0.870 0.130 1.0
## 2017-18_2_Italiano    -0.19  0.85 0.661 0.339 1.1
## 2017-18_2_Matematica  -0.53  0.79 0.641 0.359 1.8
## 2017-18_5_Inglese L    0.82  0.03 0.687 0.313 1.0
## 2017-18_5_Inglese R    0.81  0.12 0.731 0.269 1.0
## 2017-18_5_Italiano     0.53  0.63 0.880 0.120 1.9
## 2017-18_5_Matematica   0.12  0.74 0.615 0.385 1.0
## 2017-18_8_Inglese L    0.99 -0.12 0.931 0.069 1.0
## 2017-18_8_Inglese R    0.97 -0.03 0.933 0.067 1.0
## 2017-18_8_Italiano     0.83  0.27 0.911 0.089 1.2
## 2017-18_8_Matematica   0.93  0.13 0.958 0.042 1.0
## 2017-18_10_Italiano    0.96 -0.05 0.905 0.095 1.0
## 2017-18_10_Matematica  0.97 -0.04 0.924 0.076 1.0
## 2018-19_2_Italiano    -0.25  0.94 0.802 0.198 1.1
## 2018-19_2_Matematica   0.08  0.85 0.779 0.221 1.0
## 2018-19_5_Inglese L    0.74  0.13 0.633 0.367 1.1
## 2018-19_5_Inglese R    0.14  0.24 0.095 0.905 1.6
## 2018-19_5_Italiano     0.45  0.68 0.854 0.146 1.7
## 2018-19_5_Matematica   0.33  0.70 0.738 0.262 1.4
## 2018-19_8_Inglese L    0.99 -0.07 0.948 0.052 1.0
## 2018-19_8_Inglese R    0.96  0.04 0.943 0.057 1.0
## 2018-19_8_Italiano     0.80  0.33 0.903 0.097 1.3
## 2018-19_8_Matematica   0.92  0.15 0.962 0.038 1.1
## 2018-19_10_Italiano    0.95 -0.03 0.891 0.109 1.0
## 2018-19_10_Matematica  0.99 -0.06 0.949 0.051 1.0
## 2018-19_13_Inglese L   1.03 -0.27 0.962 0.038 1.1
## 2018-19_13_Inglese R   0.99 -0.25 0.888 0.112 1.1
## 2018-19_13_Italiano    1.00 -0.16 0.928 0.072 1.1
## 2018-19_13_Matematica  1.00 -0.14 0.929 0.071 1.0
## 2020-21_2_Italiano    -0.18  0.80 0.589 0.411 1.1
## 2020-21_2_Matematica  -0.40  0.81 0.613 0.387 1.5
## 2020-21_5_Inglese L    0.88  0.01 0.779 0.221 1.0
## 2020-21_5_Inglese R    0.92 -0.06 0.813 0.187 1.0
## 2020-21_5_Italiano     0.51  0.55 0.727 0.273 2.0
## 2020-21_5_Matematica   0.54  0.55 0.783 0.217 2.0
## 2020-21_8_Inglese L    0.99 -0.08 0.940 0.060 1.0
## 2020-21_8_Inglese R    0.97  0.00 0.943 0.057 1.0
## 2020-21_8_Italiano     0.81  0.31 0.917 0.083 1.3
## 2020-21_8_Matematica   0.92  0.15 0.954 0.046 1.1
## 2020-21_13_Inglese L   1.02 -0.26 0.945 0.055 1.1
## 2020-21_13_Inglese R   1.00 -0.24 0.901 0.099 1.1
## 2020-21_13_Italiano    0.97 -0.11 0.890 0.110 1.0
## 2020-21_13_Matematica  1.00 -0.15 0.923 0.077 1.0
## 
##                         MR1   MR2
## SS loadings           51.62 13.95
## Proportion Var         0.63  0.17
## Cumulative Var         0.63  0.80
## Proportion Explained   0.79  0.21
## Cumulative Proportion  0.79  1.00
## 
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.31
## MR2 0.31 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  3321  and the objective function was  325.43 with Chi Square of  26630.85
## The degrees of freedom for the model are 3158  and the objective function was  184.76 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  108 with the empirical chi square  1257.83  with prob <  1 
## The total number of observations was  111  with Likelihood Chi Square =  14873.47  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.461
## RMSEA index =  0.183  and the 90 % confidence intervals are  0.181 0.187
## BIC =  0.77
## Fit based upon off diagonal values = 1

#no we just get a factor for years of bad data!

Join

#no dups
inc$ID[duplicated(inc$ID)]

## character(0)

invalsi_fa_scores$ID[duplicated(invalsi_fa_scores$ID)]

## character(0)

geo$ID[duplicated(geo$ID)]

## character(0)

ah$ID[duplicated(ah$ID)]

## character(0)

social$ID[duplicated(social$ID)]

## character(0)

qof2021_selected$ID[duplicated(qof2021_selected$ID)]

## character(0)

qof2021_final$ID[duplicated(qof2021_final$ID)]

## character(0)

#merge
d = full_join(inc, invalsi_fa_scores, by = "ID") %>% 
  full_join(geo, by = "ID") %>%
  full_join(ah, by = "ID") %>% 
  full_join(social %>% select(-contains("name")), by = "ID") %>% 
  full_join(qof2021_selected %>% select(-`NOME PROVINCIA (ISTAT)`), by = "ID") %>% 
  full_join(qof2021_final %>% select(Provincia, QOL_2021, ID), by = "ID")

#no dups!
assert_that(!anyDuplicated(d$ID))

## [1] TRUE

d$ID[duplicated(d$ID)]

## character(0)

#combine province names to a full set
#correct capitalization
d$name = miss_fill(d$Province, d$Provincia.x, d$province, d$Provincia.y) %>% 
  str_to_title() %>% 
  str_replace_all(" E ", " e ") %>% 
  str_replace_all(" Di ", " di ")

Shapefile

#https://www.eea.europa.eu/data-and-maps/data/eea-reference-grids-2/gis-files/italy-shapefile
#useless file
# # italy_shapefile = read_sf("data/Italy_shapefile/it_1km.shp")
# sf_provinces = read_sf("data/stanford-mn871sp9778-shapefile/mn871sp9778.shp")
# 
# #fix merge code
# sf_provinces %<>% mutate(
#   ID = str_replace(hasc_2, "IT.", "")
# )

#also outdated
#https://maps.princeton.edu/catalog/stanford-mn871sp9778

#https://gadm.org/download_country.html
#outdated, issue wiht merged and splitting since 2016, esp. sicily
sf_provinces2 = read_sf("data/gadm41_ITA_shp/gadm41_ITA_2.shp")
sf_provinces2$HASC_2 %>% anyDuplicated()

## [1] 0

#fix merge code
sf_provinces2 %<>% mutate(
  ID = str_replace(HASC_2, "IT.", "")
)

#latitude and longitude as centroids
sf_provinces2$centroid = sf_provinces2 %>% st_centroid() %>% st_geometry()

## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x

sf_provinces2$lat = sf_provinces2$centroid %>% sf::st_coordinates() %>% .[, 2]
sf_provinces2$lon = sf_provinces2$centroid %>% sf::st_coordinates() %>% .[, 1]

#they messed up some of the codes too
sf_provinces2 %>% select(ID, NAME_2) %>% sf::st_drop_geometry() %>% filter(!ID %in% d$ID)

sf_provinces2$ID %<>% plyr::revalue(
  replace = c(
    "BB" = "BA",
    "FA" = "FG",
    "FO" = "FC",
    "MA" = "MI",
    "MZ" = "MB",
    "AC" = "AP",
    "PS" = "PU",
    "CG" = "CA",
    "MD" = "VS",
    "NR" = "NU",
    "ON" = "OR",
    "SX" = "SS"
  )
)

#https://www.istat.it/it/archivio/222527
#officlal 2022 version?!
sf_provinces3 = read_sf("data/Limiti01012022_g/ProvCM01012022_g/ProvCM01012022_g_WGS84.shp") %>% 
  mutate(
    ID = SIGLA
  )

#latitude and longitude as centroids
sf_provinces3$centroid = sf_provinces3 %>% st_centroid() %>% st_geometry()

## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x

sf_provinces3$lat = sf_provinces3$centroid %>% sf::st_coordinates() %>% .[, 2]
sf_provinces3$lon = sf_provinces3$centroid %>% sf::st_coordinates() %>% .[, 1]

#join with main, derive the lat and lon, then remove geo again
#we join them again later when we have created the other derived variables
d_sf = full_join(sf_provinces3, d, by = "ID")

#back to regular
d = d_sf %>% st_drop_geometry()

Analysis

S factor

S_vars = c(
  #ISTAT own compilation
  "Average_hourly_earnings_1_decile",
  "Average_hourly_earnings_5_decile",
  "Average_hourly_earnings_9_decile",
  "standardized_mortality_rate_2019",
  "mortality_rate_2019",
  "Infant_mortality_rate_2019",
  "unemployment_rate_2021",
  
  #quality of life data
  "tertiary_education",
  "years_of_education",
  "birth_rate",
  "robberies",
  "bad_payers",
  "startups",
  "air_quality"
  )

#impute
d_S = d %>% select(!!S_vars) %>% miss_impute()

#FA robustness
S_fa_robust = fa_all_methods(d_S, messages = F)
S_fa_robust$scores %>% GG_heatmap()

#default S
S_fa = fa(d_S)
S_fa

## Factor Analysis using method =  minres
## Call: fa(r = d_S)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                    MR1     h2   u2 com
## Average_hourly_earnings_1_decile  0.77 0.5874 0.41   1
## Average_hourly_earnings_5_decile  0.92 0.8396 0.16   1
## Average_hourly_earnings_9_decile  0.81 0.6509 0.35   1
## standardized_mortality_rate_2019 -0.69 0.4752 0.52   1
## mortality_rate_2019               0.05 0.0026 1.00   1
## Infant_mortality_rate_2019       -0.31 0.0955 0.90   1
## unemployment_rate_2021           -0.86 0.7420 0.26   1
## tertiary_education                0.65 0.4218 0.58   1
## years_of_education                0.65 0.4217 0.58   1
## birth_rate                       -0.17 0.0275 0.97   1
## robberies                         0.17 0.0278 0.97   1
## bad_payers                       -0.83 0.6960 0.30   1
## startups                          0.39 0.1528 0.85   1
## air_quality                       0.60 0.3546 0.65   1
## 
##                 MR1
## SS loadings    5.50
## Proportion Var 0.39
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  91  and the objective function was  12.05 with Chi Square of  1271.29
## The degrees of freedom for the model are 77  and the objective function was  6.06 
## 
## The root mean square of the residuals (RMSR) is  0.14 
## The df corrected root mean square of the residuals is  0.16 
## 
## The harmonic number of observations is  107 with the empirical chi square  399.26  with prob <  5.3e-45 
## The total number of observations was  112  with Likelihood Chi Square =  635.22  with prob <  1e-88 
## 
## Tucker Lewis Index of factoring reliability =  0.437
## RMSEA index =  0.254  and the 90 % confidence intervals are  0.237 0.274
## BIC =  271.89
## Fit based upon off diagonal values = 0.88
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.96

#plot
S_fa %>% fa_plot_loadings() + theme(text = element_text(size = 14))

GG_save("figures/S_loadings.png")

#save scores
d$S = S_fa$scores[, 1] %>% standardize()

Cognitive ability

#age heaping transformed
#simple approach with z scores
d$ah_z = d %>% select(`1861`:`1881`) %>% map_df(standardize) %>% rowMeans(na.rm = T)

#beta reg
#transform to 0-1 scale
d %<>% mutate(
  `1861_01` = 1 - (`1861` - 100)/400,
  `1871_01` = 1 - (`1871` - 100)/400,
  `1881_01` = 1 - (`1881` - 100)/400
)

#long format
ah_01_long = d %>% 
  select(ID, `1861_01`:`1881_01`) %>% 
  pivot_longer(cols = -ID, names_to = "year", values_to = "ah") %>% 
  mutate(year = str_replace(year, "_01", "") %>% as.numeric())

#fit model
beta_fit = betareg::betareg(
  ah ~ as.factor(year) + ID,
  data = ah_01_long
)

beta_fit

## 
## Call:
## betareg::betareg(formula = ah ~ as.factor(year) + ID, data = ah_01_long)
## 
## Coefficients (mean model with logit link):
##         (Intercept)  as.factor(year)1871  as.factor(year)1881  
##             1.37470              0.04390              0.42310  
##                IDAL                 IDAN                 IDAP  
##             0.67699              0.52821              0.64612  
##                IDAQ                 IDAR                 IDAV  
##             0.63535              0.47283              0.07664  
##                IDBA                 IDBG                 IDBL  
##            -0.01140              1.40801              1.46270  
##                IDBN                 IDBO                 IDBS  
##             0.17663              1.20417              1.24536  
##                IDCA                 IDCB                 IDCE  
##            -0.03942              0.29563              0.28132  
##                IDCH                 IDCL                 IDCN  
##             0.59586              0.20548              0.68477  
##                IDCO                 IDCR                 IDCS  
##             1.29317              1.32744              0.04079  
##                IDCT                 IDCZ                 IDFC  
##             0.17732             -0.07492              0.79957  
##                IDFE                 IDFG                 IDFI  
##             0.98700              0.14338              0.73404  
##                IDGE                 IDGR                 IDIM  
##             0.71936              0.56131              0.84254  
##                IDLE                 IDLI                 IDMC  
##             0.22601              0.88920              0.46360  
##                IDME                 IDMI                 IDMN  
##             0.26687              1.26225              1.32278  
##                IDMO                 IDMS                 IDMT  
##             1.09730              0.71416              0.05858  
##                IDNA                 IDNO                 IDPA  
##             0.14340              1.04477              0.52624  
##                IDPC                 IDPD                 IDPG  
##             0.91170              1.39392              0.07202  
##                IDPI                 IDPR                 IDPU  
##             0.75781              1.10051              0.45386  
##                IDPV                 IDPZ                 IDRA  
##             0.97739              0.05858              0.90377  
##                IDRC                 IDRE                 IDRM  
##             0.05108              1.10026              0.48969  
##                IDRO                 IDSA                 IDSI  
##             1.13831              0.09151              0.79362  
##                IDSO                 IDSR                 IDSS  
##             1.24661             -0.02591             -0.14861  
##                IDTE                 IDTO                 IDTP  
##             0.54536              0.92838              0.15522  
##                IDTV                 IDUD                 IDVE  
##             1.47647              1.66523              1.36097  
##                IDVI                 IDVR  
##             1.36682              1.56095  
## 
## Phi coefficients (precision model with identity link):
## (phi)  
##   536

beta_fit %>% summary()

## 
## Call:
## betareg::betareg(formula = ah ~ as.factor(year) + ID, data = ah_01_long)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -2.9292 -0.8155 -0.0702  0.6777  4.7654 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          1.37470    0.06632  20.727  < 2e-16 ***
## as.factor(year)1871  0.04390    0.02352   1.867  0.06196 .  
## as.factor(year)1881  0.42310    0.02529  16.733  < 2e-16 ***
## IDAL                 0.67699    0.10522   6.434 1.24e-10 ***
## IDAN                 0.52821    0.10163   5.198 2.02e-07 ***
## IDAP                 0.64612    0.10444   6.186 6.15e-10 ***
## IDAQ                 0.63535    0.10417   6.099 1.07e-09 ***
## IDAR                 0.47283    0.10039   4.710 2.48e-06 ***
## IDAV                 0.07664    0.09304   0.824  0.41011    
## IDBA                -0.01140    0.09172  -0.124  0.90107    
## IDBG                 1.40801    0.12985  10.844  < 2e-16 ***
## IDBL                 1.46270    0.15995   9.145  < 2e-16 ***
## IDBN                 0.17663    0.09466   1.866  0.06206 .  
## IDBO                 1.20417    0.12169   9.895  < 2e-16 ***
## IDBS                 1.24536    0.12325  10.104  < 2e-16 ***
## IDCA                -0.03942    0.09133  -0.432  0.66598    
## IDCB                 0.29563    0.09679   3.054  0.00226 ** 
## IDCE                 0.28132    0.09653   2.914  0.00356 ** 
## IDCH                 0.59586    0.10321   5.773 7.77e-09 ***
## IDCL                 0.20548    0.09516   2.159  0.03083 *  
## IDCN                 0.68477    0.10542   6.495 8.28e-11 ***
## IDCO                 1.29317    0.12512  10.336  < 2e-16 ***
## IDCR                 1.32744    0.12649  10.494  < 2e-16 ***
## IDCS                 0.04079    0.09249   0.441  0.65923    
## IDCT                 0.17732    0.09468   1.873  0.06108 .  
## IDCZ                -0.07492    0.09084  -0.825  0.40955    
## IDFC                 0.79957    0.10851   7.369 1.72e-13 ***
## IDFE                 0.98700    0.11415   8.646  < 2e-16 ***
## IDFG                 0.14338    0.09411   1.524  0.12761    
## IDFI                 0.73404    0.10672   6.878 6.05e-12 ***
## IDGE                 0.71936    0.10633   6.766 1.33e-11 ***
## IDGR                 0.56131    0.10239   5.482 4.20e-08 ***
## IDIM                 0.84254    0.10974   7.678 1.62e-14 ***
## IDLE                 0.22601    0.09552   2.366  0.01798 *  
## IDLI                 0.88920    0.11111   8.003 1.22e-15 ***
## IDMC                 0.46360    0.10019   4.627 3.71e-06 ***
## IDME                 0.26687    0.09626   2.772  0.00556 ** 
## IDMI                 1.26225    0.12390  10.187  < 2e-16 ***
## IDMN                 1.32278    0.15202   8.701  < 2e-16 ***
## IDMO                 1.09730    0.11784   9.312  < 2e-16 ***
## IDMS                 0.71416    0.10619   6.725 1.75e-11 ***
## IDMT                 0.05858    0.09276   0.632  0.52767    
## IDNA                 0.14340    0.09411   1.524  0.12755    
## IDNO                 1.04477    0.11605   9.003  < 2e-16 ***
## IDPA                 0.52624    0.10158   5.181 2.21e-07 ***
## IDPC                 0.91170    0.11179   8.155 3.48e-16 ***
## IDPD                 1.39392    0.15597   8.937  < 2e-16 ***
## IDPG                 0.07202    0.09297   0.775  0.43855    
## IDPI                 0.75781    0.10736   7.059 1.68e-12 ***
## IDPR                 1.10051    0.11795   9.330  < 2e-16 ***
## IDPU                 0.45386    0.09998   4.539 5.64e-06 ***
## IDPV                 0.97739    0.11384   8.585  < 2e-16 ***
## IDPZ                 0.05858    0.09276   0.632  0.52767    
## IDRA                 0.90377    0.11155   8.102 5.41e-16 ***
## IDRC                 0.05108    0.09265   0.551  0.58139    
## IDRE                 1.10026    0.11794   9.329  < 2e-16 ***
## IDRM                 0.48969    0.11713   4.181 2.91e-05 ***
## IDRO                 1.13831    0.14256   7.985 1.41e-15 ***
## IDSA                 0.09151    0.09327   0.981  0.32654    
## IDSI                 0.79362    0.10834   7.325 2.39e-13 ***
## IDSO                 1.24661    0.12330  10.110  < 2e-16 ***
## IDSR                -0.02591    0.09152  -0.283  0.77707    
## IDSS                -0.14861    0.08988  -1.653  0.09824 .  
## IDTE                 0.54536    0.10202   5.346 9.01e-08 ***
## IDTO                 0.92838    0.11230   8.267  < 2e-16 ***
## IDTP                 0.15522    0.09430   1.646  0.09976 .  
## IDTV                 1.47647    0.16077   9.184  < 2e-16 ***
## IDUD                 1.66523    0.17266   9.644  < 2e-16 ***
## IDVE                 1.36097    0.15412   8.831  < 2e-16 ***
## IDVI                 1.36682    0.15445   8.850  < 2e-16 ***
## IDVR                 1.56095    0.16593   9.407  < 2e-16 ***
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   536.00      54.03   9.921   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 578.8 on 72 Df
## Pseudo R-squared: 0.9335
## Number of iterations: 90 (BFGS) + 2 (Fisher scoring)

#save coefs back
beta_coef = beta_fit %>% broom::tidy() %>% 
  filter(str_detect(term, "^ID")) %>% 
  mutate(
    ID = str_replace(term, "^ID", "")
  )

#manually add 0 value for the index ID
beta_coef = bind_rows(
  tibble(
    ID = ah_01_long$ID %>% factor() %>% levels() %>% head(1),
    estimate = 0
  ),
  beta_coef
)

#join to main
d$ah_coef = NULL
d = left_join(
  d,
  beta_coef %>% mutate(ah_coef = estimate) %>% select(ah_coef, ID),
  by = "ID"
  )

#compare methods
GG_scatter(d, "ah_coef", "ah_z")

## `geom_smooth()` using formula 'y ~ x'

#ability historical and present
invalsi_label = "INVALSI achievement test score from 2010's"
ah_label = "Age heaping score from 1880's (beta regression coefficient)"

GG_scatter(d, "ah_coef", "ach", case_names = "name") +
  ylab(invalsi_label) +
  xlab(ah_label)

## `geom_smooth()` using formula 'y ~ x'

ggsave("figures/cognitive_stability.png")

## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'

Main results

#main vars
main_vars = c("ah_coef", "ach", "S", "QOL_2021", "lat", "lon", "Altitudine")

#correlations
d %>% select(!!main_vars) %>% cor_matrix(p_val = T, asterisks = .01)

##            ah_coef  ach      S        QOL_2021 lat      lon      Altitudine
## ah_coef    "1"      "0.73*"  "0.78*"  "0.75*"  "0.86*"  "-0.61*" "-0.20"   
## ach        "0.73*"  "1"      "0.82*"  "0.80*"  "0.85*"  "-0.51*" "-0.09"   
## S          "0.78*"  "0.82*"  "1"      "0.88*"  "0.86*"  "-0.65*" "-0.11"   
## QOL_2021   "0.75*"  "0.80*"  "0.88*"  "1"      "0.81*"  "-0.62*" "-0.13"   
## lat        "0.86*"  "0.85*"  "0.86*"  "0.81*"  "1"      "-0.64*" "-0.17"   
## lon        "-0.61*" "-0.51*" "-0.65*" "-0.62*" "-0.64*" "1"      "0.07"    
## Altitudine "-0.20"  "-0.09"  "-0.11"  "-0.13"  "-0.17"  "0.07"   "1"

#n pairs
d %>% select(!!main_vars) %>% pairwiseCount()

##            ah_coef ach   S QOL_2021 lat lon Altitudine
## ah_coef         69  69  69       68  69  69         69
## ach             69 107 107      106 107 107        101
## S               69 107 107      106 107 107        101
## QOL_2021        68 106 106      107 106 106        100
## lat             69 107 107      106 107 107        101
## lon             69 107 107      106 107 107        101
## Altitudine      69 101 101      100 101 101        101

#ggpairs
d %>% select(!!main_vars) %>% 
  GGally::ggpairs()

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## Warning: Removed 43 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 44 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 43 rows containing missing values

## Warning: Removed 43 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values

## Warning: Removed 43 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values

## Warning: Removed 44 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_point).
## Removed 6 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 6 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 12 rows containing missing values

## Warning: Removed 43 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing missing values (geom_point).
## Removed 5 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 5 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values

## Warning: Removed 43 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing missing values (geom_point).
## Removed 5 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing missing values (geom_point).

## Warning: Removed 5 rows containing non-finite values (stat_density).

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 11 rows containing missing values

## Warning: Removed 43 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).
## Removed 11 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (geom_point).
## Removed 11 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing non-finite values (stat_density).

#labs
S_ylab = ylab("General factor of socioeconomic variables")

#achievement and S
GG_scatter(d, "ach", "S", case_names = "name") +
  xlab(invalsi_label) +
  S_ylab

## `geom_smooth()` using formula 'y ~ x'

ggsave("figures/ach_S.png")

## Saving 7 x 5 in image

## `geom_smooth()` using formula 'y ~ x'

#age heaping and S
GG_scatter(d, "ah_coef", "S", case_names = "name") +
  xlab(ah_label) +
  S_ylab

## `geom_smooth()` using formula 'y ~ x'

ggsave("figures/ah_S.png")

## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'

#regressions
#subset with ah data
d_z = map_df(d %>% select(!!main_vars), standardize)
d_ah = d_z %>% filter(!is.na(ah_coef))

list(
  ols(S ~ lat + lon + Altitudine, data = d_z),
  ols(S ~ ach, data = d_z),
  ols(S ~ ach + lat + lon + Altitudine, data = d_z),
  #ah subset
  ols(S ~ lat + lon + Altitudine, data = d_ah),
  ols(S ~ ah_coef, data = d_ah),
  ols(S ~ ah_coef + lat + lon + Altitudine, data = d_ah)
) %>% 
  summarize_models(asterisks_only = F)

# Geography of intelligence
list(
  ols(ach ~ lat + lon + Altitudine, data = d_z),
  ols(ah_coef ~ lat + lon + Altitudine, data = d_z)
) %>% 
  summarize_models(asterisks_only = F)

Maps

#plot map with names
ggplot(sf_provinces3) +
  geom_sf() +
  geom_text(aes(x = lon, y = lat, label = DEN_UTS))

#join shapefile
#keep all overlapping data
d_sf = full_join(sf_provinces3, d, by = "ID")

#remove empty units
# d_sf %<>% filter(!is.na(geometry))
d_sf %<>% filter(!st_is_empty(.))

#dups
d_sf$ID %>% anyDuplicated()

## [1] 0

#plot with values
ggplot(d_sf, aes(fill = S)) +
  geom_sf() +
  scale_fill_continuous("Well-being index") +
  theme(legend.position = "top") ->
  map_S

map_S

ggplot(d_sf, aes(fill = ach)) +
  geom_sf() +
  scale_fill_continuous(invalsi_label) +
  theme(legend.position = "top") ->
  map_invalsi

map_invalsi

ggplot(d_sf, aes(fill = ah_coef)) +
  geom_sf() +
  scale_fill_continuous(ah_label) +
  theme(legend.position = "top") ->
  map_ah

map_ah

#together
map_S + map_invalsi + map_ah

#terrible! how to deal with the bad legends?

#tmap maybe easier
tmap_S = tm_shape(d_sf) +
  tm_polygons("S", title = "Well-being index", n = 10)

tmap_S

## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_S, filename = "figures/map_S.png")

## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/map_S.png

## Resolution: 1850.329 by 2383.359 pixels

## Size: 6.167765 by 7.944531 inches (300 dpi)

d_sf$ah_coef_z = d_sf$ah_coef %>% standardize()
tmap_ah = tm_shape(d_sf) +
  tm_polygons("ah_coef_z", title = "Age heaping", n = 10)

tmap_ah

## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_ah, filename = "figures/map_ah.png")

## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/map_ah.png

## Resolution: 1850.329 by 2383.359 pixels

## Size: 6.167765 by 7.944531 inches (300 dpi)

tmap_invalsi = tm_shape(d_sf) +
  tm_polygons("ach", title = "INVALSI", n = 10)

tmap_invalsi

## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_invalsi, filename = "figures/map_invalsi.png")

## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/map_invalsi.png

## Resolution: 1850.329 by 2383.359 pixels

## Size: 6.167765 by 7.944531 inches (300 dpi)

#side by side!!
tmap_combo = tmap_arrange(tmap_S, tmap_ah, tmap_invalsi, nrow = 1)

tmap_combo

## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(tmap_combo, filename = "figures/maps_combined.png")

## Variable(s) "S" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "ah_coef_z" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Variable(s) "ach" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

## Map saved to /media/8tb_ssd_3/projects/Italian provinces/figures/maps_combined.png

## Resolution: 2100 by 2100 pixels

## Size: 7 by 7 inches (300 dpi)

Italian provinces study notebook

Initial

Data

Social and geographical

Invalsi

Join

Shapefile

Analysis

S factor

Cognitive ability

Main results

Maps