# Working directory
setwd('/Users/_ilarossi/Documents/LUISS/Corsi/Magistrale/II semestre/Analisi dei dati/PW')

# Dataset
df = read.csv("Puglia.csv", header=T, sep=',')
# Pacchetti
library(dplyr)      
library(readr)
library(tidyr)
library(ggplot2)
library(scales)
library(skimr)
library(FactoMineR)
library(factoextra)
library(caret)

DATA EXPLORATION (EDA)

# POPOLAZIONE TOTALE
summary(df$pop_total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     172    3684    7626   15708   15919  315933
## FASCIA D'ETA' DOMINANTE PER PROVINCIA

# Definizione delle fasce d'età
age_cols   <- c(
  "pop_under5","pop_5to9","pop_10to14","pop_15to19",
  "pop_20to24","pop_25to29","pop_30to34","pop_35to39",
  "pop_40to44","pop_45to49","pop_50to54","pop_55to59",
  "pop_60to64","pop_65to69","pop_70to74","pop_over74")

age_labels <- c(
  "0–4","5–9","10–14","15–19",
  "20–24","25–29","30–34","35–39",
  "40–44","45–49","50–54","55–59",
  "60–64","65–69","70–74","75+")

# Trasformazione in formato long
df_age_long <- df %>% 
  dplyr::select(PROVINCIA, all_of(age_cols)) %>% 
  pivot_longer(
    cols      = all_of(age_cols),
    names_to  = "fascia",
    values_to = "pop"
  ) %>%
  mutate(
    fascia = factor(fascia, levels = age_cols, labels = age_labels)
  )

# Fascia con pop più alta per ciascuna provincia
df_age_dom <- df_age_long %>%
  group_by(PROVINCIA, fascia) %>%
  summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop") %>%
  group_by(PROVINCIA) %>%
  slice_max(pop, with_ties = FALSE) %>% #se due fasce ex aequo prende la prima
  ungroup()

# Tabella riassuntiva: Quante province per fascia dominante
df_age_summary <- df_age_dom %>%
  count(fascia, name = "n_province") %>%
  arrange(desc(n_province))
print(df_age_summary)
## # A tibble: 2 × 2
##   fascia n_province
##   <fct>       <int>
## 1 75+             5
## 2 40–44           1
# Faceted barplot
ggplot(df_age_dom, aes(x = "", y = pop, fill = fascia)) +
  geom_col() +
  facet_wrap(~ PROVINCIA, scales = "free_y", ncol = 2) +
  labs(
    title = "Fascia d'età dominante per provincia",
    x     = NULL,
    y     = "Popolazione"
  ) +
  theme_minimal() +
  theme(
    axis.text.x     = element_blank(),
    axis.ticks.x    = element_blank(),
    legend.position = "bottom"
  )

## DISTRIBUZIONE GENERE PER PROVINCIA

# Raggruppa per provincia e somma le popolazioni
df_sex <- df %>%
  dplyr::group_by(PROVINCIA) %>%
  dplyr::summarise(
    pop_total  = sum(pop_total,   na.rm = TRUE),
    pop_male   = sum(pop_male,    na.rm = TRUE),
    pop_female = sum(pop_female,  na.rm = TRUE),
    .groups     = "drop"
  ) %>%
  # Calcola le proporzioni
  dplyr::mutate(
    prop_male   = pop_male   / pop_total,
    prop_female = pop_female / pop_total
  ) %>%
  # Seleziona solo le colonne utili
  dplyr::select(PROVINCIA, prop_male, prop_female)

# Trasformazione in formato long
df_sex_long <- df_sex %>%
  tidyr::pivot_longer(
    cols      = c(prop_male, prop_female),
    names_to  = "sesso",
    values_to = "prop"
  ) %>%
  # Rinomina i livelli
  dplyr::mutate(
    sesso = dplyr::recode(sesso,
                          prop_male   = "Maschi",
                          prop_female = "Femmine")
  )

# Grafico
ggplot2::ggplot(df_sex_long, aes(x = PROVINCIA, y = prop, fill = sesso)) +
  geom_col() +
  coord_flip() +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Composizione di genere per provincia",
    x     = "Provincia",
    y     = "Proporzione"
  ) +
  theme_minimal()

## LIVELLO DI ISTRUZIONE DOMINANTE PER PROVINCIA

# Definizione dei livelli di istruzione
edu_cols   <- c("pop_degree_holders","pop_high_school","pop_middle_school","pop_elementary")
edu_labels <- c("Laurea","Superiore","Media","Elementare")

# Trasformazione in formato long
df_edu_long <- df %>%
  dplyr::select(PROVINCIA, all_of(edu_cols)) %>%
  pivot_longer(
    cols      = all_of(edu_cols),
    names_to  = "livello",
    values_to = "pop"
  ) %>%
  mutate(
    livello = factor(livello, levels = edu_cols, labels = edu_labels)
  )

# Livello di istruzione con pop più alta per ciascuna provincia
df_edu_dom <- df_edu_long %>%
  group_by(PROVINCIA, livello) %>%
  summarise(pop = sum(pop, na.rm = TRUE), .groups = "drop") %>%
  group_by(PROVINCIA) %>%
  slice_max(pop, with_ties = FALSE) %>%
  ungroup()

# Tabella riassuntiva: Quante province per livello di istruzione dominante
df_edu_summary <- df_edu_dom %>%
  count(livello, name = "n_province") %>%
  arrange(desc(n_province))
print(df_edu_summary)
## # A tibble: 1 × 2
##   livello n_province
##   <fct>        <int>
## 1 Media            6
## LIVELLO DI ISTRUZIONE DOMINANTE PER COMUNE

# Definizione dei livelli di istruzione
edu_cols  <- c("pop_degree_holders", "pop_high_school", 
               "pop_middle_school",  "pop_elementary")
edu_labels<- c("Laurea", "Scuola Superiore", 
               "Scuola Media", "Scuola Elementare")

# Trasformazione in formato long
df_long <- df %>%
  dplyr::select(COMUNE, all_of(edu_cols)) %>% 
  pivot_longer(
    cols      = all_of(edu_cols),
    names_to  = "livello",
    values_to = "pop"
  ) %>%
  mutate(
    livello = factor(livello, 
                     levels = edu_cols, 
                     labels = edu_labels)
  )

# Livello di istruzione con pop più alta per ciascun comune
df_dom <- df_long %>%
  group_by(COMUNE) %>%
  slice_max(pop, with_ties = FALSE) %>%
  ungroup()

# Tabella riassuntiva: Quanti comuni per livello di istruzione dominante
df_summary <- df_dom %>%
  count(livello) %>%
  arrange(desc(n))
print(df_summary)
## # A tibble: 3 × 2
##   livello               n
##   <fct>             <int>
## 1 Scuola Media        209
## 2 Scuola Superiore     40
## 3 Scuola Elementare     9
# Bar‐plot
ggplot(df_summary, aes(x = livello, y = n)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Livello di istruzione dominante per comune",
    x     = "Livello di istruzione",
    y     = "Numero di comuni"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## OCCUPATI vs DISOCCUPATI PER PROVINCIA

# Raggruppa per provincia e somma le popolazioni
df_emp <- df %>%
  dplyr::group_by(PROVINCIA) %>%
  dplyr::summarise(
    pop_total      = sum(pop_total,      na.rm = TRUE),
    pop_employed   = sum(pop_employed,   na.rm = TRUE),
    pop_unemployed = sum(pop_unemployed, na.rm = TRUE),
    .groups        = "drop"
  ) %>%
  # Calcola le proporzioni
  dplyr::mutate(
    prop_occ = pop_employed   / pop_total,
    prop_dis = pop_unemployed / pop_total
  ) %>%
  # Estrai solo le colonne utili
  dplyr::select(PROVINCIA, prop_occ, prop_dis)

# Trasformazione in formato “long”
df_emp_long <- df_emp %>%
  tidyr::pivot_longer(
    cols      = c(prop_occ, prop_dis),
    names_to  = "stato",
    values_to = "prop"
  ) %>%
  dplyr::mutate(
    stato = dplyr::recode(stato,
                          prop_occ = "Occupati",
                          prop_dis = "Disoccupati")
  )

# Grafico
ggplot2::ggplot(df_emp_long, aes(x = PROVINCIA, y = prop, fill = stato)) +
  ggplot2::geom_col() +
  ggplot2::coord_flip() +
  ggplot2::scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  ggplot2::labs(
    title = "Occupati vs Disoccupati per provincia",
    x     = "Provincia",
    y     = "Proporzione"
  ) +
  ggplot2::theme_minimal()

## DISTRIBUZIONE STRANIERI PER LUOGO DI PROVENIENZA 

#Pie-chart
country_cols <- grep(
  "^foreign_(europe|africa|america|asia|oceania)$",
  names(df),
  value = TRUE
)

df_paesi <- df %>%
  summarise(across(all_of(country_cols), sum, na.rm = TRUE)) %>%
  pivot_longer(
    cols      = everything(),
    names_to  = "area",
    values_to = "totale"
  ) %>%
  mutate(
    area = sub("^foreign_", "", area),
    pct  = totale / sum(totale, na.rm = TRUE)
  )

ggplot(df_paesi, aes(x = "", y = pct, fill = area)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  geom_text(aes(label = percent(pct, accuracy = 0.1)),
            position = position_stack(vjust = 0.5), size = 3) +
  scale_fill_brewer(palette = "Set3") +
  labs(
    title = "Distribuzione % degli stranieri per area di provenienza",
    fill  = "Area"
  ) +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5))

## DISTRIBUZIONE REDDITO PER PROVINCIA

# Reddito pro capite medio per provincia
df_rpc <- df %>%
  group_by(PROVINCIA) %>%
  summarise(mean_rpc = mean(reddito_pro_capite, na.rm = TRUE), .groups = "drop") %>%
  arrange(mean_rpc)

# Grafico
ggplot(df_rpc, aes(x = reorder(PROVINCIA, mean_rpc), y = mean_rpc)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Reddito pro-capite medio per provincia",
    x     = "Provincia",
    y     = "Reddito pro-capite"
  ) +
  theme_minimal()

# Reddito pro-capite Dipendente vs Autonomo per provincia
df_rpc2 <- df %>%
  dplyr::group_by(PROVINCIA) %>%
  dplyr::summarise(
    rpc_dip = sum(reddito_lavoro_dipendente, na.rm = TRUE) /
              sum(pop_total,              na.rm = TRUE),
    rpc_aut = sum(reddito_lavoro_autonomo,   na.rm = TRUE) /
              sum(pop_total,              na.rm = TRUE),
    .groups = "drop"
  ) %>%
  # Trasformazione in formato long
  tidyr::pivot_longer(
    cols      = c(rpc_dip, rpc_aut),
    names_to  = "tipo",
    values_to = "rpc"
  ) %>%
  dplyr::mutate(
    tipo = dplyr::case_when(
      tipo == "rpc_dip" ~ "Dipendente",
      tipo == "rpc_aut" ~ "Autonomo",
      TRUE              ~ NA_character_
    )
  )

# Grafico a barre affiancate
ggplot2::ggplot(df_rpc2, aes(x = PROVINCIA, y = rpc, fill = tipo)) +
  ggplot2::geom_col(position = "dodge") +
  ggplot2::coord_flip() +
  ggplot2::scale_y_continuous(labels = scales::comma) +
  ggplot2::labs(
    title = "Reddito pro-capite: Dipendente vs Autonomo per provincia",
    x     = "Provincia",
    y     = "Reddito pro-capite"
  ) +
  ggplot2::theme_minimal()

ANALISI IN COMPONENTI PRINCIPALI (ACP)

# Esplorazione del dataset
dplyr::glimpse(df)
## Rows: 258
## Columns: 190
## $ PROCOM                          <int> 71001, 71002, 71003, 71004, 71005, 710…
## $ CODREG                          <int> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16…
## $ REGIONE                         <chr> "Puglia", "Puglia", "Puglia", "Puglia"…
## $ CODPRO                          <int> 71, 71, 71, 71, 71, 71, 71, 71, 71, 71…
## $ PROVINCIA                       <chr> "Foggia", "Foggia", "Foggia", "Foggia"…
## $ CODCOM                          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,…
## $ COMUNE                          <chr> "Accadia", "Alberona", "Anzano di Pugl…
## $ SEZ2011                         <dbl> 7.1001e+11, 7.1002e+11, 7.1003e+11, 7.…
## $ NSEZ                            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ ACE                             <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CODLOC                          <int> 10001, 10001, 10001, 10001, 10001, 100…
## $ CODASC                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ pop_total                       <int> 2418, 1002, 1617, 13435, 6194, 2872, 3…
## $ pop_male                        <int> 1152, 478, 771, 6609, 3074, 1412, 1697…
## $ pop_female                      <int> 1266, 524, 846, 6826, 3120, 1460, 1865…
## $ pop_single                      <int> 955, 375, 656, 5496, 2596, 1145, 1333,…
## $ pop_married                     <int> 1151, 489, 746, 6742, 3048, 1388, 1759…
## $ pop_legally_separated           <int> 29, 9, 13, 152, 51, 21, 25, 59, 37, 84…
## $ pop_widowed                     <int> 270, 123, 193, 917, 449, 309, 405, 555…
## $ pop_divorced                    <int> 13, 6, 9, 128, 50, 9, 40, 63, 20, 59, …
## $ pop_single_male                 <int> 518, 204, 368, 2936, 1414, 626, 698, 1…
## $ pop_married_male                <int> 572, 247, 371, 3395, 1537, 703, 884, 1…
## $ pop_separated_male              <int> 11, 6, 4, 63, 22, 13, 13, 27, 15, 35, …
## $ pop_widowed_male                <int> 47, 18, 23, 170, 87, 66, 82, 102, 42, …
## $ pop_divorced_male               <int> 4, 3, 5, 45, 14, 4, 20, 34, 7, 24, 6, …
## $ pop_under5                      <int> 83, 28, 35, 656, 272, 97, 135, 329, 98…
## $ pop_5to9                        <int> 113, 32, 52, 732, 309, 126, 150, 366, …
## $ pop_10to14                      <int> 104, 48, 102, 723, 322, 110, 138, 447,…
## $ pop_15to19                      <int> 112, 49, 133, 835, 348, 136, 156, 425,…
## $ pop_20to24                      <int> 144, 61, 108, 874, 401, 164, 176, 479,…
## $ pop_25to29                      <int> 156, 55, 97, 870, 379, 191, 200, 402, …
## $ pop_30to34                      <int> 163, 55, 87, 907, 415, 195, 208, 421, …
## $ pop_35to39                      <int> 167, 63, 97, 959, 410, 194, 200, 496, …
## $ pop_40to44                      <int> 155, 70, 128, 1043, 450, 178, 256, 552…
## $ pop_45to49                      <int> 167, 78, 125, 1010, 483, 201, 245, 529…
## $ pop_50to54                      <int> 167, 69, 116, 765, 460, 190, 236, 480,…
## $ pop_55to59                      <int> 162, 57, 90, 743, 383, 193, 228, 402, …
## $ pop_60to64                      <int> 175, 54, 97, 782, 361, 216, 231, 451, …
## $ pop_65to69                      <int> 106, 50, 64, 686, 243, 136, 212, 412, …
## $ pop_70to74                      <int> 118, 61, 83, 706, 259, 138, 236, 409, …
## $ pop_over74                      <int> 326, 172, 203, 1144, 699, 407, 555, 85…
## $ pop_male_under5                 <int> 44, 14, 19, 338, 158, 48, 66, 177, 59,…
## $ pop_male_5to9                   <int> 69, 16, 31, 386, 170, 65, 83, 185, 85,…
## $ pop_male_10to14                 <int> 46, 28, 57, 364, 168, 61, 78, 227, 89,…
## $ pop_male_15to19                 <int> 55, 25, 73, 406, 176, 71, 83, 233, 99,…
## $ pop_male_20to24                 <int> 76, 33, 54, 434, 207, 91, 87, 243, 84,…
## $ pop_male_25to29                 <int> 82, 29, 48, 439, 195, 98, 92, 196, 68,…
## $ pop_male_30to34                 <int> 87, 28, 45, 454, 212, 100, 113, 210, 8…
## $ pop_male_35to39                 <int> 80, 29, 54, 500, 210, 107, 97, 269, 69…
## $ pop_male_40to44                 <int> 84, 40, 61, 520, 232, 89, 121, 268, 12…
## $ pop_male_45to49                 <int> 84, 34, 56, 503, 239, 97, 128, 291, 11…
## $ pop_male_50to54                 <int> 66, 37, 59, 377, 214, 99, 110, 249, 87…
## $ pop_male_55to59                 <int> 81, 29, 50, 346, 190, 89, 104, 202, 67…
## $ pop_male_60to64                 <int> 89, 21, 48, 371, 200, 108, 101, 219, 6…
## $ pop_male_65to69                 <int> 40, 20, 27, 335, 120, 74, 109, 194, 50…
## $ pop_male_70to74                 <int> 53, 30, 29, 358, 117, 57, 119, 180, 58…
## $ pop_male_over74                 <int> 116, 65, 60, 478, 266, 158, 206, 360, …
## $ pop_6plus                       <int> 2321, 968, 1575, 12630, 5866, 2754, 34…
## $ pop_degree_holders              <int> 195, 57, 61, 1029, 361, 234, 275, 366,…
## $ pop_high_school                 <int> 633, 211, 294, 2913, 1394, 718, 1087, …
## $ pop_middle_school               <int> 688, 289, 573, 3912, 2081, 838, 763, 2…
## $ pop_elementary                  <int> 421, 242, 360, 3189, 1237, 611, 786, 2…
## $ pop_literate                    <int> 299, 136, 196, 1308, 643, 292, 421, 86…
## $ pop_illiterate                  <int> 85, 33, 91, 279, 150, 61, 73, 251, 69,…
## $ pop_male_6plus                  <int> 1100, 461, 747, 6201, 2885, 1351, 1617…
## $ pop_male_degree                 <int> 83, 17, 25, 469, 163, 104, 102, 177, 5…
## $ pop_male_high_school            <int> 360, 109, 145, 1440, 729, 376, 580, 73…
## $ pop_male_middle_school          <int> 345, 168, 313, 2185, 1144, 474, 420, 1…
## $ pop_male_elementary             <int> 181, 101, 181, 1425, 551, 275, 339, 93…
## $ pop_male_literate               <int> 115, 55, 72, 596, 263, 113, 160, 382, …
## $ pop_male_illiterate             <int> 16, 11, 11, 86, 35, 9, 16, 100, 21, 35…
## $ pop_labor_force                 <int> 1002, 400, 601, 4607, 2544, 992, 1241,…
## $ pop_employed                    <int> 771, 325, 502, 3887, 2167, 853, 1074, …
## $ pop_unemployed                  <int> 149, 44, 69, 412, 227, 79, 99, 367, 98…
## $ pop_male_labor_force            <int> 584, 249, 359, 3176, 1591, 636, 789, 1…
## $ pop_male_employed               <int> 479, 206, 306, 2799, 1401, 559, 708, 1…
## $ pop_male_unemployed             <int> 69, 26, 34, 255, 110, 49, 55, 181, 50,…
## $ pop_not_in_labor_force          <int> 1116, 494, 827, 6717, 2747, 1547, 1898…
## $ pop_male_not_in_labor           <int> 409, 171, 305, 2345, 987, 602, 681, 13…
## $ pop_housekeepers                <int> 166, 67, 111, 2672, 725, 290, 451, 555…
## $ pop_students                    <int> 187, 86, 163, 1003, 444, 206, 261, 534…
## $ pop_male_students               <int> 80, 36, 76, 415, 190, 86, 117, 259, 10…
## $ pop_other_condition             <int> 101, 54, 103, 785, 351, 252, 159, 435,…
## $ pop_male_other_condition        <int> 64, 22, 62, 554, 197, 145, 83, 261, 10…
## $ pop_commute_within              <int> 549, 194, 257, 4285, 1798, 586, 919, 2…
## $ pop_commute_outside             <int> 306, 135, 218, 1141, 903, 434, 431, 61…
## $ pop_income_earners              <int> 662, 287, 450, 2257, 1227, 799, 1027, …
## $ pop_male_income_earners         <int> 264, 113, 167, 1341, 586, 371, 471, 85…
## $ foreign_total                   <int> 65, 11, 23, 582, 181, 80, 94, 78, 97, …
## $ foreign_male                    <int> 32, 6, 7, 294, 79, 39, 40, 30, 43, 301…
## $ foreign_0to29                   <int> 31, 2, 14, 279, 76, 38, 33, 29, 42, 31…
## $ foreign_30to54                  <int> 29, 8, 6, 278, 94, 35, 53, 42, 47, 269…
## $ foreign_over54                  <int> 5, 1, 3, 25, 11, 7, 8, 7, 8, 28, 0, 13…
## $ foreign_male_0to29              <int> 18, 1, 5, 137, 34, 20, 18, 12, 22, 156…
## $ foreign_male_30to54             <int> 11, 5, 2, 146, 42, 17, 21, 16, 17, 132…
## $ foreign_male_over54             <int> 3, 0, 0, 11, 3, 2, 1, 2, 4, 13, 0, 7, …
## $ foreign_europe                  <int> 44, 9, 18, 530, 162, 55, 92, 66, 73, 5…
## $ foreign_africa                  <int> 18, 0, 4, 38, 15, 23, 0, 1, 18, 112, 0…
## $ foreign_america                 <int> 3, 1, 1, 0, 3, 2, 1, 2, 3, 1, 0, 2, 1,…
## $ foreign_asia                    <int> 0, 1, 0, 14, 1, 0, 1, 9, 3, 1, 0, 0, 0…
## $ foreign_oceania                 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ stateless                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ foreign_residents               <int> 65, 11, 23, 582, 181, 80, 94, 78, 97, …
## $ dwellings_occupied              <int> 1059, 448, 660, 5123, 2311, 1158, 1532…
## $ dwellings_empty_or_nonres       <int> 905, 632, 563, 743, 2121, 1515, 1917, …
## $ other_housing_occupied          <int> 0, 0, 1, 9, 8, 0, 1, 3, 1, 0, 0, 0, 0,…
## $ dwellings_empty                 <int> 905, 632, 563, 742, 2121, 1515, 1916, …
## $ dwellings_nonresident           <int> 0, 0, 0, 1, 0, 0, 1, 0, 0, 8, 0, 0, 0,…
## $ area_occupied_dwellings         <int> 89594, 35885, 53726, 524032, 204912, 1…
## $ families_renting                <int> 163, 23, 38, 700, 374, 76, 151, 186, 1…
## $ families_owning                 <int> 788, 349, 537, 3842, 1775, 930, 1240, …
## $ families_other_title            <int> 112, 87, 90, 600, 178, 154, 145, 510, …
## $ families_total                  <int> 1063, 459, 665, 5142, 2327, 1160, 1536…
## $ family_members_total            <int> 2413, 1002, 1612, 13432, 6171, 2870, 3…
## $ families_1member                <int> 405, 188, 244, 1322, 591, 362, 520, 77…
## $ families_2members               <int> 259, 122, 146, 1305, 577, 301, 439, 84…
## $ families_3members               <int> 178, 49, 98, 1030, 451, 179, 246, 534,…
## $ families_4members               <int> 163, 79, 113, 1123, 504, 240, 248, 526…
## $ families_5members               <int> 48, 19, 56, 280, 170, 61, 73, 204, 59,…
## $ families_6plus_members          <int> 10, 2, 8, 82, 34, 17, 10, 42, 20, 47, …
## $ members_in_large_families       <int> 64, 12, 50, 518, 207, 104, 65, 264, 12…
## $ buildings_total                 <int> 854, 772, 1057, 3048, 3110, 1790, 2073…
## $ buildings_in_use                <int> 828, 765, 988, 2953, 3046, 1602, 1966,…
## $ residential_buildings           <int> 752, 741, 939, 2769, 2616, 1471, 1843,…
## $ nonres_buildings                <int> 76, 24, 49, 184, 430, 131, 123, 191, 3…
## $ res_masonry_buildings           <dbl> 353, 697, 469, 1558, 2003, 1160, 1430,…
## $ res_concrete_buildings          <dbl> 365, 25, 383, 851, 340, 155, 400, 583,…
## $ res_other_material              <dbl> 34, 19, 87, 360, 273, 156, 13, 20, 326…
## $ res_pre1919                     <int> 160, 593, 34, 229, 847, 477, 1121, 481…
## $ res_1919to1945                  <int> 148, 78, 99, 375, 282, 232, 210, 70, 1…
## $ res_1946to1960                  <int> 55, 10, 169, 602, 650, 333, 99, 208, 9…
## $ res_1961to1970                  <int> 146, 11, 169, 577, 373, 128, 119, 452,…
## $ res_1971to1980                  <int> 129, 23, 158, 413, 187, 107, 80, 606, …
## $ res_1981to1990                  <int> 81, 8, 192, 265, 169, 169, 142, 284, 1…
## $ res_1991to2000                  <int> 29, 12, 72, 165, 42, 21, 35, 209, 5, 6…
## $ res_2001to2005                  <int> 3, 4, 44, 69, 38, 2, 19, 28, 0, 37, 5,…
## $ res_post2005                    <int> 1, 2, 2, 74, 28, 2, 18, 25, 1, 50, 2, …
## $ res_1floor                      <int> 103, 94, 134, 636, 1148, 401, 537, 928…
## $ res_2floors                     <int> 270, 319, 592, 1249, 1127, 745, 956, 6…
## $ res_3floors                     <int> 303, 297, 212, 743, 246, 288, 227, 263…
## $ res_4plus_floors                <int> 76, 31, 1, 141, 95, 37, 123, 481, 9, 1…
## $ res_1unit                       <int> 250, 276, 762, 1187, 1670, 531, 1151, …
## $ res_2units                      <int> 183, 285, 104, 968, 354, 414, 380, 144…
## $ res_3to4units                   <int> 230, 149, 62, 414, 424, 372, 189, 286,…
## $ res_5to8units                   <int> 77, 25, 11, 155, 122, 131, 99, 277, 53…
## $ res_9to15units                  <int> 10, 6, 0, 26, 32, 19, 19, 61, 11, 83, …
## $ res_16plus_units                <int> 2, 0, 0, 19, 14, 4, 5, 9, 4, 22, 1, 0,…
## $ res_total_units                 <int> 2075, 1540, 1256, 6102, 5174, 3681, 34…
## $ res_excellent_condition         <int> 212, 296, 97, 467, 868, 296, 347, 0, 2…
## $ res_good_condition              <int> 491, 403, 564, 1637, 1401, 869, 1040, …
## $ res_fair_condition              <int> 43, 42, 232, 580, 319, 269, 391, 925, …
## $ res_poor_condition              <int> 6, 0, 46, 85, 28, 37, 65, 103, 12, 10,…
## $ contribuenti                    <dbl> 1676, 626, 885, 7640, 4082, 1851, 2233…
## $ contribuenti_da_lavoro_autonomo <dbl> 18, 0, 5, 74, 27, 18, 11, 24, 13, 12, …
## $ reddito_lavoro_dipendente       <dbl> 11024498, 2770067, 5148272, 63924533, …
## $ reddito_lavoro_autonomo         <dbl> 23389735, 6579912, 9991552, 112018580,…
## $ indice_vecchiaia                <dbl> 183.33333, 262.03704, 185.18519, 120.1…
## $ indice_dipendenza               <dbl> 66.07143, 78.29181, 71.11111, 68.92996…
## $ rapporto_genere                 <dbl> 90.99526, 91.22137, 91.13475, 96.82098…
## $ tasso_occupazione               <dbl> 36.40227, 36.35347, 35.15406, 34.32533…
## $ tasso_disoccupazione            <dbl> 14.870259, 11.000000, 11.480865, 8.942…
## $ indice_istruzione               <dbl> 35.67428, 27.68595, 22.53968, 31.21140…
## $ tasso_analfabetismo             <dbl> 3.662215, 3.409091, 5.777778, 2.209026…
## $ affollamento_abitativo          <dbl> 2.283286, 2.236607, 2.450000, 2.622487…
## $ dimensione_media_abitazione     <dbl> 84.60246, 80.10045, 81.40303, 102.2900…
## $ perc_abitazioni_proprieta       <dbl> 74.12982, 76.03486, 80.75188, 74.71801…
## $ perc_abitazioni_affitto         <dbl> 15.333960, 5.010893, 5.714286, 13.6133…
## $ indice_vetusta                  <dbl> 40.957447, 90.553306, 14.164004, 21.81…
## $ indice_vulnerabilita            <dbl> 46.94149, 94.06208, 49.94675, 56.26580…
## $ perc_stranieri                  <dbl> 2.6881720, 1.0978044, 1.4223871, 4.331…
## $ indice_diversita_stranieri      <dbl> 13.0, 2.2, 4.6, 116.4, 36.2, 16.0, 18.…
## $ dimensione_media_famiglia       <dbl> 2.269991, 2.183007, 2.424060, 2.612213…
## $ perc_famiglie_monocomponente    <dbl> 38.09972, 40.95861, 36.69173, 25.70984…
## $ indice_complessita_familiare    <dbl> 20.79022, 21.78649, 26.61654, 28.87981…
## $ indice_mobilita_extracom        <dbl> 35.789474, 41.033435, 45.894737, 21.02…
## $ indice_attrattivita             <dbl> 3.704728, 3.554522, 2.709186, 3.170049…
## $ indice_sviluppo                 <dbl> 64.97795, 64.24027, 63.93690, 65.66217…
## $ perc_lavoratori_autonomi        <dbl> 1.0739857, 0.0000000, 0.5649718, 0.968…
## $ perc_lavoratori_dipendenti      <dbl> 98.92601, 100.00000, 99.43503, 99.0314…
## $ reddito_medio_dipendente        <dbl> 6649.275, 4425.027, 5850.309, 8448.921…
## $ reddito_medio_autonomo          <dbl> 1299429.7, 0.0, 1998310.4, 1513764.6, …
## $ divario_reddito                 <dbl> 1292780.4, 0.0, 1992460.1, 1505315.7, …
## $ rapporto_redditi                <dbl> 195.4243, 0.0000, 341.5735, 179.1666, …
## $ reddito_totale                  <dbl> 34414233, 9349979, 15139824, 175943113…
## $ reddito_pro_capite              <dbl> 14232.520, 9331.316, 9362.909, 13095.8…
## $ tasso_attivita_economica        <dbl> 0.6931348, 0.6247505, 0.5473098, 0.568…
## $ interazione_vecchiaia_autonomi  <dbl> 196.89737, 0.00000, 104.62440, 116.358…
## $ interazione_istruzione_reddito  <dbl> 507734.9, 258346.4, 211037.0, 408740.7…
## $ log_reddito_pro_capite          <dbl> 9.563355, 9.141239, 9.144618, 9.480129…
## $ interazione_proprieta_reddito   <dbl> 1055054.2, 709505.3, 756072.5, 978497.…
# Rimozione di identificativi e variabili costanti
ids <- c("PROCOM","CODREG","REGIONE","CODPRO","PROVINCIA",
         "CODCOM","COMUNE","SEZ2011","NSEZ","ACE",
         "CODLOC","CODASC")

df2 <- df %>%
  dplyr::select(-any_of(ids)) %>%            # tolgo gli ID
  dplyr::select(where(~ n_distinct(.) > 1))  # tolgo le colonne costanti

# Near‐zero variance filter
nzv_info <- caret::nearZeroVar(df2, saveMetrics = TRUE)
nzv_cols <- rownames(nzv_info)[nzv_info$nzv]
df3 <- df2 %>%
  dplyr::select(-all_of(nzv_cols))

# Selezione delle sole variabili numeriche
df_num <- df3 %>%
  dplyr::select(where(is.numeric))

# Calcolo della PCA
pca_res <- prcomp(df_num, center = TRUE, scale. = TRUE)

# Estrazione autovalori e varianza spiegata
eig <- factoextra::get_eigenvalue(pca_res)
print(eig)   # colonne: eigenvalue, variance.percent, cumulative.percent
##           eigenvalue variance.percent cumulative.variance.percent
## Dim.1   1.233158e+02     6.966996e+01                    69.66996
## Dim.2   1.029153e+01     5.814422e+00                    75.48438
## Dim.3   7.884881e+00     4.454735e+00                    79.93912
## Dim.4   6.246099e+00     3.528869e+00                    83.46799
## Dim.5   4.400878e+00     2.486372e+00                    85.95436
## Dim.6   3.230823e+00     1.825324e+00                    87.77968
## Dim.7   2.737434e+00     1.546573e+00                    89.32626
## Dim.8   1.947490e+00     1.100277e+00                    90.42653
## Dim.9   1.675989e+00     9.468862e-01                    91.37342
## Dim.10  1.396952e+00     7.892387e-01                    92.16266
## Dim.11  1.204522e+00     6.805209e-01                    92.84318
## Dim.12  1.075916e+00     6.078619e-01                    93.45104
## Dim.13  9.555187e-01     5.398411e-01                    93.99088
## Dim.14  9.423132e-01     5.323804e-01                    94.52326
## Dim.15  8.230532e-01     4.650018e-01                    94.98826
## Dim.16  7.996707e-01     4.517914e-01                    95.44006
## Dim.17  7.245600e-01     4.093560e-01                    95.84941
## Dim.18  6.817748e-01     3.851835e-01                    96.23459
## Dim.19  5.938145e-01     3.354884e-01                    96.57008
## Dim.20  5.101508e-01     2.882208e-01                    96.85830
## Dim.21  4.730363e-01     2.672521e-01                    97.12556
## Dim.22  3.972723e-01     2.244476e-01                    97.35000
## Dim.23  3.672973e-01     2.075126e-01                    97.55752
## Dim.24  3.373835e-01     1.906122e-01                    97.74813
## Dim.25  3.191126e-01     1.802896e-01                    97.92842
## Dim.26  2.836175e-01     1.602359e-01                    98.08865
## Dim.27  2.594206e-01     1.465653e-01                    98.23522
## Dim.28  2.487832e-01     1.405555e-01                    98.37577
## Dim.29  2.235484e-01     1.262985e-01                    98.50207
## Dim.30  2.099819e-01     1.186338e-01                    98.62071
## Dim.31  2.051486e-01     1.159032e-01                    98.73661
## Dim.32  1.886296e-01     1.065704e-01                    98.84318
## Dim.33  1.750622e-01     9.890518e-02                    98.94209
## Dim.34  1.523056e-01     8.604839e-02                    99.02813
## Dim.35  1.458337e-01     8.239194e-02                    99.11053
## Dim.36  1.366062e-01     7.717866e-02                    99.18770
## Dim.37  1.230929e-01     6.954402e-02                    99.25725
## Dim.38  1.208433e-01     6.827307e-02                    99.32552
## Dim.39  1.154398e-01     6.522021e-02                    99.39074
## Dim.40  1.027805e-01     5.806810e-02                    99.44881
## Dim.41  8.748594e-02     4.942708e-02                    99.49824
## Dim.42  8.197993e-02     4.631634e-02                    99.54455
## Dim.43  7.597030e-02     4.292107e-02                    99.58747
## Dim.44  6.706058e-02     3.788733e-02                    99.62536
## Dim.45  6.155871e-02     3.477893e-02                    99.66014
## Dim.46  5.330239e-02     3.011434e-02                    99.69026
## Dim.47  5.117539e-02     2.891265e-02                    99.71917
## Dim.48  4.817242e-02     2.721606e-02                    99.74638
## Dim.49  4.367843e-02     2.467708e-02                    99.77106
## Dim.50  4.186523e-02     2.365267e-02                    99.79471
## Dim.51  3.692874e-02     2.086370e-02                    99.81558
## Dim.52  3.127695e-02     1.767059e-02                    99.83325
## Dim.53  3.059484e-02     1.728522e-02                    99.85053
## Dim.54  2.977930e-02     1.682446e-02                    99.86736
## Dim.55  2.789855e-02     1.576189e-02                    99.88312
## Dim.56  2.337754e-02     1.320765e-02                    99.89633
## Dim.57  2.054284e-02     1.160613e-02                    99.90793
## Dim.58  1.693237e-02     9.566308e-03                    99.91750
## Dim.59  1.607443e-02     9.081599e-03                    99.92658
## Dim.60  1.446022e-02     8.169616e-03                    99.93475
## Dim.61  1.237800e-02     6.993221e-03                    99.94174
## Dim.62  9.991110e-03     5.644695e-03                    99.94739
## Dim.63  8.742631e-03     4.939340e-03                    99.95233
## Dim.64  7.552022e-03     4.266679e-03                    99.95659
## Dim.65  7.194254e-03     4.064550e-03                    99.96066
## Dim.66  6.376065e-03     3.602296e-03                    99.96426
## Dim.67  5.782249e-03     3.266807e-03                    99.96753
## Dim.68  5.120560e-03     2.892971e-03                    99.97042
## Dim.69  4.591624e-03     2.594138e-03                    99.97302
## Dim.70  3.948876e-03     2.231003e-03                    99.97525
## Dim.71  3.823495e-03     2.160167e-03                    99.97741
## Dim.72  3.485769e-03     1.969361e-03                    99.97938
## Dim.73  3.301769e-03     1.865406e-03                    99.98124
## Dim.74  2.873234e-03     1.623296e-03                    99.98286
## Dim.75  2.656441e-03     1.500814e-03                    99.98437
## Dim.76  2.480244e-03     1.401268e-03                    99.98577
## Dim.77  2.300360e-03     1.299638e-03                    99.98707
## Dim.78  1.907277e-03     1.077557e-03                    99.98814
## Dim.79  1.814516e-03     1.025150e-03                    99.98917
## Dim.80  1.548133e-03     8.746516e-04                    99.99004
## Dim.81  1.451844e-03     8.202509e-04                    99.99086
## Dim.82  1.370404e-03     7.742395e-04                    99.99164
## Dim.83  1.234112e-03     6.972384e-04                    99.99234
## Dim.84  1.129557e-03     6.381677e-04                    99.99297
## Dim.85  1.034442e-03     5.844305e-04                    99.99356
## Dim.86  9.287174e-04     5.246991e-04                    99.99408
## Dim.87  8.565289e-04     4.839147e-04                    99.99457
## Dim.88  7.861574e-04     4.441567e-04                    99.99501
## Dim.89  6.804923e-04     3.844589e-04                    99.99540
## Dim.90  6.706935e-04     3.789229e-04                    99.99577
## Dim.91  6.436469e-04     3.636423e-04                    99.99614
## Dim.92  5.481364e-04     3.096816e-04                    99.99645
## Dim.93  4.638412e-04     2.620571e-04                    99.99671
## Dim.94  4.519432e-04     2.553351e-04                    99.99697
## Dim.95  4.223519e-04     2.386169e-04                    99.99720
## Dim.96  3.760020e-04     2.124305e-04                    99.99742
## Dim.97  3.532681e-04     1.995865e-04                    99.99762
## Dim.98  3.481747e-04     1.967089e-04                    99.99781
## Dim.99  3.228148e-04     1.823813e-04                    99.99799
## Dim.100 2.871748e-04     1.622457e-04                    99.99816
## Dim.101 2.680411e-04     1.514356e-04                    99.99831
## Dim.102 2.505569e-04     1.415576e-04                    99.99845
## Dim.103 2.292420e-04     1.295153e-04                    99.99858
## Dim.104 2.244808e-04     1.268253e-04                    99.99871
## Dim.105 2.102503e-04     1.187855e-04                    99.99883
## Dim.106 1.867724e-04     1.055211e-04                    99.99893
## Dim.107 1.700169e-04     9.605475e-05                    99.99903
## Dim.108 1.501490e-04     8.482992e-05                    99.99911
## Dim.109 1.416613e-04     8.003466e-05                    99.99919
## Dim.110 1.283628e-04     7.252135e-05                    99.99926
## Dim.111 1.174896e-04     6.637830e-05                    99.99933
## Dim.112 1.064370e-04     6.013391e-05                    99.99939
## Dim.113 9.256833e-05     5.229849e-05                    99.99944
## Dim.114 9.141661e-05     5.164780e-05                    99.99949
## Dim.115 8.114715e-05     4.584585e-05                    99.99954
## Dim.116 7.227690e-05     4.083441e-05                    99.99958
## Dim.117 6.876721e-05     3.885153e-05                    99.99962
## Dim.118 6.645668e-05     3.754615e-05                    99.99966
## Dim.119 5.434615e-05     3.070404e-05                    99.99969
## Dim.120 5.106764e-05     2.885177e-05                    99.99972
## Dim.121 4.834102e-05     2.731131e-05                    99.99974
## Dim.122 4.494188e-05     2.539089e-05                    99.99977
## Dim.123 4.176302e-05     2.359492e-05                    99.99979
## Dim.124 3.829612e-05     2.163622e-05                    99.99982
## Dim.125 3.568581e-05     2.016148e-05                    99.99984
## Dim.126 3.241516e-05     1.831365e-05                    99.99985
## Dim.127 2.772431e-05     1.566345e-05                    99.99987
## Dim.128 2.645002e-05     1.494351e-05                    99.99988
## Dim.129 2.496053e-05     1.410199e-05                    99.99990
## Dim.130 2.401197e-05     1.356609e-05                    99.99991
## Dim.131 2.190961e-05     1.237831e-05                    99.99992
## Dim.132 1.989362e-05     1.123933e-05                    99.99994
## Dim.133 1.839611e-05     1.039328e-05                    99.99995
## Dim.134 1.655704e-05     9.354262e-06                    99.99996
## Dim.135 1.505431e-05     8.505261e-06                    99.99996
## Dim.136 1.399196e-05     7.905063e-06                    99.99997
## Dim.137 1.005000e-05     5.677968e-06                    99.99998
## Dim.138 9.308447e-06     5.259009e-06                    99.99998
## Dim.139 8.925748e-06     5.042795e-06                    99.99999
## Dim.140 7.124338e-06     4.025050e-06                    99.99999
## Dim.141 5.721837e-06     3.232676e-06                   100.00000
## Dim.142 3.538220e-06     1.998994e-06                   100.00000
## Dim.143 1.647421e-06     9.307464e-07                   100.00000
## Dim.144 1.375911e-06     7.773509e-07                   100.00000
## Dim.145 8.634539e-07     4.878271e-07                   100.00000
## Dim.146 4.656120e-07     2.630576e-07                   100.00000
## Dim.147 3.783075e-07     2.137330e-07                   100.00000
## Dim.148 3.508250e-07     1.982062e-07                   100.00000
## Dim.149 5.545593e-08     3.133104e-08                   100.00000
## Dim.150 4.244174e-08     2.397838e-08                   100.00000
## Dim.151 2.635747e-08     1.489122e-08                   100.00000
## Dim.152 3.440745e-09     1.943924e-09                   100.00000
## Dim.153 1.753395e-30     9.906187e-31                   100.00000
## Dim.154 1.205696e-30     6.811841e-31                   100.00000
## Dim.155 1.205696e-30     6.811841e-31                   100.00000
## Dim.156 1.205696e-30     6.811841e-31                   100.00000
## Dim.157 1.205696e-30     6.811841e-31                   100.00000
## Dim.158 1.205696e-30     6.811841e-31                   100.00000
## Dim.159 1.205696e-30     6.811841e-31                   100.00000
## Dim.160 1.205696e-30     6.811841e-31                   100.00000
## Dim.161 1.205696e-30     6.811841e-31                   100.00000
## Dim.162 1.205696e-30     6.811841e-31                   100.00000
## Dim.163 1.205696e-30     6.811841e-31                   100.00000
## Dim.164 1.205696e-30     6.811841e-31                   100.00000
## Dim.165 1.205696e-30     6.811841e-31                   100.00000
## Dim.166 1.205696e-30     6.811841e-31                   100.00000
## Dim.167 1.205696e-30     6.811841e-31                   100.00000
## Dim.168 1.205696e-30     6.811841e-31                   100.00000
## Dim.169 1.205696e-30     6.811841e-31                   100.00000
## Dim.170 1.205696e-30     6.811841e-31                   100.00000
## Dim.171 1.205696e-30     6.811841e-31                   100.00000
## Dim.172 1.205696e-30     6.811841e-31                   100.00000
## Dim.173 1.205696e-30     6.811841e-31                   100.00000
## Dim.174 1.205696e-30     6.811841e-31                   100.00000
## Dim.175 1.205696e-30     6.811841e-31                   100.00000
## Dim.176 7.990657e-31     4.514495e-31                   100.00000
## Dim.177 3.676557e-31     2.077151e-31                   100.00000
# Scegli numero di PC (80% cumulato, fallback Kaiser >1)
k80 <- which(eig$cumulative.percent >= 80)[1]
k_kaiser <- sum(eig$eigenvalue > 1)
k <- if (!is.na(k80)) k80 else k_kaiser
message("Seleziono k = ", k, " componenti principali.")

# Estrazione e stampa dei top-5 loadings per ciascuna PC
loadings <- pca_res$rotation[, 1:k]
top_loadings <- purrr::map_dfr(1:k, function(i) {
  idx <- order(abs(loadings[,i]), decreasing = TRUE)[1:5]
  tibble::tibble(
    PC       = paste0("PC", i),
    Variable = rownames(loadings)[idx],
    Loading  = loadings[idx, i]
  )
})
print(top_loadings)
## # A tibble: 60 × 3
##    PC    Variable              Loading
##    <chr> <chr>                   <dbl>
##  1 PC1   pop_labor_force        0.0896
##  2 PC1   contribuenti           0.0895
##  3 PC1   pop_employed           0.0895
##  4 PC1   pop_35to39             0.0895
##  5 PC1   pop_40to44             0.0895
##  6 PC2   res_1floor             0.256 
##  7 PC2   res_1unit              0.251 
##  8 PC2   res_1981to1990         0.236 
##  9 PC2   res_1971to1980         0.227 
## 10 PC2   res_masonry_buildings  0.219 
## # ℹ 50 more rows
# Scree plot (Elbow)
factoextra::fviz_eig(pca_res, addlabels = TRUE, ncp = k)