1 Introduzione

Analisi esplorativa del questionario Equita’ PLAY (export SoSciSurvey del 2026-04-22).

Come generare il report: in RStudio apri questo file .R e usa File -> Compile Report… (Ctrl+Shift+K) selezionando HTML.

2 Pacchetti

pkgs <- c("readxl", "dplyr", "tidyr", "stringr", "ggplot2",
          "forcats", "readr", "janitor", "knitr", "DT", "scales")
to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(to_install)) install.packages(to_install, repos = "https://cloud.r-project.org")
invisible(lapply(pkgs, library, character.only = TRUE))
theme_set(theme_minimal(base_size = 11))

3 Caricamento dati

f_data     <- "data_Equityesplorativo_2026-04-22_19-25.xlsx"
f_codebook <- "codebook_Equityesplorativo_2026-04-22_19-25.xlsx"
f_vars     <- "variables_Equityesplorativo_2026-04-22_19-25.csv"

raw       <- read_excel(f_data, sheet = 1, col_types = "text")
dati      <- raw[-1, ]  # la prima riga contiene le etichette ripetute
codebook  <- read_excel(f_codebook, sheet = 1)
variables <- read_tsv(f_vars, locale = locale(encoding = "UTF-16LE"),
                      show_col_types = FALSE)

tibble::tibble(n_record = nrow(dati), n_variabili = ncol(dati)) |> kable()
n_record n_variabili
137 53

4 Pulizia e tipi

num_vars <- c("CASE","FA02","FI01","NE01","OR02","SD01",
              "SD02_01","SD02_02","SD02_03","SD02_04","SD02_05","SD02_06",
              "PR02_01","SD03_01","SD04","SD05","SD06","SD07","SD08",
              paste0("TIME00", 1:7), "TIME_SUM",
              "LASTPAGE","MAXPAGE","MISSING","MISSREL","TIME_RSI",
              "FINISHED","Q_VIEWER")
num_vars <- intersect(num_vars, names(dati))
dati[num_vars] <- lapply(dati[num_vars], function(x) suppressWarnings(as.numeric(x)))

parse_dt <- function(x) {
  x <- trimws(as.character(x))
  x[x == "" | x == "NA"] <- NA_character_
  out <- suppressWarnings(as.POSIXct(x, format = "%Y-%m-%d %H:%M:%S", tz = "Europe/Rome"))
  # fallback per formato senza secondi
  miss <- is.na(out) & !is.na(x)
  if (any(miss)) {
    out[miss] <- suppressWarnings(as.POSIXct(x[miss], format = "%Y-%m-%d %H:%M", tz = "Europe/Rome"))
  }
  out
}
if ("STARTED"  %in% names(dati)) dati$STARTED  <- parse_dt(dati$STARTED)
if ("LASTDATA" %in% names(dati)) dati$LASTDATA <- parse_dt(dati$LASTDATA)

# -9 = "Not answered" -> NA
dati <- dati |> mutate(across(all_of(num_vars), ~ ifelse(.x == -9, NA_real_, .x)))

4.1 Etichette di valore dal codebook

label_map <- codebook |>
  filter(!is.na(`Response Code`)) |>
  transmute(var = Variable,
            code = suppressWarnings(as.numeric(`Response Code`)),
            label = `Response Label`) |>
  filter(!is.na(code), !is.na(label))

apply_labels <- function(x, var_name) {
  m <- label_map |> filter(var == var_name)
  m <- m[!duplicated(m$code), ]
  if (nrow(m) == 0) return(x)
  factor(x, levels = m$code, labels = m$label)
}

to_factor <- intersect(unique(label_map$var), names(dati))
for (v in to_factor) dati[[paste0(v, "_lab")]] <- apply_labels(dati[[v]], v)

4.2 Filtro: completati + consenso

dati_ok <- dati |>
  filter(is.na(FINISHED) | FINISHED == 1,
         is.na(PR02_01)  | PR02_01 == 2)

tibble::tibble(
  record_totali = nrow(dati),
  finiti_con_consenso = nrow(dati_ok),
  pct = scales::percent(nrow(dati_ok) / nrow(dati), accuracy = 0.1)
) |> kable()
record_totali finiti_con_consenso pct
137 70 51.1%

5 Profilo socio-demografico

5.1 Eta’

summary(dati_ok$SD03_01)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   23.00   35.00   42.00   41.67   48.00   63.00       1
ggplot(dati_ok, aes(SD03_01)) +
  geom_histogram(binwidth = 5, fill = "#4C78A8", color = "white") +
  labs(title = "Distribuzione dell'eta'", x = "Eta' (anni)", y = "Frequenza")

5.2 Genere

if ("SD04_lab" %in% names(dati_ok)) {
  print(
    dati_ok |> filter(!is.na(SD04_lab)) |>
      tabyl(SD04_lab) |> adorn_pct_formatting(digits = 1) |>
      kable(caption = "SD04 - Genere")
  )
  p_g <- dati_ok |> filter(!is.na(SD04_lab)) |> count(SD04_lab) |>
    mutate(SD04_lab = fct_reorder(SD04_lab, n)) |>
    ggplot(aes(n, SD04_lab)) + geom_col(fill = "#AF7AA1") +
    labs(title = "Genere", x = "N.", y = NULL)
  print(p_g)
}
## 
## 
## Table: SD04 - Genere
## 
## |SD04_lab                                       |  n|percent |
## |:----------------------------------------------|--:|:-------|
## |Donna                                          | 15|21.4%   |
## |Uomo                                           | 53|75.7%   |
## |Persona non binaria / altra identità di genere |  0|0.0%    |
## |Preferisco non descrivermi                     |  1|1.4%    |
## |Preferisco non rispondere                      |  1|1.4%    |
## |Not answered                                   |  0|0.0%    |

5.3 Titolo di studio e occupazione

plot_cat <- function(v, titolo, fill = "#4E79A7") {
  dati_ok |> filter(!is.na(.data[[v]])) |> count(.data[[v]]) |> rename(cat = 1) |>
    mutate(cat = fct_reorder(cat, n)) |>
    ggplot(aes(n, cat)) + geom_col(fill = fill) +
    labs(title = titolo, x = "N.", y = NULL)
}
if ("SD07_lab" %in% names(dati_ok)) print(plot_cat("SD07_lab", "Titolo di studio"))

if ("SD08_lab" %in% names(dati_ok)) print(plot_cat("SD08_lab", "Occupazione", "#59A14F"))

6 Ruolo e pratiche di facilitazione

6.1 FA02 - Frequenza iniziale

lvl_fa02 <- c("Mai","Quasi mai","Raramente","A volte",
              "Spesso","Quasi sempre","Sempre")
if ("FA02_lab" %in% names(dati_ok)) {
  dati_ok |>
    filter(!is.na(FA02_lab)) |>
    mutate(FA02_lab = factor(FA02_lab, levels = lvl_fa02)) |>
    count(FA02_lab) |>
    ggplot(aes(FA02_lab, n)) +
    geom_col(fill = "#59A14F") +
    labs(title = "FA02 - Frequenza iniziale", x = NULL, y = "N. rispondenti") +
    theme(axis.text.x = element_text(angle = 30, hjust = 1))
}

6.2 FI01 - Ruolo nell’evento ludico

if ("FI01_lab" %in% names(dati_ok)) {
  dati_ok |> filter(!is.na(FI01_lab)) |> count(FI01_lab) |>
    mutate(FI01_lab = fct_reorder(FI01_lab, n)) |>
    ggplot(aes(n, FI01_lab)) +
    geom_col(fill = "#E15759") +
    labs(title = "FI01 - Ruolo", x = "N. rispondenti", y = NULL)
}

7 Organizzatori

7.1 OR02 - Domanda chiusa

lvl_or02 <- c("Nessuna persona","Quasi nessuna persona","Poche persone",
              "Alcune persone","Parecchie persone","Molte persone",
              "Moltissime persone")
if ("OR02_lab" %in% names(dati_ok)) {
  dati_ok |>
    filter(!is.na(OR02_lab)) |>
    mutate(OR02_lab = factor(OR02_lab, levels = lvl_or02)) |>
    count(OR02_lab) |>
    ggplot(aes(OR02_lab, n)) +
    geom_col(fill = "#F28E2B") +
    labs(title = "OR02 - Domanda chiusa organizzatori",
         x = NULL, y = "N. rispondenti") +
    theme(axis.text.x = element_text(angle = 30, hjust = 1))
}

8 Tipo di gioco e ambiti

8.1 SD01 - Tipo di gioco

if ("SD01_lab" %in% names(dati_ok)) {
  dati_ok |> filter(!is.na(SD01_lab)) |> count(SD01_lab) |>
    mutate(SD01_lab = fct_reorder(SD01_lab, n)) |>
    ggplot(aes(n, SD01_lab)) + geom_col(fill = "#76B7B2") +
    labs(title = "SD01 - Tipo di gioco", x = "N.", y = NULL)
}

8.2 SD02 - Ambiti professionali (multi-risposta)

ambito_vars <- grep("^SD02_0[1-6]$", names(dati_ok), value = TRUE)
ambito_labels <- c(SD02_01 = "Formazione",
                   SD02_02 = "Educazione",
                   SD02_03 = "Ricerca",
                   SD02_04 = "Ambiti sociali e terzo settore",
                   SD02_05 = "Animazione",
                   SD02_06 = "Nessuna delle precedenti")

amb <- dati_ok |>
  select(all_of(ambito_vars)) |>
  summarise(across(everything(), ~ mean(.x == 2, na.rm = TRUE) * 100)) |>
  pivot_longer(everything(), names_to = "var", values_to = "pct") |>
  mutate(ambito = ambito_labels[var],
         ambito = fct_reorder(ambito, pct))

amb |> mutate(pct = round(pct, 1)) |> kable(caption = "% di selezione per ambito")
% di selezione per ambito
var pct ambito
SD02_01 35.7 Formazione
SD02_02 32.9 Educazione
SD02_03 24.3 Ricerca
SD02_04 37.1 Ambiti sociali e terzo settore
SD02_05 11.4 Animazione
SD02_06 28.6 Nessuna delle precedenti
ggplot(amb, aes(pct, ambito)) +
  geom_col(fill = "#B07AA1") +
  geom_text(aes(label = paste0(round(pct, 1), "%")), hjust = -0.1) +
  scale_x_continuous(limits = c(0, NA), expand = expansion(mult = c(0, .15))) +
  labs(title = "Ambiti (% di selezione)", x = "% rispondenti", y = NULL)

9 Qualita’ del dato

tibble::tibble(
  indicatore = c("Tempo medio questionario (sec)",
                 "Tempo mediano (sec)",
                 "% missing medio",
                 "Speed index medio (TIME_RSI)"),
  valore = c(round(mean(dati_ok$TIME_SUM, na.rm = TRUE), 1),
             round(median(dati_ok$TIME_SUM, na.rm = TRUE), 1),
             paste0(round(mean(dati_ok$MISSING, na.rm = TRUE), 1), "%"),
             round(mean(dati_ok$TIME_RSI, na.rm = TRUE), 2))
) |> kable()
indicatore valore
Tempo medio questionario (sec) 438.3
Tempo mediano (sec) 338
% missing medio 4.9%
Speed index medio (TIME_RSI) 1.15

10 Risposte aperte

open_vars <- c("FA01_01","FA01_02","FA01_03","NE02_01",
               "OR01_01","OR01_02","OR03_01","SD05s","SD06s")
open_vars <- intersect(open_vars, names(dati_ok))

aperte <- dati_ok |>
  select(CASE, all_of(open_vars)) |>
  pivot_longer(-CASE, names_to = "variabile", values_to = "testo") |>
  filter(!is.na(testo), nchar(trimws(testo)) > 0)

DT::datatable(aperte, rownames = FALSE, filter = "top",
              options = list(pageLength = 10, scrollX = TRUE))

11 Session info

sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Italian_Italy.utf8  LC_CTYPE=Italian_Italy.utf8   
## [3] LC_MONETARY=Italian_Italy.utf8 LC_NUMERIC=C                  
## [5] LC_TIME=Italian_Italy.utf8    
## 
## time zone: Europe/Rome
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] scales_1.4.0  DT_0.33       knitr_1.51    janitor_2.2.1 readr_2.1.5  
##  [6] forcats_1.0.0 ggplot2_4.0.0 stringr_1.5.1 tidyr_1.3.1   dplyr_1.1.4  
## [11] readxl_1.4.5 
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9         generics_0.1.3     stringi_1.8.4      hms_1.1.3         
##  [5] digest_0.6.39      magrittr_2.0.3     evaluate_1.0.4     grid_4.4.1        
##  [9] timechange_0.3.0   RColorBrewer_1.1-3 fastmap_1.2.0      cellranger_1.1.0  
## [13] jsonlite_1.8.8     purrr_1.0.2        crosstalk_1.2.1    jquerylib_0.1.4   
## [17] cli_3.6.3          crayon_1.5.3       rlang_1.1.4        bit64_4.0.5       
## [21] withr_3.0.2        cachem_1.1.0       yaml_2.3.10        parallel_4.4.1    
## [25] tools_4.4.1        tzdb_0.4.0         vctrs_0.6.5        R6_2.6.1          
## [29] lifecycle_1.0.4    lubridate_1.9.4    snakecase_0.11.1   htmlwidgets_1.6.4 
## [33] bit_4.0.5          vroom_1.6.5        pkgconfig_2.0.3    pillar_1.10.1     
## [37] bslib_0.8.0        gtable_0.3.6       glue_1.7.0         xfun_0.52         
## [41] tibble_3.2.1       tidyselect_1.2.1   rstudioapi_0.16.0  farver_2.1.2      
## [45] htmltools_0.5.8.1  labeling_0.4.3     rmarkdown_2.28     compiler_4.4.1    
## [49] S7_0.2.0