Analisi esplorativa del questionario Equita’ PLAY (export SoSciSurvey del 2026-04-22).
Come generare il report: in RStudio apri questo file .R e usa File -> Compile Report… (Ctrl+Shift+K) selezionando HTML.
pkgs <- c("readxl", "dplyr", "tidyr", "stringr", "ggplot2",
"forcats", "readr", "janitor", "knitr", "DT", "scales")
to_install <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(to_install)) install.packages(to_install, repos = "https://cloud.r-project.org")
invisible(lapply(pkgs, library, character.only = TRUE))
theme_set(theme_minimal(base_size = 11))f_data <- "data_Equityesplorativo_2026-04-22_19-25.xlsx"
f_codebook <- "codebook_Equityesplorativo_2026-04-22_19-25.xlsx"
f_vars <- "variables_Equityesplorativo_2026-04-22_19-25.csv"
raw <- read_excel(f_data, sheet = 1, col_types = "text")
dati <- raw[-1, ] # la prima riga contiene le etichette ripetute
codebook <- read_excel(f_codebook, sheet = 1)
variables <- read_tsv(f_vars, locale = locale(encoding = "UTF-16LE"),
show_col_types = FALSE)
tibble::tibble(n_record = nrow(dati), n_variabili = ncol(dati)) |> kable()| n_record | n_variabili |
|---|---|
| 137 | 53 |
num_vars <- c("CASE","FA02","FI01","NE01","OR02","SD01",
"SD02_01","SD02_02","SD02_03","SD02_04","SD02_05","SD02_06",
"PR02_01","SD03_01","SD04","SD05","SD06","SD07","SD08",
paste0("TIME00", 1:7), "TIME_SUM",
"LASTPAGE","MAXPAGE","MISSING","MISSREL","TIME_RSI",
"FINISHED","Q_VIEWER")
num_vars <- intersect(num_vars, names(dati))
dati[num_vars] <- lapply(dati[num_vars], function(x) suppressWarnings(as.numeric(x)))
parse_dt <- function(x) {
x <- trimws(as.character(x))
x[x == "" | x == "NA"] <- NA_character_
out <- suppressWarnings(as.POSIXct(x, format = "%Y-%m-%d %H:%M:%S", tz = "Europe/Rome"))
# fallback per formato senza secondi
miss <- is.na(out) & !is.na(x)
if (any(miss)) {
out[miss] <- suppressWarnings(as.POSIXct(x[miss], format = "%Y-%m-%d %H:%M", tz = "Europe/Rome"))
}
out
}
if ("STARTED" %in% names(dati)) dati$STARTED <- parse_dt(dati$STARTED)
if ("LASTDATA" %in% names(dati)) dati$LASTDATA <- parse_dt(dati$LASTDATA)
# -9 = "Not answered" -> NA
dati <- dati |> mutate(across(all_of(num_vars), ~ ifelse(.x == -9, NA_real_, .x)))label_map <- codebook |>
filter(!is.na(`Response Code`)) |>
transmute(var = Variable,
code = suppressWarnings(as.numeric(`Response Code`)),
label = `Response Label`) |>
filter(!is.na(code), !is.na(label))
apply_labels <- function(x, var_name) {
m <- label_map |> filter(var == var_name)
m <- m[!duplicated(m$code), ]
if (nrow(m) == 0) return(x)
factor(x, levels = m$code, labels = m$label)
}
to_factor <- intersect(unique(label_map$var), names(dati))
for (v in to_factor) dati[[paste0(v, "_lab")]] <- apply_labels(dati[[v]], v)dati_ok <- dati |>
filter(is.na(FINISHED) | FINISHED == 1,
is.na(PR02_01) | PR02_01 == 2)
tibble::tibble(
record_totali = nrow(dati),
finiti_con_consenso = nrow(dati_ok),
pct = scales::percent(nrow(dati_ok) / nrow(dati), accuracy = 0.1)
) |> kable()| record_totali | finiti_con_consenso | pct |
|---|---|---|
| 137 | 70 | 51.1% |
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 23.00 35.00 42.00 41.67 48.00 63.00 1
ggplot(dati_ok, aes(SD03_01)) +
geom_histogram(binwidth = 5, fill = "#4C78A8", color = "white") +
labs(title = "Distribuzione dell'eta'", x = "Eta' (anni)", y = "Frequenza")if ("SD04_lab" %in% names(dati_ok)) {
print(
dati_ok |> filter(!is.na(SD04_lab)) |>
tabyl(SD04_lab) |> adorn_pct_formatting(digits = 1) |>
kable(caption = "SD04 - Genere")
)
p_g <- dati_ok |> filter(!is.na(SD04_lab)) |> count(SD04_lab) |>
mutate(SD04_lab = fct_reorder(SD04_lab, n)) |>
ggplot(aes(n, SD04_lab)) + geom_col(fill = "#AF7AA1") +
labs(title = "Genere", x = "N.", y = NULL)
print(p_g)
}##
##
## Table: SD04 - Genere
##
## |SD04_lab | n|percent |
## |:----------------------------------------------|--:|:-------|
## |Donna | 15|21.4% |
## |Uomo | 53|75.7% |
## |Persona non binaria / altra identità di genere | 0|0.0% |
## |Preferisco non descrivermi | 1|1.4% |
## |Preferisco non rispondere | 1|1.4% |
## |Not answered | 0|0.0% |
plot_cat <- function(v, titolo, fill = "#4E79A7") {
dati_ok |> filter(!is.na(.data[[v]])) |> count(.data[[v]]) |> rename(cat = 1) |>
mutate(cat = fct_reorder(cat, n)) |>
ggplot(aes(n, cat)) + geom_col(fill = fill) +
labs(title = titolo, x = "N.", y = NULL)
}
if ("SD07_lab" %in% names(dati_ok)) print(plot_cat("SD07_lab", "Titolo di studio"))lvl_fa02 <- c("Mai","Quasi mai","Raramente","A volte",
"Spesso","Quasi sempre","Sempre")
if ("FA02_lab" %in% names(dati_ok)) {
dati_ok |>
filter(!is.na(FA02_lab)) |>
mutate(FA02_lab = factor(FA02_lab, levels = lvl_fa02)) |>
count(FA02_lab) |>
ggplot(aes(FA02_lab, n)) +
geom_col(fill = "#59A14F") +
labs(title = "FA02 - Frequenza iniziale", x = NULL, y = "N. rispondenti") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
}lvl_or02 <- c("Nessuna persona","Quasi nessuna persona","Poche persone",
"Alcune persone","Parecchie persone","Molte persone",
"Moltissime persone")
if ("OR02_lab" %in% names(dati_ok)) {
dati_ok |>
filter(!is.na(OR02_lab)) |>
mutate(OR02_lab = factor(OR02_lab, levels = lvl_or02)) |>
count(OR02_lab) |>
ggplot(aes(OR02_lab, n)) +
geom_col(fill = "#F28E2B") +
labs(title = "OR02 - Domanda chiusa organizzatori",
x = NULL, y = "N. rispondenti") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
}if ("SD01_lab" %in% names(dati_ok)) {
dati_ok |> filter(!is.na(SD01_lab)) |> count(SD01_lab) |>
mutate(SD01_lab = fct_reorder(SD01_lab, n)) |>
ggplot(aes(n, SD01_lab)) + geom_col(fill = "#76B7B2") +
labs(title = "SD01 - Tipo di gioco", x = "N.", y = NULL)
}ambito_vars <- grep("^SD02_0[1-6]$", names(dati_ok), value = TRUE)
ambito_labels <- c(SD02_01 = "Formazione",
SD02_02 = "Educazione",
SD02_03 = "Ricerca",
SD02_04 = "Ambiti sociali e terzo settore",
SD02_05 = "Animazione",
SD02_06 = "Nessuna delle precedenti")
amb <- dati_ok |>
select(all_of(ambito_vars)) |>
summarise(across(everything(), ~ mean(.x == 2, na.rm = TRUE) * 100)) |>
pivot_longer(everything(), names_to = "var", values_to = "pct") |>
mutate(ambito = ambito_labels[var],
ambito = fct_reorder(ambito, pct))
amb |> mutate(pct = round(pct, 1)) |> kable(caption = "% di selezione per ambito")| var | pct | ambito |
|---|---|---|
| SD02_01 | 35.7 | Formazione |
| SD02_02 | 32.9 | Educazione |
| SD02_03 | 24.3 | Ricerca |
| SD02_04 | 37.1 | Ambiti sociali e terzo settore |
| SD02_05 | 11.4 | Animazione |
| SD02_06 | 28.6 | Nessuna delle precedenti |
ggplot(amb, aes(pct, ambito)) +
geom_col(fill = "#B07AA1") +
geom_text(aes(label = paste0(round(pct, 1), "%")), hjust = -0.1) +
scale_x_continuous(limits = c(0, NA), expand = expansion(mult = c(0, .15))) +
labs(title = "Ambiti (% di selezione)", x = "% rispondenti", y = NULL)tibble::tibble(
indicatore = c("Tempo medio questionario (sec)",
"Tempo mediano (sec)",
"% missing medio",
"Speed index medio (TIME_RSI)"),
valore = c(round(mean(dati_ok$TIME_SUM, na.rm = TRUE), 1),
round(median(dati_ok$TIME_SUM, na.rm = TRUE), 1),
paste0(round(mean(dati_ok$MISSING, na.rm = TRUE), 1), "%"),
round(mean(dati_ok$TIME_RSI, na.rm = TRUE), 2))
) |> kable()| indicatore | valore |
|---|---|
| Tempo medio questionario (sec) | 438.3 |
| Tempo mediano (sec) | 338 |
| % missing medio | 4.9% |
| Speed index medio (TIME_RSI) | 1.15 |
open_vars <- c("FA01_01","FA01_02","FA01_03","NE02_01",
"OR01_01","OR01_02","OR03_01","SD05s","SD06s")
open_vars <- intersect(open_vars, names(dati_ok))
aperte <- dati_ok |>
select(CASE, all_of(open_vars)) |>
pivot_longer(-CASE, names_to = "variabile", values_to = "testo") |>
filter(!is.na(testo), nchar(trimws(testo)) > 0)
DT::datatable(aperte, rownames = FALSE, filter = "top",
options = list(pageLength = 10, scrollX = TRUE))## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Italian_Italy.utf8 LC_CTYPE=Italian_Italy.utf8
## [3] LC_MONETARY=Italian_Italy.utf8 LC_NUMERIC=C
## [5] LC_TIME=Italian_Italy.utf8
##
## time zone: Europe/Rome
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scales_1.4.0 DT_0.33 knitr_1.51 janitor_2.2.1 readr_2.1.5
## [6] forcats_1.0.0 ggplot2_4.0.0 stringr_1.5.1 tidyr_1.3.1 dplyr_1.1.4
## [11] readxl_1.4.5
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 generics_0.1.3 stringi_1.8.4 hms_1.1.3
## [5] digest_0.6.39 magrittr_2.0.3 evaluate_1.0.4 grid_4.4.1
## [9] timechange_0.3.0 RColorBrewer_1.1-3 fastmap_1.2.0 cellranger_1.1.0
## [13] jsonlite_1.8.8 purrr_1.0.2 crosstalk_1.2.1 jquerylib_0.1.4
## [17] cli_3.6.3 crayon_1.5.3 rlang_1.1.4 bit64_4.0.5
## [21] withr_3.0.2 cachem_1.1.0 yaml_2.3.10 parallel_4.4.1
## [25] tools_4.4.1 tzdb_0.4.0 vctrs_0.6.5 R6_2.6.1
## [29] lifecycle_1.0.4 lubridate_1.9.4 snakecase_0.11.1 htmlwidgets_1.6.4
## [33] bit_4.0.5 vroom_1.6.5 pkgconfig_2.0.3 pillar_1.10.1
## [37] bslib_0.8.0 gtable_0.3.6 glue_1.7.0 xfun_0.52
## [41] tibble_3.2.1 tidyselect_1.2.1 rstudioapi_0.16.0 farver_2.1.2
## [45] htmltools_0.5.8.1 labeling_0.4.3 rmarkdown_2.28 compiler_4.4.1
## [49] S7_0.2.0