suppressPackageStartupMessages({
library(readxl); library(dplyr); library(janitor); library(stringr)
library(psych); library(tidyr); library(broom); library(knitr)
library(readr); library(writexl); library(purrr); library(ggplot2)
})
set.seed(1234)
msg <- function(...) cat(paste0(...), "\n")
# --- Helpers ---------------------------------------------------------------
safe_find_excel <- function(path_or_file, pattern = "\\.xlsx$") {
if (file.exists(path_or_file) && !grepl("^~\\$", basename(path_or_file))) {
return(normalizePath(path_or_file, mustWork = TRUE))
}
dir <- if (dir.exists(path_or_file)) path_or_file else dirname(path_or_file)
cands <- list.files(dir, pattern = pattern, full.names = TRUE)
cands <- cands[!grepl("^~\\$", basename(cands))]
if (length(cands) == 0) stop("No .xlsx files found (non-lock) in: ", dir)
cands[which.max(file.info(cands)$size)]
}
to01 <- function(x) {
if (is.numeric(x)) return(x)
if (is.logical(x)) return(as.numeric(x))
y <- suppressWarnings(as.numeric(x))
if (all(is.na(y)) && is.character(x)) {
y <- dplyr::case_when(
str_detect(tolower(x), "yes|si|sí|true|correct|correcto|1") ~ 1,
str_detect(tolower(x), "no|false|incorrect|incorrecto|0") ~ 0,
TRUE ~ NA_real_
)
}
y
}
likert4 <- function(x) {
map <- c(
"Strongly agree"=4, "Agree"=3, "Disagree"=2, "Strongly disagree"=1,
"Totalmente de acuerdo"=4, "De acuerdo"=3, "En desacuerdo"=2, "Totalmente en desacuerdo"=1
)
out <- suppressWarnings(as.numeric(map[as.character(x)]))
if (all(is.na(out))) {
nm <- setNames(unname(map), tolower(names(map)))
out <- as.numeric(nm[tolower(as.character(x))])
}
out
}
apply_alpha_based_reverse <- function(df_items) {
df_items <- as.data.frame(df_items)
df_items[] <- lapply(df_items, function(v) suppressWarnings(as.numeric(v)))
keep <- purrr::map_lgl(df_items, ~ {
v <- .x; nz <- sum(!is.na(v)); if (nz < 3) return(FALSE)
length(unique(na.omit(v))) > 1
})
df_use <- df_items[, keep, drop = FALSE]
if (ncol(df_use) < 2) return(df_items)
a_raw <- tryCatch(psych::alpha(df_use, check.keys = TRUE), error = function(e) NULL)
if (is.null(a_raw) || is.null(a_raw$keys)) return(df_items)
keys <- a_raw$keys
for (nm in names(keys)) {
if (is.na(keys[[nm]]) || keys[[nm]] == 1) next
v <- df_items[[nm]]
if (all(is.na(v))) next
mxx <- suppressWarnings(max(v, na.rm = TRUE)); mnn <- suppressWarnings(min(v, na.rm = TRUE))
if (is.finite(mxx) && is.finite(mnn) && mxx > mnn) {
df_items[[nm]] <- (mxx + mnn) - v
} else {
df_items[[nm]] <- 1 - v
}
}
df_items
}
alpha_poly <- function(df_items) {
df_items <- as.data.frame(df_items)
keep <- purrr::map_lgl(df_items, ~ {
v <- .x; nz <- sum(!is.na(v)); if (nz < 3) return(FALSE)
length(unique(na.omit(v))) > 1
})
df_items <- df_items[, keep, drop = FALSE]
if (ncol(df_items) < 2) return(NULL)
R <- tryCatch(psych::polychoric(df_items)$rho, error = function(e) NULL)
if (is.null(R)) R <- suppressWarnings(cor(df_items, use = "pairwise.complete.obs"))
tryCatch(psych::alpha(R), error = function(e) NULL)
}
q_prefix <- function(n) regex(paste0("^(x)?", n, "([_.]|\\b)"), ignore_case = TRUE)
q_range_prefix <- function(a,b) regex(paste0("^(x)?(", paste(a:b, collapse="|"), ")([_.]|\\b)"), ignore_case = TRUE)
# --- Load data -------------------------------------------------------------
if (!file.exists(params$excel_path)) {
message("Path not found; open a file chooser…")
params$excel_path <- file.choose()
}
path <- tryCatch(safe_find_excel(params$excel_path),
error = function(e) normalizePath(params$excel_path, mustWork = TRUE))
if (grepl("^~\\$", basename(path))) stop("You're pointing at an Excel lock file (~$...). Close Excel and use the real .xlsx.")
msg("Using file: ", path)
## Using file: C:\Users\micha\Downloads\Final Analysis for ISELA submission\ISELA colombia Domains final clean.xlsx
raw <- readxl::read_excel(path, sheet = params$sheet)
dat <- raw %>% janitor::clean_names()
dat <- dat %>%
mutate(
across(where(is.numeric), ~ dplyr::na_if(., 999)),
across(where(is.character), ~ dplyr::na_if(trimws(.), "999"))
)
all_cols <- names(dat)
# --- Define item sets (per your spec) -------------------------------------
rel_q201 <- all_cols[
str_detect(all_cols, q_prefix(201)) &
( str_detect(all_cols, "select_all_that_apply_someone_at_home|home|community") |
str_detect(all_cols, "select_all_that_apply_friends|friends") |
str_detect(all_cols, "select_all_that_apply_teacher|teacher|adult_at_school|escuela") ) &
!str_detect(all_cols, "no_one|no_response")
]
rel_q202_all <- all_cols[
str_detect(all_cols, q_prefix(202)) &
!str_detect(all_cols, "no_one|no_response|read_")
]
persev_help_3 <- c(
"x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_someone_at_home_or_in_the_community",
"x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_friends",
"x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_teacher_o_adulto_en_la_escuela"
)
persev_help_3 <- persev_help_3[persev_help_3 %in% all_cols]
q202_stem <- grep("select_all_that_apply$", rel_q202_all, value = TRUE)
stress_rel_items <- setdiff(c(rel_q201, rel_q202_all), c(persev_help_3, q202_stem))
gm_items <- all_cols[str_detect(all_cols, q_range_prefix(215, 218))]
conf_items <- c(all_cols[str_detect(all_cols, q_prefix(212))],
all_cols[str_detect(all_cols, q_prefix(213))])
perf_items <- c(all_cols[str_detect(all_cols, q_prefix(210))],
all_cols[str_detect(all_cols, q_prefix(211))])
em_items <- all_cols[str_detect(all_cols, q_prefix(207)) | str_detect(all_cols, q_prefix(208))]
# --- Map & reverse-code ----------------------------------------------------
if (length(gm_items) > 0) {
dat <- dat %>% mutate(across(all_of(gm_items), ~ { y <- likert4(.x); if (all(is.na(y))) to01(.x) else y }))
}
to_num <- unique(c(persev_help_3, stress_rel_items, conf_items, perf_items, em_items))
dat <- dat %>% mutate(across(any_of(to_num), to01))
# Hard reverse x207 (negative wording)
if ("x207_i_feel_irritated_when_someone_cries" %in% names(dat)) {
v <- dat[["x207_i_feel_irritated_when_someone_cries"]]
mxx <- suppressWarnings(max(v, na.rm = TRUE)); mnn <- suppressWarnings(min(v, na.rm = TRUE))
if (is.finite(mxx) && is.finite(mnn) && mxx > mnn) {
dat[["x207_i_feel_irritated_when_someone_cries"]] <- (mxx + mnn) - v
} else {
dat[["x207_i_feel_irritated_when_someone_cries"]] <- 1 - v
}
}
# Auto reverse within-scale using alpha(check.keys=TRUE)
fix_and_alpha <- function(df_items) {
if (length(df_items) < 2) return(list(alpha_before = NA, alpha_after = NA, itemstats = NULL, fixed_items = df_items))
a_before <- alpha_poly(dat[, df_items])
fixed <- apply_alpha_based_reverse(dat[, df_items])
a_after <- alpha_poly(fixed)
dat[, df_items] <<- fixed
list(alpha_before = if (!is.null(a_before)) a_before$total$raw_alpha else NA,
alpha_after = if (!is.null(a_after)) a_after$total$raw_alpha else NA,
itemstats = if (!is.null(a_after)) a_after$item.stats else NULL,
fixed_items = df_items)
}
res_persev <- if (length(persev_help_3) >= 2) fix_and_alpha(persev_help_3) else NULL
## Some items ( x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_someone_at_home_or_in_the_community ) were negatively correlated with the first principal component and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_someone_at_home_or_in_the_community ) were negatively correlated with the first principal component and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
res_stress <- if (length(stress_rel_items) >= 2) fix_and_alpha(stress_rel_items) else NULL
res_gm <- if (length(gm_items) >= 2) fix_and_alpha(gm_items) else NULL
res_conf <- if (length(conf_items) >= 2) fix_and_alpha(conf_items) else NULL
res_perf <- if (length(perf_items) >= 2) fix_and_alpha(perf_items) else NULL
res_empathy <- if (length(em_items) >= 2) fix_and_alpha(em_items) else NULL
## Some items ( x207_i_feel_irritated_when_someone_cries ) were negatively correlated with the first principal component and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( x207_i_feel_irritated_when_someone_cries ) were negatively correlated with the first principal component and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
make_composite <- function(items, name) {
if (length(items) < 1) return(invisible(NULL))
dat[[paste0(name, "_sum")]] <- rowSums(dat[, items], na.rm = TRUE)
dat[[paste0(name, "_mean")]] <- rowMeans(dat[, items], na.rm = TRUE)
}
if (length(persev_help_3) >= 1) make_composite(persev_help_3, "perseverance_helpseek")
if (length(stress_rel_items) >= 1)make_composite(stress_rel_items, "stress_mgmt")
if (length(gm_items) >= 1) make_composite(gm_items, "growth_mindset")
if (length(conf_items) >= 1) make_composite(conf_items, "conflict")
if (length(perf_items) >= 1) make_composite(perf_items, "perseverance_perf")
if (length(em_items) >= 1) make_composite(em_items, "empathy")
cat("**Reliability (polychoric alpha) — before vs after reverse-fix**\n\n")
## **Reliability (polychoric alpha) — before vs after reverse-fix**
printer <- function(res, label){
if (is.null(res)) return(invisible(NULL))
cat(sprintf("%s: before = %s; after = %s\n",
label,
ifelse(is.na(res$alpha_before),"NA",sprintf("%.3f",res$alpha_before)),
ifelse(is.na(res$alpha_after), "NA",sprintf("%.3f",res$alpha_after))))
}
printer(res_persev, "Perseverance (help-seeking)")
## Perseverance (help-seeking): before = -0.225; after = -0.225
printer(res_stress, "Stress mgmt (relationships proxy)")
## Stress mgmt (relationships proxy): before = 0.504; after = 0.504
printer(res_gm, "Growth mindset")
## Growth mindset: before = 0.721; after = 0.721
printer(res_conf, "Conflict")
## Conflict: before = 0.816; after = 0.816
printer(res_perf, "Perseverance (performance)")
## Perseverance (performance): before = 1.000; after = 1.000
printer(res_empathy, "Empathy")
## Empathy: before = -0.524; after = -0.524
# --- Exploratory Factor Analysis (EFA) -------------------------------------
efa_output_dir <- "efa_outputs"
if (!dir.exists(efa_output_dir)) dir.create(efa_output_dir)
# Helper to run EFA on a set of items (polychoric), parallel analysis, then chosen nfactors
run_efa <- function(items, label, max_factors = 4, rotate = "oblimin") {
items <- intersect(items, names(dat))
if (length(items) < 3) {
msg("Skipping EFA for ", label, " (needs >=3 items).")
return(NULL)
}
df <- dat[, items]
# Polychoric correlation
pc <- tryCatch(psych::polychoric(df), error=function(e) NULL)
if (is.null(pc)) return(NULL)
R <- pc$rho
# KMO and Bartlett
kmo <- tryCatch(psych::KMO(R), error=function(e) NULL)
bart <- tryCatch(psych::cortest.bartlett(R, n = nrow(na.omit(df))), error=function(e) NULL)
# Parallel analysis
pa <- tryCatch(psych::fa.parallel(R, n.obs = nrow(df), fm = "minres", fa = "fa", plot = FALSE), error=function(e) NULL)
nf <- if (!is.null(pa) && !is.null(pa$nfact)) pa$nfact else 1
nf <- min(max(nf, 1), max_factors)
# EFA
fa_fit <- tryCatch(psych::fa(R, nfactors = nf, n.obs = nrow(df), fm = "minres", rotate = rotate), error=function(e) NULL)
# Save loadings table
if (!is.null(fa_fit)) {
loadings_tbl <- as.data.frame(unclass(fa_fit$loadings))
loadings_tbl$item <- rownames(loadings_tbl)
loadings_tbl <- loadings_tbl %>% select(item, everything())
readr::write_csv(loadings_tbl, file.path(efa_output_dir, paste0("efa_loadings_", label, ".csv")))
}
# Return list
list(label = label, R = R, kmo = kmo, bartlett = bart, parallel = pa, nfactors = nf, fit = fa_fit)
}
# Domain-specific EFAs
efa_persev <- run_efa(persev_help_3, "perseverance_helpseek", max_factors = 2)
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
efa_stress <- run_efa(stress_rel_items,"stress_mgmt_relproxy", max_factors = 2)
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
efa_gm <- run_efa(gm_items, "growth_mindset", max_factors = 2)
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
efa_conf <- run_efa(conf_items, "conflict", max_factors = 2)
## Skipping EFA for conflict (needs >=3 items).
efa_empathy <- run_efa(em_items, "empathy", max_factors = 2)
## Skipping EFA for empathy (needs >=3 items).
# Global EFA across all domain items (optional)
all_item_pool <- unique(c(persev_help_3, stress_rel_items, gm_items, conf_items, em_items))
efa_global <- run_efa(all_item_pool, "global_items", max_factors = 6)
## Parallel analysis suggests that the number of factors = 5 and the number of components = NA
# Pretty print summaries
summarise_efa <- function(ef) {
if (is.null(ef)) return(invisible(NULL))
cat("\n--- EFA summary:", ef$label, " ---\n")
if (!is.null(ef$kmo)) {
cat(sprintf("KMO MSA: %.3f (overall)\n", ef$kmo$MSA))
}
if (!is.null(ef$bartlett)) {
cat(sprintf("Bartlett's test: chi2=%.1f, df=%d, p=%.3g\n",
ef$bartlett$chisq, ef$bartlett$df, ef$bartlett$p.value))
}
cat("Parallel analysis suggested factors:", ef$nfactors, "\n")
if (!is.null(ef$fit)) {
cat(sprintf("fa(): RMSEA=%.3f, TLI=%.3f, BIC=%.1f\n",
ef$fit$RMSEA[1], ef$fit$TLI, ef$fit$BIC))
print(kable(as.data.frame(unclass(ef$fit$loadings)), caption=paste("Loadings —", ef$label)))
}
}
summarise_efa(efa_persev)
##
## --- EFA summary: perseverance_helpseek ---
## KMO MSA: 0.605 (overall)
## Bartlett's test: chi2=454.3, df=3, p=3.79e-98
## Parallel analysis suggested factors: 1
##
##
## Table: Loadings — perseverance_helpseek
##
## | | MR1|
## |:----------------------------------------------------------------------------------------------------------------------------------|----------:|
## |x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_someone_at_home_or_in_the_community | -0.4561905|
## |x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_friends | 0.7806212|
## |x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_teacher_o_adulto_en_la_escuela | 0.4935221|
summarise_efa(efa_stress)
##
## --- EFA summary: stress_mgmt_relproxy ---
## KMO MSA: 0.426 (overall)
## Bartlett's test: chi2=647.1, df=3, p=6.26e-140
## Parallel analysis suggested factors: 2
##
##
## Table: Loadings — stress_mgmt_relproxy
##
## | | MR1| MR2|
## |:--------------------------------------------------------------------------------------------------|---------:|----------:|
## |x201_when_you_are_sad_who_do_you_talk_to_select_all_that_apply_someone_at_home_or_in_the_community | 0.0094779| 0.6239838|
## |x201_when_you_are_sad_who_do_you_talk_to_select_all_that_apply_friends | 0.7774611| -0.2112948|
## |x201_when_you_are_sad_who_do_you_talk_to_select_all_that_apply_teacher_or_adult_at_school_escuela | 0.7255820| 0.2960064|
summarise_efa(efa_gm)
##
## --- EFA summary: growth_mindset ---
## KMO MSA: 0.713 (overall)
## Bartlett's test: chi2=1148.0, df=6, p=8.58e-245
## Parallel analysis suggested factors: 2
##
##
## Table: Loadings — growth_mindset
##
## | | MR1| MR2|
## |:-----------------------------------------------------------------------------------------------------------------------|----------:|----------:|
## |x215_i_know_how_to_use_my_time_and_organize_my_work_well_to_achieve_my_goals | 0.2941766| 0.3267326|
## |x216_when_i_encounter_a_challenge_or_a_difficulty_i_see_it_as_a_way_to_help_me_improve | 0.9982676| -0.0016210|
## |x217_i_believe_that_when_someone_corrects_me_it_helps_me_learn_and_grow | 0.1954897| 0.5293672|
## |x218_i_feel_inspired_and_motivated_to_work_harder_when_i_see_that_my_classmates_and_the_people_around_me_are_successful | -0.0581622| 0.7906614|
summarise_efa(efa_conf)
summarise_efa(efa_empathy)
summarise_efa(efa_global)
##
## --- EFA summary: global_items ---
## KMO MSA: 0.143 (overall)
## Bartlett's test: chi2=73041.0, df=91, p=0
## Parallel analysis suggested factors: 5
## fa(): RMSEA=1.149, TLI=-1.447, BIC=60488.9
##
##
## Table: Loadings — global_items
##
## | | MR1| MR3| MR2| MR4| MR5|
## |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------:|----------:|----------:|----------:|----------:|
## |x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_someone_at_home_or_in_the_community | -0.0550335| -0.0874991| 0.0239403| 1.0013657| 0.0214930|
## |x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_friends | 0.0162330| 0.9445137| 0.0791287| -0.1379645| -0.0344236|
## |x202_when_you_are_working_on_something_difficult_who_do_you_ask_for_help_select_all_that_apply_teacher_o_adulto_en_la_escuela | 0.9040500| 0.1270659| -0.1301733| -0.1418128| 0.0919235|
## |x201_when_you_are_sad_who_do_you_talk_to_select_all_that_apply_someone_at_home_or_in_the_community | 0.0725035| 0.1434470| 0.2258006| 0.5246073| -0.0698118|
## |x201_when_you_are_sad_who_do_you_talk_to_select_all_that_apply_friends | -0.0869723| 0.8154007| -0.0073390| 0.0723624| 0.0646907|
## |x201_when_you_are_sad_who_do_you_talk_to_select_all_that_apply_teacher_or_adult_at_school_escuela | 0.6542400| 0.3548071| -0.3485199| 0.2882818| -0.1355036|
## |x215_i_know_how_to_use_my_time_and_organize_my_work_well_to_achieve_my_goals | 0.1106986| 0.1666779| 0.5866748| 0.0410362| 0.0493924|
## |x216_when_i_encounter_a_challenge_or_a_difficulty_i_see_it_as_a_way_to_help_me_improve | 0.0515078| 0.1483915| 0.6436859| -0.0129393| 0.0178292|
## |x217_i_believe_that_when_someone_corrects_me_it_helps_me_learn_and_grow | 0.0045975| -0.0952408| 0.6999942| 0.1140626| 0.0175560|
## |x218_i_feel_inspired_and_motivated_to_work_harder_when_i_see_that_my_classmates_and_the_people_around_me_are_successful | -0.0205502| 0.1607524| 0.5026960| 0.2090772| 0.2254787|
## |x212_i_want_you_to_imagine_a_situation_where_you_are_playing_with_a_toy_you_like_one_of_your_friends_also_wants_to_play_with_the_toy_but_there_is_only_one_available_what_would_you_do_in_this_situation | 0.9389326| -0.1079674| 0.1425651| -0.0038556| 0.1720634|
## |x213_now_i_want_you_to_imagine_that_you_are_playing_with_a_toy_and_another_friend_wants_to_play_with_the_same_toy_but_there_is_only_one_available_this_time_your_friend_takes_the_toy_away_from_you_and_starts_playing_with_it_without_asking_you_what_wo | 0.8260540| -0.0971118| 0.1503404| 0.0369565| -0.3950305|
## |x207_i_feel_irritated_when_someone_cries | -0.0027770| 0.0573695| 0.0144694| -0.0708371| -0.3223842|
## |x208_usually_when_one_of_my_friends_is_sad_i_can_notice_it_without_them_needing_to_tell_me | 0.1240420| -0.0107389| 0.1566973| 0.0086696| 0.6863979|
# --- Save dataset and alpha/efa summaries ---------------------------------
out_xlsx <- "ISELA_adolescent_domains_REPLICA_fixedkeys_EFA_scored.xlsx"
writexl::write_xlsx(dat, out_xlsx)
msg("Saved enhanced dataset -> ", out_xlsx)
## Saved enhanced dataset -> ISELA_adolescent_domains_REPLICA_fixedkeys_EFA_scored.xlsx
alpha_summaries <- list(
perseverance_helpseek = res_persev,
stress_mgmt_relationships_proxy = res_stress,
growth_mindset = res_gm,
conflict = res_conf,
perseverance_performance = res_perf,
empathy = res_empathy
)
for (nm in names(alpha_summaries)) {
r <- alpha_summaries[[nm]]
if (is.null(r)) next
tibble::tibble(scale = nm,
alpha_before = r$alpha_before,
alpha_after = r$alpha_after) %>%
readr::write_csv(paste0("alpha_", nm, "_summary.csv"))
if (!is.null(r$itemstats)) {
r$itemstats %>% tibble::rownames_to_column("item") %>%
readr::write_csv(paste0("alpha_", nm, "_itemstats_afterfix.csv"))
}
}
# Also export EFA fit summaries to text
sink("efa_summaries.txt")
summarise_efa(efa_persev)
summarise_efa(efa_stress)
summarise_efa(efa_gm)
summarise_efa(efa_conf)
summarise_efa(efa_empathy)
summarise_efa(efa_global)
sink()
# Quick composite descriptives (after reverse-fix)
comps <- c("perseverance_helpseek_mean","stress_mgmt_mean","growth_mindset_mean","conflict_mean","perseverance_perf_mean","empathy_mean")
comps <- intersect(comps, names(dat))
if (length(comps) > 0) {
desc <- bind_rows(purrr::map(comps, ~ tibble::as_tibble(psych::describe(dat[[.x]])) %>% mutate(variable = .x))) %>%
select(variable, n, mean, sd, min, max, skew, kurtosis)
print(kable(desc, digits = 3, caption = "Composite descriptives (after reverse-fix)"))
}