Show code
setwd("~/data_analyses/multiplatform")
EUPeopleSp <- haven::read_sav("UNOVPT_265830_20240625_ESPAÑA.sav")
EUPeopleNet <- janitor::clean_names(EUPeopleSp)This document analyses the relationships between platform use and political engagement purpose.
setwd("~/data_analyses/multiplatform")
EUPeopleSp <- haven::read_sav("UNOVPT_265830_20240625_ESPAÑA.sav")
EUPeopleNet <- janitor::clean_names(EUPeopleSp)data_MP dataset# --- Prepare data_MP dataset ---
data_MP <- EUPeopleSp %>%
# Select the relevant variables
dplyr::select(DIG02_1:DIG02_8, DEM02, DEM02R, VOTEI02_ES, PANELIST_SEX, LERI, POLINT, POLINF_1:POLINF_3) %>%
# Convert variables to factors
dplyr::mutate(across(c(DIG02_1:DIG02_8, DEM02R, VOTEI02_ES, PANELIST_SEX, LERI, POLINT, POLINF_1:POLINF_3),
haven::as_factor)) %>%
# Order the 1–8 digital use frequency variables
dplyr::mutate(across(c(DIG02_1:DIG02_8),
~ factor(.x,
levels = c("Never", "Less often", "Several times a month",
"Several times a week", "Once a day",
"Several times a day", "Almost all the time"),
ordered = TRUE))) %>%
# Create numeric versions of these ordered factors
dplyr::mutate(across(c(DIG02_1:DIG02_8),
as.integer,
.names = "{.col}_num")) %>%
# Rename variables cleanly
dplyr::rename(
YouTube_ = DIG02_1,
WhatsApp_ = DIG02_2,
TwitterX_ = DIG02_3,
Instagram_ = DIG02_4,
TikTok_ = DIG02_5,
Facebook_ = DIG02_6,
LinkedIn_ = DIG02_7,
Telegram_ = DIG02_8,
YouTube = DIG02_1_num,
WhatsApp = DIG02_2_num,
Twitter_X = DIG02_3_num,
Instagram = DIG02_4_num,
TikTok = DIG02_5_num,
Facebook = DIG02_6_num,
Linkedin = DIG02_7_num,
Telegram = DIG02_8_num,
Age = DEM02,
Age_cat = DEM02R,
Vote = VOTEI02_ES,
Sex = PANELIST_SEX,
Left_Right = LERI,
Pol_int = POLINT,
Inf_disagr = POLINF_1,
Inf_check = POLINF_2,
Inf_confirm = POLINF_3
) %>%
# Recode the Vote factor levels
dplyr::mutate(Vote = forcats::fct_recode(Vote,
"Other party" = "EH-Bildu",
"Other party" = "EAJ-PNV",
"Other party" = "BNG",
"Other party" = "CCa",
"Other party" = "UPN",
"Did not vote" = "I was not old enough to vote"))data_USES dataset#| label: social_media_use
#| message: false
#| warning: false
# --- Load libraries ---
library(dplyr)
library(forcats)
# --- 1️⃣ Select only the social media use variables ---
data_USES <- EUPeopleNet %>%
dplyr::select(dplyr::starts_with("dig03_"))
# --- 2️⃣ Helper: compute "most frequent use reason" per platform ---
# safer to precompute matrices outside mutate to avoid NSE issues
#get_use_reason <- function(df, prefix) {
# m <- df %>%
# dplyr::select(dplyr::starts_with(prefix)) %>%
# as.matrix()
# suppressWarnings(max.col(m, ties.method = "first"))
#}
# --- 3️⃣ Apply helper for each platform ---
#data_USES <- data_USES %>%
# dplyr::mutate(
# use_Youtube = get_use_reason(., "dig03_1_number"),
# use_WhatsApp = get_use_reason(., "dig03_2_number"),
# use_TwitterX = get_use_reason(., "dig03_3_number"),
# use_Instagram= get_use_reason(., "dig03_4_number"),
# use_TikTok = get_use_reason(., "dig03_5_number"),
# use_Facebook = get_use_reason(., "dig03_6_number"),
# use_Linkedin = get_use_reason(., "dig03_7_number"),
# use_Telegram = get_use_reason(., "dig03_8_number")
# )
# --- 4️⃣ Convert to ordered factors ---
#labels_use <- c("friends", "family", "inf socially", "sup soc causes",
# "inf pol", "sup pol causes", "connect profess",
# "promote profess", "Other")
#data_USES <- data_USES %>%
# dplyr::mutate(across(
# c(use_Youtube:use_Telegram),
# ~ factor(.x, levels = 1:9, labels = labels_use, ordered = TRUE)
# ))
# --- 5️⃣ Derive political engagement indicators ---
#data_USES <- data_USES %>%
# dplyr::mutate(
# PolInf_smed = dplyr::if_else(
# rowSums(dplyr::across(
# c(use_Youtube, use_WhatsApp, use_TwitterX, use_Instagram,
# use_TikTok, use_Facebook, use_Linkedin, use_Telegram),
# ~ as.integer(.x) == 5
# ), na.rm = TRUE) > 0, 1, 0
# ),
# PolSup_smed = dplyr::if_else(
# rowSums(dplyr::across(
# c(use_Youtube, use_WhatsApp, use_TwitterX, use_Instagram,
# use_TikTok, use_Facebook, use_Linkedin, use_Telegram),
# ~ as.integer(.x) == 6
# ), na.rm = TRUE) > 0, 1, 0
# )
# ) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(
# PolSocial_smed = as.integer(any(as.integer(c_across(
# c(use_Youtube, use_WhatsApp, use_TwitterX, use_Instagram,
# use_TikTok, use_Facebook, use_Linkedin, use_Telegram)
# )) %in% 3:6)),
# PolInfSup_smed = as.integer(any(as.integer(c_across(
# c(use_Youtube, use_WhatsApp, use_TwitterX, use_Instagram,
# use_TikTok, use_Facebook, use_Linkedin, use_Telegram)
# )) %in% 5:6))
# ) %>%
# dplyr::ungroup() %>%
# dplyr::mutate(
# dplyr::across(
# c(PolInf_smed, PolSup_smed, PolSocial_smed, PolInfSup_smed),
# ~ factor(.x, levels = c(0, 1), labels = c("No", "Yes"))
# ),
# dplyr::across(
# c(PolInf_smed, PolSup_smed, PolSocial_smed, PolInfSup_smed),
# ~ forcats::fct_relevel(.x, "Yes")
# )
# )Summary of the newly created variables and their contingency table showing the use of social media platforms for political information and support.
#summary(dplyr::select(data_USES, PolInf_smed, PolSup_smed, PolSocial_smed, PolInfSup_smed))
#table(data_USES$PolInf_smed, data_USES$PolSup_smed)New corrected code (taking into account that the question on platform uses is a multiple choice question.
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(forcats)
# --- 1️⃣ Define platforms and purposes ---
platforms <- c("YouTube", "WhatsApp", "TwitterX", "Instagram",
"TikTok", "Facebook", "LinkedIn", "Telegram")
platform_prefixes <- paste0("dig03_", 1:8, "_number_")
purpose_labels <- c(
"friends", "family", "inf_socially", "sup_soc_causes",
"inf_pol", "sup_pol_causes", "connect_profess",
"promote_profess", "other"
)
purpose_suffixes <- c(1:8, 98) # '98' = 'other'
# --- 2️⃣ Compute binary indicators for each platform × purpose ---
platform_long <- list()
for (p in seq_along(platforms)) {
plat <- platforms[p]
prefix <- platform_prefixes[p]
for (i in seq_along(purpose_suffixes)) {
pattern_i <- paste0(prefix, purpose_suffixes[i], "$")
cols_i <- grep(pattern_i, names(data_USES), value = TRUE)
if (length(cols_i) > 0) {
platform_long[[length(platform_long) + 1]] <- data_USES %>%
mutate(
Platform = plat,
Purpose = purpose_labels[i],
Response = ifelse(rowSums(across(all_of(cols_i)), na.rm = TRUE) > 0, "Yes", "No")
) %>%
dplyr::select(Platform, Purpose, Response)
}
}
}
data_platform_use <- bind_rows(platform_long)
# --- 3️⃣ Summarize only “Yes” responses ---
summary_platform_use <- data_platform_use %>%
filter(Response == "Yes") %>%
group_by(Platform, Purpose) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(Platform) %>%
mutate(Percentage = n / sum(n) * 100) %>%
ungroup()
# --- 4️⃣ Enforce order of Purpose (same as questionnaire) ---
summary_platform_use <- summary_platform_use %>%
mutate(Purpose = factor(Purpose, levels = purpose_labels))
# --- 5️⃣ Plot faceted grid ---
ggplot(summary_platform_use,
aes(x = Purpose, y = Percentage)) +
geom_col(fill = "darkorange", color = "white") +
geom_text(aes(label = sprintf("%.1f%%", Percentage)),
vjust = -0.4, size = 3.3, color = "black") +
facet_wrap(~ Platform, ncol = 4, scales = "fixed") +
labs(
title = "Use of Social Media Platform by Purpose",
subtitle = "Percentage of respondents using each platform for each purpose (Yes responses)",
x = "Purpose of Use",
y = "Percentage of Respondents"
) +
theme_minimal(base_size = 13) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
strip.text = element_text(face = "bold", size = 12),
plot.title = element_text(face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)library(dplyr)
library(forcats)
library(stringr)
library(ggplot2)
# --- 1️⃣ Select only the social media use variables ---
data_USES <- EUPeopleNet %>%
dplyr::select(dplyr::starts_with("dig03_"))
# --- 2️⃣ Define prefixes and purpose labels ---
platform_prefixes <- paste0("dig03_", 1:8, "_number_")
# Purpose labels
labels_use <- c(
"friends", "family", "inf_socially", "sup_soc_causes",
"inf_pol", "sup_pol_causes", "connect_profess",
"promote_profess", "other"
)
# Purpose numeric suffixes (note: 98 instead of 9 for 'other')
purpose_suffixes <- c(1:8, 98)
# --- 3️⃣ Compute binary indicators: uses ANY platform for each purpose ---
for (i in seq_along(labels_use)) {
# Match all columns ending with "_number_<suffix>"
pattern_i <- paste0("_number_", purpose_suffixes[i], "$")
cols_i <- grep(pattern_i, names(data_USES), value = TRUE)
if (length(cols_i) > 0) {
data_USES[[labels_use[i]]] <- ifelse(
rowSums(data_USES[, cols_i], na.rm = TRUE) > 0, 1, 0
)
} else {
message("⚠️ No columns found for purpose: ", labels_use[i])
}
}
# --- 4️⃣ Convert to factors only for variables that exist ---
existing_labels <- intersect(labels_use, names(data_USES))
data_USES <- data_USES %>%
dplyr::mutate(
dplyr::across(
all_of(existing_labels),
~ factor(.x, levels = c(0, 1), labels = c("No", "Yes"))
)
)
# --- tidy summary table ---
data_USES %>%
tidyr::pivot_longer(
cols = all_of(existing_labels),
names_to = "Purpose", values_to = "Response"
) %>%
count(Purpose, Response) %>%
group_by(Purpose) %>%
mutate(Percentage = n / sum(n) * 100) %>%
ungroup()# A tibble: 18 × 4
Purpose Response n Percentage
<chr> <fct> <int> <dbl>
1 connect_profess No 992 66.1
2 connect_profess Yes 509 33.9
3 family No 333 22.2
4 family Yes 1168 77.8
5 friends No 187 12.5
6 friends Yes 1314 87.5
7 inf_pol No 1095 73.0
8 inf_pol Yes 406 27.0
9 inf_socially No 698 46.5
10 inf_socially Yes 803 53.5
11 other No 429 28.6
12 other Yes 1072 71.4
13 promote_profess No 1217 81.1
14 promote_profess Yes 284 18.9
15 sup_pol_causes No 1362 90.7
16 sup_pol_causes Yes 139 9.26
17 sup_soc_causes No 1268 84.5
18 sup_soc_causes Yes 233 15.5
# --- 6️⃣ : simple visualization ---
summary_uses <- data_USES %>%
tidyr::pivot_longer(
cols = all_of(existing_labels),
names_to = "Purpose", values_to = "Response"
) %>%
count(Purpose, Response) %>%
group_by(Purpose) %>%
mutate(Percentage = n / sum(n) * 100) %>%
ungroup()
ggplot(summary_uses, aes(x = Purpose, y = Percentage, fill = Response)) +
geom_col(position = "dodge") +
scale_fill_manual(values = c("grey70", "darkorange")) +
labs(
title = "Use of Any Social Media Platform by Purpose",
subtitle = "Percentages within each purpose (Yes vs No)",
x = "Purpose",
y = "Percentage of Respondents",
fill = "Response"
) +
theme_minimal(base_size = 10) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
plot.title = element_text(face = "bold")
)data_long <- data_MP %>%
pivot_longer(cols = c(YouTube_:Telegram_),
names_to = "variable", values_to = "value")
ggplot(data_long, aes(x = value)) +
geom_bar(fill = "darkorange") +
geom_text(stat = "count", aes(label = ..count..), vjust = -0.5, size = 2) +
facet_wrap(~ variable, scales = "free_y", ncol = ) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Social Media Frequency Use with Counts",
x = NULL, y = "Count") +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) # ← adds 15% space on topdata_long_pct <- data_long %>%
group_by(variable, value) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(variable) %>%
mutate(pct = 100 * n / sum(n))
ggplot(data_long_pct, aes(x = value, y = pct)) +
geom_col(fill = "darkorange") +
geom_text(aes(label = sprintf("%.1f%%", pct)), vjust = -0.3, size = 2) +
facet_wrap(~ variable, scales = "free_y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "Social Media Frequency Use (Percentages)",
x = NULL, y = "Percentages") +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) # ← adds 15% space on top#data_long_1 <- data_USES %>%
# pivot_longer(cols = c(use_Youtube:use_Telegram),
# names_to = "variable", values_to = "value")
#ggplot(data_long_1, aes(x = value)) +
# geom_bar(fill = "darkorange") +
# geom_text(stat = "count", aes(label = ..count..), vjust = -0.3, size = 2) +
# facet_wrap(~ variable, scales = "free_y") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
# labs(title = "Uses of Social Media Platforms (with Counts)",
# x = NULL, y = "Count") +
# scale_y_continuous(expand = expansion(mult = c(0, 0.15))) # ← adds 15% space on top
#data_long_1pct <- data_long_1 %>%
# group_by(variable, value) %>%
# summarise(n = n(), .groups = "drop") %>%
# group_by(variable) %>%
# mutate(pct = 100 * n / sum(n))
#ggplot(data_long_1pct, aes(x = value, y = pct)) +
# geom_col(fill = "darkorange") +
# geom_text(aes(label = sprintf("%.1f%%", pct)), vjust = -0.3, size = 2) +
# facet_wrap(~ variable, scales = "free_y") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
# labs(title = "Uses of Social Media Platforms (Percentages)",
#x = NULL, y = "Percentages") +
# scale_y_continuous(expand = expansion(mult = c(0, 0.15))) # ← adds 15% space on top
# What was the purpose that you used the following platform for?
# To inform myself politically (YES = on any of the platforms)
#ggplot(data_USES, aes(x = PolInf_smed)) +
# geom_bar(fill = "darkgreen") +
# theme_minimal()
# To support political causes (YES = on any of the platforms)
#ggplot(data_USES, aes(x = PolSup_smed)) +
# geom_bar(fill = "darkgreen") +
# theme_minimal()
# To inform myself politically, OR To support political causes (YES = on any of the platforms)
#ggplot(data_USES, aes(x = PolInfSup_smed)) +
# geom_bar(fill = "darkgreen") +
# theme_minimal()
# To inform myself socially, To support social causes, To inform myself politically,
# or To support political causes, (YES = on any of the platforms)
#ggplot(data_USES, aes(x = PolSocial_smed)) +
# geom_bar(fill = "darkgreen") +
# theme_minimal()
#data_long_2 <- data_USES %>%
# pivot_longer(cols = c(PolInf_smed, PolSup_smed, PolSocial_smed, PolInfSup_smed),
# names_to = "variable", values_to = "value")
#ggplot(data_long_2, aes(x = value)) +
# geom_bar(fill = "darkorange") +
# facet_wrap(~ variable, scales = "free_y") +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1))ggplot(data_MP, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "darkorange", color = "white") +
theme_minimal() +
labs(
title = "Age",
x = "How old are you?",
y = "Count"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)ggplot(data_MP, aes(x = Sex)) +
geom_bar(fill = "darkorange") +
theme_minimal() +
labs(
title = "Sex",
x = "Sex",
y = "Count"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)ggplot(data_MP, aes(x = Vote)) +
geom_bar(fill = "darkorange") +
theme_minimal() +
labs(
title = "Vote in the 2024 Spanish General Elections",
x = "Which party did you vote in the last national elections?",
y = "Count"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)ggplot(data_MP, aes(x = Left_Right)) +
geom_bar(fill = "darkorange") +
theme_minimal() +
labs(
title = "Left Right self-location",
x = "Left-Right",
y = "Count"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)ggplot(data_MP, aes(x = Pol_int)) +
geom_bar(fill = "darkorange") +
theme_minimal() +
labs(
title = "Political interest",
x = "Political interest",
y = "Count"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)ggplot(data_MP, aes(x = Inf_disagr)) +
geom_bar(fill = "darkorange") +
theme_minimal() +
labs(
title = "Frequency online exposure information disagree",
x = "How often read, whatch, listen to something you disagree with?",
y = "Count"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1)
)data_PCA1 <- data_MP %>% dplyr::select(
Age_cat:Telegram)
data_PCA1 <- data_PCA1 %>%
dplyr::mutate(Left_Right = dplyr::recode(Left_Right,
"0 Left" = "Ext_left",
"1" = "Ext_left",
"2" = "Left",
"3" = "Left",
"4" = "Center",
"5" = "Center",
"6" = "Center",
"7" = "Right",
"8" = "Right",
"9" = "Ext_right",
"10 Right" = "Ext_right",
"I do not position myself" = "DKDA",
"I don't know" = "DKDA"
)) %>%
dplyr::mutate(Left_Right = factor(Left_Right,
levels = c("Ext_left", "Left", "Center", "Right", "Ext_right", "DKDA")))psych::KMO(data_PCA1[,9:16])Kaiser-Meyer-Olkin factor adequacy
Call: psych::KMO(r = data_PCA1[, 9:16])
Overall MSA = 0.77
MSA for each item =
YouTube WhatsApp Twitter_X Instagram TikTok Facebook Linkedin Telegram
0.82 0.73 0.74 0.75 0.79 0.76 0.78 0.77
Classical interpretation:
| 0.70–0.79 | Middling (so acceptable) |
tests if the correlation matrix significantly differs from an identity matrix: if there are correlations. chi-square statistically significant.
psych::cortest.bartlett(data_PCA1[,9:16])$chisq
[1] 1400.615
$p.value
[1] 1.155677e-277
$df
[1] 28
If variables are too strongly correlated (r > 0.9), they might be redundant.
The correlation matrix shows that no variable pair have high correlations and no variable is redundant.
cor_matrix <- cor(data_PCA1[,9:16], use = "pairwise.complete.obs")
round(cor_matrix, 2) YouTube WhatsApp Twitter_X Instagram TikTok Facebook Linkedin
YouTube 1.00 0.17 0.25 0.21 0.23 0.14 0.16
WhatsApp 0.17 1.00 0.10 0.31 0.18 0.23 0.08
Twitter_X 0.25 0.10 1.00 0.27 0.19 0.07 0.33
Instagram 0.21 0.31 0.27 1.00 0.37 0.25 0.21
TikTok 0.23 0.18 0.19 0.37 1.00 0.18 0.17
Facebook 0.14 0.23 0.07 0.25 0.18 1.00 0.02
Linkedin 0.16 0.08 0.33 0.21 0.17 0.02 1.00
Telegram 0.23 0.06 0.37 0.28 0.22 0.10 0.25
Telegram
YouTube 0.23
WhatsApp 0.06
Twitter_X 0.37
Instagram 0.28
TikTok 0.22
Facebook 0.10
Linkedin 0.25
Telegram 1.00
caret::findCorrelation(cor_matrix, cutoff = 0.9)integer(0)
PCA1_result <- FactoMineR::PCA(data_PCA1,
quanti.sup = ,
quali.sup = 1:8,
graph = FALSE)summary(PCA1_result)
Call:
FactoMineR::PCA(X = data_PCA1, quali.sup = 1:8, graph = FALSE)
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
Variance 2.433 1.234 0.837 0.806 0.788 0.719 0.612
% of var. 30.407 15.422 10.464 10.079 9.850 8.990 7.645
Cumulative % of var. 30.407 45.829 56.293 66.372 76.222 85.212 92.857
Dim.8
Variance 0.571
% of var. 7.143
Cumulative % of var. 100.000
Individuals (the 10 first)
Dist Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
1 | 2.810 | 1.592 0.069 0.321 | -0.248 0.003 0.008 | -0.427
2 | 2.676 | -1.614 0.071 0.364 | -0.299 0.005 0.012 | -1.113
3 | 1.691 | -1.598 0.070 0.892 | 0.412 0.009 0.059 | 0.198
4 | 2.903 | 0.197 0.001 0.005 | -0.948 0.048 0.107 | 0.333
5 | 1.860 | 0.643 0.011 0.120 | 0.338 0.006 0.033 | -0.463
6 | 2.391 | -2.012 0.111 0.708 | -0.249 0.003 0.011 | -0.256
7 | 1.453 | -0.071 0.000 0.002 | -0.695 0.026 0.229 | 0.153
8 | 2.121 | -0.862 0.020 0.165 | 1.428 0.110 0.453 | 0.551
9 | 3.780 | 0.930 0.024 0.061 | 1.817 0.178 0.231 | -1.525
10 | 1.973 | 1.171 0.038 0.353 | 0.185 0.002 0.009 | 0.478
ctr cos2
1 0.015 0.023 |
2 0.099 0.173 |
3 0.003 0.014 |
4 0.009 0.013 |
5 0.017 0.062 |
6 0.005 0.011 |
7 0.002 0.011 |
8 0.024 0.067 |
9 0.185 0.163 |
10 0.018 0.059 |
Variables
Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr cos2
YouTube | 0.538 11.915 0.290 | -0.020 0.034 0.000 | -0.637 48.499 0.406
WhatsApp | 0.434 7.737 0.188 | 0.537 23.356 0.288 | 0.365 15.926 0.133
Twitter_X | 0.611 15.357 0.374 | -0.437 15.485 0.191 | 0.046 0.258 0.002
Instagram | 0.690 19.573 0.476 | 0.233 4.398 0.054 | 0.206 5.049 0.042
TikTok | 0.592 14.402 0.350 | 0.185 2.782 0.034 | -0.097 1.119 0.009
Facebook | 0.392 6.308 0.153 | 0.567 26.011 0.321 | -0.135 2.186 0.018
Linkedin | 0.497 10.152 0.247 | -0.454 16.731 0.206 | 0.446 23.784 0.199
Telegram | 0.595 14.556 0.354 | -0.372 11.202 0.138 | -0.163 3.180 0.027
YouTube |
WhatsApp |
Twitter_X |
Instagram |
TikTok |
Facebook |
Linkedin |
Telegram |
Supplementary categories (the 10 first)
Dist Dim.1 cos2 v.test Dim.2 cos2 v.test
18_24 | 1.516 | 1.111 0.537 8.130 | -0.242 0.026 -2.489 |
25_34 | 0.781 | 0.711 0.827 8.264 | -0.051 0.004 -0.838 |
35_44 | 0.378 | 0.307 0.659 3.871 | 0.018 0.002 0.318 |
45_54 | 0.260 | -0.150 0.330 -1.756 | -0.023 0.008 -0.373 |
55_64 | 0.450 | -0.371 0.680 -3.745 | 0.153 0.115 2.164 |
65_+ | 0.994 | -0.948 0.909 -12.159 | 0.035 0.001 0.631 |
PP | 0.181 | -0.075 0.174 -0.795 | -0.015 0.007 -0.220 |
PSOE | 0.212 | -0.075 0.124 -0.942 | 0.152 0.511 2.682 |
VOX | 0.687 | 0.557 0.658 4.133 | 0.091 0.018 0.946 |
Sumar | 0.446 | 0.228 0.262 1.987 | -0.306 0.470 -3.738 |
Dim.3 cos2 v.test
18_24 0.045 0.001 0.564 |
25_34 0.115 0.022 2.284 |
35_44 -0.049 0.017 -1.062 |
45_54 0.035 0.018 0.702 |
55_64 0.002 0.000 0.034 |
65_+ -0.099 0.010 -2.169 |
PP 0.069 0.145 1.236 |
PSOE -0.007 0.001 -0.144 |
VOX -0.119 0.030 -1.508 |
Sumar 0.098 0.049 1.458 |
More detailed description:
PCA1_desc <- dimdesc(PCA1_result, axes = 1:2, proba = 0.05)
PCA1_desc$Dim.1
Link between the variable and the continuous variables (R-square)
=================================================================================
correlation p.value
Instagram 0.6900103 1.103105e-212
Twitter_X 0.6111961 1.941440e-154
Telegram 0.5950547 1.831279e-144
TikTok 0.5918950 1.411310e-142
YouTube 0.5383770 1.476217e-113
Linkedin 0.4969345 1.990065e-94
WhatsApp 0.4338399 6.318571e-70
Facebook 0.3917088 3.166138e-56
Link between the variable and the categorical variable (1-way anova)
=============================================
R2 p.value
Age_cat 0.173328865 1.805377e-59
Inf_disagr 0.065063049 6.938223e-21
Inf_check 0.064956786 7.541753e-21
Inf_confirm 0.058231566 1.442538e-18
Vote 0.017828157 1.461680e-03
Left_Right 0.008763054 2.182186e-02
Pol_int 0.005647770 3.704118e-02
Link between variable and the categories of the categorical variables
================================================================
Estimate p.value
Age_cat=25_34 0.6006780 6.496349e-17
Age_cat=18_24 1.0007284 2.081955e-16
Inf_disagr=Inf_disagr_Often 0.3718729 3.245581e-09
Inf_check=Inf_check_Often 0.3562369 1.138440e-08
Inf_check=Inf_check_Very often 0.7159107 1.555352e-08
Inf_disagr=Inf_disagr_Very often 0.7288150 2.143396e-08
Inf_confirm=Inf_confirm_Often 0.3530755 1.260255e-07
Inf_confirm=Inf_confirm_Very often 0.5610816 2.797012e-07
Vote=VOX 0.5925739 3.436378e-05
Age_cat=35_44 0.1969253 1.049325e-04
Pol_int=Very interested 0.2934429 6.245230e-03
Left_Right=Ext_right 0.2970816 1.156829e-02
Left_Right=Ext_left 0.1626145 4.051919e-02
Vote=Sumar 0.2640042 4.688392e-02
Inf_confirm=Inf_confirm_Rarely -0.3568404 1.038729e-03
Inf_check=Inf_check_Rarely -0.4198861 2.674686e-04
Age_cat=55_64 -0.4807461 1.754287e-04
Inf_disagr=Inf_disagr_Rarely -0.4718912 1.244277e-04
Inf_disagr=Inf_disagr_Never -0.5404764 1.176458e-09
Inf_check=Inf_check_Never -0.5831323 2.341727e-10
Inf_confirm=Inf_confirm_Never -0.5259557 4.009742e-11
Age_cat=65_+ -1.0580813 1.094565e-35
#function for extracting the barycentres of the supplementary qualitative variables
get_sup_coords <- function(pca_result, variable_name, dims = 2) {
# 1️⃣ Extract relevant data
quali_sup_data <- pca_result$call$quali.sup
if ("quali.sup" %in% names(quali_sup_data)) {
quali_sup_data <- quali_sup_data$quali.sup
}
# 2️⃣ Check variable name
if (!variable_name %in% names(quali_sup_data)) {
stop(paste("Variable", variable_name, "not found among supplementary qualitative variables."))
}
# 3️⃣ Get barycentres (means per category)
bary <- pca_result$call$quali.sup$barycentre
# Select rows for the desired variable
var_categories <- levels(quali_sup_data[[variable_name]])
bary_var <- bary[rownames(bary) %in% var_categories, , drop = FALSE]
if (nrow(bary_var) == 0) {
stop(paste("No barycentres found for variable", variable_name))
}
# 4️⃣ Retrieve PCA scaling information (center and scale)
means <- pca_result$call$centre
sds <- pca_result$call$ecart.type
# Standardize barycentres using PCA preprocessing
bary_scaled <- sweep(bary_var, 2, means, "-")
bary_scaled <- sweep(bary_scaled, 2, sds, "/")
# 5️⃣ Project onto PCA axes
loadings <- pca_result$var$coord
projected <- as.matrix(bary_scaled) %*% as.matrix(loadings[, 1:dims])
# 6️⃣ Format output
projected_df <- as.data.frame(projected)
colnames(projected_df) <- paste0("Dim.", 1:dims)
projected_df$Category <- rownames(projected_df)
projected_df$Variable <- variable_name
return(projected_df)
}
age_coords <- get_sup_coords(PCA1_result, "Age_cat")
vote_coords <- get_sup_coords(PCA1_result, "Vote")
Pol_int_coords<- get_sup_coords(PCA1_result, "Pol_int")
LRight_coords<- get_sup_coords(PCA1_result, "Left_Right")
Sex_coords<- get_sup_coords(PCA1_result, "Sex")
infdis_coords<- get_sup_coords(PCA1_result, "Inf_disagr")
infcheck_coords<- get_sup_coords(PCA1_result, "Inf_check")
infconf_coords<- get_sup_coords(PCA1_result, "Inf_confirm")Biplot showing the centroids of vote
p +
geom_point(data = vote_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = vote_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)Biplot showing the centroids of age (recoded)
p +
geom_point(data = age_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = age_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)p +
geom_point(data = Pol_int_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = Pol_int_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)p +
geom_point(data = LRight_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = LRight_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)p +
geom_point(data = Sex_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = Sex_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)p +
geom_point(data = infdis_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = infdis_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)p +
geom_point(data = infcheck_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = infcheck_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)p +
geom_point(data = infconf_coords, aes(x = Dim.1, y = Dim.2),
color = "darkblue", size = 3, shape = 20) +
ggrepel::geom_text_repel(
data = infconf_coords,
aes(x = Dim.1, y = Dim.2, label = Category),
color = "darkblue",
size = 3.5
)# | label: prepare-participation
# | code-summary: "Convert dichotomous variables and handle missing values"
# --- 1. Select dichotomous variables (online + offline participation) ---
data_FACTOR <- EUPeopleNet %>%
dplyr::select(parton_1:partoff_13)
# --- 2. Recode missing values (99) to NA ---
data_FACTOR <- data_FACTOR %>%
dplyr::mutate(across(everything(),
~ dplyr::na_if(.x, 99)))
# --- 3. Convert to numeric (ensure 0 and 1 are treated as numeric values) ---
data_FACTOR <- data_FACTOR %>%
dplyr::mutate(across(everything(),
~ as.numeric(as.character(.x))))Rename political participation variables
# | label: rename-participation
# | code-summary: "Rename online and offline political participation variables"
data_FACTOR <- data_FACTOR %>%
dplyr::rename(
# --- Online political participation (13 vars) ---
pol_inf_on = parton_1,
liked_on = parton_2,
shared_on = parton_3,
posted_on = parton_4,
discuss_on = parton_5,
comment_on = parton_6,
profile_on = parton_7,
vol_party_on = parton_8,
donat_party_on = parton_9,
donat_NGO_on = parton_10,
vol_social_on = parton_11,
contact_on = parton_12,
petition_on = parton_13,
# --- Offline political participation (13 vars) ---
pol_meet_off = partoff_1,
strike_off = partoff_2,
demonst_off = partoff_3,
org_demonst_off = partoff_4,
discuss_off = partoff_5,
vol_party_off = partoff_6,
donat_party_off = partoff_7,
donat_NGO_off = partoff_8,
contact_off = partoff_9,
read_campaign_off = partoff_10,
petition_off = partoff_11,
boycott_off = partoff_12,
button_off = partoff_13
)# | label: pca-suitability
# | code-summary: "Check factor analysis suitability of online/offline political participation variables"
# --- 1️⃣ Replace missing codes (99) with NA and ensure numeric format ---
data_FACTOR <- data_FACTOR %>%
mutate(across(everything(),
~ na_if(as.numeric(.x), 99)))
# --- 2️⃣ Drop rows with too many missing value) ---
data_FACTOR <- data_FACTOR %>%
filter(rowMeans(is.na(.)) < 0.5)
# --- 3️⃣ Check remaining missing values ---
cat("Number of missing values per variable:\n")Number of missing values per variable:
print(colSums(is.na(data_FACTOR))) pol_inf_on liked_on shared_on posted_on
20 26 22 16
discuss_on comment_on profile_on vol_party_on
23 25 15 12
donat_party_on donat_NGO_on vol_social_on contact_on
14 20 20 16
petition_on pol_meet_off strike_off demonst_off
21 6 14 14
org_demonst_off discuss_off vol_party_off donat_party_off
14 15 10 19
donat_NGO_off contact_off read_campaign_off petition_off
17 8 11 11
boycott_off button_off
19 9
# --- 4️⃣ Compute the correlation matrix (pairwise complete) ---
cor_matrix <- cor(data_FACTOR, use = "pairwise.complete.obs")
# --- 5️⃣ Visualize correlation ---
if (requireNamespace("corrplot", quietly = TRUE)) {
corrplot::corrplot(cor_matrix, method = "color", type = "upper",
tl.cex = 0.7, tl.col = "black",
title = "Correlation matrix Online/Offline participation items",
mar = c(0,0,2,0))
}# --- 6️⃣ KMO (Kaiser-Meyer-Olkin) Test ---
if (requireNamespace("psych", quietly = TRUE)) {
kmo_result <- psych::KMO(cor_matrix)
print(kmo_result)
} else {
cat("Package 'psych' not installed. Please install it to run KMO test.\n")
}Kaiser-Meyer-Olkin factor adequacy
Call: psych::KMO(r = cor_matrix)
Overall MSA = 0.91
MSA for each item =
pol_inf_on liked_on shared_on posted_on
0.94 0.94 0.95 0.91
discuss_on comment_on profile_on vol_party_on
0.92 0.92 0.93 0.90
donat_party_on donat_NGO_on vol_social_on contact_on
0.83 0.82 0.92 0.92
petition_on pol_meet_off strike_off demonst_off
0.93 0.94 0.84 0.89
org_demonst_off discuss_off vol_party_off donat_party_off
0.92 0.90 0.90 0.83
donat_NGO_off contact_off read_campaign_off petition_off
0.80 0.92 0.89 0.94
boycott_off button_off
0.95 0.95
# --- 7️⃣ Bartlett’s Test of Sphericity ---
if (requireNamespace("psych", quietly = TRUE)) {
bartlett_result <- psych::cortest.bartlett(cor_matrix, n = nrow(data_FACTOR))
print(bartlett_result)
} else {
cat("Package 'psych' not installed. Please install it to run Bartlett test.\n")
}$chisq
[1] 11056.85
$p.value
[1] 0
$df
[1] 325
# --- 8️⃣: Check multicollinearity ---
if (requireNamespace("caret", quietly = TRUE)) {
high_corr <- caret::findCorrelation(cor_matrix, cutoff = 0.9)
if (length(high_corr) > 0) {
cat("Variables with high correlation (> 0.9):\n")
print(names(data_FACTOR)[high_corr])
} else {
cat("No variables show excessive multicollinearity.\n")
}
}No variables show excessive multicollinearity.
# | label: efa-participation
# | code-summary: "Run Exploratory Factor Analysis on political participation data"
# --- Load required packages ---
library(psych)
library(GPArotation)
library(factoextra)
# --- 1️⃣ Compute polychoric correlation matrix (good for binary or ordinal data) ---
cor_poly <- psych::polychoric(data_FACTOR)$rho
# --- 2️⃣ Determine number of factors ---
# Scree plot and parallel analysis help choose the number of latent factors
fa.parallel(cor_poly, n.obs = nrow(data_FACTOR), fm = "minres", fa = "fa")Parallel analysis suggests that the number of factors = 7 and the number of components = NA
# This plot shows eigenvalues from actual data (solid line) vs random data (dotted).
# The point where actual eigenvalues drop below the random ones indicates
# the ideal number of factors to retain.
# --- 3️⃣ Run the Exploratory Factor Analysis (EFA) ---
efa_result <- fa(data_FACTOR,
nfactors = 5, # change this based on parallel analysis
rotate = "oblimin", # allows correlated factors (common in social data)
fm = "minres") # minimum residual method, robust for polychoric data
# --- 4️⃣ Inspect results ---
print(efa_result$loadings, cutoff = 0.3) # Show only loadings > 0.3
Loadings:
MR1 MR2 MR3 MR4 MR5
pol_inf_on 0.468
liked_on 0.577
shared_on 0.606
posted_on 0.794
discuss_on 0.643
comment_on 0.758
profile_on
vol_party_on 0.505
donat_party_on 0.445
donat_NGO_on 0.740
vol_social_on
contact_on 0.484
petition_on 0.333
pol_meet_off 0.533
strike_off 0.644
demonst_off 0.667
org_demonst_off 0.553
discuss_off 0.375 0.360
vol_party_off 0.612
donat_party_off 0.552
donat_NGO_off 0.745
contact_off 0.599
read_campaign_off 0.347
petition_off
boycott_off
button_off 0.345
MR1 MR2 MR3 MR4 MR5
SS loadings 2.979 2.583 1.392 1.194 0.712
Proportion Var 0.115 0.099 0.054 0.046 0.027
Cumulative Var 0.115 0.214 0.267 0.313 0.341
efa_result$Vaccounted # Variance explained by each factor MR1 MR2 MR3 MR4 MR5
SS loadings 3.3640195 2.9436975 1.62198114 1.52976846 0.75101163
Proportion Var 0.1293854 0.1132191 0.06238389 0.05883725 0.02888506
Cumulative Var 0.1293854 0.2426045 0.30498839 0.36382564 0.39271070
Proportion Explained 0.3294674 0.2883016 0.15885457 0.14982339 0.07355303
Cumulative Proportion 0.3294674 0.6177690 0.77662358 0.92644697 1.00000000
efa_result$TLI # Tucker-Lewis Index (fit measure)[1] 0.8935915
efa_result$RMSEA # RMSEA (model fit index) RMSEA lower upper confidence
0.04899477 0.04582540 0.05225409 0.90000000
# --- 5️⃣ Visualize factor loadings ---
fa.diagram(efa_result,
main = "Exploratory Factor Analysis – Political Participation",
simple = FALSE)# --- 6️⃣ Visualize in 2D space ---
library(ggplot2)
loadings <- as.data.frame(efa_result$loadings[1:ncol(data_FACTOR), ])
loadings$variable <- rownames(loadings)
ggplot(loadings, aes(x = MR1, y = MR2, label = variable)) +
geom_point(color = "steelblue", size = 3) +
geom_text(vjust = -0.5, size = 3) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray60") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
theme_minimal() +
labs(title = "Factor Loadings (EFA)", x = "Factor 1", y = "Factor 2")Although the parallel analysis suggested a solution with eight latent factors, the five-factor solution has been selected. Beyond five factors, the factor loaddings become weak and exhibit excessive cross-loading, making the five-factor structure the most readily interpretable. Furthermore, the five-factor solution demonstrates good model fit, as shown by the statistics: TLI=0.894 (on the verge of the 0.9 threshold) and RMSEA=0.049 (clearly under the 0.08 threshold).
This component primarily summarizes online political communication (online posting and commenting are the variables with higher loadings). Interestingly, it also relates to one offline variable (offline discussion of political issues), suggesting that digital political engagement facilitates offline political activity, possibly by increasing the availability of political information.
This factor is characterized by a mix of online and offline practices that share the common thread of partisan mobilization. Significantly, the most important variables include both offline and online ‘volunteering for a political party or campaign’ and ‘contacting an elected official’ (parliamentarian, regional, or municipal councilor). This strong clustering highlights the perfect hybridization of contemporary political participation (cf. Theocharis et al., 2023).
This factor captures traditional unconventional offline political participation: ‘Participated in a strike’ and ‘participated in a demonstration’ (both offline). Interestingly, the variable ‘to organize a demonstration’ loads more highly onto the second factor, aligning it with partisan mobilization activities.
Finally, the fifth factor captures offline information and discussion activities.
Save the factor scores
efa_result <- fa(data_FACTOR,
nfactors = 5,
rotate = "oblimin",
fm = "minres",
scores = "regression") # this computes factor scoresExtract the factor scores
factor_scores <- as.data.frame(efa_result$scores)
head(factor_scores) MR1 MR2 MR3 MR4 MR5
1 0.5173575 0.03565827 -0.5458478 0.02419699 0.28567624
2 -0.6818774 -0.36195158 -0.5735275 -0.57171429 -0.38750231
3 -0.7040207 -0.26643535 0.6967554 -0.36142256 0.03979503
4 -0.4024355 -0.40636664 -0.5782954 -0.44300334 -0.32729814
5 -0.5148042 0.88442056 -0.4923241 -0.39430386 1.12378410
6 -0.6818774 -0.36195158 -0.5735275 -0.57171429 -0.38750231
Rename factors
factor_scores <- factor_scores %>%
rename(
Info_Online = MR1,
Partisan_mobil = MR2,
Civic_Engagement = MR3,
Protest_Action = MR4,
Info_Offline = MR5
)Add factor scores to main dataset
efa_result <- fa(data_FACTOR, nfactors = 5, rotate = "oblimin", fm = "minres", scores = "regression")
length(efa_result$scores) # number of rows with factor scores[1] 7300
# Extract factor scores from efa_result
factor_scores <- as.data.frame(efa_result$scores)
# Add column names
colnames(factor_scores) <- c("Info_Online", "Partisan_mobil",
"Civic_Engagement", "Protest_Action", "Info_Offline")
# Add NAs to all rows, then fill those that match
data_PCA3 <- data_PCA2
data_PCA3[paste0(rownames(factor_scores)),
colnames(factor_scores)] <- factor_scoresPCA3_result <- PCA(data_PCA3,
quanti.sup = c(17:25), # ... + part. factors
quali.sup = c(1:8), #age_cat, vote, sex, L-Righ, pol. interest
graph = FALSE)Summary of the PCA:
summary(PCA3_result)
Call:
PCA(X = data_PCA3, quanti.sup = c(17:25), quali.sup = c(1:8),
graph = FALSE)
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
Variance 2.433 1.234 0.837 0.806 0.788 0.719 0.612
% of var. 30.407 15.422 10.464 10.079 9.850 8.990 7.645
Cumulative % of var. 30.407 45.829 56.293 66.372 76.222 85.212 92.857
Dim.8
Variance 0.571
% of var. 7.143
Cumulative % of var. 100.000
Individuals (the 10 first)
Dist Dim.1 ctr cos2 Dim.2 ctr cos2
1 | 2.810 | 1.592 0.069 0.321 | -0.248 0.003 0.008 |
2 | 2.676 | -1.614 0.071 0.364 | -0.299 0.005 0.012 |
3 | 1.691 | -1.598 0.070 0.892 | 0.412 0.009 0.059 |
4 | 2.903 | 0.197 0.001 0.005 | -0.948 0.048 0.107 |
5 | 1.860 | 0.643 0.011 0.120 | 0.338 0.006 0.033 |
6 | 2.391 | -2.012 0.111 0.708 | -0.249 0.003 0.011 |
7 | 1.453 | -0.071 0.000 0.002 | -0.695 0.026 0.229 |
8 | 2.121 | -0.862 0.020 0.165 | 1.428 0.110 0.453 |
9 | 3.780 | 0.930 0.024 0.061 | 1.817 0.178 0.231 |
10 | 1.973 | 1.171 0.038 0.353 | 0.185 0.002 0.009 |
Dim.3 ctr cos2
1 -0.427 0.015 0.023 |
2 -1.113 0.099 0.173 |
3 0.198 0.003 0.014 |
4 0.333 0.009 0.013 |
5 -0.463 0.017 0.062 |
6 -0.256 0.005 0.011 |
7 0.153 0.002 0.011 |
8 0.551 0.024 0.067 |
9 -1.525 0.185 0.163 |
10 0.478 0.018 0.059 |
Variables
Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
YouTube | 0.538 11.915 0.290 | -0.020 0.034 0.000 | -0.637 48.499
WhatsApp | 0.434 7.737 0.188 | 0.537 23.356 0.288 | 0.365 15.926
Twitter_X | 0.611 15.357 0.374 | -0.437 15.485 0.191 | 0.046 0.258
Instagram | 0.690 19.573 0.476 | 0.233 4.398 0.054 | 0.206 5.049
TikTok | 0.592 14.402 0.350 | 0.185 2.782 0.034 | -0.097 1.119
Facebook | 0.392 6.308 0.153 | 0.567 26.011 0.321 | -0.135 2.186
Linkedin | 0.497 10.152 0.247 | -0.454 16.731 0.206 | 0.446 23.784
Telegram | 0.595 14.556 0.354 | -0.372 11.202 0.138 | -0.163 3.180
cos2
YouTube 0.406 |
WhatsApp 0.133 |
Twitter_X 0.002 |
Instagram 0.042 |
TikTok 0.009 |
Facebook 0.018 |
Linkedin 0.199 |
Telegram 0.027 |
Supplementary continuous variables
Dim.1 cos2 Dim.2 cos2 Dim.3 cos2
inf_socially | 0.317 0.100 | -0.028 0.001 | 0.027 0.001 |
inf_pol | 0.287 0.082 | -0.221 0.049 | 0.021 0.000 |
sup_soc_causes | 0.272 0.074 | -0.131 0.017 | -0.002 0.000 |
sup_pol_causes | 0.256 0.065 | -0.160 0.026 | -0.014 0.000 |
Info_Online | 0.057 0.003 | -0.019 0.000 | -0.020 0.000 |
Partisan_mobil | 0.038 0.001 | 0.007 0.000 | 0.013 0.000 |
Civic_Engagement | 0.026 0.001 | -0.004 0.000 | -0.017 0.000 |
Protest_Action | 0.042 0.002 | -0.022 0.000 | -0.020 0.000 |
Info_Offline | 0.003 0.000 | -0.009 0.000 | -0.008 0.000 |
Supplementary categories (the 10 first)
Dist Dim.1 cos2 v.test Dim.2 cos2 v.test
18_24 | 1.516 | 1.111 0.537 8.130 | -0.242 0.026 -2.489
25_34 | 0.781 | 0.711 0.827 8.264 | -0.051 0.004 -0.838
35_44 | 0.378 | 0.307 0.659 3.871 | 0.018 0.002 0.318
45_54 | 0.260 | -0.150 0.330 -1.756 | -0.023 0.008 -0.373
55_64 | 0.450 | -0.371 0.680 -3.745 | 0.153 0.115 2.164
65_+ | 0.994 | -0.948 0.909 -12.159 | 0.035 0.001 0.631
PP | 0.181 | -0.075 0.174 -0.795 | -0.015 0.007 -0.220
PSOE | 0.212 | -0.075 0.124 -0.942 | 0.152 0.511 2.682
VOX | 0.687 | 0.557 0.658 4.133 | 0.091 0.018 0.946
Sumar | 0.446 | 0.228 0.262 1.987 | -0.306 0.470 -3.738
Dim.3 cos2 v.test
18_24 | 0.045 0.001 0.564 |
25_34 | 0.115 0.022 2.284 |
35_44 | -0.049 0.017 -1.062 |
45_54 | 0.035 0.018 0.702 |
55_64 | 0.002 0.000 0.034 |
65_+ | -0.099 0.010 -2.169 |
PP | 0.069 0.145 1.236 |
PSOE | -0.007 0.001 -0.144 |
VOX | -0.119 0.030 -1.508 |
Sumar | 0.098 0.049 1.458 |
Description of the dimensions of the PCA:
First dimension description:
PCA3_desc <- dimdesc(PCA3_result, axes = 1:2, proba = 0.05)
# First dimension
PCA3_desc$Dim.1
Link between the variable and the continuous variables (R-square)
=================================================================================
correlation p.value
Instagram 0.6900103 1.103105e-212
Twitter_X 0.6111961 1.941440e-154
Telegram 0.5950547 1.831279e-144
TikTok 0.5918950 1.411310e-142
YouTube 0.5383770 1.476217e-113
Linkedin 0.4969345 1.990065e-94
WhatsApp 0.4338399 6.318571e-70
Facebook 0.3917088 3.166138e-56
inf_socially 0.3168419 2.369672e-36
inf_pol 0.2866560 8.772841e-30
sup_soc_causes 0.2720851 6.918357e-27
sup_pol_causes 0.2556981 7.809959e-24
Info_Online 0.0573243 2.635931e-02
Link between the variable and the categorical variable (1-way anova)
=============================================
R2 p.value
Age_cat 0.173328865 1.805377e-59
Inf_disagr 0.065063049 6.938223e-21
Inf_check 0.064956786 7.541753e-21
Inf_confirm 0.058231566 1.442538e-18
Vote 0.017828157 1.461680e-03
Left_Right 0.008763054 2.182186e-02
Pol_int 0.005647770 3.704118e-02
Link between variable and the categories of the categorical variables
================================================================
Estimate p.value
Age_cat=25_34 0.6006780 6.496349e-17
Age_cat=18_24 1.0007284 2.081955e-16
Inf_disagr=Inf_disagr_Often 0.3718729 3.245581e-09
Inf_check=Inf_check_Often 0.3562369 1.138440e-08
Inf_check=Inf_check_Very often 0.7159107 1.555352e-08
Inf_disagr=Inf_disagr_Very often 0.7288150 2.143396e-08
Inf_confirm=Inf_confirm_Often 0.3530755 1.260255e-07
Inf_confirm=Inf_confirm_Very often 0.5610816 2.797012e-07
Vote=VOX 0.5925739 3.436378e-05
Age_cat=35_44 0.1969253 1.049325e-04
Pol_int=Very interested 0.2934429 6.245230e-03
Left_Right=Ext_right 0.2970816 1.156829e-02
Left_Right=Ext_left 0.1626145 4.051919e-02
Vote=Sumar 0.2640042 4.688392e-02
Inf_confirm=Inf_confirm_Rarely -0.3568404 1.038729e-03
Inf_check=Inf_check_Rarely -0.4198861 2.674686e-04
Age_cat=55_64 -0.4807461 1.754287e-04
Inf_disagr=Inf_disagr_Rarely -0.4718912 1.244277e-04
Inf_disagr=Inf_disagr_Never -0.5404764 1.176458e-09
Inf_check=Inf_check_Never -0.5831323 2.341727e-10
Inf_confirm=Inf_confirm_Never -0.5259557 4.009742e-11
Age_cat=65_+ -1.0580813 1.094565e-35
Second dimension description:
PCA3_desc$Dim.2
Link between the variable and the continuous variables (R-square)
=================================================================================
correlation p.value
Facebook 0.5665042 3.815478e-128
WhatsApp 0.5368145 8.701018e-113
Instagram 0.2329317 6.077689e-20
TikTok 0.1852588 4.684170e-13
sup_soc_causes -0.1313659 3.266506e-07
sup_pol_causes -0.1599975 4.553176e-10
inf_pol -0.2209871 4.642983e-18
Telegram -0.3717719 2.106881e-50
Twitter_X -0.4370979 4.545840e-71
Linkedin -0.4543420 2.488840e-77
Link between the variable and the categorical variable (1-way anova)
=============================================
R2 p.value
Sex 0.08247299 6.846732e-30
Pol_int 0.03800281 1.557488e-12
Inf_confirm 0.02997522 3.042086e-09
Inf_check 0.01919939 7.741179e-06
Inf_disagr 0.01689364 3.979959e-05
Left_Right 0.01137803 4.269074e-03
Vote 0.01541966 5.712181e-03
Link between variable and the categories of the categorical variables
================================================================
Estimate p.value
Sex=Female 0.31904904 6.846732e-30
Inf_confirm=Inf_confirm_Never 0.27370684 4.621870e-07
Pol_int=Not at all interested 0.34001314 7.547599e-07
Inf_check=Inf_check_Never 0.26044394 1.123382e-05
Inf_disagr=Inf_disagr_Never 0.22785705 3.497317e-05
Left_Right=Ext_right 0.27040053 3.777178e-03
Vote=PSOE 0.19289964 7.268035e-03
Inf_confirm=Inf_confirm_Rarely 0.17604400 1.484558e-02
Pol_int=Not very interested 0.15716161 1.801361e-02
Age_cat=55_64 0.17104978 3.039276e-02
Left_Right=Right -0.17846679 4.513395e-02
Left_Right=Left -0.14978501 1.696177e-02
Inf_confirm=Inf_confirm_Very often -0.20375520 1.306372e-02
Age_cat=18_24 -0.22373834 1.276720e-02
Inf_disagr=Inf_disagr_Often -0.15975253 3.316981e-03
Pol_int=Somewhat interested -0.07692991 4.271881e-04
Inf_check=Inf_check_Often -0.19205758 1.859426e-04
Vote=Sumar -0.26493744 1.802757e-04
Inf_confirm=Inf_confirm_Often -0.22238397 1.204128e-05
Pol_int=Very interested -0.42024483 7.890404e-08
Sex=Male -0.31904904 6.846732e-30
New biplot with factors as supplementary numerical variables
fviz_pca_var(PCA3_result,
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)The first PCA component, which represents general online platform use, is slightly positively associated with the first three EFA factors. However, only the first factor, ‘Info_Online’, which captures online political information engagement, achieves statistical significance. This suggests that individuals who are more active across social media are also more likely to seek political information online. Other forms of participation (e.g., civic, protest, partisan) show weaker associations, which is consistent with the notion that online media use facilitates information exposure more strongly than it drives offline mobilization.
A PCA of social media use frequency — measuring digital engagement patterns (how often respondents use platforms like YouTube, WhatsApp, X, etc.).
An EFA of political participation items — measuring latent constructs of political behavior (e.g., online activism, civic engagement, protest, etc.).
Here we are looking at how these latent political participation factors relate to the principal components from media use. We are correlating behavioral intensity of media use (PCA) with types of political engagement (EFA). These are conceptually related, but not identical domains.
While the first PCA component usually captures a general intensity of online media use — i.e., respondents who frequently use multiple social platforms, the first EFA factor (“Info_Online”) represents online political information-seeking or engagement, then: It conceptually aligns with high use of online platforms, so, a significant positive association is what theory would predict.
The other EFA factors (e.g. “Partisan Mobilization”, “Civic Engagement”, “Protest Action”, etc.) capture types of political participation, not media consumption frequency per se. Those may not depend directly on how much someone uses social media, but rather on motivation, ideology, or opportunity. Hence, their correlation with PCA components tends to be weak or non-significant.
Different latent spaces: PCA creates orthogonal components explaining variance in observed numeric variables. EFA estimates correlated latent factors explaining covariances among ordinal/binary items. These spaces don’t necessarily align geometrically.
Different variable scales & correlations:
PCA uses continuous measures (frequencies);
EFA used dichotomous (0/1) participation data, in this case via polychoric correlations. Mixing them naturally yields small linear correlations.
Low shared variance: Even if related conceptually, the proportion of shared variance between use frequency and political action is typically modest (<.3).
Estimate a canonical correlation analysis (CCA) between the PCA and EFA latent spaces (measuring shared multivariate structure).
SEM (structural equation modeling) to model “Media Use → Political Engagement Factors” explicitly
These analyses should test if the latent relationships are truly weak or hidden behind different scaling assumptions.
How much the two latent spaces (frequency of social media use and types of political engagement) are related?
Prepare data:
## function to match cases in both data sets
library(psych)
library(dplyr)
run_aligned_efa_pca <- function(data_factor, data_pca,
efa_vars, pca_vars,
n_factors_efa = 5,
n_components_pca = 3,
rotate_efa = "oblimin",
rotate_pca = "none") {
# --- 1️⃣ Ensure both datasets have rownames ---
if (is.null(rownames(data_factor))) rownames(data_factor) <- as.character(1:nrow(data_factor))
if (is.null(rownames(data_pca))) rownames(data_pca) <- as.character(1:nrow(data_pca))
# --- 2️⃣ Restrict to shared respondents (intersection by rownames)
shared_ids <- intersect(rownames(data_factor), rownames(data_pca))
data_factor <- data_factor[shared_ids, , drop = FALSE]
data_pca <- data_pca[shared_ids, , drop = FALSE]
# --- 3️⃣ Identify complete rows for each dataset
complete_factor <- complete.cases(data_factor[, efa_vars])
complete_pca <- complete.cases(data_pca[, pca_vars])
# --- 4️⃣ Keep only respondents complete in both datasets
common_complete <- complete_factor & complete_pca
data_factor_aligned <- data_factor[common_complete, efa_vars, drop = FALSE]
data_pca_aligned <- data_pca[common_complete, pca_vars, drop = FALSE]
message("✅ Using ", sum(common_complete), " respondents with complete data in both datasets.")
# --- 5️⃣ Run EFA ---
efa_result <- fa(
r = polychoric(data_factor_aligned)$rho,
nfactors = n_factors_efa,
rotate = rotate_efa,
fm = "minres"
)
# --- 6️⃣ Extract EFA scores ---
efa_scores <- factor.scores(data_factor_aligned, efa_result)$scores
# --- 7️⃣ Run PCA ---
pca_result <- psych::principal(
data_pca_aligned,
nfactors = n_components_pca,
rotate = rotate_pca,
scores = TRUE
)
# --- 8️⃣ Combine results ---
combined_scores <- bind_cols(
as.data.frame(pca_result$scores),
as.data.frame(efa_scores)
)
list(
efa = efa_result,
pca = pca_result,
combined = combined_scores,
n_used = sum(common_complete)
)
}result <- run_aligned_efa_pca(
data_factor = data_FACTOR,
data_pca = data_PCA3,
efa_vars = 1:26,
pca_vars = c("YouTube","WhatsApp","Twitter_X","Instagram",
"TikTok","Facebook","Linkedin","Telegram"),
n_factors_efa = 5,
n_components_pca = 3
)The raw linear relationships before deeper testing. Which PCA components correlate with which EFA factors? PC1 correlates with MR2, also MR1 and MR5.
cor_matrix_pca_efa <- cor(result$combined, use = "pairwise.complete.obs")
corrplot(cor_matrix_pca_efa,
method = "color",
type = "upper",
tl.col = "black",
tl.cex = 0.8,
addCoef.col = "black",
number.cex = 0.6,
title = "Correlations between PCA Components and EFA Factors",
mar = c(0,0,2,0))Formally testing the relationship between EFA-derived latent constructs (political participation / information factors) and PCA components (social media usage patterns).
Canonical Correlation Analysis (CCA) finds pairs of linear combinations (canonical variates) of the two sets that are maximally correlated. It’s the multivariate analog of correlating factors and components.
result$factor_scores → EFA latent factors (e.g. 5 factors)
result$pca_scores → PCA components (e.g. 3 components)
We want to run a Canonical Correlation Analysis (CCA) to assess how strongly these two latent structures are related.
library(CCA)
library(dplyr)
# Extract aligned datasets
aligned_df <- result$combined
# EFA factor scores (latent political engagement dimensions)
efa_scores <- dplyr::select(aligned_df, MR1, MR2, MR3, MR4, MR5)
# PCA components (social media use dimensions)
pca_scores <- dplyr::select(aligned_df, PC1, PC2, PC3)
dim(efa_scores)[1] 1222 5
dim(pca_scores)[1] 1222 3
run canonical correlation:
cca_result <- cc(pca_scores, efa_scores)Inspect the canonical correlations:
# Canonical correlations
cca_result$cor[1] 0.06695529 0.04351579 0.03136912
# Structure coefficients (loadings)
cca_result$xcoef # PCA variable coefficients [,1] [,2] [,3]
PC1 -0.973764166 -0.07815694 -0.2137167
PC2 0.227477116 -0.30904823 -0.9234410
PC3 0.006124545 -0.94782946 0.3187190
cca_result$ycoef # EFA variable coefficients [,1] [,2] [,3]
MR1 0.04822493 -1.23260351 -0.5532491
MR2 -0.96687103 0.57237765 -0.4424578
MR3 0.03371717 0.40388438 -0.5112884
MR4 0.35807075 0.05282431 0.2477920
MR5 -0.30467535 0.30349079 1.0866347
Test the significance (Wilks’ Lambda)
library(CCP)
rho <- cca_result$cor
n <- nrow(aligned_df)
p <- ncol(pca_scores)
q <- ncol(efa_scores)
# Significance test
cca_test <- p.asym(rho, n, p, q, tstat = "Wilks")Wilks' Lambda, using F-approximation (Rao's F):
stat approx df1 df2 p.value
1 to 3: 0.9926541 0.5975919 15 3351.721 0.8791192
2 to 3: 0.9971242 0.4377038 8 2430.000 0.8989235
3 to 3: 0.9990160 0.3992497 3 1216.000 0.7535676
cca_test$id
[1] "Wilks"
$stat
[1] 0.9926541 0.9971242 0.9990160
$approx
[1] 0.5975919 0.4377038 0.3992497
$df1
[1] 15 8 3
$df2
[1] 3351.721 2430.000 1216.000
$p.value
[1] 0.8791192 0.8989235 0.7535676
Visualize canonical variables:
library(ggplot2)
# Compute canonical variate scores
U <- as.data.frame(as.matrix(pca_scores) %*% cca_result$xcoef) # for PCA side
V <- as.data.frame(as.matrix(efa_scores) %*% cca_result$ycoef) # for EFA side
# Name them
colnames(U) <- paste0("U", 1:ncol(U))
colnames(V) <- paste0("V", 1:ncol(V))
# Combine into one dataframe
cca_df <- cbind(U, V)
# Plot the first pair of canonical variates
ggplot(cca_df, aes(x = U1, y = V1)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "coral") +
labs(
title = "Canonical Correlation between PCA (Social Media) and EFA (Political Engagement)",
x = "First Canonical Variate (PCA)",
y = "First Canonical Variate (EFA)"
) +
theme_minimal()library(ggplot2)
data.frame(
Canonical_Function = 1:length(cca_result$cor),
Correlation = cca_result$cor
) %>%
ggplot(aes(x = Canonical_Function, y = Correlation)) +
geom_col(fill = "coral") +
geom_text(aes(label = round(Correlation, 2)), vjust = -0.3) +
labs(title = "Canonical Correlations between EFA and PCA Latent Sets",
x = "Canonical Function", y = "Correlation") +
theme_minimal()Hierarchical clustering on PCA results:
hcpc_res <- FactoMineR::HCPC(PCA2_result, nb.clust = -1, graph = FALSE) # -1 lets HCPC choosestr(hcpc_res$call$X) # dataset used for clustering'data.frame': 1501 obs. of 6 variables:
$ Dim.1: num -3.99 -3.99 -3.99 -3.99 -3.99 ...
$ Dim.2: num -2.57 -2.57 -2.57 -2.57 -2.57 ...
$ Dim.3: num -0.95 -0.95 -0.95 -0.95 -0.95 ...
$ Dim.4: num -2.63 -2.63 -2.63 -2.63 -2.63 ...
$ Dim.5: num 0.659 0.659 0.659 0.659 0.659 ...
$ clust: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
Cluster description
print(hcpc_res$desc.var$quanti) # quantitative variable description$`1`
v.test Mean in category Overall mean sd in category Overall sd
Linkedin -3.184222 1.5128513 1.9202797 1.0365744 1.3660273
Twitter_X -3.681680 1.7254013 2.3785079 1.3588406 1.8938670
TikTok -4.732098 1.6145297 2.5233582 1.2281800 2.0504048
inf_socially -5.191121 0.2924528 0.5349767 0.4548892 0.4987751
YouTube -6.439676 2.9904070 3.9966285 1.6338304 1.6681727
Facebook -8.544822 2.0678790 3.7317237 1.4488388 2.0788407
Instagram -10.900596 1.7367125 4.0228802 1.4176123 2.2390785
WhatsApp -29.736854 3.1698113 6.0588629 1.4173567 1.0372220
p.value
Linkedin 1.451434e-03
Twitter_X 2.317022e-04
TikTok 2.222108e-06
inf_socially 2.090321e-07
YouTube 1.197288e-10
Facebook 1.287341e-17
Instagram 1.145049e-27
WhatsApp 2.564964e-194
$`2`
v.test Mean in category Overall mean sd in category
WhatsApp 5.922126 6.21735746 6.05886288 0.4806972
Facebook -4.359055 3.49790532 3.73172368 2.0261150
sup_pol_causes -6.150088 0.04660453 0.09260493 0.2107903
inf_socially -6.287262 0.45406125 0.53497668 0.4978852
sup_soc_causes -6.352650 0.09587217 0.15522985 0.2944159
inf_pol -7.450791 0.18508655 0.27048634 0.3883678
YouTube -8.955711 3.61114468 3.99662846 1.6329018
Linkedin -11.260770 1.52336931 1.92027972 0.8928070
Instagram -15.164794 3.14674557 4.02288022 2.1328214
Twitter_X -15.907693 1.60114873 2.37850787 1.2207215
Telegram -16.612668 1.69179604 2.50204360 1.2800069
TikTok -21.979177 1.36052855 2.52335816 0.8665336
Overall sd p.value
WhatsApp 1.0372220 3.178054e-09
Facebook 2.0788407 1.306252e-05
sup_pol_causes 0.2898780 7.743991e-10
inf_socially 0.4987751 3.231149e-10
sup_soc_causes 0.3621237 2.116362e-10
inf_pol 0.4442111 9.278213e-14
YouTube 1.6681727 3.375542e-19
Linkedin 1.3660273 2.049603e-29
Instagram 2.2390785 6.048865e-52
Twitter_X 1.8938670 5.604096e-57
Telegram 1.8902241 5.642660e-62
TikTok 2.0504048 4.556445e-107
$`3`
v.test Mean in category Overall mean sd in category Overall sd
TikTok 24.942608 4.986916 2.523358 1.6845314 2.050405
Instagram 14.290134 5.564179 4.022880 1.4251160 2.239078
Facebook 8.253396 4.558209 3.731724 2.0271404 2.078841
WhatsApp 7.544666 6.435821 6.058863 0.5687644 1.037222
YouTube 5.279419 4.420865 3.996628 1.5372609 1.668173
Twitter_X -3.853601 2.026949 2.378508 1.4931714 1.893867
Linkedin -5.808792 1.538048 1.920280 0.8632652 1.366027
p.value
TikTok 2.568526e-137
Instagram 2.521229e-46
Facebook 1.539561e-16
WhatsApp 4.534479e-14
YouTube 1.295939e-07
Twitter_X 1.163930e-04
Linkedin 6.292506e-09
$`4`
v.test Mean in category Overall mean sd in category
Twitter_X 25.972838 4.8730001 2.37850787 1.6670814
Linkedin 21.924668 3.4390975 1.92027972 1.7451606
Telegram 19.610825 4.3818902 2.50204360 1.9075927
inf_pol 11.840499 0.5372168 0.27048634 0.4986130
Instagram 10.943665 5.2655203 4.02288022 1.6211833
sup_pol_causes 9.771247 0.2362460 0.09260493 0.4247750
YouTube 9.718219 4.8187593 3.99662846 1.4071669
inf_socially 9.172662 0.7669903 0.53497668 0.4227484
sup_soc_causes 8.464863 0.3106796 0.15522985 0.4627719
TikTok 4.492027 2.9904424 2.52335816 2.1530637
WhatsApp 3.748705 6.2560444 6.05886288 0.7122354
Facebook 2.305175 3.9747417 3.73172368 1.9894697
Overall sd p.value
Twitter_X 1.8938670 1.004124e-148
Linkedin 1.3660273 1.511360e-106
Telegram 1.8902241 1.249884e-85
inf_pol 0.4442111 2.410140e-32
Instagram 2.2390785 7.126010e-28
sup_pol_causes 0.2898780 1.495998e-22
YouTube 1.6681727 2.521534e-22
inf_socially 0.4987751 4.614779e-20
sup_soc_causes 0.3621237 2.564547e-17
TikTok 2.0504048 7.054839e-06
WhatsApp 1.0372220 1.777499e-04
Facebook 2.0788407 2.115676e-02
print(hcpc_res$desc.var$quali) # qualitative variable descriptionNULL
Clusters on PCA map:
plot(hcpc_res, choice = "map") # clusters on PCA mapDendrogram
plot(hcpc_res, choice = "tree") # dendrogram# --- Load required packages ---
library(FactoMineR)
library(factoextra)
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
library(forcats)
# --- 1️⃣ Extract cluster assignments ---
clusters <- hcpc_res$data.clust %>%
dplyr::select(clust) %>%
dplyr::mutate(ID = dplyr::row_number())
# --- 2️⃣ Merge clusters with your original dataset ---
data_clustered <- data_PCA3 %>%
dplyr::mutate(ID = dplyr::row_number()) %>%
dplyr::left_join(clusters, by = "ID")
# --- 3️⃣ Identify supplementary variables ---
# Adjust this selection to match the variables you want to use for description
num_vars <- names(dplyr::select(data_clustered, where(is.numeric))) %>%
setdiff(c("ID")) # exclude ID
cat_vars <- names(dplyr::select(data_clustered, where(is.factor)))# --- 4️⃣ Quantitative variables summary ---
cat("===== Quantitative Variables by Cluster =====\n")===== Quantitative Variables by Cluster =====
num_summary <- data_clustered %>%
group_by(clust) %>%
summarise(across(all_of(num_vars),
list(mean = mean, sd = sd),
.names = "{.col}_{.fn}"),
.groups = "drop")
print(num_summary)# A tibble: 4 × 35
clust YouTube_mean YouTube_sd WhatsApp_mean WhatsApp_sd Twitter_X_mean
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 NA NA 3.17 1.42 NA
2 2 NA NA NA NA NA
3 3 NA NA 6.44 0.570 NA
4 4 NA NA NA NA NA
# ℹ 29 more variables: Twitter_X_sd <dbl>, Instagram_mean <dbl>,
# Instagram_sd <dbl>, TikTok_mean <dbl>, TikTok_sd <dbl>,
# Facebook_mean <dbl>, Facebook_sd <dbl>, Linkedin_mean <dbl>,
# Linkedin_sd <dbl>, Telegram_mean <dbl>, Telegram_sd <dbl>,
# inf_socially_mean <dbl>, inf_socially_sd <dbl>, inf_pol_mean <dbl>,
# inf_pol_sd <dbl>, sup_soc_causes_mean <dbl>, sup_soc_causes_sd <dbl>,
# sup_pol_causes_mean <dbl>, sup_pol_causes_sd <dbl>, …
# --- ANOVA significance tests ---
anova_results <- sapply(num_vars, function(v) {
if (all(is.na(data_clustered[[v]]))) return(NA)
fit <- aov(data_clustered[[v]] ~ data_clustered$clust)
summary(fit)[[1]][["Pr(>F)"]][1]
})
anova_results <- sort(na.omit(anova_results))
cat("\n--- ANOVA p-values (quantitative vars) ---\n")
--- ANOVA p-values (quantitative vars) ---
print(anova_results) WhatsApp TikTok Twitter_X Instagram
2.833461e-293 2.165287e-231 2.457226e-196 2.606196e-125
Linkedin Telegram YouTube inf_pol
1.842067e-122 5.948653e-114 2.709095e-40 1.877210e-32
Facebook inf_socially sup_pol_causes sup_soc_causes
1.411693e-30 1.530211e-25 1.047215e-21 2.383617e-17
Info_Online Protest_Action Partisan_mobil Civic_Engagement
8.189722e-02 1.705594e-01 2.279796e-01 4.928133e-01
Info_Offline
7.536225e-01
# --- 5️⃣ Categorical variables description ---
cat("\n===== Categorical Variables by Cluster =====\n")
===== Categorical Variables by Cluster =====
for (v in cat_vars) {
cat("\n###", v, "###\n")
tab <- table(data_clustered[[v]], data_clustered$clust)
print(tab)
test <- suppressWarnings(chisq.test(tab))
print(test)
}
### Age_cat ###
1 2 3 4
0_17 0 0 0 0
18_24 6 23 53 38
25_34 7 99 81 83
35_44 13 138 77 80
45_54 20 148 51 55
55_64 18 128 38 29
65_+ 42 215 35 24
Pearson's Chi-squared test
data: tab
X-squared = NaN, df = 18, p-value = NA
### Vote ###
1 2 3 4
PP 19 122 39 49
PSOE 18 157 79 53
VOX 5 44 41 33
Sumar 9 78 28 49
ERC 2 23 11 6
JxCat-Junts 3 26 3 9
Other party 7 40 15 26
Did not vote 18 112 59 35
I voted blank / no ballot paper 5 20 10 9
I prefer not to answer 20 129 50 40
Pearson's Chi-squared test
data: tab
X-squared = 54.547, df = 27, p-value = 0.001304
### Sex ###
1 2 3 4
Male 73 350 118 195
Female 33 401 217 114
Pearson's Chi-squared test
data: tab
X-squared = 68.513, df = 3, p-value = 8.885e-15
### Left_Right ###
1 2 3 4
Ext_left 10 86 43 41
Left 22 171 56 85
Center 28 221 91 82
Right 14 86 37 41
Ext_right 3 52 35 22
DKDA 29 135 73 38
Pearson's Chi-squared test
data: tab
X-squared = 32.435, df = 15, p-value = 0.005615
### Pol_int ###
1 2 3 4
Very interested 10 63 13 48
Somewhat interested 36 232 94 121
Not very interested 35 296 133 98
Not at all interested 25 160 95 42
Pearson's Chi-squared test
data: tab
X-squared = 52.951, df = 9, p-value = 2.986e-08
### Inf_disagr ###
1 2 3 4
Very often 3 52 22 37
Often 16 149 76 114
sometimes 42 275 122 112
Rarely 16 100 44 20
Never 29 175 71 26
Pearson's Chi-squared test
data: tab
X-squared = 81.117, df = 12, p-value = 2.526e-12
### Inf_check ###
1 2 3 4
Very often 5 40 21 40
Often 18 125 61 91
sometimes 31 240 118 111
Rarely 18 156 52 43
Never 34 190 83 24
Pearson's Chi-squared test
data: tab
X-squared = 86.143, df = 12, p-value = 2.743e-13
### Inf_confirm ###
1 2 3 4
Very often 7 78 26 59
Often 23 151 89 110
sometimes 35 212 102 97
Rarely 13 119 40 23
Never 28 191 78 20
Pearson's Chi-squared test
data: tab
X-squared = 98.695, df = 12, p-value = 1.003e-15
### clust ###
1 2 3 4
1 106 0 0 0
2 0 751 0 0
3 0 0 335 0
4 0 0 0 309
Pearson's Chi-squared test
data: tab
X-squared = 4503, df = 9, p-value < 2.2e-16
# --- 6️⃣ Visualizations ---
## Boxplots for top 3 quantitative differentiators
top_num <- names(head(anova_results, 3))
for (v in top_num) {
ggplot(data_clustered, aes(x = clust, y = .data[[v]], fill = clust)) +
geom_boxplot(alpha = 0.7) +
theme_minimal() +
labs(title = paste("Distribution of", v, "by Cluster"),
x = "Cluster", y = v) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Dark2") -> p
print(p)
}## Stacked bar plots for categorical variables
for (v in cat_vars) {
ggplot(data_clustered, aes(x = clust, fill = .data[[v]])) +
geom_bar(position = "fill") +
scale_y_continuous(labels = percent) +
labs(title = paste(v, "distribution across clusters"),
x = "Cluster", y = "Percentage") +
theme_minimal() +
scale_fill_brewer(palette = "Set2") -> p
print(p)
}# --- 7️⃣ HCPC’s internal cluster descriptions ---
cat("\n===== HCPC Internal Descriptions =====\n")
===== HCPC Internal Descriptions =====
cat("\n--- Quantitative Variables ---\n")
--- Quantitative Variables ---
print(hcpc_res$desc.var$quanti)$`1`
v.test Mean in category Overall mean sd in category Overall sd
Linkedin -3.184222 1.5128513 1.9202797 1.0365744 1.3660273
Twitter_X -3.681680 1.7254013 2.3785079 1.3588406 1.8938670
TikTok -4.732098 1.6145297 2.5233582 1.2281800 2.0504048
inf_socially -5.191121 0.2924528 0.5349767 0.4548892 0.4987751
YouTube -6.439676 2.9904070 3.9966285 1.6338304 1.6681727
Facebook -8.544822 2.0678790 3.7317237 1.4488388 2.0788407
Instagram -10.900596 1.7367125 4.0228802 1.4176123 2.2390785
WhatsApp -29.736854 3.1698113 6.0588629 1.4173567 1.0372220
p.value
Linkedin 1.451434e-03
Twitter_X 2.317022e-04
TikTok 2.222108e-06
inf_socially 2.090321e-07
YouTube 1.197288e-10
Facebook 1.287341e-17
Instagram 1.145049e-27
WhatsApp 2.564964e-194
$`2`
v.test Mean in category Overall mean sd in category
WhatsApp 5.922126 6.21735746 6.05886288 0.4806972
Facebook -4.359055 3.49790532 3.73172368 2.0261150
sup_pol_causes -6.150088 0.04660453 0.09260493 0.2107903
inf_socially -6.287262 0.45406125 0.53497668 0.4978852
sup_soc_causes -6.352650 0.09587217 0.15522985 0.2944159
inf_pol -7.450791 0.18508655 0.27048634 0.3883678
YouTube -8.955711 3.61114468 3.99662846 1.6329018
Linkedin -11.260770 1.52336931 1.92027972 0.8928070
Instagram -15.164794 3.14674557 4.02288022 2.1328214
Twitter_X -15.907693 1.60114873 2.37850787 1.2207215
Telegram -16.612668 1.69179604 2.50204360 1.2800069
TikTok -21.979177 1.36052855 2.52335816 0.8665336
Overall sd p.value
WhatsApp 1.0372220 3.178054e-09
Facebook 2.0788407 1.306252e-05
sup_pol_causes 0.2898780 7.743991e-10
inf_socially 0.4987751 3.231149e-10
sup_soc_causes 0.3621237 2.116362e-10
inf_pol 0.4442111 9.278213e-14
YouTube 1.6681727 3.375542e-19
Linkedin 1.3660273 2.049603e-29
Instagram 2.2390785 6.048865e-52
Twitter_X 1.8938670 5.604096e-57
Telegram 1.8902241 5.642660e-62
TikTok 2.0504048 4.556445e-107
$`3`
v.test Mean in category Overall mean sd in category Overall sd
TikTok 24.942608 4.986916 2.523358 1.6845314 2.050405
Instagram 14.290134 5.564179 4.022880 1.4251160 2.239078
Facebook 8.253396 4.558209 3.731724 2.0271404 2.078841
WhatsApp 7.544666 6.435821 6.058863 0.5687644 1.037222
YouTube 5.279419 4.420865 3.996628 1.5372609 1.668173
Twitter_X -3.853601 2.026949 2.378508 1.4931714 1.893867
Linkedin -5.808792 1.538048 1.920280 0.8632652 1.366027
p.value
TikTok 2.568526e-137
Instagram 2.521229e-46
Facebook 1.539561e-16
WhatsApp 4.534479e-14
YouTube 1.295939e-07
Twitter_X 1.163930e-04
Linkedin 6.292506e-09
$`4`
v.test Mean in category Overall mean sd in category
Twitter_X 25.972838 4.8730001 2.37850787 1.6670814
Linkedin 21.924668 3.4390975 1.92027972 1.7451606
Telegram 19.610825 4.3818902 2.50204360 1.9075927
inf_pol 11.840499 0.5372168 0.27048634 0.4986130
Instagram 10.943665 5.2655203 4.02288022 1.6211833
sup_pol_causes 9.771247 0.2362460 0.09260493 0.4247750
YouTube 9.718219 4.8187593 3.99662846 1.4071669
inf_socially 9.172662 0.7669903 0.53497668 0.4227484
sup_soc_causes 8.464863 0.3106796 0.15522985 0.4627719
TikTok 4.492027 2.9904424 2.52335816 2.1530637
WhatsApp 3.748705 6.2560444 6.05886288 0.7122354
Facebook 2.305175 3.9747417 3.73172368 1.9894697
Overall sd p.value
Twitter_X 1.8938670 1.004124e-148
Linkedin 1.3660273 1.511360e-106
Telegram 1.8902241 1.249884e-85
inf_pol 0.4442111 2.410140e-32
Instagram 2.2390785 7.126010e-28
sup_pol_causes 0.2898780 1.495998e-22
YouTube 1.6681727 2.521534e-22
inf_socially 0.4987751 4.614779e-20
sup_soc_causes 0.3621237 2.564547e-17
TikTok 2.0504048 7.054839e-06
WhatsApp 1.0372220 1.777499e-04
Facebook 2.0788407 2.115676e-02
cat("\n--- Qualitative Variables ---\n")
--- Qualitative Variables ---
print(hcpc_res$desc.var$quali)NULL
# --- 📦 Load required packages ---
library(dplyr)
library(broom)
library(car)
library(ggplot2)
library(purrr)
library(tidyr)
library(stringr)
# --- 🧩 Define variables ---
social_vars <- c("YouTube","WhatsApp","Twitter_X","Instagram",
"TikTok","Facebook","Linkedin","Telegram")
predictors <- c("Age_cat", "Vote", "Sex", "Left_Right",
"Pol_int", "Info_Online", "Partisan_mobil")
# --- 🧠 Run OLS for each platform (Final Corrected Version) ---
ols_results <- map_df(social_vars, function(var) {
formula <- as.formula(
paste(var, "~", paste(predictors, collapse = " + "))
)
model <- lm(formula, data = data_PCA3)
# Extract coefficients and model fit info
coef_tbl <- broom::tidy(model, conf.int = TRUE) %>%
mutate(
Dependent = var,
Adj_R2 = summary(model)$adj.r.squared,
F_stat = broom::glance(model)$statistic,
p_model = broom::glance(model)$p.value
)
# --- ROBUST VIF MERGING BLOCK ---
vif_vals <- tryCatch({
car::vif(model)
}, error = function(e) NULL)
if (!is.null(vif_vals)) {
# Step 1: Create a reliable VIF data frame from either a matrix or a vector
# The key column here is named "predictor_key".
if (is.matrix(vif_vals)) {
vif_df <- tibble(
predictor_key = rownames(vif_vals),
VIF = vif_vals[, 1]
)
} else {
vif_df <- tibble(
predictor_key = names(vif_vals),
VIF = vif_vals
)
}
# Step 2: Create a matching "predictor_key" column in the main coefficient table
predictor_pattern <- paste(predictors, collapse = "|")
coef_tbl <- coef_tbl %>%
mutate(predictor_key = stringr::str_extract(term, predictor_pattern))
# Step 3: Join the tables using the correct key.
coef_tbl <- left_join(coef_tbl, vif_df, by = "predictor_key") %>%
dplyr::select(-predictor_key) # Remove the temporary key column
} else {
# If VIF calculation failed, just add an NA column
coef_tbl <- coef_tbl %>% mutate(VIF = NA_real_)
}
coef_tbl
})
# --- 🧾 Model Fit Summary ---
fit_summary <- ols_results %>%
group_by(Dependent) %>%
summarise(
Adj_R2 = unique(Adj_R2),
F_stat = unique(F_stat),
p_model = unique(p_model)
)
knitr::kable(fit_summary, digits = 3, caption = "Model Fit Statistics per Platform")| Dependent | Adj_R2 | F_stat | p_model |
|---|---|---|---|
| 0.049 | 3.676 | 0 | |
| 0.222 | 15.792 | 0 | |
| 0.081 | 5.377 | 0 | |
| Telegram | 0.071 | 4.928 | 0 |
| TikTok | 0.136 | 9.091 | 0 |
| Twitter_X | 0.133 | 8.837 | 0 |
| 0.048 | 3.645 | 0 | |
| YouTube | 0.074 | 5.142 | 0 |
# --- 📉 Coefficients Table ---
coef_table <- ols_results %>%
filter(term != "(Intercept)") %>%
mutate(Significant = ifelse(p.value < 0.05, "Yes", "No")) %>%
dplyr::select(Dependent, term, estimate, std.error, conf.low, conf.high, p.value, VIF, Significant)
knitr::kable(coef_table, digits = 3, caption = "OLS Regression Coefficients (95% CI, VIF, Significance)")| Dependent | term | estimate | std.error | conf.low | conf.high | p.value | VIF | Significant |
|---|---|---|---|---|---|---|---|---|
| YouTube | Age_cat25_34 | -0.025 | 0.192 | -0.402 | 0.352 | 0.897 | 1.184 | No |
| YouTube | Age_cat35_44 | -0.128 | 0.190 | -0.502 | 0.246 | 0.501 | 1.184 | No |
| YouTube | Age_cat45_54 | -0.570 | 0.195 | -0.953 | -0.188 | 0.004 | 1.184 | Yes |
| YouTube | Age_cat55_64 | -0.719 | 0.203 | -1.116 | -0.321 | 0.000 | 1.184 | Yes |
| YouTube | Age_cat65_+ | -0.956 | 0.194 | -1.336 | -0.575 | 0.000 | 1.184 | Yes |
| YouTube | VotePSOE | 0.037 | 0.181 | -0.318 | 0.392 | 0.839 | 3.507 | No |
| YouTube | VoteVOX | 0.145 | 0.205 | -0.258 | 0.547 | 0.481 | 3.507 | No |
| YouTube | VoteSumar | 0.065 | 0.217 | -0.361 | 0.490 | 0.766 | 3.507 | No |
| YouTube | VoteERC | -0.440 | 0.324 | -1.076 | 0.195 | 0.174 | 3.507 | No |
| YouTube | VoteJxCat-Junts | -0.486 | 0.313 | -1.100 | 0.128 | 0.121 | 3.507 | No |
| YouTube | VoteOther party | 0.426 | 0.228 | -0.021 | 0.873 | 0.062 | 3.507 | No |
| YouTube | VoteDid not vote | 0.262 | 0.191 | -0.113 | 0.637 | 0.171 | 3.507 | No |
| YouTube | VoteI voted blank / no ballot paper | -0.309 | 0.301 | -0.899 | 0.282 | 0.305 | 3.507 | No |
| YouTube | VoteI prefer not to answer | -0.119 | 0.187 | -0.485 | 0.247 | 0.523 | 3.507 | No |
| YouTube | SexFemale | -0.269 | 0.092 | -0.450 | -0.088 | 0.004 | 1.061 | Yes |
| YouTube | Left_RightLeft | -0.303 | 0.160 | -0.617 | 0.011 | 0.059 | 3.333 | No |
| YouTube | Left_RightCenter | 0.073 | 0.169 | -0.258 | 0.405 | 0.664 | 3.333 | No |
| YouTube | Left_RightRight | 0.025 | 0.214 | -0.394 | 0.444 | 0.907 | 3.333 | No |
| YouTube | Left_RightExt_right | -0.065 | 0.246 | -0.547 | 0.418 | 0.792 | 3.333 | No |
| YouTube | Left_RightDKDA | 0.116 | 0.196 | -0.268 | 0.500 | 0.555 | 3.333 | No |
| YouTube | Pol_intSomewhat interested | -0.280 | 0.171 | -0.615 | 0.055 | 0.102 | 1.538 | No |
| YouTube | Pol_intNot very interested | -0.327 | 0.173 | -0.667 | 0.014 | 0.060 | 1.538 | No |
| YouTube | Pol_intNot at all interested | -0.638 | 0.197 | -1.025 | -0.251 | 0.001 | 1.538 | Yes |
| YouTube | Info_Online | 0.091 | 0.059 | -0.026 | 0.207 | 0.126 | 1.482 | No |
| YouTube | Partisan_mobil | 0.004 | 0.063 | -0.120 | 0.128 | 0.949 | 1.477 | No |
| Age_cat25_34 | 0.093 | 0.117 | -0.137 | 0.323 | 0.428 | 1.183 | No | |
| Age_cat35_44 | 0.035 | 0.116 | -0.193 | 0.263 | 0.765 | 1.183 | No | |
| Age_cat45_54 | -0.034 | 0.119 | -0.267 | 0.199 | 0.774 | 1.183 | No | |
| Age_cat55_64 | -0.164 | 0.124 | -0.407 | 0.078 | 0.184 | 1.183 | No | |
| Age_cat65_+ | -0.486 | 0.118 | -0.717 | -0.254 | 0.000 | 1.183 | Yes | |
| VotePSOE | 0.076 | 0.110 | -0.140 | 0.291 | 0.491 | 3.502 | No | |
| VoteVOX | 0.154 | 0.125 | -0.090 | 0.399 | 0.216 | 3.502 | No | |
| VoteSumar | -0.057 | 0.132 | -0.316 | 0.202 | 0.666 | 3.502 | No | |
| VoteERC | 0.141 | 0.197 | -0.246 | 0.528 | 0.475 | 3.502 | No | |
| VoteJxCat-Junts | 0.166 | 0.189 | -0.204 | 0.537 | 0.378 | 3.502 | No | |
| VoteOther party | -0.073 | 0.139 | -0.346 | 0.199 | 0.597 | 3.502 | No | |
| VoteDid not vote | 0.006 | 0.116 | -0.222 | 0.234 | 0.961 | 3.502 | No | |
| VoteI voted blank / no ballot paper | -0.179 | 0.183 | -0.539 | 0.181 | 0.331 | 3.502 | No | |
| VoteI prefer not to answer | -0.083 | 0.113 | -0.304 | 0.139 | 0.465 | 3.502 | No | |
| SexFemale | 0.232 | 0.056 | 0.122 | 0.342 | 0.000 | 1.061 | Yes | |
| Left_RightLeft | -0.096 | 0.098 | -0.288 | 0.096 | 0.327 | 3.302 | No | |
| Left_RightCenter | -0.024 | 0.103 | -0.226 | 0.179 | 0.819 | 3.302 | No | |
| Left_RightRight | -0.055 | 0.130 | -0.311 | 0.201 | 0.672 | 3.302 | No | |
| Left_RightExt_right | 0.062 | 0.150 | -0.232 | 0.356 | 0.679 | 3.302 | No | |
| Left_RightDKDA | -0.206 | 0.119 | -0.439 | 0.027 | 0.084 | 3.302 | No | |
| Pol_intSomewhat interested | -0.018 | 0.104 | -0.223 | 0.186 | 0.860 | 1.515 | No | |
| Pol_intNot very interested | -0.028 | 0.106 | -0.235 | 0.180 | 0.795 | 1.515 | No | |
| Pol_intNot at all interested | -0.062 | 0.120 | -0.297 | 0.173 | 0.605 | 1.515 | No | |
| Info_Online | -0.019 | 0.036 | -0.089 | 0.051 | 0.597 | 1.469 | No | |
| Partisan_mobil | 0.074 | 0.039 | -0.001 | 0.150 | 0.055 | 1.467 | No | |
| Twitter_X | Age_cat25_34 | -0.604 | 0.211 | -1.019 | -0.189 | 0.004 | 1.187 | Yes |
| Twitter_X | Age_cat35_44 | -0.978 | 0.210 | -1.389 | -0.566 | 0.000 | 1.187 | Yes |
| Twitter_X | Age_cat45_54 | -1.185 | 0.214 | -1.605 | -0.765 | 0.000 | 1.187 | Yes |
| Twitter_X | Age_cat55_64 | -1.425 | 0.223 | -1.863 | -0.986 | 0.000 | 1.187 | Yes |
| Twitter_X | Age_cat65_+ | -1.756 | 0.214 | -2.176 | -1.337 | 0.000 | 1.187 | Yes |
| Twitter_X | VotePSOE | -0.124 | 0.199 | -0.514 | 0.266 | 0.533 | 3.489 | No |
| Twitter_X | VoteVOX | -0.255 | 0.226 | -0.699 | 0.189 | 0.260 | 3.489 | No |
| Twitter_X | VoteSumar | 0.099 | 0.238 | -0.368 | 0.566 | 0.677 | 3.489 | No |
| Twitter_X | VoteERC | -0.594 | 0.358 | -1.296 | 0.109 | 0.098 | 3.489 | No |
| Twitter_X | VoteJxCat-Junts | 0.197 | 0.343 | -0.476 | 0.869 | 0.566 | 3.489 | No |
| Twitter_X | VoteOther party | 0.414 | 0.249 | -0.076 | 0.903 | 0.097 | 3.489 | No |
| Twitter_X | VoteDid not vote | -0.010 | 0.210 | -0.422 | 0.403 | 0.964 | 3.489 | No |
| Twitter_X | VoteI voted blank / no ballot paper | 0.003 | 0.329 | -0.643 | 0.649 | 0.993 | 3.489 | No |
| Twitter_X | VoteI prefer not to answer | 0.000 | 0.206 | -0.404 | 0.403 | 0.999 | 3.489 | No |
| Twitter_X | SexFemale | -0.516 | 0.102 | -0.716 | -0.317 | 0.000 | 1.060 | Yes |
| Twitter_X | Left_RightLeft | -0.080 | 0.176 | -0.426 | 0.266 | 0.652 | 3.335 | No |
| Twitter_X | Left_RightCenter | -0.418 | 0.186 | -0.784 | -0.053 | 0.025 | 3.335 | Yes |
| Twitter_X | Left_RightRight | -0.358 | 0.236 | -0.821 | 0.105 | 0.129 | 3.335 | No |
| Twitter_X | Left_RightExt_right | -0.215 | 0.271 | -0.746 | 0.316 | 0.427 | 3.335 | No |
| Twitter_X | Left_RightDKDA | -0.497 | 0.215 | -0.919 | -0.075 | 0.021 | 3.335 | Yes |
| Twitter_X | Pol_intSomewhat interested | -0.682 | 0.187 | -1.049 | -0.316 | 0.000 | 1.538 | Yes |
| Twitter_X | Pol_intNot very interested | -1.057 | 0.190 | -1.430 | -0.684 | 0.000 | 1.538 | Yes |
| Twitter_X | Pol_intNot at all interested | -1.360 | 0.217 | -1.786 | -0.934 | 0.000 | 1.538 | Yes |
| Twitter_X | Info_Online | 0.131 | 0.065 | 0.004 | 0.259 | 0.043 | 1.468 | Yes |
| Twitter_X | Partisan_mobil | 0.014 | 0.072 | -0.128 | 0.155 | 0.850 | 1.474 | No |
| Age_cat25_34 | -0.633 | 0.236 | -1.097 | -0.170 | 0.007 | 1.186 | Yes | |
| Age_cat35_44 | -1.329 | 0.235 | -1.789 | -0.868 | 0.000 | 1.186 | Yes | |
| Age_cat45_54 | -1.968 | 0.239 | -2.438 | -1.498 | 0.000 | 1.186 | Yes | |
| Age_cat55_64 | -2.194 | 0.249 | -2.683 | -1.706 | 0.000 | 1.186 | Yes | |
| Age_cat65_+ | -3.094 | 0.239 | -3.562 | -2.626 | 0.000 | 1.186 | Yes | |
| VotePSOE | -0.320 | 0.222 | -0.756 | 0.115 | 0.149 | 3.502 | No | |
| VoteVOX | -0.006 | 0.252 | -0.501 | 0.489 | 0.982 | 3.502 | No | |
| VoteSumar | -0.125 | 0.267 | -0.648 | 0.398 | 0.639 | 3.502 | No | |
| VoteERC | -0.060 | 0.394 | -0.834 | 0.713 | 0.878 | 3.502 | No | |
| VoteJxCat-Junts | 0.626 | 0.385 | -0.129 | 1.381 | 0.104 | 3.502 | No | |
| VoteOther party | -0.291 | 0.280 | -0.840 | 0.258 | 0.299 | 3.502 | No | |
| VoteDid not vote | -0.321 | 0.235 | -0.782 | 0.140 | 0.172 | 3.502 | No | |
| VoteI voted blank / no ballot paper | -0.241 | 0.370 | -0.967 | 0.485 | 0.515 | 3.502 | No | |
| VoteI prefer not to answer | -0.358 | 0.229 | -0.807 | 0.092 | 0.119 | 3.502 | No | |
| SexFemale | 0.966 | 0.113 | 0.743 | 1.188 | 0.000 | 1.061 | Yes | |
| Left_RightLeft | -0.256 | 0.197 | -0.642 | 0.131 | 0.195 | 3.299 | No | |
| Left_RightCenter | -0.580 | 0.208 | -0.987 | -0.172 | 0.005 | 3.299 | Yes | |
| Left_RightRight | -0.568 | 0.263 | -1.083 | -0.053 | 0.031 | 3.299 | Yes | |
| Left_RightExt_right | -0.007 | 0.302 | -0.600 | 0.587 | 0.983 | 3.299 | No | |
| Left_RightDKDA | -0.683 | 0.240 | -1.153 | -0.212 | 0.004 | 3.299 | Yes | |
| Pol_intSomewhat interested | -0.041 | 0.210 | -0.453 | 0.371 | 0.844 | 1.527 | No | |
| Pol_intNot very interested | -0.012 | 0.214 | -0.431 | 0.407 | 0.955 | 1.527 | No | |
| Pol_intNot at all interested | 0.017 | 0.242 | -0.458 | 0.493 | 0.943 | 1.527 | No | |
| Info_Online | 0.072 | 0.072 | -0.069 | 0.214 | 0.317 | 1.460 | No | |
| Partisan_mobil | 0.093 | 0.079 | -0.063 | 0.248 | 0.242 | 1.459 | No | |
| TikTok | Age_cat25_34 | -1.134 | 0.229 | -1.584 | -0.685 | 0.000 | 1.185 | Yes |
| TikTok | Age_cat35_44 | -1.752 | 0.227 | -2.198 | -1.306 | 0.000 | 1.185 | Yes |
| TikTok | Age_cat45_54 | -2.062 | 0.232 | -2.517 | -1.606 | 0.000 | 1.185 | Yes |
| TikTok | Age_cat55_64 | -2.053 | 0.242 | -2.528 | -1.578 | 0.000 | 1.185 | Yes |
| TikTok | Age_cat65_+ | -2.454 | 0.232 | -2.909 | -1.998 | 0.000 | 1.185 | Yes |
| TikTok | VotePSOE | 0.288 | 0.216 | -0.135 | 0.711 | 0.182 | 3.471 | No |
| TikTok | VoteVOX | 1.056 | 0.245 | 0.576 | 1.537 | 0.000 | 3.471 | Yes |
| TikTok | VoteSumar | -0.021 | 0.259 | -0.529 | 0.487 | 0.935 | 3.471 | No |
| TikTok | VoteERC | 0.398 | 0.390 | -0.367 | 1.164 | 0.307 | 3.471 | No |
| TikTok | VoteJxCat-Junts | -0.251 | 0.374 | -0.984 | 0.482 | 0.502 | 3.471 | No |
| TikTok | VoteOther party | 0.023 | 0.274 | -0.514 | 0.560 | 0.933 | 3.471 | No |
| TikTok | VoteDid not vote | 0.044 | 0.228 | -0.404 | 0.492 | 0.847 | 3.471 | No |
| TikTok | VoteI voted blank / no ballot paper | -0.057 | 0.359 | -0.761 | 0.648 | 0.875 | 3.471 | No |
| TikTok | VoteI prefer not to answer | 0.081 | 0.222 | -0.355 | 0.517 | 0.715 | 3.471 | No |
| TikTok | SexFemale | 0.193 | 0.110 | -0.024 | 0.409 | 0.081 | 1.062 | No |
| TikTok | Left_RightLeft | -0.344 | 0.192 | -0.721 | 0.032 | 0.073 | 3.277 | No |
| TikTok | Left_RightCenter | 0.042 | 0.202 | -0.355 | 0.439 | 0.835 | 3.277 | No |
| TikTok | Left_RightRight | -0.181 | 0.256 | -0.682 | 0.321 | 0.479 | 3.277 | No |
| TikTok | Left_RightExt_right | 0.443 | 0.294 | -0.134 | 1.019 | 0.132 | 3.277 | No |
| TikTok | Left_RightDKDA | -0.051 | 0.234 | -0.509 | 0.407 | 0.828 | 3.277 | No |
| TikTok | Pol_intSomewhat interested | -0.064 | 0.204 | -0.464 | 0.336 | 0.754 | 1.540 | No |
| TikTok | Pol_intNot very interested | -0.017 | 0.207 | -0.424 | 0.390 | 0.934 | 1.540 | No |
| TikTok | Pol_intNot at all interested | 0.052 | 0.235 | -0.410 | 0.514 | 0.826 | 1.540 | No |
| TikTok | Info_Online | -0.004 | 0.071 | -0.143 | 0.135 | 0.958 | 1.486 | No |
| TikTok | Partisan_mobil | -0.028 | 0.076 | -0.177 | 0.121 | 0.712 | 1.482 | No |
| Age_cat25_34 | 0.949 | 0.242 | 0.473 | 1.425 | 0.000 | 1.183 | Yes | |
| Age_cat35_44 | 1.449 | 0.241 | 0.977 | 1.921 | 0.000 | 1.183 | Yes | |
| Age_cat45_54 | 1.106 | 0.246 | 0.624 | 1.588 | 0.000 | 1.183 | Yes | |
| Age_cat55_64 | 1.342 | 0.256 | 0.840 | 1.844 | 0.000 | 1.183 | Yes | |
| Age_cat65_+ | 1.144 | 0.244 | 0.665 | 1.623 | 0.000 | 1.183 | Yes | |
| VotePSOE | -0.006 | 0.228 | -0.453 | 0.440 | 0.978 | 3.489 | No | |
| VoteVOX | 0.210 | 0.259 | -0.298 | 0.718 | 0.417 | 3.489 | No | |
| VoteSumar | -0.354 | 0.273 | -0.890 | 0.182 | 0.195 | 3.489 | No | |
| VoteERC | -0.868 | 0.404 | -1.661 | -0.074 | 0.032 | 3.489 | Yes | |
| VoteJxCat-Junts | -0.099 | 0.395 | -0.873 | 0.676 | 0.803 | 3.489 | No | |
| VoteOther party | 0.009 | 0.287 | -0.555 | 0.573 | 0.975 | 3.489 | No | |
| VoteDid not vote | -0.224 | 0.241 | -0.697 | 0.249 | 0.354 | 3.489 | No | |
| VoteI voted blank / no ballot paper | -0.113 | 0.380 | -0.857 | 0.632 | 0.767 | 3.489 | No | |
| VoteI prefer not to answer | -0.151 | 0.234 | -0.610 | 0.309 | 0.520 | 3.489 | No | |
| SexFemale | 0.628 | 0.116 | 0.400 | 0.856 | 0.000 | 1.062 | Yes | |
| Left_RightLeft | -0.366 | 0.202 | -0.763 | 0.030 | 0.070 | 3.302 | No | |
| Left_RightCenter | -0.313 | 0.213 | -0.731 | 0.105 | 0.142 | 3.302 | No | |
| Left_RightRight | -0.530 | 0.269 | -1.058 | -0.001 | 0.049 | 3.302 | Yes | |
| Left_RightExt_right | -0.104 | 0.310 | -0.713 | 0.505 | 0.738 | 3.302 | No | |
| Left_RightDKDA | -0.345 | 0.246 | -0.828 | 0.137 | 0.161 | 3.302 | No | |
| Pol_intSomewhat interested | -0.084 | 0.216 | -0.507 | 0.339 | 0.697 | 1.536 | No | |
| Pol_intNot very interested | 0.001 | 0.219 | -0.429 | 0.430 | 0.998 | 1.536 | No | |
| Pol_intNot at all interested | 0.186 | 0.249 | -0.302 | 0.674 | 0.455 | 1.536 | No | |
| Info_Online | 0.040 | 0.074 | -0.106 | 0.186 | 0.589 | 1.477 | No | |
| Partisan_mobil | 0.076 | 0.080 | -0.080 | 0.233 | 0.339 | 1.473 | No | |
| Age_cat25_34 | 0.181 | 0.162 | -0.137 | 0.498 | 0.264 | 1.187 | No | |
| Age_cat35_44 | 0.018 | 0.160 | -0.296 | 0.332 | 0.911 | 1.187 | No | |
| Age_cat45_54 | -0.258 | 0.164 | -0.580 | 0.065 | 0.117 | 1.187 | No | |
| Age_cat55_64 | -0.399 | 0.171 | -0.734 | -0.064 | 0.020 | 1.187 | Yes | |
| Age_cat65_+ | -0.800 | 0.164 | -1.121 | -0.479 | 0.000 | 1.187 | Yes | |
| VotePSOE | -0.358 | 0.151 | -0.655 | -0.062 | 0.018 | 3.519 | Yes | |
| VoteVOX | -0.276 | 0.173 | -0.615 | 0.062 | 0.110 | 3.519 | No | |
| VoteSumar | -0.097 | 0.181 | -0.452 | 0.258 | 0.591 | 3.519 | No | |
| VoteERC | 0.113 | 0.271 | -0.419 | 0.644 | 0.678 | 3.519 | No | |
| VoteJxCat-Junts | 0.212 | 0.259 | -0.296 | 0.720 | 0.413 | 3.519 | No | |
| VoteOther party | 0.032 | 0.190 | -0.341 | 0.405 | 0.867 | 3.519 | No | |
| VoteDid not vote | -0.356 | 0.161 | -0.672 | -0.040 | 0.027 | 3.519 | Yes | |
| VoteI voted blank / no ballot paper | 0.116 | 0.253 | -0.380 | 0.611 | 0.647 | 3.519 | No | |
| VoteI prefer not to answer | -0.106 | 0.157 | -0.414 | 0.202 | 0.498 | 3.519 | No | |
| SexFemale | -0.220 | 0.077 | -0.372 | -0.068 | 0.005 | 1.061 | Yes | |
| Left_RightLeft | 0.042 | 0.134 | -0.220 | 0.305 | 0.753 | 3.312 | No | |
| Left_RightCenter | 0.063 | 0.141 | -0.214 | 0.340 | 0.655 | 3.312 | No | |
| Left_RightRight | 0.042 | 0.179 | -0.309 | 0.394 | 0.813 | 3.312 | No | |
| Left_RightExt_right | -0.059 | 0.206 | -0.463 | 0.344 | 0.774 | 3.312 | No | |
| Left_RightDKDA | 0.048 | 0.164 | -0.274 | 0.369 | 0.772 | 3.312 | No | |
| Pol_intSomewhat interested | -0.587 | 0.141 | -0.864 | -0.310 | 0.000 | 1.534 | Yes | |
| Pol_intNot very interested | -0.750 | 0.144 | -1.032 | -0.468 | 0.000 | 1.534 | Yes | |
| Pol_intNot at all interested | -0.889 | 0.165 | -1.212 | -0.566 | 0.000 | 1.534 | Yes | |
| Info_Online | 0.015 | 0.050 | -0.082 | 0.112 | 0.759 | 1.478 | No | |
| Partisan_mobil | 0.009 | 0.054 | -0.098 | 0.116 | 0.866 | 1.478 | No | |
| Telegram | Age_cat25_34 | 0.031 | 0.221 | -0.402 | 0.465 | 0.887 | 1.190 | No |
| Telegram | Age_cat35_44 | -0.120 | 0.219 | -0.551 | 0.310 | 0.583 | 1.190 | No |
| Telegram | Age_cat45_54 | -0.335 | 0.224 | -0.775 | 0.105 | 0.135 | 1.190 | No |
| Telegram | Age_cat55_64 | -0.698 | 0.234 | -1.157 | -0.240 | 0.003 | 1.190 | Yes |
| Telegram | Age_cat65_+ | -1.143 | 0.224 | -1.582 | -0.703 | 0.000 | 1.190 | Yes |
| Telegram | VotePSOE | 0.022 | 0.207 | -0.384 | 0.429 | 0.914 | 3.520 | No |
| Telegram | VoteVOX | 0.507 | 0.235 | 0.045 | 0.969 | 0.031 | 3.520 | Yes |
| Telegram | VoteSumar | 0.421 | 0.248 | -0.067 | 0.908 | 0.091 | 3.520 | No |
| Telegram | VoteERC | 0.008 | 0.375 | -0.727 | 0.743 | 0.983 | 3.520 | No |
| Telegram | VoteJxCat-Junts | 0.210 | 0.358 | -0.492 | 0.912 | 0.557 | 3.520 | No |
| Telegram | VoteOther party | 0.054 | 0.261 | -0.458 | 0.567 | 0.836 | 3.520 | No |
| Telegram | VoteDid not vote | -0.016 | 0.220 | -0.447 | 0.415 | 0.943 | 3.520 | No |
| Telegram | VoteI voted blank / no ballot paper | 0.005 | 0.348 | -0.677 | 0.687 | 0.989 | 3.520 | No |
| Telegram | VoteI prefer not to answer | 0.144 | 0.214 | -0.276 | 0.564 | 0.501 | 3.520 | No |
| Telegram | SexFemale | -0.374 | 0.106 | -0.582 | -0.166 | 0.000 | 1.062 | Yes |
| Telegram | Left_RightLeft | -0.169 | 0.184 | -0.531 | 0.192 | 0.358 | 3.322 | No |
| Telegram | Left_RightCenter | -0.019 | 0.194 | -0.400 | 0.362 | 0.922 | 3.322 | No |
| Telegram | Left_RightRight | 0.044 | 0.245 | -0.436 | 0.524 | 0.857 | 3.322 | No |
| Telegram | Left_RightExt_right | -0.139 | 0.283 | -0.694 | 0.416 | 0.623 | 3.322 | No |
| Telegram | Left_RightDKDA | -0.033 | 0.224 | -0.472 | 0.405 | 0.881 | 3.322 | No |
| Telegram | Pol_intSomewhat interested | -0.523 | 0.195 | -0.905 | -0.140 | 0.007 | 1.532 | Yes |
| Telegram | Pol_intNot very interested | -0.746 | 0.199 | -1.136 | -0.356 | 0.000 | 1.532 | Yes |
| Telegram | Pol_intNot at all interested | -0.796 | 0.226 | -1.239 | -0.354 | 0.000 | 1.532 | Yes |
| Telegram | Info_Online | 0.116 | 0.068 | -0.017 | 0.250 | 0.088 | 1.478 | No |
| Telegram | Partisan_mobil | -0.031 | 0.073 | -0.175 | 0.113 | 0.672 | 1.476 | No |
# --- 🎨 Plot Coefficients ---
coef_table %>%
mutate(
term = str_replace_all(term, c(
"Age_cat" = "Age: ",
"Vote" = "Vote: ",
"Sex" = "Sex: ",
"\\[T\\." = "", # Removes the "[T." prefix for dummy variables
"\\]" = "" # Removes the closing "]"
))
) %>%
ggplot(aes(y = reorder(term, estimate), x = estimate,
color = Significant, shape = Significant)) +
geom_point(position = position_dodge(width = 0.5), size = 1) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high),
height = 0.2, position = position_dodge(width = 0.5), orientation = "y") + geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") + # Use vline for vertical
facet_wrap(~ Dependent, scales = "free_y", ncol = 2) +
scale_color_manual(values = c("No" = "gray50", "Yes" = "#E69F00")) +
scale_shape_manual(values = c("No" = 16, "Yes" = 17)) +
theme_bw(base_size = 6) + # Switched to theme_bw for clarity
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
strip.background = element_rect(fill = "gray90"),
axis.text.y = element_text(size = 4)
) +
labs(
title = "OLS Coefficients by Platform",
subtitle = "Highlighted points are statistically significant (p < .05)",
y = "Predictors", # Swapped labels
x = "Coefficient Estimate (±95% CI)", # Swapped labels
color = "Significant:",
shape = "Significant:"
)library(dplyr)
library(psych)
library(car)
library(stringr)
# --- 1️⃣ Identify all political-use variables from dig03_1 to dig03_8 ---
# Political purposes = 3, 4, 5, 6
political_suffixes <- paste0("_number_", 3:6)
# Build regex to match ANY platform prefix + ANY political suffix
pattern_pol <- paste0("dig03_[1-8](", paste0("_number_", 3:6, collapse="|"), ")$")
pol_vars1 <- names(data_USES)[grepl(pattern_pol, names(data_USES))]
length(pol_vars1)[1] 32
pol_vars1 [1] "dig03_1_number_3" "dig03_1_number_4" "dig03_1_number_5" "dig03_1_number_6"
[5] "dig03_2_number_3" "dig03_2_number_4" "dig03_2_number_5" "dig03_2_number_6"
[9] "dig03_3_number_3" "dig03_3_number_4" "dig03_3_number_5" "dig03_3_number_6"
[13] "dig03_4_number_3" "dig03_4_number_4" "dig03_4_number_5" "dig03_4_number_6"
[17] "dig03_5_number_3" "dig03_5_number_4" "dig03_5_number_5" "dig03_5_number_6"
[21] "dig03_6_number_3" "dig03_6_number_4" "dig03_6_number_5" "dig03_6_number_6"
[25] "dig03_7_number_3" "dig03_7_number_4" "dig03_7_number_5" "dig03_7_number_6"
[29] "dig03_8_number_3" "dig03_8_number_4" "dig03_8_number_5" "dig03_8_number_6"
Create new archive
# Create new archive
data_PCA4 <- data_PCA3 %>%
bind_cols(data_USES %>% dplyr::select(all_of(pol_vars1)))
dim(data_PCA4)[1] 1501 57
data_PCA4 <- data_PCA4 %>%
dplyr::rename(
YT_inf_soc = dig03_1_number_3,
YT_sup_soc = dig03_1_number_4,
YT_inf_pol = dig03_1_number_5,
YT_sup_pol = dig03_1_number_6,
WP_inf_soc = dig03_2_number_3,
WP_sup_soc = dig03_2_number_4,
WP_inf_pol = dig03_2_number_5,
WP_sup_pol = dig03_2_number_6,
TW_inf_soc = dig03_3_number_3,
TW_sup_soc = dig03_3_number_4,
TW_inf_pol = dig03_3_number_5,
TW_sup_pol = dig03_3_number_6,
IN_inf_soc = dig03_4_number_3,
IN_sup_soc = dig03_4_number_4,
IN_inf_pol = dig03_4_number_5,
IN_sup_pol = dig03_4_number_6,
TT_inf_soc = dig03_5_number_3,
TT_sup_soc = dig03_5_number_4,
TT_inf_pol = dig03_5_number_5,
TT_sup_pol = dig03_5_number_6,
FB_inf_soc = dig03_6_number_3,
FB_sup_soc = dig03_6_number_4,
FB_inf_pol = dig03_6_number_5,
FB_sup_pol = dig03_6_number_6,
LN_inf_soc = dig03_7_number_3,
LN_sup_soc = dig03_7_number_4,
LN_inf_pol = dig03_7_number_5,
LN_sup_pol = dig03_7_number_6,
TG_inf_soc = dig03_8_number_3,
TG_sup_soc = dig03_8_number_4,
TG_inf_pol = dig03_8_number_5,
TG_sup_pol = dig03_8_number_6
)
pol_vars <- names(data_PCA4)[
grepl("^[A-Za-z]{2}_(inf|sup)_(soc|pol)$", names(data_PCA4))
]
data_PCA4 <- data_PCA4 %>%
dplyr::select(
-inf_socially,
-inf_pol,
-sup_soc_causes,
-sup_pol_causes
)Kaiser-Meyer-Olkin
kmo_pol <- KMO(data_PCA4 %>% dplyr::select(YT_inf_soc:TG_sup_pol))
kmo_polKaiser-Meyer-Olkin factor adequacy
Call: KMO(r = data_PCA4 %>% dplyr::select(YT_inf_soc:TG_sup_pol))
Overall MSA = 0.75
MSA for each item =
YT_inf_soc YT_sup_soc YT_inf_pol YT_sup_pol WP_inf_soc WP_sup_soc WP_inf_pol
0.65 0.84 0.65 0.75 0.71 0.73 0.78
WP_sup_pol TW_inf_soc TW_sup_soc TW_inf_pol TW_sup_pol IN_inf_soc IN_sup_soc
0.81 0.69 0.75 0.57 0.75 0.71 0.76
IN_inf_pol IN_sup_pol TT_inf_soc TT_sup_soc TT_inf_pol TT_sup_pol FB_inf_soc
0.79 0.84 0.63 0.81 0.78 0.75 0.68
FB_sup_soc FB_inf_pol FB_sup_pol LN_inf_soc LN_sup_soc LN_inf_pol LN_sup_pol
0.85 0.74 0.74 0.67 0.52 0.73 0.69
TG_inf_soc TG_sup_soc TG_inf_pol TG_sup_pol
0.81 0.68 0.75 0.79
Interpretation:
>.80 → Excellent
>.70 → Good
>.60 → Mediocre
<.50 → PCA not recommended
Bartlett correlation test
bart_pol <- cortest.bartlett(cor(data_PCA4 %>% dplyr::select(YT_inf_soc:TG_sup_pol), use="pairwise.complete.obs"),
n = nrow(data_PCA4))
bart_pol$chisq
[1] 9439.823
$p.value
[1] 0
$df
[1] 496
Significant p-value (< .05) means correlations ≠ identity matrix → PCA appropriate.
Multicolinearity test VIF
# Compute VIF one variable at a time
vif_results <- sapply(pol_vars, function(v) {
others <- setdiff(pol_vars, v)
model <- lm(data_PCA4[[v]] ~ ., data = data_PCA4[, others])
mean(car::vif(model))
})
vif_resultsYT_inf_soc YT_sup_soc YT_inf_pol YT_sup_pol WP_inf_soc WP_sup_soc WP_inf_pol
1.646490 1.627475 1.646340 1.614439 1.645714 1.619690 1.629664
WP_sup_pol TW_inf_soc TW_sup_soc TW_inf_pol TW_sup_pol IN_inf_soc IN_sup_soc
1.596428 1.649620 1.631299 1.647899 1.629261 1.643823 1.611073
IN_inf_pol IN_sup_pol TT_inf_soc TT_sup_soc TT_inf_pol TT_sup_pol FB_inf_soc
1.636126 1.594183 1.625470 1.644923 1.636298 1.656901 1.651920
FB_sup_soc FB_inf_pol FB_sup_pol LN_inf_soc LN_sup_soc LN_inf_pol LN_sup_pol
1.639052 1.617919 1.631557 1.648991 1.643112 1.639013 1.626366
TG_inf_soc TG_sup_soc TG_inf_pol TG_sup_pol
1.635532 1.629128 1.637782 1.617550
Interpretation:
VIF < 5 → acceptable
VIF > 10 → strong multicollinearity
cor_mat <- cor(data_PCA4 %>% dplyr::select(YT_inf_soc:TG_sup_pol), use = "pairwise.complete.obs")
det(cor_mat)[1] 0.001761066
Interpretation:
Determinant > .00001 → OK
Near zero → severe redundancy
library(FactoMineR)
library(factoextra)
library(dplyr)
library(ggplot2)
# --- 1️⃣ Select political-use variables ---
pol_data <- data_PCA4 %>% dplyr::select(all_of(pol_vars))
# --- 2️⃣ Supplementary variables (adapt to your exact names) ---
supp_quali <- c("Age_cat", "Vote", "Sex", "Pol_int", "Left_Right", "Inf_disagr",
"Inf_check", "Inf_confirm")
supp_quanti <- c("Info_Online", "Partisan_mobil","Civic_Engagement","Protest_Action",
"Info_Offline","YouTube", "WhatsApp", "Twitter_X", "Instagram","TikTok",
"Facebook", "Linkedin", "Telegram")
# Keep only the variables that exist
supp_quali <- intersect(supp_quali, names(data_PCA4))
supp_quanti <- intersect(supp_quanti, names(data_PCA4))
# --- 3️⃣ Build PCA model with supplementary vars ---
pca_pol_fm <- PCA(
data_PCA4,
scale.unit = TRUE,
quanti.sup = match(supp_quanti, names(data_PCA4)),
quali.sup = match(supp_quali, names(data_PCA4)),
ind.sup = NULL,
graph = FALSE
)
# --- 3️⃣1 summary of the model ---
summary(pca_pol_fm)
Call:
PCA(X = data_PCA4, scale.unit = TRUE, ind.sup = NULL, quanti.sup = match(supp_quanti,
names(data_PCA4)), quali.sup = match(supp_quali, names(data_PCA4)),
graph = FALSE)
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
Variance 4.321 1.829 1.662 1.500 1.445 1.372 1.219
% of var. 13.503 5.715 5.195 4.688 4.515 4.288 3.810
Cumulative % of var. 13.503 19.219 24.413 29.101 33.617 37.905 41.715
Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
Variance 1.195 1.166 1.137 1.051 1.008 0.950 0.876
% of var. 3.735 3.643 3.552 3.285 3.150 2.969 2.736
Cumulative % of var. 45.450 49.092 52.645 55.930 59.080 62.049 64.785
Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
Variance 0.846 0.834 0.808 0.777 0.729 0.721 0.694
% of var. 2.645 2.606 2.526 2.428 2.278 2.252 2.169
Cumulative % of var. 67.430 70.036 72.562 74.990 77.268 79.520 81.689
Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27 Dim.28
Variance 0.668 0.648 0.612 0.595 0.546 0.540 0.522
% of var. 2.087 2.027 1.913 1.860 1.705 1.689 1.631
Cumulative % of var. 83.776 85.802 87.716 89.576 91.281 92.970 94.601
Dim.29 Dim.30 Dim.31 Dim.32
Variance 0.452 0.436 0.430 0.411
% of var. 1.413 1.361 1.342 1.283
Cumulative % of var. 96.014 97.375 98.717 100.000
Individuals (the 10 first)
Dist Dim.1 ctr cos2 Dim.2 ctr cos2
1 | 5.841 | 0.002 0.000 0.000 | 1.362 0.068 0.054 |
2 | 0.796 | -0.312 0.002 0.154 | -0.059 0.000 0.006 |
3 | 2.306 | -0.629 0.006 0.074 | 0.174 0.001 0.006 |
4 | 3.458 | -1.249 0.024 0.130 | 0.440 0.007 0.016 |
5 | 2.668 | -1.774 0.048 0.442 | -1.692 0.104 0.402 |
6 | 0.705 | -0.168 0.000 0.057 | -0.159 0.001 0.051 |
7 | 2.563 | -1.636 0.041 0.407 | -1.407 0.072 0.302 |
8 | 1.749 | -1.126 0.020 0.415 | -0.880 0.028 0.253 |
9 | 1.646 | -0.930 0.013 0.319 | -0.564 0.012 0.117 |
10 | 3.472 | -1.149 0.020 0.110 | 0.496 0.009 0.020 |
Dim.3 ctr cos2
1 0.399 0.006 0.005 |
2 -0.292 0.003 0.134 |
3 -0.068 0.000 0.001 |
4 -0.191 0.001 0.003 |
5 0.388 0.006 0.021 |
6 -0.097 0.000 0.019 |
7 0.385 0.006 0.023 |
8 -0.587 0.014 0.113 |
9 -0.633 0.016 0.148 |
10 -0.997 0.040 0.082 |
Variables (the 10 first)
Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
YT_inf_soc | 0.196 0.889 0.038 | 0.425 9.896 0.181 | 0.234 3.285
YT_sup_soc | 0.437 4.417 0.191 | -0.145 1.148 0.021 | 0.014 0.012
YT_inf_pol | 0.285 1.875 0.081 | 0.070 0.265 0.005 | -0.017 0.017
YT_sup_pol | 0.375 3.251 0.140 | -0.144 1.140 0.021 | -0.048 0.136
WP_inf_soc | 0.322 2.394 0.103 | -0.032 0.056 0.001 | 0.431 11.177
WP_sup_soc | 0.420 4.079 0.176 | -0.246 3.298 0.060 | 0.363 7.934
WP_inf_pol | 0.464 4.974 0.215 | -0.244 3.262 0.060 | 0.285 4.886
WP_sup_pol | 0.517 6.195 0.268 | -0.324 5.736 0.105 | 0.254 3.871
TW_inf_soc | 0.169 0.662 0.029 | 0.525 15.090 0.276 | -0.164 1.613
TW_sup_soc | 0.429 4.266 0.184 | 0.002 0.000 0.000 | -0.562 19.005
cos2
YT_inf_soc 0.055 |
YT_sup_soc 0.000 |
YT_inf_pol 0.000 |
YT_sup_pol 0.002 |
WP_inf_soc 0.186 |
WP_sup_soc 0.132 |
WP_inf_pol 0.081 |
WP_sup_pol 0.064 |
TW_inf_soc 0.027 |
TW_sup_soc 0.316 |
Supplementary continuous variables (the 10 first)
Dim.1 cos2 Dim.2 cos2 Dim.3 cos2
Info_Online | -0.021 0.000 | 0.004 0.000 | -0.012 0.000 |
Partisan_mobil | -0.022 0.000 | 0.032 0.001 | 0.021 0.000 |
Civic_Engagement | -0.007 0.000 | 0.006 0.000 | 0.007 0.000 |
Protest_Action | -0.008 0.000 | 0.020 0.000 | -0.024 0.001 |
Info_Offline | 0.007 0.000 | -0.040 0.002 | -0.019 0.000 |
YouTube | 0.093 0.009 | -0.017 0.000 | 0.025 0.001 |
WhatsApp | 0.004 0.000 | 0.061 0.004 | 0.026 0.001 |
Twitter_X | 0.192 0.037 | 0.069 0.005 | -0.089 0.008 |
Instagram | 0.072 0.005 | 0.004 0.000 | 0.001 0.000 |
TikTok | 0.126 0.016 | -0.063 0.004 | 0.051 0.003 |
Supplementary categories (the 10 first)
Dist Dim.1 cos2 v.test Dim.2 cos2 v.test
18_24 | 1.068 | 0.800 0.560 4.392 | -0.015 0.000 -0.126 |
25_34 | 0.538 | 0.337 0.393 2.943 | 0.011 0.000 0.143 |
35_44 | 0.489 | -0.416 0.722 -3.934 | -0.099 0.041 -1.442 |
45_54 | 0.452 | -0.341 0.569 -3.003 | 0.122 0.072 1.646 |
55_64 | 0.378 | -0.186 0.242 -1.408 | -0.047 0.015 -0.542 |
65_+ | 0.438 | 0.234 0.286 2.253 | 0.019 0.002 0.282 |
PP | 0.462 | 0.219 0.225 1.731 | 0.077 0.028 0.938 |
PSOE | 0.381 | 0.128 0.114 1.214 | -0.036 0.009 -0.523 |
VOX | 0.748 | 0.192 0.066 1.067 | -0.099 0.018 -0.851 |
Sumar | 0.497 | 0.184 0.138 1.203 | 0.137 0.076 1.375 |
Dim.3 cos2 v.test
18_24 -0.062 0.003 -0.552 |
25_34 -0.157 0.085 -2.207 |
35_44 0.020 0.002 0.303 |
45_54 -0.019 0.002 -0.267 |
55_64 0.026 0.005 0.323 |
65_+ 0.137 0.098 2.123 |
PP 0.030 0.004 0.388 |
PSOE 0.015 0.002 0.233 |
VOX -0.259 0.120 -2.325 |
Sumar -0.035 0.005 -0.366 |
# --- 3️⃣1.Descrition of the dimensions ---
PCA4_desc <- dimdesc(pca_pol_fm, axes = 1:3, proba = 0.05)
# --- Dimmension 1 description
PCA4_desc$Dim.1
Link between the variable and the continuous variables (R-square)
=================================================================================
correlation p.value
IN_sup_pol 0.56729168 1.421305e-128
WP_sup_pol 0.51738425 1.548161e-103
IN_sup_soc 0.47872001 8.075882e-87
IN_inf_pol 0.47138955 6.945551e-84
WP_inf_pol 0.46361321 7.544063e-81
TG_sup_pol 0.46062297 1.057473e-79
FB_sup_soc 0.46055769 1.119885e-79
TT_inf_pol 0.45227022 1.470557e-76
YT_sup_soc 0.43686054 5.512203e-71
TW_sup_soc 0.42933251 2.298504e-68
WP_sup_soc 0.41982511 3.775684e-65
TW_sup_pol 0.39997706 9.222744e-59
FB_sup_pol 0.39504426 3.064118e-57
TT_sup_pol 0.39125435 4.343137e-56
TT_sup_soc 0.37718013 6.080239e-52
YT_sup_pol 0.37479043 2.936857e-51
TG_sup_soc 0.36510220 1.522296e-48
FB_inf_pol 0.36458061 2.118462e-48
TG_inf_pol 0.34431834 4.996350e-43
IN_inf_soc 0.32913394 2.968721e-39
WP_inf_soc 0.32161857 1.832000e-37
LN_sup_pol 0.29276388 4.749031e-31
YT_inf_pol 0.28460727 2.296220e-29
TG_inf_soc 0.27884548 3.293184e-28
LN_inf_pol 0.25014194 7.574795e-23
FB_inf_soc 0.23677978 1.425912e-20
YT_inf_soc 0.19596975 1.852482e-14
Twitter_X 0.19223935 5.830049e-14
LN_inf_soc 0.18924135 1.440773e-13
TW_inf_soc 0.16913704 4.254227e-11
LN_sup_soc 0.15728687 8.965272e-10
TT_inf_soc 0.15430891 1.862156e-09
Telegram 0.15230428 3.022005e-09
TW_inf_pol 0.14881864 6.908848e-09
Linkedin 0.14183487 3.420739e-08
TikTok 0.12577288 1.017886e-06
YouTube 0.09302580 3.074982e-04
Instagram 0.07175641 5.414008e-03
Link between the variable and the categorical variable (1-way anova)
=============================================
R2 p.value
Pol_int 0.06656323 3.265459e-22
Inf_check 0.04979074 9.803092e-16
Inf_disagr 0.04828807 3.102599e-15
Inf_confirm 0.04279002 2.051190e-13
Age_cat 0.03348938 8.750931e-10
Left_Right 0.01460093 5.210061e-04
Vote 0.01347152 1.631765e-02
Link between variable and the categories of the categorical variables
================================================================
Estimate p.value
Pol_int=Very interested 1.16082831 5.302494e-16
Inf_disagr=Inf_disagr_Often 0.47026572 2.934705e-08
Inf_confirm=Inf_confirm_Very often 0.69070704 1.197453e-06
Inf_check=Inf_check_Often 0.42402669 4.863343e-06
Age_cat=18_24 0.72827859 1.061452e-05
Pol_int=Somewhat interested 0.08135235 1.453687e-04
Inf_disagr=Inf_disagr_Very often 0.62038766 2.608432e-04
Inf_check=Inf_check_Very often 0.58812668 7.055278e-04
Inf_confirm=Inf_confirm_Often 0.24188943 2.871815e-03
Age_cat=25_34 0.26581709 3.222132e-03
Inf_check=Inf_check_sometimes 0.11799108 1.301395e-02
Age_cat=65_+ 0.16271698 2.420900e-02
Left_Right=Ext_left 0.24409760 3.999033e-02
Inf_check=Inf_check_Rarely -0.35205214 1.419079e-02
Age_cat=45_54 -0.41253235 2.646805e-03
Vote=I prefer not to answer -0.41444545 6.798444e-04
Left_Right=DKDA -0.49332536 1.043461e-04
Pol_int=Not very interested -0.48422996 9.552307e-05
Age_cat=35_44 -0.48703263 8.065560e-05
Pol_int=Not at all interested -0.75795070 1.050200e-07
Inf_confirm=Inf_confirm_Never -0.72042425 3.079895e-11
Inf_disagr=Inf_disagr_Never -0.77904883 1.604261e-11
Inf_check=Inf_check_Never -0.77809231 1.581944e-12
# --- Dimmension 2 description
PCA4_desc$Dim.2
Link between the variable and the continuous variables (R-square)
=================================================================================
correlation p.value
TW_inf_soc 0.52533120 3.013204e-107
IN_inf_soc 0.51397748 5.632320e-102
TT_inf_soc 0.51138338 8.460458e-101
FB_inf_soc 0.44720779 1.072422e-74
YT_inf_soc 0.42543024 4.940180e-67
TW_inf_pol 0.29115927 1.028879e-30
TG_inf_soc 0.27513673 1.768825e-27
IN_inf_pol 0.16978873 3.574372e-11
LN_inf_soc 0.14954040 5.830752e-09
FB_inf_pol 0.12487626 1.215921e-06
TT_inf_pol 0.07630652 3.094667e-03
YT_inf_pol 0.06965336 6.942435e-03
Twitter_X 0.06874255 7.716980e-03
WhatsApp 0.06112753 1.786085e-02
TT_sup_soc -0.05432830 3.532369e-02
LN_inf_pol -0.05701603 2.718062e-02
TikTok -0.06293689 1.473880e-02
TG_sup_pol -0.09690325 1.698988e-04
LN_sup_pol -0.10221719 7.270062e-05
IN_sup_pol -0.10915139 2.255511e-05
TT_sup_pol -0.12161129 2.299149e-06
YT_sup_pol -0.14438684 1.923466e-08
YT_sup_soc -0.14491947 1.703509e-08
LN_sup_soc -0.15945875 5.214384e-10
TG_sup_soc -0.16571159 1.050631e-10
FB_sup_pol -0.17916430 2.706415e-12
WP_inf_pol -0.24423736 7.971796e-22
WP_sup_soc -0.24559852 4.659258e-22
WP_sup_pol -0.32388750 5.342410e-38
Link between the variable and the categorical variable (1-way anova)
=============================================
R2 p.value
Inf_confirm 0.009891079 0.00495527
Pol_int 0.007454192 0.01067013
Inf_disagr 0.007267081 0.02749500
Link between variable and the categories of the categorical variables
================================================================
Estimate p.value
Inf_confirm=Inf_confirm_Very often 0.2328764 0.010341577
Inf_disagr=Inf_disagr_Often 0.1474057 0.024067745
Left_Right=Left 0.1722936 0.026696221
Pol_int=Not at all interested -0.2094587 0.003736380
Inf_check=Inf_check_Never -0.1833972 0.003670116
Inf_confirm=Inf_confirm_Never -0.2140775 0.003550311
Inf_disagr=Inf_disagr_Never -0.1988853 0.003279192
# --- Dimmension 3 description
PCA4_desc$Dim.3
Link between the variable and the continuous variables (R-square)
=================================================================================
correlation p.value
WP_inf_soc 0.43103927 5.932087e-69
WP_sup_soc 0.36315765 5.202526e-48
FB_inf_soc 0.31549616 4.833556e-36
WP_inf_pol 0.28499452 1.915561e-29
WP_sup_pol 0.25366848 1.802608e-23
YT_inf_soc 0.23367520 4.602141e-20
LN_inf_pol 0.20661311 6.191772e-16
IN_inf_soc 0.16082364 3.695166e-10
FB_inf_pol 0.15706635 9.468457e-10
TT_inf_soc 0.14110557 4.025021e-08
LN_inf_soc 0.08020538 1.872149e-03
Facebook 0.07226962 5.090446e-03
FB_sup_soc 0.05833110 2.382445e-02
TT_sup_soc 0.05570614 3.092113e-02
LN_sup_soc 0.05408964 3.613820e-02
TikTok 0.05076242 4.926344e-02
Telegram -0.05217638 4.326437e-02
TG_sup_soc -0.06805041 8.356381e-03
TT_inf_pol -0.06816418 8.248131e-03
Twitter_X -0.08944174 5.218428e-04
LN_sup_pol -0.09640438 1.836009e-04
IN_sup_pol -0.09860465 1.300523e-04
IN_sup_soc -0.13570890 1.307527e-07
TG_inf_pol -0.15315911 2.460153e-09
FB_sup_pol -0.16121520 3.345716e-10
TW_inf_soc -0.16374089 1.752449e-10
TW_inf_pol -0.38628709 1.331497e-54
TW_sup_soc -0.56206125 9.540613e-126
TW_sup_pol -0.56697686 2.109940e-128
Link between the variable and the categorical variable (1-way anova)
=============================================
R2 p.value
Left_Right 0.007849355 0.03773854
Link between variable and the categories of the categorical variables
================================================================
Estimate p.value
Left_Right=Center 0.1554638 0.017904665
Age_cat=65_+ 0.1459762 0.033747076
Vote=JxCat-Junts -0.3675452 0.040695521
Inf_confirm=Inf_confirm_Very often -0.1836021 0.030420750
Age_cat=25_34 -0.1477261 0.027270602
Vote=VOX -0.2200736 0.020045464
Inf_disagr=Inf_disagr_Very often -0.2676913 0.006841006
# --- 4️⃣ PCA plots + supplementary variables ---
# Variable map
fviz_pca_var(pca_pol_fm,
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)# Individuals with supplementary categories projected
fviz_pca_biplot(
pca_pol_fm,
label = "var",
habillage = "Vote",
repel = TRUE,
col.var = "firebrick"
)Clusters individuals based on PCA component scores.
library(FactoMineR)
library(factoextra)
hc <- HCPC(
pca_pol_fm,
nb.clust = -1, # lets HCPC choose optimum based on inertia
consol = TRUE,
graph = FALSE
)
# Cluster dendrogram
plot(hc, choice = "tree")# Cluster map
fviz_cluster(hc, repel = TRUE, geom = "point") +
ggtitle("Clusters Based on Political Social Media Uses")library(dplyr)
library(tidyr)
library(stringr)
# --- 1️⃣ Safety checks ---
if (!exists("hc")) stop("❌ 'hc' object not found — run HCPC first.")
if (!exists("data_PCA4")) stop("❌ 'data_PCA4' not found — load it before running.")
if (!exists("pol_vars")) stop("❌ 'pol_vars' not found — define the political-use variable names.")
# --- 2️⃣ Compute cluster means if not already done ---
if (!exists("means")) {
means <- hc$data.clust %>%
dplyr::select(clust, dplyr::all_of(pol_vars)) %>%
dplyr::group_by(clust) %>%
dplyr::summarise(across(everything(), mean, na.rm = TRUE), .groups = "drop")
}
# --- 3️⃣ Global means for comparison ---
if (!exists("global_means")) {
global_means <- colMeans(data_PCA4[, pol_vars], na.rm = TRUE)
}
# --- 4️⃣ Compute deviations from global means ---
dev <- means
for (v in pol_vars) {
dev[[paste0(v, "_dev")]] <- dev[[v]] - global_means[v]
}
# --- 5️⃣ Parse variable names into platform / purpose ---
dev_long <- dev %>%
dplyr::select(clust, ends_with("_dev")) %>%
tidyr::pivot_longer(
-clust,
names_to = "variable",
values_to = "deviation"
) %>%
tidyr::separate(
variable,
into = c("platform", "purpose", "extra"),
sep = "_",
remove = FALSE,
fill = "right"
)
# --- 6️⃣ Define label dictionaries ---
platform_labels <- c(
YT = "YouTube", WP = "WhatsApp", TW = "Twitter/X", IN = "Instagram",
TT = "TikTok", FB = "Facebook", LN = "LinkedIn", TG = "Telegram"
)
purpose_labels <- c(
inf_soc = "Inform socially",
sup_soc = "Support social causes",
inf_pol = "Inform politically",
sup_pol = "Support political causes"
)
# --- 3️⃣ Pivot deviations to long format ---
dev_long <- dev %>%
select(clust, ends_with("_dev")) %>%
pivot_longer(-clust, names_to = "variable", values_to = "deviation") %>%
mutate(
platform = str_extract(variable, "^[A-Z]{2}"), # e.g. YT, TW, FB
purpose = case_when(
str_detect(variable, "inf_soc") ~ "inf_soc",
str_detect(variable, "sup_soc") ~ "sup_soc",
str_detect(variable, "inf_pol") ~ "inf_pol",
str_detect(variable, "sup_pol") ~ "sup_pol",
TRUE ~ "other"
)
) %>%
filter(purpose %in% names(purpose_labels))
# --- 4️⃣ Compute mean deviation per cluster × platform × purpose ---
dev_summary <- dev_long %>%
group_by(clust, platform, purpose) %>%
summarise(mean_dev = mean(deviation, na.rm = TRUE), .groups = "drop")
# --- 5️⃣ New flexible automatic naming function ---
auto_name_weighted <- function(df, high_th = 0.15, low_th = -0.15) {
highs <- df %>%
filter(mean_dev > high_th) %>%
mutate(label = paste(platform_labels[platform], purpose_labels[purpose], sep = " – ")) %>%
pull(label)
lows <- df %>%
filter(mean_dev < low_th) %>%
mutate(label = paste(platform_labels[platform], purpose_labels[purpose], sep = " – ")) %>%
pull(label)
label <- c()
if (length(highs) > 0) label <- c(label, paste("High", paste(highs, collapse = " + ")))
if (length(lows) > 0) label <- c(label, paste("Low", paste(lows, collapse = " + ")))
if (length(label) == 0) return("Moderate Use")
paste(label, collapse = " | ")
}
# --- 6️⃣ Apply naming function cluster by cluster ---
cluster_names <- dev_summary %>%
group_by(clust) %>%
dplyr::summarise(name = auto_name_weighted(cur_data()), .groups = "drop")
cluster_names# A tibble: 5 × 2
clust name
<fct> <chr>
1 1 Moderate Use
2 2 High Facebook – Inform socially + Instagram – Inform socially + TikTok …
3 3 High Twitter/X – Inform politically + Twitter/X – Inform socially + Twi…
4 4 High Facebook – Inform politically + Facebook – Inform socially + Faceb…
5 5 High Facebook – Inform politically + Facebook – Support political cause…
library(ggplot2)
library(forcats)
# Reuse `dev_summary` from previous code
# Each cell = mean deviation of one platform–purpose combo in one cluster
# Format labels nicely
dev_heat <- dev_summary %>%
mutate(
platform = factor(platform,
levels = names(platform_labels),
labels = platform_labels),
purpose = factor(purpose,
levels = names(purpose_labels),
labels = purpose_labels),
clust = as.factor(clust)
)
# Plot heatmap
ggplot(dev_heat, aes(x = platform, y = purpose, fill = mean_dev)) +
geom_tile(color = "white", size = 0.3) +
geom_text(aes(label = sprintf("%.2f", mean_dev)), size = 3.2, color = "black") +
scale_fill_gradient2(
low = "#4575B4", mid = "white", high = "#D73027", midpoint = 0,
name = "Deviation\n(from mean)"
) +
facet_wrap(~ clust, ncol = 2) +
labs(
title = "Deviation Heatmap by Cluster, Platform, and Purpose",
subtitle = "Red = above average use | Blue = below average use",
x = "Platform",
y = "Purpose"
) +
theme_minimal(base_size = 10) +
theme(
strip.text = element_text(face = "bold", size = 10),
axis.text.x = element_text(angle = 90, hjust = 1),
panel.grid = element_blank()
)library(dplyr)
library(tidyr)
library(ggplot2)
library(rstatix)
library(effectsize)
library(gt)
library(forcats)
# --- 1️⃣ Rejoin cluster assignments to supplementary variables ---
clust_data <- hc$data.clust %>%
dplyr::mutate(ID = dplyr::row_number()) %>%
dplyr::rename(cluster = clust)
# Add ID to data_PCA4 if not already there
if (!"ID" %in% names(data_PCA4)) {
data_PCA4 <- data_PCA4 %>%
dplyr::mutate(ID = dplyr::row_number())
}
# Join supplementary variables from data_PCA4
clust_data <- clust_data %>%
dplyr::left_join(
data_PCA4 %>%
dplyr::select(
ID,
Age_cat, Sex, Vote, Left_Right, Pol_int,
Info_Online, Partisan_mobil, Civic_Engagement,
Protest_Action, Info_Offline,
YouTube, WhatsApp, Twitter_X, Instagram,
TikTok, Facebook, Linkedin, Telegram
),
by = "ID"
)
# --- 2️⃣ Handle duplicated columns (if any) ---
dup_vars <- names(clust_data)[stringr::str_detect(names(clust_data), "\\.x$|\\.y$")]
if (length(dup_vars) > 0) {
message("⚠️ Duplicated variable names found. Cleaning up...")
clust_data <- clust_data %>%
dplyr::select(-dplyr::matches("\\.x$")) %>% # drop .x
dplyr::rename_with(~ stringr::str_remove(.x, "\\.y$")) # clean .y
}
# Convert to factors where needed
clust_data <- clust_data %>%
dplyr::mutate(
cluster = as.factor(cluster),
Age_cat = as.factor(Age_cat),
Sex = as.factor(Sex),
Vote = as.factor(Vote),
Pol_int = as.factor(Pol_int),
Left_Right = as.factor(Left_Right)
)
# Define variable groups
sup_quali <- c("Age_cat", "Sex", "Vote", "Pol_int", "Left_Right")
sup_quanti <- c("Info_Online", "Partisan_mobil",
"Civic_Engagement", "Protest_Action", "Info_Offline",
"YouTube", "WhatsApp", "Twitter_X", "Instagram",
"TikTok", "Facebook", "Linkedin", "Telegram")
# --- 2️⃣ Quantitative supplementary vars: Welch ANOVA + Effect Size ---
quant_desc <- clust_data %>%
pivot_longer(cols = all_of(sup_quanti),
names_to = "Variable", values_to = "Value") %>%
group_by(Variable) %>%
summarise(
p_value = oneway.test(Value ~ cluster, var.equal = FALSE)$p.value,
eta_sq = eta_squared(aov(Value ~ cluster))[[1]],
.groups = "drop"
) %>%
mutate(signif = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ ""
))
# --- 3️⃣ Categorical supplementary vars: Chi-square tests ---
library(dplyr)
library(tidyr)
library(purrr)
clust_data <- clust_data %>%
dplyr::mutate(Age_cat = droplevels(Age_cat))
quali_desc <- map_dfr(sup_quali, function(v) {
tmp <- clust_data %>%
dplyr::select(cluster, !!sym(v)) %>%
dplyr::filter(!is.na(cluster), !is.na(!!sym(v)))
tbl <- table(tmp$cluster, tmp[[v]])
if (all(dim(tbl) > 1)) {
test <- suppressWarnings(chisq.test(tbl))
tibble(
Variable = v,
chi_sq = unname(test$statistic),
df = unname(test$parameter),
p_value = test$p.value
)
} else {
tibble(
Variable = v,
chi_sq = NA_real_,
df = NA_real_,
p_value = NA_real_
)
}
}) %>%
mutate(
signif = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ ""
)
)
# --- 4️⃣ Present results as tables ---
gt(quant_desc) %>%
tab_header(
title = "Cluster Description: Quantitative Supplementary Variables",
subtitle = "Welch ANOVA results with effect sizes"
) %>%
fmt_number(columns = where(is.numeric), decimals = 3)| Cluster Description: Quantitative Supplementary Variables | |||
|---|---|---|---|
| Welch ANOVA results with effect sizes | |||
| Variable | p_value | eta_sq | signif |
| Civic_Engagement | 0.946 | 0.000 | |
| 0.031 | 0.008 | * | |
| Info_Offline | 0.201 | 0.004 | |
| Info_Online | 0.642 | 0.002 | |
| 0.000 | 0.023 | *** | |
| 0.000 | 0.032 | *** | |
| Partisan_mobil | 0.000 | 0.003 | *** |
| Protest_Action | 0.273 | 0.004 | |
| Telegram | 0.000 | 0.048 | *** |
| TikTok | 0.000 | 0.028 | *** |
| Twitter_X | 0.000 | 0.110 | *** |
| 0.095 | 0.005 | ||
| YouTube | 0.001 | 0.012 | ** |
Description of clusters by supplementary variables
gt(quali_desc) %>%
tab_header(
title = "Cluster Description: Categorical Supplementary Variables",
subtitle = "Chi-square tests of independence"
) %>%
fmt_number(columns = where(is.numeric), decimals = 3)| Cluster Description: Categorical Supplementary Variables | ||||
|---|---|---|---|---|
| Chi-square tests of independence | ||||
| Variable | chi_sq | df | p_value | signif |
| Age_cat | 53.660 | 20.000 | 0.000 | *** |
| Sex | 1.990 | 4.000 | 0.738 | |
| Vote | 65.739 | 36.000 | 0.002 | ** |
| Pol_int | 115.582 | 12.000 | 0.000 | *** |
| Left_Right | 69.648 | 20.000 | 0.000 | *** |
Description of clusters by supplementary variables
# --- 5️⃣ Visualization: quantitative variable profiles ---
clust_data %>%
group_by(cluster) %>%
summarise(across(all_of(sup_quanti), mean, na.rm = TRUE)) %>%
pivot_longer(-cluster, names_to = "Variable", values_to = "Mean") %>%
ggplot(aes(x = cluster, y = Mean, fill = cluster)) +
geom_col(position = "dodge") +
facet_wrap(~Variable, scales = "free_y", ncol = 4) +
theme_minimal(base_size = 12) +
labs(title = "Cluster Mean Profiles: Quantitative Supplementary Variables",
x = "Cluster", y = "Mean") +
theme(legend.position = "none")# --- 6️⃣ Visualization: categorical variable distributions ---
clust_data %>%
pivot_longer(cols = all_of(sup_quali), names_to = "Variable", values_to = "Category") %>%
count(Variable, Category, cluster) %>%
group_by(Variable, cluster) %>%
mutate(percent = 100 * n / sum(n)) %>%
ggplot(aes(x = Category, y = percent, fill = cluster)) +
geom_col(position = "dodge") +
facet_wrap(~Variable, scales = "free_x") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y = "% within cluster", title = "Cluster Distributions: Categorical Supplementary Variables")v <- "Vote" # or any of the categorical variables
plot_data <- clust_data %>%
count(cluster, !!sym(v)) %>%
group_by(cluster) %>%
mutate(percent = 100 * n / sum(n)) %>%
ungroup()
ggplot(plot_data, aes(x = !!sym(v), y = percent, fill = cluster)) +
geom_col(position = "dodge") +
labs(
title = paste("Distribution of", v, "by Cluster"),
x = v, y = "% within cluster"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Theocharis, Yannis, Shelley Boulianne, Karolina Koc-Michalska, and Bruce Bimber. 2023. ‘Platform Affordances and Political Participation: How Social Media Reshape Political Engagement’. West European Politics 46(4): 788–811. doi:10.1080/01402382.2022.2087410.
15.1 Social media and left-right ideology
Show code
Show code
Show code
Show code