# load external packages and source helper functions
library(knitr)
library(kableExtra)
library(dplyr)
library(haven)
library(ggplot2)
library(ggcorrplot)
library(viridis)
library(tidyverse)
library(gridExtra)
library(gt)
library(lavaan)
library(semTools)
library(semPlot)
library(psych)
library(afcommon)
source("af_common_add_ons.R")
options(dplyr.summarise.inform = FALSE)
Load the tidy survey data file and select the required columns for the analysis
Filter for Jewish population only .
df <- as.data.frame(readRDS("Israel Survey/data/israel_survey.RDS"))
df <- df %>% filter(sample == "Jewish") %>% filter(religion == "Jewish")
Define the survey measures used for the extremism dimensions
ideological_vars <- c("peace_1", "annex_1")
behavioral_vars <- c(
paste0("poli_violence1_", 1:3), # poli_violence_4 does not exist in Wave 1
c("tolerance_19"), # tolerance 26 does not exist in Wave 1
c("activism__3", "activism__4" )
)
social_vars <- c(
paste0("tolerance_", 1:5),
paste0("tolerance_", c(9,10,11,13,15,16,18,20))
)
political_orientation_vars <- c("politi_orient_1")
wave_1_excluded <- c("poli_violence1_4", "tolerance_26", "Demo_state_3")
measures_vars <- c(ideological_vars, behavioral_vars, social_vars, political_orientation_vars)
Remove all rows (respondents) that have an NA (not answered) in one of the survey measures (questions).
original_row_count <- nrow(df)
df_clean <- df %>%
filter(if_all(all_of(measures_vars), ~ !is.na(.) & !is.null(.)))
df_clean <- df_clean %>%
filter(if_all(all_of(wave_1_excluded), ~ (!is.na(.) & !is.null(.)) | (Wave == "First")))
cleaned_row_count <- nrow(df_clean)
rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100
Removed 341 rows out of 7780 (4.3830334%).
When assessing socio-political extremism dimensions, combining multiple related questions into a single measure can enhance the comprehensiveness and reliability of our assessment. Before merging these questions, we must verify they measure the same underlying construct through Confirmatory Factor Analysis (CFA). Given our multi-wave dataset structure, we employ grouped CFA, which first requires testing for measurement invariance. Since wave one contains a different set of measures compared to subsequent waves, we conduct this analysis in two stages - first analyzing the initial wave independently, then examining the remaining waves together.
The ideological dimension represent extremism of either the political-left or the political right-wing. It is based on the following survey measures (Questions):
The scale of peace_1 is in the reverse direction (right-wing –> left-wing) of the other two measures (left-wing –> right-wing). We thus reverse it.
df_1 <- df %>% filter(Wave == "First")
df_2 <- df %>% filter(Wave != "First")
ideology_vars_1 <- c("peace_1_r", "annex_1")
model_1 <- paste("pe_ideology =~", paste(ideology_vars_1, collapse = "+"))
ideology_vars_2 <- c("peace_1_r", "annex_1", "Demo_state_3")
model_2 <- paste("pe_ideology =~", paste(ideology_vars_2, collapse = "+"))
The ideological dimension (pe_ideology) is calculated by ‘folding’ the measure scales. When folding scales to measure extremism, we are essentially measuring distance from the midpoint (4 in a 1-7 scale), so values of 1 and 7 would both indicate high extremism (3 units from center), while 3 and 5 would indicate lower extremism (1 unit from center).
We perform CFA first and then fold the resulting factor score. This sequence Validates that the measures truly capture the same underlying left-right construct. It creates a more reliable composite measure of ideology, preserving the linear relationships between items during validation.This approach maintains better measurement properties and is more theoretically sound, as the original theoretical construct (ideology) are first validated and only then transformed into the derived construct (extremism).
result_1 <- af_cfa(df_1, var_list = ideology_vars_1, latent_var_name = "pe_ideology",
group = NULL, model = model_1,
cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)
af_gt_cfa_results_tbl(result_1$cfa_tbl)
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
pe_ideology | Wave 1 | NA | NA | 0 | 3.763233e-11 | Poor reliability (0.53) | 0.5335424 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
result_2 <- af_cfa(df_2, var_list = ideology_vars_2, latent_var_name = "pe_ideology",
group = "Wave", model = model_2,
cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)
af_gt_cfa_results_tbl(result_2$cfa_tbl)
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
pe_ideology | Waves 2-last | 1 | 1 | 0 | 1.159868e-08 | Acceptable reliability (0.72) | 0.7230871 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
Correlate latent var with distance function of manifests
ideology_mean <- af_mean(df_1, var_list = ideology_vars_1)
ideology_predict <- result_1$new_df[["pe_ideology"]]
c1_mean <- cor(ideology_mean, ideology_predict)
ideology_mean <- af_mean(df_2, var_list = ideology_vars_2)
ideology_predict <- result_2$new_df[["pe_ideology"]]
c2_mean <- cor(ideology_mean, ideology_predict)
af_create_x_multi_y_plot(
data = data.frame(ideology_mean, ideology_predict),
y_var_names = c("ideology_mean", "ideology_predict"),
smooth = TRUE, show_points = FALSE
)
Correlation between latent variable (pre_ideology) to distance function of manifests
Combine wave 1 with wave 2…last wave results and then fold the ideology variable
df <- rbind(result_1$new_df, result_2$new_df)
# Fold ideology to identify extremism
# Original scale is 1-7 with the midpoint set to 4
# New scale is 0-3 where 0 is center and 3 is extreme in either left or right
df$pe_ideology <- af_folded_scale(df$pe_ideology, center = 4)
# Linear transformation from [0,3] to [1,7]
df$pe_ideology <- (df$pe_ideology / 3) * 6 + 1
af_create_y_plot(df, y_var = "pe_ideology", group_var = "Wave", plot_types = c("density"),
use_facet = TRUE)
The theory related to political violence relate to two types: violence against the citizens of the out-group and vilence against the state and its institutions.
Eight different questions related to violence in the survey: tolerance 19 and 26 relate to violence against citizens of the out_group. poli_violence 1, 2, 3, 4 and activism 3 and 4 relate to violence against the state and it’s institutions.
It is important to note that poli_violence 4 and tolerance 26 do not exist in the first wave
To identify the structure of the violence measure we first perform an exploratory factor analysis (EFA). We use the Principal Axis (PA) factoring method as it can be better for exploratory factor analysis, especially with non-normal data.
# Parallel analysis to determine number of factors
fa_result <- psych::fa.parallel(df[, behavioral_vars], fm="pa", plot = TRUE)
#> Parallel analysis suggests that the number of factors = 3 and the number of components = 1
The analysis suggests that 3 factor(s) are required.
# Parallel analysis to determine number of factors
fit <- fa(df[, behavioral_vars], nfactors = fa_result$nfact, fm = "pa", missing = TRUE, use = "na.or.complete")
loads <- fit$loadings
psych::fa.diagram(loads)
Based on the indications above we can see that the survey measures load onto three factors. PA1 is the latent variable representing political violence against the government and politicians. PA3 is the latent variable representing political violence against state institutions. PA2 is the latent variable representing political violence against the out group.
We also check the use of only two factors (latent variables)
# Parallel analysis to determine number of factors
fit <- fa(df[, behavioral_vars], nfactors = 2, fm = "pa", missing = TRUE, use = "na.or.complete")
loads <- fit$loadings
psych::fa.diagram(loads)
The results fit the theory regarding the types of political violence. PA1 is the latent variable representing political violence against the state government and institutions. PA2 is the latent variable representing political violence against the out-group citizens.
Using separate CFAs and then combining results would lose the established equivalence in measurement structure that we’ve already confirmed with the metric invariance test for waves 2-last. This approach would be less theoretically justified and could introduce more researcher degrees of freedom in how we combine the results. We thus perform CFA the first Wave and then for waves 2-last wave.
df_1 <- df %>% filter(Wave == "First")
df_2 <- df %>% filter(Wave != "First")
# Model for first wave
gov_vars_1 <- paste0("poli_violence1_",1:3)
inst_vars_1 <- paste0("activism__", c(3,4))
out_vars_1 <- paste0("tolerance_", c(19))
behavioral_vars_1 <- c(gov_vars_1, inst_vars_1, out_vars_1)
model_1 <- paste("pe_violence =~", paste(behavioral_vars_1, collapse = "+"))
gov_vars_2 <- paste0("poli_violence1_",1:4)
inst_vars_2 <- paste0("activism__", c(3,4))
out_vars_2 <- paste0("tolerance_", c(19,26))
behavioral_vars_2 <- c(gov_vars_2, inst_vars_2, out_vars_2)
model_2 <- paste("pe_violence =~", paste(behavioral_vars_2, collapse = "+"))
result_1 <- af_cfa(df_1, var_list = behavioral_vars_1, latent_var_name = "pe_violence",
group = NULL, model = model_1,
cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)
af_gt_cfa_results_tbl(result_1$cfa_tbl)
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
pe_violence | Wave 1 | 0.7652941 | 0.6088235 | 0.2591642 | 0.08429097 | Acceptable reliability (0.76) | 0.762801 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
result_2 <- af_cfa(df_2, var_list = behavioral_vars_2, latent_var_name = "pe_violence",
group = NULL, model = model_2,
cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)
af_gt_cfa_results_tbl(result_2$cfa_tbl)
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
pe_violence | Waves 2-last | 0.6584238 | 0.5217934 | 0.2602597 | 0.1235573 | Acceptable reliability (0.78) | 0.7761107 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
Correlate latent var with mean/distance function of manifests
violence_mean <- af_mean(df_1, behavioral_vars_1)
violence_predict <- result_1$new_df[["pe_violence"]]
c1_mean <- cor(violence_mean, violence_predict)
af_create_x_multi_y_plot(
data = data.frame(violence_mean, violence_predict),
y_var_names = c("violence_mean", "violence_predict"),
smooth = TRUE, show_points = FALSE
)
violence_mean <- af_mean(df_2, behavioral_vars_2)
violence_predict <- result_2$new_df[["pe_violence"]]
c2_mean <- cor(violence_mean, violence_predict)
af_create_x_multi_y_plot(
data = data.frame(violence_mean, violence_predict),
y_var_names = c("violence_mean", "violence_predict"),
smooth = TRUE, show_points = FALSE
)
Correlation between latent variable (pe_violence) to mean of manifests
Create overall political extremism measure by calculating the 3D distance (RMS) of the three PE dimensions.
df$pe_overall <- af_dist(df, c("pe_ideology", "pe_violence", "pe_intolerance"))
af_create_y_plot(df, y_var = "pe_overall", group_var = "Wave", plot_types = c("density"),
use_facet = TRUE)
Self-reported political orientation, ranging from 1 to 7, where 1 represents the right and 7 represents the left.
We use politi_orient_1 - the self political orientation scale of 1-7 where 1 represent Right-wing and 7 represent Left-wing to define three political categories as follows: right (1-3), center (4) and left (5-7)
df$pe_left_center_right<- af_rcl(df$politi_orient_1, c_low = 4, c_up = 4)
af_create_y_plot(data = df, y_var = "pe_ideology", group_var = "pe_left_center_right", bins = 100,
plot_types = c("density"), use_facet = TRUE)
We create alternative classification as follows: right (1-2), center (3-5) and left (6-7)
df$pe_left_center_right_2 <- af_rcl(df$politi_orient_1, c_low = 3, c_up = 5)
af_create_y_plot(data = df, y_var = "pe_ideology", group_var = "pe_left_center_right_2", bins = 100,
plot_types = c("density"), use_facet = TRUE)
The following chart provide information on the distribution of political categories variables
Combine Traditional with Religious group Combine National Ultra-Orthodox with Ultra-Orthodox
# Remove rows with NA values in religiosity
df <- df[!is.na(df$religiosity), ]
pe_religiosity_order <- c("Secular", "Religious", "National Religious", "Ultra-Orthodox")
# Create pe_religiosity variable with mappings
df$pe_religiosity <- as.character(df$religiosity)
df$pe_religiosity[df$religiosity == "National Ultra-Orthodox"] <- "Ultra-Orthodox"
df$pe_religiosity[df$religiosity == "Traditional"] <- "Religious"
df$pe_religiosity <- factor(df$pe_religiosity,levels = pe_religiosity_order)
af_create_y_plot(df, y_var = "religiosity", group_var = "Wave", use_facet = TRUE)
Remove all columns which name ends with ’_P’ (Used for panel in SPSS)
Remove survey management variables
rmv_list <- c(
"StartDate", "EndDate", "Status", "IPAddress", "IPAddress", "Progress",
"Duration__in_seconds_", "UserLanguage", "Finished", "RecordedDate",
"RecipientLastName", "RecipientFirstName", "RecipientEmail", "ExternalReference",
"LocationLatitude", "LocationLongitude", "DistributionChannel", "consent"
)
df <- df %>% dplyr::select(-all_of(rmv_list))
Remove calculated / report variables
rmv_list <- c(
"finish", "i.user1", "i.user3",
"i.user9", "OUTPARTY", "inparty", "party",
"religroup", "religroupn", "left_right", "Gender_Dummy",
"center_left_right", "relig_group", "SETTLERS_NOT", "democracy_T",
"Civil_rights_All", "Social_D", "Threat_Per", "exclusion_1_T",
"tolerance_13_r", "Dehumanization", "Policy_least_liked", "Civil_rights_least_liked",
"tolerance_25_r", "indirect_exclu", "exclusion_2_T", "tolerance_12R",
"tolerance_17R", "tolerance_18R", "tolerance_19R", "tolerance_20R",
"halaca_t", "demo_state_r", "mean_anti_demo", "poli_violence_T",
"activismT", "RWA_T", "LWA_T", "SDO_2_R",
"SDO_3_R", "SDO_T", "DOGMA_2_R", "Dogma_T",
"traditional_meta_1", "normethnic_1",
"normleastethnic", "hitgab_supp_1", "hitgab_demo_1", "yesod_supp_1",
"yesod_demo_1", "ragil_supp_1", "ragil_demo_1", "svirut_supp_1",
"svirut_demo_1", "yoamash_supp_1", "yoamash_demo_1", "shoftim_supp_1",
"shoftim_demo_1", "reform_support", "feelings_opp_1", "feelings_opp_2",
"feelings_opp_3", "feelings_opp_4", "feelings_opp_5", "feelings_supp_1",
"feelings_supp_2", "feelings_supp_3", "feelings_supp_4", "feelings_supp_5",
"threat_1", "threat_2", "threat_3", "threat_4",
"threat_5", "palas_emotions_1", "palas_emotions_2", "palas_emotions_3",
"palas_emotions_4", "palas_emotions_5", "palas_emotions_6", "openess_pal_1",
"openess_pal_2", "openess_pal_3", "openess_pal_4", "violence_pal_1",
"violence_pal_2", "policy_pal_1", "policy_pal_2", "ethnic",
"ethnicg", "threat_4r", "phythreat", "symthreat",
"symthreat_arabs", "distance_palas", "violen_palast", "policy_pal_2_r",
"policy_palas_t", "filter_." # , "regionmap", "mean_supp"
)
df <- df %>% dplyr::select(-all_of(rmv_list))
Remove all empty columns (NULL or NA)
Remove all rows from the dataset that has undefined values in one of the following variables: gender, age, education, religiosity, religion
original_row_count <- nrow(df)
must_have_list <-
c("gender", "age", "education", "religiosity", "religion")
# Remove rows with NA values in specified variables
df <- df[complete.cases(df[must_have_list]), ]
rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100
Removed 0 rows out of 7439 (0%).
Remove “Other” gender category
original_row_count <- nrow(df)
df <- df %>%
filter(gender != "Other") %>%
mutate(gender = droplevels(gender)) # Remove unused factor level
rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100
Removed -2 rows out of 7437 (-0.0268926%).
3.3 Social Dimension
All tolerance questions use a scale of 1 - 7 where 1 indicates low agreement and 7 indicates high agreement. Most tolerance questions actually reflect intolerance towards the out groups. Few (1-5, 13, 25) reflect positive tolerance. In order to have all variables reflect the same direction, we reverse the positive tolerance variables and relate to the social dimension as reflecting intolerance where 1 indicates low intolerance and 7 indicates high intolerance.
Political intolerance has two sub-dimensions (components): Political Exclusion and Social Distancing. Political Exclusion refers to the denial of equal rights and the use of policies against members of out-groups. Social Distancing refers to personal intolerance towards members of the out-groups.
To identify the structure of the intolerance measure we first perform an exploratory factor analysis (EFA). We use the Principal Axis (PA) factoring method as it can be better for exploratory factor analysis, especially with non-normal data. We first check the use of only two latent variables.
The results fit the theory regarding the types of political intolerance. PA1 is the latent variable representing political exclusion. PA2 is the latent variable representing social distancing.
To solidify our findings we perform CFA analysis including each of the political intolerance latent variables as well as for a combined variable.
3.3.0.1 Results Visualization and Sanity
Correlation between latent variable (pe_intolerance) to mean of manifests: 0.9947517
3.3.0.2 Finalize Results