# load external packages and source helper functions

library(knitr)
library(kableExtra)

library(dplyr) 
library(haven)
library(ggplot2)
library(ggcorrplot)
library(viridis)
library(tidyverse)
library(gridExtra)
library(gt)
library(lavaan)
library(semTools)
library(semPlot)
library(psych)

library(afcommon)

source("af_common_add_ons.R")

options(dplyr.summarise.inform = FALSE)

1 Prepare Dataset

Load the tidy survey data file and select the required columns for the analysis

Filter for Jewish population only .

df <- as.data.frame(readRDS("Israel Survey/data/israel_survey.RDS"))
df <- df %>% filter(sample == "Jewish") %>% filter(religion == "Jewish")

Define the survey measures used for the extremism dimensions

ideological_vars <- c("peace_1", "annex_1")

behavioral_vars <- c(
  paste0("poli_violence1_", 1:3), # poli_violence_4 does not exist in Wave 1
  c("tolerance_19"),              # tolerance 26 does not exist in Wave 1
  c("activism__3", "activism__4" )
  )

social_vars <- c(
  paste0("tolerance_", c(1,2,3,5)),  # 1:5 - tolerance 4 is NA for reדpondents selecting leastliked == "מתישבים ביהודה ושומרון"
  paste0("tolerance_", c(9,10,11,13,15,16,18,20))
)

political_orientation_vars <- c("politi_orient_1")

wave_1_excluded <- c("poli_violence1_4", "tolerance_26", "Demo_state_3")

measures_vars <- c(ideological_vars, behavioral_vars, social_vars, political_orientation_vars) 

Remove all rows (respondents) that have an NA (not answered) in one of the survey measures (questions).

original_row_count <- nrow(df)

df_clean <- df %>%
  filter(if_all(all_of(measures_vars), ~ !is.na(.) & !is.null(.)))
df_clean <- df_clean %>%
  filter(if_all(all_of(wave_1_excluded), ~ (!is.na(.) & !is.null(.)) | (Wave == "First")))
cleaned_row_count <- nrow(df_clean)

rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100

Removed 1 rows out of 7780 (0.0128535%).

# Check if more than 20% of rows were removed
if (percentage_removed > 20) {
  stop("Execution stopped: More than 20% of the rows were removed during cleanup.")
}

df <- df_clean

2 Political Extremism Measures

2.1 Cognitive Dimension

2.1.1 Measures

3.1.1 Measures

Rate your agreement with the following statements on a scale of 1-7, where: 7 = strongly agree and 1 = strongly disagree.

(1 = Right political orientation 7 = Left political orientation)

  • peace_1: Negotiations for a permanent settlement – Israel must reach a two-state peace agreement with the Palestinians
  • annex_1: Annexation - Israel should annex the settlement areas in Judea and Samaria
  • demo_state_3: I prefer the idea of a complete Land of Israel to the democratic nature of the state.

The scale of peace_1 is in the reverse direction (right-wing –> left-wing) to the other two measures (left-wing –> right-wing). We thus reverse it and use peace_1_r.

It is important to note that demo_state_3 does not exist in the first wave

df$peace_1_r <- af_reverse_scale(df$peace_1, scale_min = 1, scale_max = 7)

2.1.2 Ideology variable

df_1 <- df %>% filter(Wave == "First")
df_2 <- df %>% filter(Wave != "First")

ideology_vars_1 <- c("peace_1_r", "annex_1")
model_1 <- paste("pe_ideology =~", paste(ideology_vars_1, collapse = "+"))

ideology_vars_2 <- c("peace_1_r", "annex_1", "Demo_state_3")
model_2 <- paste("pe_ideology =~", paste(ideology_vars_2, collapse = "+"))

The Cognitive (Ideology) dimension (pe_ideology) is calculated by ‘folding’ the measure scales. When folding scales to measure extremism, we are essentially measuring distance from the midpoint (4 in a 1-7 scale), so values of 1 and 7 would both indicate high extremism (3 units from center), while 3 and 5 would indicate lower extremism (1 unit from center).

We perform CFA first and then fold the resulting factor score. This sequence Validates that the measures truly capture the same underlying left-right construct. It creates a more reliable composite measure of ideology, preserving the linear relationships between items during validation.This approach maintains better measurement properties and is more theoretically sound, as the original theoretical construct (ideology) are first validated and only then transformed into the derived construct (extremism).

result_1 <- af_cfa(df_1, var_list = ideology_vars_1, latent_var_name = "pe_ideology", 
                   group = NULL, model = model_1,
                   cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result_1$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_ideology Wave 1 NA NA 0 3.763233e-11 Poor reliability (0.53) 0.5335424
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_1$fit)
title("Ideology SEM Model - Wave 1")

result_2 <- af_cfa(df_2, var_list = ideology_vars_2, latent_var_name = "pe_ideology", 
                   group = "Wave", model = model_2, 
                   cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)

af_gt_cfa_results_tbl(result_2$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_ideology Waves 2-last 1 1 0 1.677028e-08 Acceptable reliability (0.74) 0.7413636
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_2$fit)
title("Ideology SEM Model - Waves 2-last")

# Combine wave 1 with wave 2...last wave results and then fold the ideology variable 
df <- rbind(result_1$new_df, result_2$new_df)

# Fold ideology to identify extremism
# Original scale is 1-7 with the midpoint set to 4
# New scale is 0-3 where 0 is center and 3 is extreme in either left or right
df$pe_ideology <- af_folded_scale(df$pe_ideology, center = 4) 

# Linear transformation from [0,3] to [1,7]
df$pe_ideology <- (df$pe_ideology / 3) * 6 + 1

af_create_y_plot(df, y_var = "pe_ideology", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

2.2 Behavioral Dimension

2.2.1 Measures

Rate your agreement with the following statements on a scale of 1-7, where: 7 = strongly agree and 1 = strongly disagree.

Violence against the Government and Institutions

  • poli_violence1_1: There are situations where there is no choice and even weapons must be used to prevent the government from implementing its policy.
  • poli_violence1_2: When a political disaster is looming and all means of protest have been exhausted to no avail, physically harming politicians may be forgivable.
  • poli_violence1_3: In the Israeli reality, violent struggle may sometimes be justified in order to achieve political gains (against the government).
  • poli_violence1_4: Sending threats and hate mail to public figures may sometimes be necessary to bring about an end to a dangerous policy.
  • **activism__3**: Damage to public property and equipment by public employees or security forces
  • **activism__4**: Use of physical force against public servants or security forces

Violence against the Outgroup

  • tolerance_19: Sometimes there is no choice but to use physical force against ______ to prevent them from advancing their interests.
  • tolerance_26: We must forcibly evict _________ from the common space.

It is important to note that poli_violence 4 and tolerance 26 do not exist in the first wave

2.2.2 Violence variable

Using separate CFAs and then combining results would lose the established equivalence in measurement structure that we’ve already confirmed with the metric invariance test for waves 2-last. This approach would be less theoretically justified and could introduce more researcher degrees of freedom in how we combine the results. We thus perform CFA the first Wave and then for waves 2-last wave.

gov_vars_1 <- paste0("poli_violence1_",1:3)
inst_vars_1 <- paste0("activism__", c(3,4))
out_vars_1 <- paste0("tolerance_", c(19))
behavioral_vars_1 <- c(gov_vars_1, inst_vars_1, out_vars_1)
state_vars_1 <- c(gov_vars_1, inst_vars_1)

gov_vars_2 <- paste0("poli_violence1_",1:4)
inst_vars_2 <- paste0("activism__", c(3,4))
out_vars_2 <- paste0("tolerance_", c(19,26))
behavioral_vars_2 <- c(gov_vars_2, inst_vars_2, out_vars_2)
state_vars_2 <- c(gov_vars_2, inst_vars_2)
df_1 <- df %>% filter(Wave == "First")
model_1 <- paste("pe_violence =~", paste(behavioral_vars_1, collapse = "+"))
result_1 <- af_cfa(df_1, var_list = behavioral_vars_1, latent_var_name = "pe_violence", 
                   group = NULL, model = model_1,
                   cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result_1$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence Wave 1 0.7652941 0.6088235 0.2591642 0.08429097 Acceptable reliability (0.76) 0.762801
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_1$fit)
title("Violence SEM Model - Wave 1")

df_2 <- df %>% filter(Wave != "First")
model_2 <- paste("pe_violence =~", paste(behavioral_vars_2, collapse = "+"))
result_2 <- af_cfa(df_2, var_list = behavioral_vars_2, latent_var_name = "pe_violence", 
                   group = NULL, model = model_2, 
                  cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)

af_gt_cfa_results_tbl(result_2$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence Waves 2-last 0.6641979 0.5298771 0.2574801 0.1223272 Acceptable reliability (0.78) 0.7765251
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_2$fit)
title("Violence SEM Model - Waves 2-last")

df <- rbind(result_1$new_df, result_2$new_df)

af_create_y_plot(df, y_var = "pe_violence", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

2.2.3 Violence against the State

df_1 <- df %>% filter(Wave == "First")
model_1_state <- paste("pe_violence_state =~", paste(state_vars_1, collapse = "+"))
result_1_state <- af_cfa(df_1, var_list = state_vars_1, latent_var_name = "pe_violence_state", 
                   group = NULL, model = model_1_state,
                   cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result_1_state$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence_state Wave 1 0.7703153 0.5406306 0.3382145 0.1236189 Good reliability (0.84) 0.8407675
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_1_state$fit)
title("Violence against the State SEM Model - Wave 1")

df_2 <- df %>% filter(Wave != "First")
model_2_state <- paste("pe_violence_state =~", paste(state_vars_2, collapse = "+"))
result_2_state <- af_cfa(df_2, var_list = state_vars_2, latent_var_name = "pe_violence_state", 
                   group = NULL, model = model_2_state, 
                  cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)

af_gt_cfa_results_tbl(result_2_state$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence_state Waves 2-last 0.741151 0.5685851 0.3127391 0.1027756 Good reliability (0.85) 0.8467191
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_2_state$fit)
title("Violence against the State SEM Model - Waves 2-last")

df <- rbind(result_1_state$new_df, result_2_state$new_df)

af_create_y_plot(df, y_var = "pe_violence_state", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

2.2.4 Violence against the Outgroup

df_1 <- df %>% filter(Wave == "First")
result_1_outgroup_new_df <- df_1 %>% mutate(pe_violence_outgroup = !!sym(out_vars_1[[1]]))

# model_1_outgroup <- paste("pe_violence_outgroup =~", paste(out_vars_1, collapse = "+"))
# result_1_outgroup <- af_cfa(df_1, var_list = out_vars_1, latent_var_name = "pe_violence_outgroup", 
#                    group = NULL, model = model_1_outgroup,
#                    cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)
# 
# af_gt_cfa_results_tbl(result_1_outgroup$cfa_tbl)
# 
# af_create_sem_plot(result_1_outgroup$fit)
# title("Violence against the Outgroup SEM Model - Wave 1")
df_2 <- df %>% filter(Wave != "First")
model_2_outgroup <- paste("pe_violence_outgroup =~", paste(out_vars_2, collapse = "+"))
result_2_outgroup <- af_cfa(df_2, var_list = out_vars_2, latent_var_name = "pe_violence_outgroup", 
                   group = NULL, model = model_2_outgroup, 
                  cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)

af_gt_cfa_results_tbl(result_2_outgroup$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence_outgroup Waves 2-last NA NA 0 1.323442e-09 Acceptable reliability (0.76) 0.7624859
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_2_outgroup$fit)
title("Violence against the Outgroup SEM Model - Waves 2-last")

df <- rbind(result_1_outgroup_new_df, result_2_outgroup$new_df) # Notice wave 1 data

af_create_y_plot(df, y_var = "pe_violence_outgroup", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

2.3 Social Dimension

2.4 Measures

Rate your agreement with the following statements on a scale of 1-7, where: 7 = strongly agree and 1 = strongly disagree.

Social Distancing

  • tolerance_r_1: I agree that ______ will be my supervisor at work.
  • tolerance_r_2:I agree to host ______ in my home under social circumstances.
  • tolerance_r_3: I agree that my son or daughter will have a romantic relationship with ______
  • tolerance_r_5: I am willing to read opinion pieces by ______

Note: We do not use tolerance 4 since it was skipped for respondents that selected their leastliked == “מתישבים ביהודה ושומרון” tolerance_r_4: I agree that ______ will live in my neighborhood.

Political Exclusion

  • tolerance_9: ______ should be banned from serving as prime ministers in Israel
  • tolerance_10: ______ should not be allowed to serve as members of the Knesset
  • tolerance_11: ______ should not be allowed to teach in public schools
  • tolerance_r_13: ____ should be allowed to hold demonstrations
  • tolerance_15: Monitoring of the telephones of ______ should be allowed
  • tolerance_16: ______ should be banned from teaching at university
  • tolerance_18: ______ should be denied the right to express their worldview publicly
  • tolerance_20: ______ should be stripped of their citizenship

Tolerance variables 1-5, 13, 25 reflect positive tolerance. In order to have all variables reflect the same direction, we reverse the positive tolerance variables to reflect intolerance.

scale_min <- 1
scale_max <- 7

# Reverse positive tolerance variable to intolerance variables

df$tolerance_r_1 <- af_reverse_scale(df$tolerance_1, scale_min, scale_max) 
df$tolerance_r_2 <- af_reverse_scale(df$tolerance_2, scale_min, scale_max) 
df$tolerance_r_3 <- af_reverse_scale(df$tolerance_3, scale_min, scale_max) 
# df$tolerance_r_4 <- af_reverse_scale(df$tolerance_4, scale_min, scale_max) 
df$tolerance_r_5 <- af_reverse_scale(df$tolerance_5, scale_min, scale_max) 
df$tolerance_r_13 <- af_reverse_scale(df$tolerance_13, scale_min, scale_max) 
df$tolerance_r_25 <- af_reverse_scale(df$tolerance_25, scale_min, scale_max) 

Political intolerance has two sub-dimensions (components): Political Exclusion and Social Distancing. Political Exclusion refers to the denial of equal rights and the use of policies against members of out-groups. Social Distancing refers to personal intolerance towards members of the out-groups.

social_distancing_vars <- c(paste0("tolerance_r_", c(1,2,3,5))) # 1:5
political_exclusion_vars <- c(paste0("tolerance_", c(9,10,11)),
                         paste0("tolerance_r_", 13),
                         paste0("tolerance_", c(15,16)),
                         paste0("tolerance_", c(18,20)))
intolerance_vars <- c(social_distancing_vars, political_exclusion_vars)
model <- paste("pe_intolerance =~", paste(intolerance_vars, collapse = "+"))

result <- af_cfa(df, var_list = intolerance_vars, latent_var_name = "pe_intolerance", 
                 group = "Wave", model = model,
                 cfa_type = NULL, group_id = "All Waves", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_intolerance All Waves 0.8149057 0.7737737 0.1627815 0.06606904 Excellent reliability (0.93) 0.9270915
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result$fit, layout = "circle")
title("Intolerance SEM Model")

df <- result$new_df

af_create_y_plot(df, y_var = "pe_intolerance", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

2.4.1 Social Distancing

model <- paste("pe_intolerance_social =~", paste(social_distancing_vars, collapse = "+"))

result <- af_cfa(df, var_list = social_distancing_vars, latent_var_name = "pe_intolerance_social", 
                 group = "Wave", model = model,
                 cfa_type = NULL, group_id = "All Waves", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_intolerance_social All Waves 0.9976146 0.9928438 0.04742819 0.00905084 Good reliability (0.86) 0.8562326
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result$fit)
title("Social Distancing SEM Model")


df <- result$new_df

af_create_y_plot(df, y_var = "pe_intolerance_social", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

2.4.2 Political Exclusion

model <- paste("pe_intolerance_political =~", paste(political_exclusion_vars, collapse = "+"))

result <- af_cfa(df, var_list = political_exclusion_vars, latent_var_name = "pe_intolerance_political", 
                 group = "Wave", model = model,
                 cfa_type = NULL, group_id = "All Waves", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_intolerance_political All Waves 0.8878431 0.8429803 0.1670649 0.04715721 Excellent reliability (0.91) 0.9107889
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result$fit)
title("Political Exclusion SEM Model")


df <- result$new_df

af_create_y_plot(df, y_var = "pe_intolerance_political", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

df$pe_overall <- af_dist(df, c("pe_ideology", "pe_violence", "pe_intolerance"))

2.5 Political Orientation

Self-reported political orientation, ranging from 1 to 7, where 1 represents the right and 7 represents the left.

We use politi_orient_1 - the self political orientation scale of 1-7 where 1 represent Right-wing and 7 represent Left-wing to define three political categories as follows: right (1-3), center (4) and left (5-7)

df$pe_left_center_right<- af_rcl(df$politi_orient_1, c_low = 4, c_up = 4)

af_create_y_plot(data = df, y_var = "pe_ideology", group_var = "pe_left_center_right", bins = 100, 
                 plot_types = c("density"), use_facet = TRUE)

We create alternative classification as follows: right (1-2), center (3-5) and left (6-7)

df$pe_left_center_right_2 <- af_rcl(df$politi_orient_1, c_low = 3, c_up = 5)

af_create_y_plot(data = df, y_var = "pe_ideology", group_var = "pe_left_center_right_2", bins = 100, 
                 plot_types = c("density"), use_facet = TRUE)

The following chart provide information on the distribution of political categories variables

af_create_y_plot(df, y_var = "pe_left_center_right", group_var = "Wave", plot_types = c("bar"))

af_create_y_plot(df, y_var = "pe_left_center_right_2", group_var = "Wave", plot_types = c("bar"))

2.6 Combined Religiosity Groups

Combine Traditional with Religious group Combine National Ultra-Orthodox with Ultra-Orthodox

# Remove rows with NA values in religiosity
df <- df[!is.na(df$religiosity), ]

pe_religiosity_order <- c("Secular", "Religious", "National Religious", "Ultra-Orthodox")

# Create pe_religiosity variable with mappings
df$pe_religiosity <- as.character(df$religiosity)
df$pe_religiosity[df$religiosity == "National Ultra-Orthodox"] <- "Ultra-Orthodox"
df$pe_religiosity[df$religiosity == "Traditional"] <- "Religious"
df$pe_religiosity <- factor(df$pe_religiosity,levels = pe_religiosity_order)

af_create_y_plot(df, y_var = "religiosity", group_var = "Wave", use_facet = TRUE)


af_create_y_plot(df, y_var = "pe_religiosity", group_var = "Wave", use_facet = TRUE)

3 Clean Final Dataset

Remove all columns which name ends with ’_P’ (Used for panel in SPSS)

df <- df[, !grepl("_P$", names(df))]

Remove survey management variables

rmv_list <- c(
  "StartDate", "EndDate", "Status", "IPAddress", "IPAddress", "Progress",  
  "Duration__in_seconds_", "UserLanguage", "Finished", "RecordedDate",            
  "RecipientLastName", "RecipientFirstName", "RecipientEmail", "ExternalReference", 
  "LocationLatitude", "LocationLongitude", "DistributionChannel", "consent"
)
 
df <- df %>% dplyr::select(-all_of(rmv_list))

Remove calculated / report variables

rmv_list <- c(
  "finish", "i.user1", "i.user3",                     
  "i.user9", "OUTPARTY", "inparty", "party",                      
  "religroup", "religroupn", "left_right", "Gender_Dummy",                
  "center_left_right", "relig_group", "SETTLERS_NOT", "democracy_T",                 
  "Civil_rights_All", "Social_D", "Threat_Per", "exclusion_1_T",               
  "tolerance_13_r", "Dehumanization", "Policy_least_liked", "Civil_rights_least_liked",   
  "tolerance_25_r", "indirect_exclu", "exclusion_2_T", "tolerance_12R",              
  "tolerance_17R", "tolerance_18R", "tolerance_19R", "tolerance_20R",               
  "halaca_t", "demo_state_r", "mean_anti_demo", "poli_violence_T",            
  "activismT", "RWA_T", "LWA_T", "SDO_2_R",                     
  "SDO_3_R", "SDO_T", "DOGMA_2_R", "Dogma_T",
  "traditional_meta_1", "normethnic_1",                
  "normleastethnic", "hitgab_supp_1", "hitgab_demo_1", "yesod_supp_1",                
  "yesod_demo_1", "ragil_supp_1", "ragil_demo_1", "svirut_supp_1",               
  "svirut_demo_1", "yoamash_supp_1", "yoamash_demo_1", "shoftim_supp_1",              
  "shoftim_demo_1", "reform_support", "feelings_opp_1", "feelings_opp_2",              
  "feelings_opp_3", "feelings_opp_4", "feelings_opp_5", "feelings_supp_1",             
  "feelings_supp_2", "feelings_supp_3", "feelings_supp_4", "feelings_supp_5",             
  "threat_1", "threat_2", "threat_3", "threat_4", 
  "threat_5", "palas_emotions_1", "palas_emotions_2", "palas_emotions_3", 
  "palas_emotions_4", "palas_emotions_5", "palas_emotions_6", "openess_pal_1", 
  "openess_pal_2", "openess_pal_3", "openess_pal_4", "violence_pal_1", 
  "violence_pal_2", "policy_pal_1", "policy_pal_2", "ethnic", 
  "ethnicg", "threat_4r", "phythreat", "symthreat", 
  "symthreat_arabs", "distance_palas", "violen_palast", "policy_pal_2_r", 
  "policy_palas_t", "filter_." # , "regionmap", "mean_supp"
)
 
df <- df %>% dplyr::select(-all_of(rmv_list))

Remove all empty columns (NULL or NA)

df <- df[, !sapply(df, function(col) all(is.na(col) | is.null(col)))]

Remove all rows from the dataset that has undefined values in one of the following variables: gender, age, education, religiosity, religion

original_row_count <- nrow(df)

must_have_list <- 
  c("gender", "age", "education", "religiosity", "religion")
# Remove rows with NA values in specified variables
df <- df[complete.cases(df[must_have_list]), ]

rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100

Removed 0 rows out of 7779 (0%).

Remove “Other” gender category

original_row_count <- nrow(df)

df <- df %>%
  filter(gender != "Other") %>%
  mutate(gender = droplevels(gender)) # Remove unused factor level

rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100

Removed -2 rows out of 7777 (-0.0257169%).

4 Save il_pe.RDS

Save in ‘Israel Survey/data/il_pe.RDS’.

saveRDS(df, "Israel Survey/data/il_pe.RDS")