1 Required Libraries

# load external packages and source helper functions

library(knitr)
library(kableExtra)

library(dplyr) 
library(haven)
library(ggplot2)
library(ggcorrplot)
library(viridis)
library(tidyverse)
library(gridExtra)
library(gt)
library(lavaan)
library(semTools)
library(semPlot)
library(psych)

library(afcommon)

source("af_common_add_ons.R")

options(dplyr.summarise.inform = FALSE)

2 Prepare Dataset

Load the tidy survey data file and select the required columns for the analysis

Filter for Jewish population only .

df <- as.data.frame(readRDS("Israel Survey/data/israel_survey.RDS"))
df <- df %>% filter(sample == "Jewish") %>% filter(religion == "Jewish")

Define the survey measures used for the extremism dimensions

ideological_vars <- c("peace_1", "annex_1")

behavioral_vars <- c(
  paste0("poli_violence1_", 1:3), # poli_violence_4 does not exist in Wave 1
  c("tolerance_19"),              # tolerance 26 does not exist in Wave 1
  c("activism__3", "activism__4" )
  )

social_vars <- c(
  paste0("tolerance_", 1:5),
  paste0("tolerance_", c(9,10,11,13,15,16,18,20))
)

political_orientation_vars <- c("politi_orient_1")

wave_1_excluded <- c("poli_violence1_4", "tolerance_26", "Demo_state_3")

measures_vars <- c(ideological_vars, behavioral_vars, social_vars, political_orientation_vars) 

Remove all rows (respondents) that have an NA (not answered) in one of the survey measures (questions).

original_row_count <- nrow(df)

df_clean <- df %>%
  filter(if_all(all_of(measures_vars), ~ !is.na(.) & !is.null(.)))
df_clean <- df_clean %>%
  filter(if_all(all_of(wave_1_excluded), ~ (!is.na(.) & !is.null(.)) | (Wave == "First")))
cleaned_row_count <- nrow(df_clean)

rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100

Removed 341 rows out of 7780 (4.3830334%).

# Check if more than 20% of rows were removed
if (percentage_removed > 20) {
  stop("Execution stopped: More than 20% of the rows were removed during cleanup.")
}

df <- df_clean

3 Political Extremism Measures

When assessing socio-political extremism dimensions, combining multiple related questions into a single measure can enhance the comprehensiveness and reliability of our assessment. Before merging these questions, we must verify they measure the same underlying construct through Confirmatory Factor Analysis (CFA). Given our multi-wave dataset structure, we employ grouped CFA, which first requires testing for measurement invariance. Since wave one contains a different set of measures compared to subsequent waves, we conduct this analysis in two stages - first analyzing the initial wave independently, then examining the remaining waves together.

3.1 Ideological Dimension

The ideological dimension represent extremism of either the political-left or the political right-wing. It is based on the following survey measures (Questions):

  1. peace_1Israel should reach a two-state peace agreement with the Palestinians through negotiations for a permanent agreement, where 1 represent complete disagreement (right wing position) and 7 represent full agreement (left-wing position)
  2. annex_1 Israel should annex the settlement areas in Judea and Samaria, where 1 represent complete disagreement (left wing position) and 7 represent full agreement (right-wing position)
  3. demo_state_3 I prefer the idea of a complete Land of Israel over a democratic character of the state, where 1 represent complete disagreement (left wing position) and 7 represent full agreement (right-wing position).

The scale of peace_1 is in the reverse direction (right-wing –> left-wing) of the other two measures (left-wing –> right-wing). We thus reverse it.

df$peace_1_r <- af_reverse_scale(df$peace_1, scale_min = 1, scale_max = 7)

3.1.1 Confirmatory Factor Analysis (CFA)

df_1 <- df %>% filter(Wave == "First")
df_2 <- df %>% filter(Wave != "First")

ideology_vars_1 <- c("peace_1_r", "annex_1")
model_1 <- paste("pe_ideology =~", paste(ideology_vars_1, collapse = "+"))

ideology_vars_2 <- c("peace_1_r", "annex_1", "Demo_state_3")
model_2 <- paste("pe_ideology =~", paste(ideology_vars_2, collapse = "+"))

The ideological dimension (pe_ideology) is calculated by ‘folding’ the measure scales. When folding scales to measure extremism, we are essentially measuring distance from the midpoint (4 in a 1-7 scale), so values of 1 and 7 would both indicate high extremism (3 units from center), while 3 and 5 would indicate lower extremism (1 unit from center).

We perform CFA first and then fold the resulting factor score. This sequence Validates that the measures truly capture the same underlying left-right construct. It creates a more reliable composite measure of ideology, preserving the linear relationships between items during validation.This approach maintains better measurement properties and is more theoretically sound, as the original theoretical construct (ideology) are first validated and only then transformed into the derived construct (extremism).

3.1.1.1 Perform CFA of wave 1

result_1 <- af_cfa(df_1, var_list = ideology_vars_1, latent_var_name = "pe_ideology", 
                   group = NULL, model = model_1,
                   cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result_1$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_ideology Wave 1 NA NA 0 3.763233e-11 Poor reliability (0.53) 0.5335424
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_1$fit)
title("Ideology SEM Model - Wave 1")

3.1.1.2 Perform CFA of waves 2 … last wave

result_2 <- af_cfa(df_2, var_list = ideology_vars_2, latent_var_name = "pe_ideology", 
                   group = "Wave", model = model_2, 
                   cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)

af_gt_cfa_results_tbl(result_2$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_ideology Waves 2-last 1 1 0 1.159868e-08 Acceptable reliability (0.72) 0.7230871
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_2$fit)
title("Ideology SEM Model - Waves 2-last")

3.1.1.3 Results visualization and Sanity

Correlate latent var with distance function of manifests

ideology_mean <- af_mean(df_1, var_list = ideology_vars_1)
ideology_predict <- result_1$new_df[["pe_ideology"]]

c1_mean <- cor(ideology_mean, ideology_predict)

ideology_mean <- af_mean(df_2, var_list = ideology_vars_2)
ideology_predict <- result_2$new_df[["pe_ideology"]]

c2_mean <- cor(ideology_mean, ideology_predict)

af_create_x_multi_y_plot(
  data = data.frame(ideology_mean, ideology_predict),
  y_var_names = c("ideology_mean", "ideology_predict"),
  smooth = TRUE, show_points = FALSE
)

Correlation between latent variable (pre_ideology) to distance function of manifests

  • Wave 1: 0.9952268
  • Wave 2…last wave: 0.9958264

3.1.1.4 Finalize (combine) results

Combine wave 1 with wave 2…last wave results and then fold the ideology variable

df <- rbind(result_1$new_df, result_2$new_df)

# Fold ideology to identify extremism
# Original scale is 1-7 with the midpoint set to 4
# New scale is 0-3 where 0 is center and 3 is extreme in either left or right
df$pe_ideology <- af_folded_scale(df$pe_ideology, center = 4) 

# Linear transformation from [0,3] to [1,7]
df$pe_ideology <- (df$pe_ideology / 3) * 6 + 1

af_create_y_plot(df, y_var = "pe_ideology", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

3.2 Behavioral Dimension

The theory related to political violence relate to two types: violence against the citizens of the out-group and vilence against the state and its institutions.

Eight different questions related to violence in the survey: tolerance 19 and 26 relate to violence against citizens of the out_group. poli_violence 1, 2, 3, 4 and activism 3 and 4 relate to violence against the state and it’s institutions.

It is important to note that poli_violence 4 and tolerance 26 do not exist in the first wave

3.2.1 Exploratory Factor Analysis (EFA)

To identify the structure of the violence measure we first perform an exploratory factor analysis (EFA). We use the Principal Axis (PA) factoring method as it can be better for exploratory factor analysis, especially with non-normal data.

# Parallel analysis to determine number of factors
fa_result <- psych::fa.parallel(df[, behavioral_vars], fm="pa", plot = TRUE) 

#> Parallel analysis suggests that the number of factors =  3  and the number of components =  1

The analysis suggests that 3 factor(s) are required.

# Parallel analysis to determine number of factors
fit <- fa(df[, behavioral_vars], nfactors = fa_result$nfact, fm = "pa", missing = TRUE, use = "na.or.complete") 
loads <- fit$loadings
psych::fa.diagram(loads)

Based on the indications above we can see that the survey measures load onto three factors. PA1 is the latent variable representing political violence against the government and politicians. PA3 is the latent variable representing political violence against state institutions. PA2 is the latent variable representing political violence against the out group.

We also check the use of only two factors (latent variables)

# Parallel analysis to determine number of factors
fit <- fa(df[, behavioral_vars], nfactors = 2, fm = "pa", missing = TRUE, use = "na.or.complete") 
loads <- fit$loadings
psych::fa.diagram(loads)

The results fit the theory regarding the types of political violence. PA1 is the latent variable representing political violence against the state government and institutions. PA2 is the latent variable representing political violence against the out-group citizens.

3.2.2 Confirmatory Factor Analysis (CFA)

Using separate CFAs and then combining results would lose the established equivalence in measurement structure that we’ve already confirmed with the metric invariance test for waves 2-last. This approach would be less theoretically justified and could introduce more researcher degrees of freedom in how we combine the results. We thus perform CFA the first Wave and then for waves 2-last wave.

df_1 <- df %>% filter(Wave == "First")
df_2 <- df %>% filter(Wave != "First")

# Model for first wave

gov_vars_1 <- paste0("poli_violence1_",1:3)
inst_vars_1 <- paste0("activism__", c(3,4))
out_vars_1 <- paste0("tolerance_", c(19))
behavioral_vars_1 <- c(gov_vars_1, inst_vars_1, out_vars_1)
model_1 <- paste("pe_violence =~", paste(behavioral_vars_1, collapse = "+"))

gov_vars_2 <- paste0("poli_violence1_",1:4)
inst_vars_2 <- paste0("activism__", c(3,4))
out_vars_2 <- paste0("tolerance_", c(19,26))
behavioral_vars_2 <- c(gov_vars_2, inst_vars_2, out_vars_2)
model_2 <- paste("pe_violence =~", paste(behavioral_vars_2, collapse = "+"))

3.2.2.1 Perform CFA of wave 1

result_1 <- af_cfa(df_1, var_list = behavioral_vars_1, latent_var_name = "pe_violence", 
                   group = NULL, model = model_1,
                   cfa_type = NULL, group_id = "Wave 1", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result_1$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence Wave 1 0.7652941 0.6088235 0.2591642 0.08429097 Acceptable reliability (0.76) 0.762801
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_1$fit)
title("Violence SEM Model - Wave 1")

3.2.2.2 Perform CFA of waves 2 … last wave

result_2 <- af_cfa(df_2, var_list = behavioral_vars_2, latent_var_name = "pe_violence", 
                   group = NULL, model = model_2, 
                  cfa_type = NULL, group_id = "Waves 2-last", factors_list = NULL)

af_gt_cfa_results_tbl(result_2$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_violence Waves 2-last 0.6584238 0.5217934 0.2602597 0.1235573 Acceptable reliability (0.78) 0.7761107
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result_2$fit)
title("Violence SEM Model - Waves 2-last")

3.2.2.3 Results visualization and Sanity

Correlate latent var with mean/distance function of manifests

violence_mean <- af_mean(df_1, behavioral_vars_1)
violence_predict <- result_1$new_df[["pe_violence"]]

c1_mean <- cor(violence_mean, violence_predict)

af_create_x_multi_y_plot(
  data = data.frame(violence_mean, violence_predict),
  y_var_names = c("violence_mean", "violence_predict"),
  smooth = TRUE, show_points = FALSE
)


violence_mean <- af_mean(df_2, behavioral_vars_2)
violence_predict <- result_2$new_df[["pe_violence"]]

c2_mean <- cor(violence_mean, violence_predict)

af_create_x_multi_y_plot(
  data = data.frame(violence_mean, violence_predict),
  y_var_names = c("violence_mean", "violence_predict"),
  smooth = TRUE, show_points = FALSE
)

Correlation between latent variable (pe_violence) to mean of manifests

  • Wave 1: 0.9395848
  • Wave 2…last wave: 0.8790465

3.2.2.4 Finalize (combine) results

Combine wave 1 with wave 2…last wave results

df <- rbind(result_1$new_df, result_2$new_df)

af_create_y_plot(df, y_var = "pe_violence", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

3.3 Social Dimension

All tolerance questions use a scale of 1 - 7 where 1 indicates low agreement and 7 indicates high agreement. Most tolerance questions actually reflect intolerance towards the out groups. Few (1-5, 13, 25) reflect positive tolerance. In order to have all variables reflect the same direction, we reverse the positive tolerance variables and relate to the social dimension as reflecting intolerance where 1 indicates low intolerance and 7 indicates high intolerance.

scale_min <- 1
scale_max <- 7

# Reverse positive tolerance variable to intolerance variables

df$tolerance_r_1 <- af_reverse_scale(df$tolerance_1, scale_min, scale_max) 
df$tolerance_r_2 <- af_reverse_scale(df$tolerance_2, scale_min, scale_max) 
df$tolerance_r_3 <- af_reverse_scale(df$tolerance_3, scale_min, scale_max) 
df$tolerance_r_4 <- af_reverse_scale(df$tolerance_4, scale_min, scale_max) 
df$tolerance_r_5 <- af_reverse_scale(df$tolerance_5, scale_min, scale_max) 
df$tolerance_r_13 <- af_reverse_scale(df$tolerance_13, scale_min, scale_max) 
df$tolerance_r_25 <- af_reverse_scale(df$tolerance_25, scale_min, scale_max) 

Political intolerance has two sub-dimensions (components): Political Exclusion and Social Distancing. Political Exclusion refers to the denial of equal rights and the use of policies against members of out-groups. Social Distancing refers to personal intolerance towards members of the out-groups.

social_distancing_vars <- c(paste0("tolerance_r_", 1:5))
political_exclusion_vars <- c(paste0("tolerance_", c(9,10,11)),
                         paste0("tolerance_r_", 13),
                         paste0("tolerance_", c(15,16)),
                         paste0("tolerance_", c(18,20)))
intolerance_vars <- c(social_distancing_vars, political_exclusion_vars)

To identify the structure of the intolerance measure we first perform an exploratory factor analysis (EFA). We use the Principal Axis (PA) factoring method as it can be better for exploratory factor analysis, especially with non-normal data. We first check the use of only two latent variables.

# Parallel analysis to determine number of factors
fit <- fa(df[, intolerance_vars], nfactors = 2, fm = "pa", missing = TRUE, use = "na.or.complete") 
loads <- fit$loadings
psych::fa.diagram(loads)

The results fit the theory regarding the types of political intolerance. PA1 is the latent variable representing political exclusion. PA2 is the latent variable representing social distancing.

To solidify our findings we perform CFA analysis including each of the political intolerance latent variables as well as for a combined variable.

model <- paste("pe_intolerance =~", paste(intolerance_vars, collapse = "+"))

result <- af_cfa(df, var_list = intolerance_vars, latent_var_name = "pe_intolerance", 
                 group = "Wave", model = model,
                 cfa_type = NULL, group_id = "All Waves", factors_list = NULL, clean_model = FALSE)

af_gt_cfa_results_tbl(result$cfa_tbl)
Latent.var Group CFI TLI RMSEA SRMR Cronbach Alpha
pe_intolerance All Waves 0.7983266 0.7579919 0.1642267 0.06992985 Excellent reliability (0.93) 0.9326395
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

af_create_sem_plot(result$fit)
title("Intolerance SEM Model")

3.3.0.1 Results Visualization and Sanity

intolerance_mean <- af_mean(df, intolerance_vars)
intolerance_predict <- result$new_df[["pe_intolerance"]]

c_mean <- cor(intolerance_mean, intolerance_predict)

af_create_x_multi_y_plot(
  data = data.frame(intolerance_mean, intolerance_predict),
  y_var_names = c("intolerance_mean", "intolerance_predict"),
  smooth = TRUE, show_points = FALSE
)

Correlation between latent variable (pe_intolerance) to mean of manifests: 0.9947517

3.3.0.2 Finalize Results

df <- result$new_df

af_create_y_plot(df, y_var = "pe_intolerance", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

3.4 Overall Political Extremism

Create overall political extremism measure by calculating the 3D distance (RMS) of the three PE dimensions.

df$pe_overall <- af_dist(df, c("pe_ideology", "pe_violence", "pe_intolerance"))

af_create_y_plot(df, y_var = "pe_overall", group_var = "Wave", plot_types = c("density"), 
                 use_facet = TRUE)

3.5 Political Orientation

Self-reported political orientation, ranging from 1 to 7, where 1 represents the right and 7 represents the left.

We use politi_orient_1 - the self political orientation scale of 1-7 where 1 represent Right-wing and 7 represent Left-wing to define three political categories as follows: right (1-3), center (4) and left (5-7)

df$pe_left_center_right<- af_rcl(df$politi_orient_1, c_low = 4, c_up = 4)

af_create_y_plot(data = df, y_var = "pe_ideology", group_var = "pe_left_center_right", bins = 100, 
                 plot_types = c("density"), use_facet = TRUE)

We create alternative classification as follows: right (1-2), center (3-5) and left (6-7)

df$pe_left_center_right_2 <- af_rcl(df$politi_orient_1, c_low = 3, c_up = 5)

af_create_y_plot(data = df, y_var = "pe_ideology", group_var = "pe_left_center_right_2", bins = 100, 
                 plot_types = c("density"), use_facet = TRUE)

The following chart provide information on the distribution of political categories variables

af_create_y_plot(df, y_var = "pe_left_center_right", group_var = "Wave", plot_types = c("bar"))

af_create_y_plot(df, y_var = "pe_left_center_right_2", group_var = "Wave", plot_types = c("bar"))

3.6 Combined Religiosity Groups

Combine Traditional with Religious group Combine National Ultra-Orthodox with Ultra-Orthodox

# Remove rows with NA values in religiosity
df <- df[!is.na(df$religiosity), ]

pe_religiosity_order <- c("Secular", "Religious", "National Religious", "Ultra-Orthodox")

# Create pe_religiosity variable with mappings
df$pe_religiosity <- as.character(df$religiosity)
df$pe_religiosity[df$religiosity == "National Ultra-Orthodox"] <- "Ultra-Orthodox"
df$pe_religiosity[df$religiosity == "Traditional"] <- "Religious"
df$pe_religiosity <- factor(df$pe_religiosity,levels = pe_religiosity_order)

af_create_y_plot(df, y_var = "religiosity", group_var = "Wave", use_facet = TRUE)


af_create_y_plot(df, y_var = "pe_religiosity", group_var = "Wave", use_facet = TRUE)

4 Clean Final Dataset

Remove all columns which name ends with ’_P’ (Used for panel in SPSS)

df <- df[, !grepl("_P$", names(df))]

Remove survey management variables

rmv_list <- c(
  "StartDate", "EndDate", "Status", "IPAddress", "IPAddress", "Progress",  
  "Duration__in_seconds_", "UserLanguage", "Finished", "RecordedDate",            
  "RecipientLastName", "RecipientFirstName", "RecipientEmail", "ExternalReference", 
  "LocationLatitude", "LocationLongitude", "DistributionChannel", "consent"
)
 
df <- df %>% dplyr::select(-all_of(rmv_list))

Remove calculated / report variables

rmv_list <- c(
  "finish", "i.user1", "i.user3",                     
  "i.user9", "OUTPARTY", "inparty", "party",                      
  "religroup", "religroupn", "left_right", "Gender_Dummy",                
  "center_left_right", "relig_group", "SETTLERS_NOT", "democracy_T",                 
  "Civil_rights_All", "Social_D", "Threat_Per", "exclusion_1_T",               
  "tolerance_13_r", "Dehumanization", "Policy_least_liked", "Civil_rights_least_liked",   
  "tolerance_25_r", "indirect_exclu", "exclusion_2_T", "tolerance_12R",              
  "tolerance_17R", "tolerance_18R", "tolerance_19R", "tolerance_20R",               
  "halaca_t", "demo_state_r", "mean_anti_demo", "poli_violence_T",            
  "activismT", "RWA_T", "LWA_T", "SDO_2_R",                     
  "SDO_3_R", "SDO_T", "DOGMA_2_R", "Dogma_T",
  "traditional_meta_1", "normethnic_1",                
  "normleastethnic", "hitgab_supp_1", "hitgab_demo_1", "yesod_supp_1",                
  "yesod_demo_1", "ragil_supp_1", "ragil_demo_1", "svirut_supp_1",               
  "svirut_demo_1", "yoamash_supp_1", "yoamash_demo_1", "shoftim_supp_1",              
  "shoftim_demo_1", "reform_support", "feelings_opp_1", "feelings_opp_2",              
  "feelings_opp_3", "feelings_opp_4", "feelings_opp_5", "feelings_supp_1",             
  "feelings_supp_2", "feelings_supp_3", "feelings_supp_4", "feelings_supp_5",             
  "threat_1", "threat_2", "threat_3", "threat_4", 
  "threat_5", "palas_emotions_1", "palas_emotions_2", "palas_emotions_3", 
  "palas_emotions_4", "palas_emotions_5", "palas_emotions_6", "openess_pal_1", 
  "openess_pal_2", "openess_pal_3", "openess_pal_4", "violence_pal_1", 
  "violence_pal_2", "policy_pal_1", "policy_pal_2", "ethnic", 
  "ethnicg", "threat_4r", "phythreat", "symthreat", 
  "symthreat_arabs", "distance_palas", "violen_palast", "policy_pal_2_r", 
  "policy_palas_t", "filter_." # , "regionmap", "mean_supp"
)
 
df <- df %>% dplyr::select(-all_of(rmv_list))

Remove all empty columns (NULL or NA)

df <- df[, !sapply(df, function(col) all(is.na(col) | is.null(col)))]

Remove all rows from the dataset that has undefined values in one of the following variables: gender, age, education, religiosity, religion

original_row_count <- nrow(df)

must_have_list <- 
  c("gender", "age", "education", "religiosity", "religion")
# Remove rows with NA values in specified variables
df <- df[complete.cases(df[must_have_list]), ]

rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100

Removed 0 rows out of 7439 (0%).

Remove “Other” gender category

original_row_count <- nrow(df)

df <- df %>%
  filter(gender != "Other") %>%
  mutate(gender = droplevels(gender)) # Remove unused factor level

rows_removed <- original_row_count - cleaned_row_count
percentage_removed <- rows_removed / original_row_count * 100

Removed -2 rows out of 7437 (-0.0268926%).

5 Save il_pe.RDS

Save in ‘Israel Survey/data/il_pe.RDS’.

saveRDS(df, "Israel Survey/data/il_pe.RDS")