# Read the data from indices_table.txt
df <- as.data.frame(readRDS("Israel Survey/data/il_pe.RDS"))
wave_var <- "Wave"
wave_order <- c("First", "Second", "Third", "Fourth", "Fifth", "Sixth")
community_var <- "pe_left_center_right"
community_order <- c("left", "center", "right")
dimensions_order <- c("Overall", "Cognitive", "Behavioral", "Social")
# Calculate the Political Extremism Gauge Indices
indices_result <- af_gauge_indices(df, pop_var1 = wave_var, comm_var1 = community_var)
df <- indices_result$df
# Convert data to a more manageable format for analysis
df <- df %>%
mutate(Wave = factor(Wave, levels = wave_order)) %>%
mutate(!!sym(community_var) := factor(!!sym(community_var), levels = community_order)) %>%
mutate(event_occurred = factor(1, levels = c(0, 1))) %>% # used for pairwise regression
mutate( # used for cumulative event effect regression
post_event1 = ifelse(Wave %in% c("Second", "Third", "Fourth", "Fifth", "Sixth"), 1, 0),
post_event2 = ifelse(Wave %in% c("Third", "Fourth", "Fifth", "Sixth"), 1, 0),
post_event3 = ifelse(Wave %in% c("Fourth", "Fifth", "Sixth"), 1, 0),
post_event4 = ifelse(Wave %in% c("Fifth", "Sixth"), 1, 0),
post_event5 = ifelse(Wave == "Sixth", 1, 0)
) %>%
mutate(
immediate_event1 = ifelse(Wave == "Second", 1, 0), # Event 1 impact (First → Second)
immediate_event2 = ifelse(Wave == "Third", 1, 0), # Event 2 impact (Second → Third)
immediate_event3 = ifelse(Wave == "Fourth", 1, 0), # Event 3 impact (Third → Fourth)
immediate_event4 = ifelse(Wave == "Fifth", 1, 0), # Event 4 impact (Fourth → Fifth)
immediate_event5 = ifelse(Wave == "Sixth", 1, 0) # Event 5 impact (Fifth → Sixth)
)
# Print Event Table
event_table <- data.frame(
event_name = c("Inland Terror", "Bennet Gov. Fall", "Judicial Reform", "Gallant Dismissal", "Oct. 7th War"),
waves = c("1-2", "2-3", "3-4", "4-5", "5-6"),
type = c("Security", "Political", "Political", "Political", "Security"),
stringsAsFactors = FALSE
)
# gt(event_table) %>%
# tab_header(
# title = md("**Event Table**"),
# )
# add event_type and event_result info to df based on the event_table
df <- af_add_event_info(df, wave_var, wave_order, event_table, community_var)
# Create the wave list in the form
# list("Inland Terror"= c("First", "Second"), "Bennet Gov. Fall" = c("Second", "Third"), ...)
wave_list <- list()
for(i in 1:nrow(event_table)) {
wave_range <- as.numeric(unlist(strsplit(event_table$waves[i], "-")))
wave_list[[event_table$event_name[i]]] <- c(wave_order[wave_range[1]],
wave_order[wave_range[2]])
}
# Create demographics regression formula part
demographics <- c("gender", "age_group") # , "education"
d_fmla <- paste(demographics, collapse = "+")
# Set display names for regression results
display_names <- list(
"Wave" = "Wave",
"post_event1" = "Inland Terror",
"post_event2" = "Bennet Gov. Fall",
"post_event3" = "Judicial Reform",
"post_event4" = "Gallant Dismissal",
"post_event5" = "Oct. 7th War",
"immediate_event1" = "Inland Terror",
"immediate_event2" = "Bennet Gov. Fall",
"immediate_event3" = "Judicial Reform",
"immediate_event4" = "Gallant Dismissal",
"immediate_event5" = "Oct. 7th War",
"pe_left_center_right" = "Political Orientation",
"event_occurred" = "Event Occured",
"event_type" = "Event Type",
"gender" = "Gender",
"age_group" = "Age Group",
"education" = "Education"
)
immediate_names = c(
"immediate_event1" = "Inland Terror",
"immediate_event2" = "Bennet Gov. Fall",
"immediate_event3" = "Judicial Reform",
"immediate_event4" = "Gallant Dismissal",
"immediate_event5" = "Oct. 7th War"
)
coef_names <- c(
"event_occurred1" = "Event Occurred",
"pe_left_center_rightleft" = "Political Orientation[left]",
"pe_left_center_rightright" = "Political Orientation[right]",
"event_occurred1:pe_left_center_rightleft" = "Event Occurred : Political Orientation[left]",
"event_occurred1:pe_left_center_rightright" = "Event Occurred : Political Orientation[right]"
)
af_create_y_plot(df,"pe_ideology", plot_types = c("density"),
title = "Cognitive Dimension (Ideology)",
group_var = "Wave", use_facet = TRUE, facet_cols = 3)
af_create_y_plot(df,"pe_violence", plot_types = c("density"),
title = "Behavioral Dimension (Violence)",
group_var = "Wave", use_facet = TRUE, facet_cols = 3)
af_create_y_plot(df,"pe_intolerance", plot_types = c("density"),
title = "Social Dimension (Intolerance)",
group_var = "Wave", use_facet = TRUE, facet_cols = 3)
af_create_y_plot(df,"pe_overall", plot_types = c("density"),
title = "Overall",
group_var = "Wave", use_facet = TRUE, facet_cols = 3)
af_create_y_plot(df,"Wave", plot_types = c("bar"), group_var = "extremist_cel",
title = "Cogntive Extremists")
af_create_y_plot(df,"Wave", plot_types = c("bar"), group_var = "extremist_bel",
title = "Behavioral Extremists")
af_create_y_plot(df,"Wave", plot_types = c("bar"), group_var = "extremist_sel",
title = "Social Extremists")
af_create_y_plot(df,"Wave", plot_types = c("bar"), group_var = "extremist_oel",
title = "Overall Extremists")
af_create_y_plot(df,"Wave", plot_types = c("bar"), group_var = "pe_left_center_right",
title = "Political Orientation")
af_create_y_plot(df,"Wave", plot_types = c("bar"), group_var = "pe_religiosity",
title = "Religiosity")
This RMarkdown file provides some recommendations and comparison regarding the possibility to use either ANOVA or regression analyses for identifying the effect of an event occurred between to consecutive survey waves (Wave 1 and 2). We relate to two types of survey data: Panel Sample where the respondents sample is identical between the two waves and Random Independent Sample where each wave consists of an independently and randomly selected sample. We also address two response variable cases: continuous and binary.
For the case where the two waves form a panel (i.e., the same individuals participate in both waves), I found that the most recommended/used statistical approach to compare a continuous response variable across the two waves is a Fixed Effect (FE) model using the the PLM R package (plm(type = “within”), See below for references).
The core strength of the FE model is its ability to eliminate the bias caused by unobserved time-invariant individual characteristics that may be correlated with other predictors (like political_orientation). By focusing on within-individual changes over time, it provides consistent and unbiased estimates for time-varying variables (event_occurred).
The primary limitation is that it cannot estimate the direct coefficients for purely time-invariant variables (gender) themselves, as these are “differenced out” along with the fixed effects. However, it can estimate their interaction with time-varying variables.
For binary response variables ANOVA is not suitable. The main solution is to use Conditional Logit model (clogit, R package ‘survival’). the conditional logit model provides consistent estimates for time-varying predictors by conditioning on the sum of outcomes per individual, effectively removing the fixed effects. This ensures that the estimates for event_occurred and its interaction are unbiased.
The primary limitations are that it drops individuals who do not exhibit any change in the binary response variable across the two waves which can lead to a loss of data. Like the plm(type=“within”) solution for a continuous response variable, it cannot estimate coefficients for time-invariant variables.
For the case where each survey wave consists of an independently and randomly selected sample, the standard statistical approach to compare a continuous response variable across the two waves is a Linear Regression Model (lm) with the wave indicator as a primary predictor.
While ANOVA (aov) will yield an equivalent F-statistic for the overall test of mean differences, lm() provides the regression coefficients directly. When you code event_occurred (e.g., Wave 1 = 0, Wave 2 = 1), the coefficient associated with it directly represents the estimated average difference in the response variable between the two waves. This offers a more interpretative and precise estimate of the magnitude and direction of the change than an ANOVA F-test alone. Including other relevant covariates (even time-invariant ones like political_orientation, age_group, gender) is straightforward in this framework, allowing for the estimation of their unique effects on the response variable, conditional on event_occurred.
For the case where the response variable is Binary, the standard appraoch is to use Logistic Regression (glm(family = binomial(link = “logit”))). It allows for the direct inclusion of event_occurred as a dummy and its interaction with political_orientation, providing insights into how the odds of the binary outcome change from Wave 1 to Wave 2 and how this change is moderated by political_orientation.
The PLM package:
Croissant, Y., & Millo, G. (2008). Panel Data Econometrics in R: The plm Package. Journal of Statistical Software, 27(2), 1-43. DOI: 10.18637/jss.v027.i02, https://www.jstatsoft.org/article/view/v027i02
Its primary strength lies in its ability to estimate a variety of linear panel models, including fixed effects, random effects, and pooled OLS, while also offering robust inference options.
Key relevant advantages:
Examples of related academic research in political science that uses plm:
# Create Panel Dataset
df1 <- df$respondent_id[df$Wave == "Third"]
df2 <- df$respondent_id[df$Wave == "Fourth"]
panel_respondents <- intersect(df1, df2)
panel_df <- df %>%
filter(Wave %in% c("Third", "Fourth")) %>%
filter(respondent_id %in% panel_respondents) %>%
mutate(event_occurred = factor(case_when(
Wave == "Third" ~ 0,
Wave == "Fourth" ~ 1
), levels = c(0, 1)))
p_data <- plm::pdata.frame(panel_df, index = c("respondent_id", "event_occurred"))
plm::pdim(p_data)
Balanced Panel: n = 671, T = 2, N = 1342
We create an anova model and a plm model for each of the dimensions (cognitive, behavioral, and social) and for overall (RMS of the three dimensions)
# plm within models
mp1 <- plm(pe_ideology ~ event_occurred * pe_left_center_right + age_group, data = p_data, model = "within")
mp2 <- plm(pe_violence ~ event_occurred * pe_left_center_right + age_group, data = p_data, model = "within")
mp3 <- plm(pe_intolerance ~ event_occurred * pe_left_center_right + age_group, data = p_data, model = "within")
mp4 <- plm(pe_overall ~ event_occurred * pe_left_center_right + age_group, data = p_data, model = "within")
ma1 <- aov(pe_ideology ~ event_occurred * pe_left_center_right + age_group + Error(respondent_id/event_occurred),
data =panel_df)
ma2 <- aov(pe_violence ~ event_occurred * pe_left_center_right + age_group + Error(respondent_id/event_occurred),
data =panel_df)
ma3 <- aov(pe_intolerance ~ event_occurred * pe_left_center_right + age_group + Error(respondent_id/event_occurred),
data =panel_df)
ma4 <- aov(pe_overall ~ event_occurred * pe_left_center_right + age_group + Error(respondent_id/event_occurred),
data =panel_df)
Term | Interpretation |
---|---|
event_occurred |
Average within-subject change in pe_ideology across all
respondents |
event_occurred:pe_left_center_right |
Whether the change differs by political orientation |
model_list <- list("Cognitive" = ma1, "Behavioral" = ma2, "Social" = ma3, "Overall" = ma4)
af_anova_table(model_list, title = "Within Effects ANOVA Results", show_residuals = FALSE)
Cognitive | Behavioral | Social | Overall | |
---|---|---|---|---|
event_occurred |
0.411 (0.522) |
0.027 (0.869) |
1.204 (0.273) |
0.037 (0.847) |
pe_left_center_right |
0.285 (0.752) |
1.197 (0.303) |
0.404 (0.668) |
0.887 (0.412) |
age_group |
0.614 (0.542) |
0.354 (0.702) |
2.072 (0.127) |
1.979 (0.139) |
event_occurred:pe_left_center_right |
0.945 (0.389) |
4.952** (0.007) |
0.069 (0.934) |
0.939 (0.392) |
Significance codes: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.1
model_list <- list("Cognitive" = mp1, "Behavioral" = mp2, "Social" = mp3, "Overall" = mp4)
af_stargazer(models = model_list, title = "Within Effects PLM Results")
Dependent variable: | ||||
pe_ideology | pe_violence | pe_intolerance | pe_overall | |
Cognitive | Behavioral | Social | Overall | |
(1) | (2) | (3) | (4) | |
event_occurred1 | 0.256 | 0.114 | -0.113 | 0.099 |
(0.176) | (0.102) | (0.167) | (0.096) | |
pe_left_center_rightcenter | 0.347 | 0.036 | 0.062 | 0.129 |
(0.303) | (0.176) | (0.289) | (0.165) | |
pe_left_center_rightright | 0.214 | -0.027 | -0.101 | 0.017 |
(0.330) | (0.192) | (0.314) | (0.179) | |
age_group31-45 | 0.123 | 0.512 | 0.728 | 0.540 |
(1.006) | (0.585) | (0.957) | (0.547) | |
age_group46-60 | 1.036 | 0.560 | 2.183 | 1.342 |
(1.299) | (0.756) | (1.237) | (0.706) | |
event_occurred1:pe_left_center_rightcenter | -0.284 | 0.032 | 0.038 | -0.089 |
(0.211) | (0.122) | (0.200) | (0.114) | |
event_occurred1:pe_left_center_rightright | -0.233 | -0.192 | 0.063 | -0.135 |
(0.190) | (0.111) | (0.181) | (0.103) | |
Observations | 1,342 | 1,342 | 1,342 | 1,342 |
R2 | 0.006 | 0.019 | 0.009 | 0.011 |
Adjusted R2 | -1.007 | -0.981 | -1.001 | -0.997 |
p<0.05; p<0.01; p<0.001 |
We create an anova model and a OLS (lm) model for each of the dimensions (cognitive, behavioral, and social) and for overall (RMS of the three dimensions)
# lm models
mp1 <- lm(pe_ideology ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
mp2 <- lm(pe_violence ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
mp3 <- lm(pe_intolerance ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
mp4 <- lm(pe_overall ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
# ANOVA models
ma1 <- aov(pe_ideology ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
ma2 <- aov(pe_violence ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
ma3 <- aov(pe_intolerance ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
ma4 <- aov(pe_overall ~ event_occurred*pe_left_center_right+gender+age_group, data = independent_df)
model_list <- list("Cognitive" = ma1, "Behavioral" = ma2, "Social" = ma3, "Overall" = ma4)
af_anova_table(model_list, title = "Within Effects ANOVA Results", show_residuals = FALSE)
Cognitive | Behavioral | Social | Overall | |
---|---|---|---|---|
event_occurred |
2.958. (0.086) |
0.813 (0.367) |
0 (1) |
2.414 (0.12) |
pe_left_center_right |
65.977*** (0) |
1.711 (0.181) |
119.291*** (0) |
8.657*** (0) |
gender |
59.585*** (0) |
52.769*** (0) |
0.33 (0.566) |
56.419*** (0) |
age_group |
10.548*** (0) |
8.508*** (0) |
6.87*** (0) |
10.066*** (0) |
event_occurred:pe_left_center_right |
0.798 (0.45) |
4.072* (0.017) |
1.095 (0.335) |
3.682* (0.025) |
Significance codes: *** p < 0.001, ** p < 0.01, * p < 0.05, . p < 0.1
model_list <- list("Cognitive" = mp1, "Behavioral" = mp2, "Social" = mp3, "Overall" = mp4)
af_stargazer(models = model_list, title = "Within Effects PLM Results")
Dependent variable: | ||||
pe_ideology | pe_violence | pe_intolerance | pe_overall | |
Cognitive | Behavioral | Social | Overall | |
(1) | (2) | (3) | (4) | |
event_occurred1 | 0.295 | 0.326** | -0.005 | 0.227* |
(0.211) | (0.113) | (0.181) | (0.111) | |
pe_left_center_rightcenter | -1.026*** | 0.029 | 0.679*** | -0.196* |
(0.158) | (0.085) | (0.135) | (0.083) | |
pe_left_center_rightright | -1.156*** | -0.013 | 1.449*** | 0.028 |
(0.147) | (0.079) | (0.126) | (0.077) | |
genderFemale | -0.519*** | -0.269*** | 0.046 | -0.262*** |
(0.070) | (0.037) | (0.060) | (0.037) | |
age_group31-45 | -0.030 | -0.059 | 0.188* | 0.051 |
(0.095) | (0.051) | (0.081) | (0.050) | |
age_group46-60 | 0.200* | -0.205*** | 0.310*** | 0.155** |
(0.097) | (0.052) | (0.083) | (0.051) | |
age_group60+ | 0.525*** | -0.254*** | 0.386*** | 0.293*** |
(0.114) | (0.061) | (0.097) | (0.060) | |
event_occurred1:pe_left_center_rightcenter | -0.240 | -0.205 | 0.068 | -0.136 |
(0.246) | (0.132) | (0.211) | (0.129) | |
event_occurred1:pe_left_center_rightright | -0.289 | -0.332** | -0.128 | -0.287* |
(0.229) | (0.123) | (0.196) | (0.120) | |
Observations | 2,638 | 2,638 | 2,638 | 2,638 |
R2 | 0.080 | 0.033 | 0.091 | 0.041 |
Adjusted R2 | 0.077 | 0.030 | 0.087 | 0.038 |
p<0.05; p<0.01; p<0.001 |
We check whether there were transitions between the panel waves of variables that we expect to remain invariant
# Step 1: Keep unique values per respondent and wave
changes_df <- panel_df %>%
select(respondent_id, event_occurred, pe_left_center_right) %>%
distinct() %>%
pivot_wider(
names_from = event_occurred,
values_from = pe_left_center_right,
names_prefix = "wave_"
) %>%
filter(wave_0 != wave_1) # Only respondents who changed
# Step 2: Count transitions
transition_summary <- changes_df %>%
count(wave_0, wave_1, name = "n_transitions") %>%
arrange(desc(n_transitions))
# Display
gt(transition_summary)
wave_0 | wave_1 | n_transitions |
---|---|---|
center | right | 23 |
right | center | 21 |
center | left | 15 |
left | center | 5 |
right | left | 5 |
left | right | 3 |
# Step 1: Keep unique values per respondent and wave
changes_df <- panel_df %>%
select(respondent_id, event_occurred, gender) %>%
distinct() %>%
pivot_wider(
names_from = event_occurred,
values_from = gender,
names_prefix = "wave_"
) %>%
filter(wave_0 != wave_1) # Only respondents who changed
# Step 2: Count transitions
transition_summary <- changes_df %>%
count(wave_0, wave_1, name = "n_transitions") %>%
arrange(desc(n_transitions))
# Display
gt(transition_summary)
wave_0 | wave_1 | n_transitions |
---|
# Step 1: Keep unique values per respondent and wave
changes_df <- panel_df %>%
select(respondent_id, event_occurred, age_group) %>%
distinct() %>%
pivot_wider(
names_from = event_occurred,
values_from = age_group,
names_prefix = "wave_"
) %>%
filter(wave_0 != wave_1) # Only respondents who changed
# Step 2: Count transitions
transition_summary <- changes_df %>%
count(wave_0, wave_1, name = "n_transitions") %>%
arrange(desc(n_transitions))
# Display
gt(transition_summary)
wave_0 | wave_1 | n_transitions |
---|---|---|
31-45 | 46-60 | 3 |
18-30 | 31-45 | 2 |
# Step 1: Keep unique values per respondent and wave
changes_df <- panel_df %>%
select(respondent_id, event_occurred, age) %>%
distinct() %>%
pivot_wider(
names_from = event_occurred,
values_from = age,
names_prefix = "wave_"
) %>%
filter(wave_0 != wave_1) # Only respondents who changed
# Step 2: Count transitions
transition_summary <- changes_df %>%
count(wave_0, wave_1, name = "n_transitions") %>%
arrange(desc(n_transitions))
# Display
gt(transition_summary)
wave_0 | wave_1 | n_transitions |
---|---|---|
24 | 25 | 8 |
53 | 54 | 8 |
42 | 43 | 7 |
22 | 23 | 6 |
41 | 42 | 6 |
23 | 24 | 5 |
30 | 31 | 5 |
25 | 26 | 4 |
28 | 29 | 4 |
31 | 32 | 4 |
34 | 35 | 4 |
36 | 37 | 4 |
39 | 40 | 4 |
43 | 44 | 4 |
47 | 48 | 4 |
51 | 52 | 4 |
52 | 53 | 4 |
20 | 21 | 3 |
21 | 22 | 3 |
32 | 33 | 3 |
33 | 34 | 3 |
38 | 39 | 3 |
40 | 41 | 3 |
69 | 70 | 3 |
74 | 75 | 3 |
27 | 28 | 2 |
29 | 30 | 2 |
35 | 36 | 2 |
37 | 38 | 2 |
44 | 45 | 2 |
50 | 51 | 2 |
54 | 55 | 2 |
58 | 59 | 2 |
70 | 71 | 2 |
72 | 73 | 2 |
19 | 20 | 1 |
22 | 24 | 1 |
26 | 27 | 1 |
31 | 33 | 1 |
43 | 49 | 1 |
46 | 47 | 1 |
48 | 49 | 1 |
55 | 56 | 1 |
56 | 57 | 1 |
57 | 58 | 1 |
60 | 61 | 1 |
61 | 62 | 1 |
64 | 65 | 1 |
66 | 67 | 1 |
67 | 70 | 1 |
68 | 69 | 1 |
71 | 72 | 1 |
73 | 74 | 1 |
75 | 76 | 1 |
Hausman test compares within (fixed effects) and random effects plm models. The null hypothesis is that the random effects model is consistent and efficient (i.e., the random effects estimator is preferred). If the p-value is:
The tested models is: pe_ideology ~ event_occurred + event_occurred : pe_left_center_right
# Hausman test models
mpw1 <- plm(pe_ideology ~ event_occurred + event_occurred : pe_left_center_right, data = p_data, model = "within")
mpr1 <- plm(pe_ideology ~ event_occurred + event_occurred : pe_left_center_right, data = p_data, model = "random")
af_hausman_test(mpw1, mpr1)
Hypotheses:
Test Results:
38.2638
5
0
Decision at α = 0.05:
Recommendation: Fixed Effects (within)
Explanation: Reject H0: Use fixed effects model (p < 0.05)
Reason: The random effects assumption is violated - individual effects are correlated with regressors
Significance: *** (p < 0.001)