EXPLORATION
# View the structure of the dataset
str(ESSmastersubset)
## spc_tbl_ [512,745 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:512745] "ESS1e06_7" "ESS1e06_7" "ESS1e06_7" "ESS1e06_7" ...
## $ essround: num [1:512745] 1 1 1 1 1 1 1 1 1 1 ...
## $ edition : num [1:512745] 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 6.7 ...
## $ proddate: chr [1:512745] "23.11.2023" "23.11.2023" "23.11.2023" "23.11.2023" ...
## $ idno : num [1:512745] 1 2 3 4 6 7 8 10 12 14 ...
## $ cntry : chr [1:512745] "AT" "AT" "AT" "AT" ...
## $ dweight : num [1:512745] 0.945 0.473 0.945 0.945 1.891 ...
## $ pspwght : num [1:512745] 0.941 0.47 1.392 1.382 1.438 ...
## $ pweight : num [1:512745] 0.271 0.271 0.271 0.271 0.271 ...
## $ anweight: num [1:512745] 0.255 0.128 0.378 0.375 0.39 ...
## $ prob : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ stratum : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ psu : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ netuse : num [1:512745] 5 6 0 4 7 0 1 6 7 1 ...
## $ netusoft: num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ polintr : num [1:512745] 3 1 2 2 3 1 3 2 3 3 ...
## $ stfdem : num [1:512745] 8 5 5 7 7 0 8 9 4 88 ...
## $ stfgov : num [1:512745] 7 0 7 3 5 0 5 5 3 5 ...
## $ trstprl : num [1:512745] 9 0 6 8 6 0 6 9 5 0 ...
## $ vote : num [1:512745] 2 1 1 1 1 2 1 1 1 1 ...
## $ gndr : num [1:512745] 1 1 2 1 2 2 2 2 1 2 ...
## $ yrbrn : num [1:512745] 1949 1953 1940 1959 1962 ...
## $ age : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ agea : num [1:512745] 54 50 63 44 41 63 75 41 47 52 ...
## $ edufld : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ edulvla : num [1:512745] 3 3 2 5 3 2 3 5 3 2 ...
## $ edulvlb : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ eisced : num [1:512745] 0 0 0 0 0 0 0 0 0 0 ...
## $ hinctnt : num [1:512745] 77 2 77 9 9 5 88 9 8 77 ...
## $ hinctnta: num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ isco08 : num [1:512745] NA NA NA NA NA NA NA NA NA NA ...
## $ iscoco : num [1:512745] 1314 7412 5120 2224 4130 ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. essround = col_double(),
## .. edition = col_double(),
## .. proddate = col_character(),
## .. idno = col_double(),
## .. cntry = col_character(),
## .. dweight = col_double(),
## .. pspwght = col_double(),
## .. pweight = col_double(),
## .. anweight = col_double(),
## .. prob = col_double(),
## .. stratum = col_double(),
## .. psu = col_double(),
## .. netuse = col_double(),
## .. netusoft = col_double(),
## .. polintr = col_double(),
## .. stfdem = col_double(),
## .. stfgov = col_double(),
## .. trstprl = col_double(),
## .. vote = col_double(),
## .. gndr = col_double(),
## .. yrbrn = col_double(),
## .. age = col_double(),
## .. agea = col_double(),
## .. edufld = col_double(),
## .. edulvla = col_double(),
## .. edulvlb = col_double(),
## .. eisced = col_double(),
## .. hinctnt = col_double(),
## .. hinctnta = col_double(),
## .. isco08 = col_double(),
## .. iscoco = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# Summarize the dataset
summary(ESSmastersubset)
## name essround edition proddate
## Length:512745 Min. : 1.000 Min. :1.000 Length:512745
## Class :character 1st Qu.: 3.000 1st Qu.:2.600 Class :character
## Mode :character Median : 6.000 Median :3.200 Mode :character
## Mean : 5.864 Mean :3.442
## 3rd Qu.: 9.000 3rd Qu.:3.700
## Max. :11.000 Max. :6.700
##
## idno cntry dweight pspwght
## Min. :1.000e+00 Length:512745 Min. :0.0016 Min. :0.000
## 1st Qu.:1.863e+03 Class :character 1st Qu.:0.8600 1st Qu.:0.681
## Median :1.165e+04 Mode :character Median :1.0000 Median :0.920
## Mean :9.508e+08 Mean :1.0000 Mean :1.004
## 3rd Qu.:5.948e+04 3rd Qu.:1.0502 3rd Qu.:1.186
## Max. :5.101e+11 Max. :6.2070 Max. :6.855
## NA's :22190
## pweight anweight prob stratum
## Min. :0.02243 Min. : 0.00 Min. :0 Min. : 1
## 1st Qu.:0.23013 1st Qu.: 0.18 1st Qu.:0 1st Qu.: 357
## Median :0.40602 Median : 0.40 Median :0 Median : 868
## Mean :0.95684 Mean : 0.95 Mean :0 Mean :1235
## 3rd Qu.:1.84949 3rd Qu.: 1.25 3rd Qu.:0 3rd Qu.:2182
## Max. :5.44713 Max. :22.00 Max. :0 Max. :2914
## NA's :112727 NA's :381351 NA's :381351
## psu netuse netusoft polintr
## Min. : 1 Min. : 0.00 Min. :1 Min. :1.00
## 1st Qu.: 5748 1st Qu.: 0.00 1st Qu.:3 1st Qu.:2.00
## Median :10317 Median : 2.00 Median :5 Median :3.00
## Mean :11100 Mean : 3.56 Mean :4 Mean :2.64
## 3rd Qu.:15836 3rd Qu.: 7.00 3rd Qu.:5 3rd Qu.:3.00
## Max. :27238 Max. :99.00 Max. :9 Max. :9.00
## NA's :381351 NA's :276867 NA's :336964
## stfdem stfgov trstprl vote
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :1.000
## 1st Qu.: 3.000 1st Qu.: 2.000 1st Qu.: 3.000 1st Qu.:1.000
## Median : 5.000 Median : 5.000 Median : 5.000 Median :1.000
## Mean : 8.453 Mean : 6.905 Mean : 6.609 Mean :1.419
## 3rd Qu.: 7.000 3rd Qu.: 6.000 3rd Qu.: 7.000 3rd Qu.:2.000
## Max. :99.000 Max. :99.000 Max. :99.000 Max. :9.000
## NA's :2046
## gndr yrbrn age agea
## Min. :1.000 Min. :1885 Min. : 14.2 Min. : 13.00
## 1st Qu.:1.000 1st Qu.:1949 1st Qu.: 32.8 1st Qu.: 34.00
## Median :2.000 Median :1964 Median : 47.2 Median : 49.00
## Mean :1.551 Mean :2009 Mean : 54.4 Mean : 55.06
## 3rd Qu.:2.000 3rd Qu.:1979 3rd Qu.: 62.3 3rd Qu.: 64.00
## Max. :9.000 Max. :9999 Max. :999.0 Max. :999.00
## NA's :469745
## edufld edulvla edulvlb eisced
## Min. : 1.0 Min. : 1.0 Min. : 0.0 Min. : 0.000
## 1st Qu.: 1.0 1st Qu.: 2.0 1st Qu.: 229.0 1st Qu.: 2.000
## Median : 5.0 Median : 3.0 Median : 322.0 Median : 3.000
## Mean :15.3 Mean : 3.5 Mean : 454.4 Mean : 3.867
## 3rd Qu.: 9.0 3rd Qu.: 5.0 3rd Qu.: 520.0 3rd Qu.: 5.000
## Max. :99.0 Max. :99.0 Max. :9999.0 Max. :99.000
## NA's :367262 NA's :323097 NA's :189648
## hinctnt hinctnta isco08 iscoco
## Min. : 1.0 Min. : 1.00 Min. : 0 Min. : 100
## 1st Qu.: 5.0 1st Qu.: 3.00 1st Qu.: 3113 1st Qu.: 3310
## Median : 7.0 Median : 6.00 Median : 5221 Median : 5220
## Mean :23.1 Mean :20.21 Mean :12896 Mean :13491
## 3rd Qu.:10.0 3rd Qu.:10.00 3rd Qu.: 8182 3rd Qu.: 8323
## Max. :99.0 Max. :99.00 Max. :99999 Max. :99999
## NA's :394140 NA's :142352 NA's :242106 NA's :270639
# Take a quick look at the first few rows
head(ESSmastersubset)
## # A tibble: 6 × 32
## name essround edition proddate idno cntry dweight pspwght pweight anweight
## <chr> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ESS1e0… 1 6.7 23.11.2… 1 AT 0.945 0.941 0.271 0.255
## 2 ESS1e0… 1 6.7 23.11.2… 2 AT 0.473 0.470 0.271 0.128
## 3 ESS1e0… 1 6.7 23.11.2… 3 AT 0.945 1.39 0.271 0.378
## 4 ESS1e0… 1 6.7 23.11.2… 4 AT 0.945 1.38 0.271 0.375
## 5 ESS1e0… 1 6.7 23.11.2… 6 AT 1.89 1.44 0.271 0.390
## 6 ESS1e0… 1 6.7 23.11.2… 7 AT 0.945 1.39 0.271 0.378
## # ℹ 22 more variables: prob <dbl>, stratum <dbl>, psu <dbl>, netuse <dbl>,
## # netusoft <dbl>, polintr <dbl>, stfdem <dbl>, stfgov <dbl>, trstprl <dbl>,
## # vote <dbl>, gndr <dbl>, yrbrn <dbl>, age <dbl>, agea <dbl>, edufld <dbl>,
## # edulvla <dbl>, edulvlb <dbl>, eisced <dbl>, hinctnt <dbl>, hinctnta <dbl>,
## # isco08 <dbl>, iscoco <dbl>
DATA PREP
# Create a new variable 'combined_age' that uses 'age' where available, and 'agea' otherwise
ESSmastersubset$combined_age <- ifelse(is.na(ESSmastersubset$age), ESSmastersubset$agea, ESSmastersubset$age)
# Check summary statistics for the combined age variable
summary(ESSmastersubset$combined_age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.00 34.00 49.00 55.08 64.00 999.00
# Clean 'combined_age' variable: set unrealistic values (outside 0-150) to NA
ESSmastersubset <- ESSmastersubset %>%
mutate(combined_age = ifelse(combined_age < 0 | combined_age > 150, NA, combined_age))
# Recode 'eisced' into 'education_level' and handle missing values
ESSmastersubset$education_level <- ESSmastersubset$eisced
# Set invalid or missing codes to NA
ESSmastersubset$education_level[ESSmastersubset$education_level %in% c(0, 55, 77, 88, 99)] <- NA
# Recode 'education_level' into a factor with descriptive labels
ESSmastersubset$education_level <- factor(ESSmastersubset$education_level,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("Low", "Low", "Medium", "Medium", "Medium", "High", "High"))
# Handle missing and invalid values for vote
ESSmastersubset$vote[ESSmastersubset$vote %in% c(7, 8, 9)] <- NA
# Convert 'vote' to a factor
ESSmastersubset$vote <- factor(ESSmastersubset$vote,
levels = c(1, 2, 3),
labels = c("Yes", "No", "Not eligible"))
# Recode 'polintr' to 'political_interest' and handle missing values
ESSmastersubset <- ESSmastersubset %>%
mutate(political_interest = case_when(
polintr == 1 ~ "Very interested",
polintr == 2 ~ "Quite interested",
polintr == 3 ~ "Hardly interested",
polintr == 4 ~ "Not at all interested",
polintr %in% c(7, 8, 9) ~ NA_character_
))
# Convert to factor with the correct levels
ESSmastersubset$political_interest <- factor(ESSmastersubset$political_interest,
levels = c("Very interested", "Quite interested", "Hardly interested", "Not at all interested"))
# Check the result
table(ESSmastersubset$political_interest, useNA = "ifany")
##
## Very interested Quite interested Hardly interested
## 55586 179499 178336
## Not at all interested <NA>
## 97660 1664
# Frequency table for 'vote'
table(ESSmastersubset$vote)
##
## Yes No Not eligible
## 366552 103484 37453
# Proportion table for 'vote'
prop.table(table(ESSmastersubset$vote))
##
## Yes No Not eligible
## 0.72228561 0.20391378 0.07380061
# Frequency table for 'education_level'
table(ESSmastersubset$education_level)
##
## Low Medium High
## 114751 223052 98070
# Proportion table for 'education_level'
prop.table(table(ESSmastersubset$education_level))
##
## Low Medium High
## 0.2632671 0.5117362 0.2249967
# Frequency table for 'political_interest'
table(ESSmastersubset$political_interest)
##
## Very interested Quite interested Hardly interested
## 55586 179499 178336
## Not at all interested
## 97660
# Proportion table for 'political_interest'
prop.table(table(ESSmastersubset$political_interest))
##
## Very interested Quite interested Hardly interested
## 0.1087616 0.3512144 0.3489388
## Not at all interested
## 0.1910852
# Frequency table for 'cntry'
table(ESSmastersubset$cntry)
##
## AL AT BE BG CH CY CZ DE DK EE ES FI FR
## 1201 17579 17451 13240 18309 6065 20090 36845 12408 16856 19452 21095 19038
## GB GR HR HU IE IL IS IT LT LU LV ME MK
## 22663 12558 8098 18760 24250 16218 3975 10178 13017 3187 3921 2478 1429
## NL NO PL PT RO RS RU SE SI SK TR UA XK
## 20024 17402 17689 17881 2146 3548 12458 18216 14732 12734 4272 9987 1295
# Proportion table for 'cntry'
prop.table(table(ESSmastersubset$cntry))
##
## AL AT BE BG CH CY
## 0.002342295 0.034284098 0.034034462 0.025821802 0.035707808 0.011828492
## CZ DE DK EE ES FI
## 0.039181269 0.071858331 0.024199163 0.032874041 0.037936986 0.041141308
## FR GB GR HR HU IE
## 0.037129567 0.044199358 0.024491706 0.015793426 0.036587387 0.047294464
## IL IS IT LT LU LV
## 0.031629757 0.007752392 0.019850023 0.025386888 0.006215565 0.007647076
## ME MK NL NO PL PT
## 0.004832812 0.002786960 0.039052550 0.033938898 0.034498630 0.034873085
## RO RS RU SE SI SK
## 0.004185316 0.006919619 0.024296678 0.035526431 0.028731631 0.024834957
## TR UA XK
## 0.008331627 0.019477518 0.002525622
# Summary statistics for 'combined_age'
summary(ESSmastersubset$combined_age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 13.00 33.75 49.00 48.70 63.00 123.00 3443
DESCRIPTIVE STATS
# Create a vertical summary table
summary_table <- tibble(
Variable = c("Mean Age (SD)",
"Voted Yes (%)", "Voted No (%)", "Not Eligible to Vote (%)",
"Low Education (%)", "Medium Education (%)", "High Education (%)",
"Very Interested in Politics (%)", "Quite Interested in Politics (%)",
"Hardly Interested in Politics (%)", "Not Interested in Politics (%)"),
Statistics = c(
paste0(round(mean(ESSmastersubset$combined_age, na.rm = TRUE), 2),
" (", round(sd(ESSmastersubset$combined_age, na.rm = TRUE), 2), ")"),
round(mean(ESSmastersubset$vote == "Yes", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$vote == "No", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$vote == "Not eligible", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$education_level == "Low", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$education_level == "Medium", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$education_level == "High", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$political_interest == "Very interested", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$political_interest == "Quite interested", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$political_interest == "Hardly interested", na.rm = TRUE) * 100, 2),
round(mean(ESSmastersubset$political_interest == "Not at all interested", na.rm = TRUE) * 100, 2)
)
)
# Convert to a flextable
ft <- flextable(summary_table)
# Add some formatting
ft <- ft %>%
set_caption("Table 1: Summary Statistics") %>%
autofit() %>% # Automatically adjust column widths
bold(j = 1) # Bold the first column (Variable names)
# Create a Word document and add the flextable
doc <- read_docx() %>%
body_add_flextable(value = ft) %>%
body_add_par(" ") # Add a blank line after the table for spacing
# Save the document
print(doc, target = "summary_table_soc252.docx")
# Visualization: Age Distribution by Education Level
age_distribution_plot <- ggplot(ESSmastersubset, aes(x = education_level, y = combined_age, fill = education_level)) +
geom_boxplot() +
scale_fill_brewer(palette = "Pastel1") +
theme_minimal() +
labs(
title = "Age Distribution by Education Level",
x = "Education Level",
y = "Age"
)
# Save the plot
ggsave("age_distribution_plot.png", plot = age_distribution_plot)
## Saving 7 x 5 in image
## Warning: Removed 3443 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# Bar Plot of Voting Behavior by Education Level
voting_behavior_plot <- ggplot(ESSmastersubset, aes(x = education_level, fill = as.factor(vote))) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "Pastel1", name = "Voted") +
labs(title = "Voting Behavior by Education Level",
x = "Education Level",
y = "Proportion") +
theme_minimal()
# Save the plot
ggsave("voting_behavior_plot.png", plot = voting_behavior_plot, width = 7, height = 7)
# Density Plot: Age Distribution by Political Interest
age_by_political_interest_plot <- ggplot(ESSmastersubset, aes(x = combined_age, fill = political_interest)) +
geom_density(alpha = 0.7) +
scale_fill_brewer(palette = "Pastel1") +
theme_minimal() +
labs(
title = "Density Plot of Age by Political Interest",
x = "Age",
y = "Density"
)
# Save the plot
ggsave("age_by_political_interest_plot.png", plot = age_by_political_interest_plot)
## Saving 7 x 5 in image
## Warning: Removed 3443 rows containing non-finite outside the scale range
## (`stat_density()`).
REGRESSION ANALYSIS
library(sjPlot)
# Model 1: Education Level on Voting Behavior
model_1 <- glm(vote ~ education_level,
data = ESSmastersubset,
family = binomial(link = "logit"))
tab_model(model_1,
show.std = TRUE,
dv.labels = "Voting Behavior",
title = "Model 1: Education Level on Voting Behavior",
p.style = "stars",
file = "Model_1_Regression_Analysis.doc")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
| Voting Behavior | ||||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Beta | CI | standardized CI |
| (Intercept) | 0.66 *** | 0.66 | 0.65 – 0.66 | 0.65 – 0.66 |
| education level [Medium] | 0.58 *** | 0.58 | 0.57 – 0.59 | 0.57 – 0.59 |
| education level [High] | 0.31 *** | 0.31 | 0.30 – 0.32 | 0.30 – 0.32 |
| Observations | 431562 | |||
| R2 Tjur | 0.023 | |||
|
||||
# Model 2: Add age as a control variable
model_2 <- glm(vote ~ education_level + combined_age,
data = ESSmastersubset,
family = binomial(link = "logit"))
tab_model(model_2,
show.std = TRUE,
dv.labels = "Voting Behavior",
title = "Model 2: Education Level and Age on Voting Behavior",
p.style = "stars",
file = "Model_2_Regression_Analysis.doc")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
| Voting Behavior | ||||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Beta | CI | standardized CI |
| (Intercept) | 4.99 *** | 0.73 | 4.87 – 5.12 | 0.72 – 0.74 |
| education level [Medium] | 0.45 *** | 0.45 | 0.44 – 0.46 | 0.44 – 0.46 |
| education level [High] | 0.23 *** | 0.23 | 0.22 – 0.23 | 0.22 – 0.23 |
| combined age | 0.96 *** | 0.48 | 0.96 – 0.96 | 0.48 – 0.48 |
| Observations | 429266 | |||
| R2 Tjur | 0.274 | |||
|
||||
# Model 3: Full model with education level, age, and political interest
model_3 <- glm(vote ~ education_level + combined_age + political_interest,
data = ESSmastersubset,
family = binomial(link = "logit"))
tab_model(model_3,
show.std = TRUE,
dv.labels = "Voting Behavior",
title = "Model 3: Education Level, Age, and Political Interest on Voting Behavior",
p.style = "stars",
file = "Model_3_Regression_Analysis.doc")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
| Voting Behavior | ||||
|---|---|---|---|---|
| Predictors | Odds Ratios | std. Beta | CI | standardized CI |
| (Intercept) | 1.88 *** | 0.31 | 1.81 – 1.96 | 0.30 – 0.32 |
| education level [Medium] | 0.53 *** | 0.53 | 0.52 – 0.54 | 0.52 – 0.54 |
| education level [High] | 0.32 *** | 0.32 | 0.31 – 0.33 | 0.31 – 0.33 |
| combined age | 0.96 *** | 0.50 | 0.96 – 0.96 | 0.50 – 0.51 |
|
political interest [Quite interested] |
1.20 *** | 1.20 | 1.16 – 1.24 | 1.16 – 1.24 |
|
political interest [Hardly interested] |
2.24 *** | 2.24 | 2.17 – 2.30 | 2.17 – 2.30 |
|
political interest [Not at all interested] |
4.68 *** | 4.68 | 4.54 – 4.84 | 4.54 – 4.84 |
| Observations | 428086 | |||
| R2 Tjur | 0.284 | |||
|
||||
INTERACTIONS
# Model 4: Interaction between education level and age
model_4 <- glm(vote ~ education_level * combined_age,
data = ESSmastersubset,
family = binomial(link = "logit"))
tab_model(model_4,
show.std = TRUE,
dv.labels = "Voting Behavior",
title = "Model 4: Interaction Between Education Level and Age on Voting Behavior",
file = "Model_4_Interaction_Regression.doc")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
| Voting Behavior | ||||||
|---|---|---|---|---|---|---|
| Predictors | Odds Ratios | std. Beta | CI | standardized CI | p | std. p |
| (Intercept) | 7.52 | 0.74 | 7.26 – 7.79 | 0.73 – 0.75 | <0.001 | <0.001 |
| education level [Medium] | 0.25 | 0.46 | 0.24 – 0.26 | 0.45 – 0.46 | <0.001 | <0.001 |
| education level [High] | 0.09 | 0.25 | 0.08 – 0.10 | 0.24 – 0.25 | <0.001 | <0.001 |
| combined age | 0.95 | 0.41 | 0.95 – 0.95 | 0.41 – 0.42 | <0.001 | <0.001 |
|
education level [Medium] × combined age |
1.01 | 1.26 | 1.01 – 1.01 | 1.24 – 1.28 | <0.001 | <0.001 |
|
education level [High] × combined age |
1.02 | 1.46 | 1.02 – 1.02 | 1.43 – 1.50 | <0.001 | <0.001 |
| Observations | 429266 | |||||
| R2 Tjur | 0.293 | |||||
# Model 5: Interaction between education level and political interest
model_5 <- glm(vote ~ education_level * political_interest,
data = ESSmastersubset,
family = binomial(link = "logit"))
tab_model(model_5,
show.std = TRUE,
dv.labels = "Voting Behavior",
title = "Model 5: Interaction Between Education Level and Political Interest on Voting Behavior",
file = "Model_5_Interaction_Regression.doc")
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
## Profiled confidence intervals may take longer time to compute.
## Use `ci_method="wald"` for faster computation of CIs.
| Voting Behavior | |||||
|---|---|---|---|---|---|
| Predictors | Odds Ratios | std. Beta | CI | standardized CI | p |
| (Intercept) | 0.28 | 0.28 | 0.26 – 0.30 | 0.26 – 0.30 | <0.001 |
| education level [Medium] | 0.51 | 0.51 | 0.48 – 0.55 | 0.48 – 0.55 | <0.001 |
| education level [High] | 0.38 | 0.38 | 0.35 – 0.41 | 0.35 – 0.41 | <0.001 |
|
political interest [Quite interested] |
1.29 | 1.29 | 1.21 – 1.37 | 1.21 – 1.37 | <0.001 |
|
political interest [Hardly interested] |
2.48 | 2.48 | 2.33 – 2.63 | 2.33 – 2.63 | <0.001 |
|
political interest [Not at all interested] |
4.00 | 4.00 | 3.77 – 4.26 | 3.77 – 4.26 | <0.001 |
|
education level [Medium] × political interest [Quite interested] |
1.11 | 1.11 | 1.03 – 1.20 | 1.03 – 1.20 | 0.008 |
|
education level [High] × political interest [Quite interested] |
1.00 | 1.00 | 0.92 – 1.09 | 0.92 – 1.09 | 0.976 |
|
education level [Medium] × political interest [Hardly interested] |
1.25 | 1.25 | 1.16 – 1.35 | 1.16 – 1.35 | <0.001 |
|
education level [High] × political interest [Hardly interested] |
1.09 | 1.09 | 1.00 – 1.18 | 1.00 – 1.18 | 0.053 |
|
education level [Medium] × political interest [Not at all interested] |
1.85 | 1.85 | 1.71 – 1.99 | 1.71 – 1.99 | <0.001 |
|
education level [High] × political interest [Not at all interested] |
1.77 | 1.77 | 1.61 – 1.93 | 1.61 – 1.93 | <0.001 |
| Observations | 430362 | ||||
| R2 Tjur | 0.098 | ||||
# Create interaction term
ESSmastersubset$education_interest_interaction <- ESSmastersubset$education_level * ESSmastersubset$political_interest
## Warning in Ops.factor(ESSmastersubset$education_level,
## ESSmastersubset$political_interest): '*' not meaningful for factors
VISUALISATIONS FOR INTERACTIONS
# Visualization for Model 4: Interaction between education level and age
predicted_probs_age <- data.frame(
education_level = rep(levels(ESSmastersubset$education_level), each = 100),
combined_age = rep(seq(min(ESSmastersubset$combined_age, na.rm = TRUE),
max(ESSmastersubset$combined_age, na.rm = TRUE),
length.out = 100), 3)
)
predicted_probs_age$pred <- predict(model_4,
newdata = predicted_probs_age,
type = "response")
interaction_plot_age <- ggplot(predicted_probs_age, aes(x = combined_age, y = pred, color = education_level)) +
geom_line(linewidth = 1) + # Updated from size to linewidth
scale_color_brewer(palette = "Pastel1") +
theme_minimal() +
labs(
title = "Interaction Effect of Education Level and Age on Voting Behavior",
x = "Age",
y = "Predicted Probability of Voting"
)
# Save the interaction plot
ggsave("interaction_plot_age.png", plot = interaction_plot_age)
## Saving 7 x 5 in image
# Visualization for Model 5: Interaction between education level and political interest
predicted_probs_interest <- data.frame(
education_level = rep(levels(ESSmastersubset$education_level), each = 4),
political_interest = rep(levels(ESSmastersubset$political_interest), 3)
)
predicted_probs_interest$pred <- predict(model_5,
newdata = predicted_probs_interest,
type = "response")
interaction_plot_interest <- ggplot(predicted_probs_interest, aes(x = political_interest, y = pred, color = education_level)) +
geom_line(linewidth = 1, aes(group = education_level)) + # Updated from size to linewidth
scale_color_brewer(palette = "Pastel1") +
theme_minimal() +
labs(
title = "Interaction Effect of Education Level and Political Interest on Voting Behavior",
x = "Political Interest",
y = "Predicted Probability of Voting"
)
# Save the interaction plot
ggsave("interaction_plot_interest.png", plot = interaction_plot_interest)
## Saving 7 x 5 in image