k_meryem_homework

# Loading the list of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here

# Installing packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Now we load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## Warning: package 'infer' was built under R version 4.3.3

## Warning: package 'effects' was built under R version 4.3.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.3.3

## lattice theme set by effectsTheme()
## See ?effectsTheme for details.

## Warning: package 'survey' was built under R version 4.3.3

## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select

## Warning: package 'aod' was built under R version 4.3.3

## 
## Attaching package: 'aod'
## 
## The following object is masked from 'package:survival':
## 
##     rats

## Warning: package 'interactions' was built under R version 4.3.3

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Attaching package: 'flextable'
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "kableExtra"   "interactions" "aod"          "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "flextable"    "kableExtra"   "interactions" "aod"          "MASS"        
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "scales"       "flextable"    "kableExtra"   "interactions" "aod"         
##  [6] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
## [11] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [16] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [21] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [26] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [31] "methods"      "base"

Task 1

Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

# Here we read the germany data fst
germany_data <- read_fst("germany_data.fst")

# Renaming data set to 'df' , so whenever 'df' is used it actually means 'germany_data'
df <- germany_data

# Preparing weighted data set
df$weight <- df$dweight * df$pweight
survey_design <- svydesign(ids = ~1, data = df, weights = ~weight)

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

# Recoding the varaible
df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on the year of birth (yrbrn).
    # The year of birth is categorized into different generational cohorts.
    # Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
    # The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         5360         8583         5601         4652

# Inputting into the models
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)

modelsummary(
  list(model3, model4),
  # you can add as many models as you have generated by putting them in this list. But you MUST refer to them to their stored name. So if you named your model say Regression1, you would have to put that instead of model1. By default, you can use the naming convention we rely upon of model1, model2, ...
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}", # what we are specifying here is what we want to show in our model summary table
                  "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)
(Intercept)	55.3 (0.3)***	56.4 (0.3)***
polIDModerate	4.6 (0.3)***	3.4 (0.4)***
polIDRight	7.7 (0.4)***	4.2 (0.7)***
religious	0.9 (0.0)***	0.6 (0.1)***
polIDModerate × religious		0.3 (0.1)***
polIDRight × religious		0.8 (0.1)***
Num.Obs.	23373	23373
R2	0.048	0.050
R2 Adj.	0.048	0.050
AIC	200594.1	200558.6
BIC	200634.4	200615.0
Log.Lik.	−100292.049	−100272.298
RMSE	17.27	17.24

COEFFICIENT of–> olIDModerate (4.6): This coefficient suggests that, comparing individuals with the same religiousness (i.e., what some term “holding constant”), the average outcome for individuals identifying as politically moderate are 4.6 units higher compared to the reference category (in our case, identifying ideologically as on the ‘left’). The “baseline group” refers to the category that is omitted, and is thus interpreted as part of our intercept.

COEFFICIENT of–> polIDRight (7.7): This coefficient indicates that, comparing individuals with the same religiousness, individuals identifying as politically right are expected to be, on average, 7.7 units higher than the reference category (i.e., identifying ideologically as on the ‘left’) on the authoritarian values scale.

COEFFICIENT of–> religious (0.9): For the ‘religious’ predictor, a coefficient of 1.6 means that with every one-unit increase in religiousness, there is an associated expected average increase of 0.9 units on the authoritarian values scale, assuming that we are comparing people with the same ideological categorization (i.e., holding ideological self-placement constant).

All 3 of the coefficients have triples asterisks, signifying the p value to be less than 0.001. This implies a less than 0.1% probability that the observed result is due to randomness within our sample. This, under frequentest assumptions, gives us evidence to reject the null hypothesis.

Task 2

Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.

# Displaying the interaction plot
interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Religiousness",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  polID*religious effect
##           religious
## polID             0        2        5        8       10
##   Left     56.40706 57.56775 59.30879 61.04983 62.21053
##   Moderate 59.85063 61.66176 64.37844 67.09513 68.90626
##   Right    60.60237 63.35032 67.47224 71.59416 74.34211

The interaction plot for those identifying Politically with the left shows an upward trend. This implies that as Left-leaning individuals become more religious, they score more on the authoritarian scale on average.

The interaction plot for those identifying Politically with the right also shows an upward trend. This implies that as Right-leaning individuals become more religious, they score higher on the authoritarian scale on average.

The interaction plot for those identifying Politically as moderates shows an upward trend that is similar to those identifying with the right. This implies that as those with moderate political leaning become more religious, they score higher on the authoritarian scale on average. However, in comparison to the Right group, the effect of religiosity is less steep for moderates, as can be seen from the gradient of the two graphs.

The overall trends are quite similar for all 3 cohorts. The difference in Y-intercepts highlights that in the absence of religious identification (religious scale as 0) PolID right score highest on the authoritarian scale, followed by POLidmoderates and last POLID left

Task 3

Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

# Reading the netherlands data fst
netherlands_data <- read_fst("netherlands_data.fst")

# Preparing weighted data set
netherlands_data$weight <- netherlands_data$dweight * netherlands_data$pweight
survey_design <- svydesign(ids = ~1, data = netherlands_data, weights = ~weight)

# Assigning the netherlands_data to 'df' so that it can be used instead
df <- netherlands_data %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

# check
table(df$religion)

## 
##    No   Yes 
## 10913  6750

# Code to produce a table of left and right 
df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)

## 
##  Left Right 
##  5449  7010

# Code to showcase generations
df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on the year of birth (yrbrn).
    # The year of birth is categorized into different generational cohorts.
    # Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
    # The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3771         6147         4548         2730

model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)

# Displaying a summary table
modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")

	(1)	(2)
religionYes	7.2 (0.3)***	5.7 (0.7)***
IDRight	4.2 (0.3)***	4.3 (0.3)***
genBaby Boomers	−5.2 (0.4)***	−5.9 (0.6)***
genGen X	−7.7 (0.4)***	−8.8 (0.6)***
genMillennials	−8.4 (0.5)***	−9.7 (0.6)***
religionYes × genBaby Boomers		1.1 (0.8)
religionYes × genGen X		2.3 (0.9)**
religionYes × genMillennials		2.9 (1.0)**
Num.Obs.	12116	12116
R2	0.124	0.125
R2 Adj.	0.124	0.125
AIC	101219.3	101213.5
BIC	101271.2	101287.5
Log.Lik.	−50602.672	−50596.757
RMSE	15.27	15.26

COEFFICIENT of–> religionYes (7.2): it suggests that, indivudals who identify with a religion opposed to those that don’t involves an increase of 7.2 in authitorian values.

COEFFICIENT of IDRight (4.2): Individuals that identify with the right-wing political spectrum show a 4.2 in the authoritarian values.

genBaby Boomers (-5.2): genBaby Boomers displays a decrease of 5.2 in authoritarian values

genGen X (-7.7): genGen X indicates a decrease of 7.7 in uthoritarian values.

genMillenials (-8.4): genMillenails showcases a decrase of 8.4 in uthoritarian values.

When it comes to religion, and identifying as a right-wing on the political spectrum, there seems to be a trend in younger generations not identifying with authoritarian ideology.

Task 4

Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

netherlands_data <- read_fst("netherlands_data.fst")

df <- netherlands_data

df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on the year of birth (yrbrn).
    # The year of birth is categorized into different generational cohorts.
    # Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
    # The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3967         6359         4684         2803

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)

Interpretation : Those on the left started off with lower authoritarian values than the other two political orientations, and decreased in authoritarian values over time, eventually reaching a low point in 2000. However, the right, despite having started off with the highest authoritarian values prior to 1940, also faced a decrease. This decrease was sharper than the other two’s, and eventually became slightly lower than the left’s in 2000. The moderates had authoritarian values higher than the left but lower than the right, which decreased over time, and eventually ended off higher than the other two’s. There does not appear to be an interaction effect here, as the lines are not parallel and do not seem to be dependent on any other variable.

Task 5

Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

netherlands_data <- read_fst("netherlands_data.fst")

df <- netherlands_data

df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on the year of birth (yrbrn).
    # The year of birth is categorized into different generational cohorts.
    # Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
    # The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3967         6359         4684         2803

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)

## 
##  Left Right 
##  5449  7010

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)

The religiosity of those on the right as well as those on the left both have a positive relationship with authoritarian values, although the right has higher values for both variables. There appears to be an interaction effect, as both lines are parallel, and the values would differ from the current values shown if one changes up the religiosity or the authoritarian values.

k_meryem_homework_5

2024-03-26

Task 1

Task 2

Task 3

Task 4

Task 5