# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "kableExtra", "flextable", "equatiomatic") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'effects' was built under R version 4.3.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.2
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## Warning: package 'survey' was built under R version 4.3.2
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Attaching package: 'flextable'
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "flextable"    "kableExtra"   "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "equatiomatic" "flextable"    "kableExtra"   "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"
ess <- read_fst("All-ESS-Data.fst")
  1. What’s your research question? How does trust in the police relate to feeling safe of walking alone in local area after dark based on gender in Iceland?

  2. What’s the main debate related to your question (add at least one citation)? According to the Global Peace Index, Iceland has consistently ranked as the safest country in the world for 14 consecutive years, as reported by World Population Review. The Global Peace Index (GPI) evaluates and ranks countries based on their safety and peacefulness. However, academic articles suggest that while Iceland maintains a low crime rate, there is an increasing concern among its population regarding crime. Although not explicitly mentioned, it can be inferred that the low crime rates potentially contribute to people’s trust in law enforcement agencies. By perceiving effective performance from the police due to reduced criminal activities, individuals may develop a sense of confidence and reliance on them. Nevertheless, with rising apprehensions about crimes such as drug-related offenses highlighted in these articles, people’s expectations towards law enforcement might also escalate accordingly. Therefore, if authorities consistently succeed in maintaining low crime rates and ensuring public safety, it will further enhance people’s trust in them; conversely, any failure could undermine this trust. Safest Countries in the World 2023 (worldpopulationreview.com)Links to an external site. https://heinonline.org/HOL/P?h=hein.journals/socprob48&i=98Links to an external site. Gunnlaugsson, H. (2001). Going public with social science: crime and criminal justice policy in iceland. Social Problems, 48(1), 88-92.

  3. What’s your main hypothesis (not your null or interaction)? In the given context, my primary hypothesis would be that individuals who exhibit higher levels of trust in the police are more likely to feel secure when walking alone in the area after dark, taking into account gender differences within Iceland.

  4. Why did you choose to explore your particular question, outcome, and country combo? I have chosen to investigate the specific correlation between a particular question, outcome, and country of trust in the police relate to feeling safe of walking alone in local area after dark based on gender in Iceland primarily due to my personal interest towards examining the underlying reasons behind Iceland’s consistent ranking as the safest country in the world for fourteen consecutive years. This interest was sparked by my perusal of information on World Population Review. Furthermore, after delving into Gunnlaugsson’s scholarly article, where he actively engages in the public discourse surrounding crime and drug issues within Icelandic society, I became even more intrigued.

table(ess$essround)
## 
##     1     2     3     4     5     6     7     8     9    10 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685

Create year variable from 1983 to 2018, based on every 5 year

ess$year <- NA
replacements <- c(1983, 1988, 1993, 1998, 2003, 2008, 2013, 2018, 2023)
for(i in 1:9){
  ess$year[ess$essround == i] <- replacements[i]
}
iceland_data <- ess %>%
  filter(cntry == "IS")

ice <- iceland_data

Variables: trstplc - Trust in the police, the independent variable https://ess.sikt.no/en/datafile/b2b0bf39-176b-4eca-8d26-3c05ea83d2cb/266?tab=1&elements=[%2275644eb7-7153-4e4f-b8a8-647c1093545f/4%22]

aesfdrk - Feeling of safety of walking alone in local area after dark, the dependent variable https://ess.sikt.no/en/datafile/b2b0bf39-176b-4eca-8d26-3c05ea83d2cb/266?tab=1&elements=[%2295133f07-a60c-462e-a90b-a055146a2533/2%22]

gndr - gender, as the second independent variable.

Variable Cleaning

iceland_data <- ess %>% 
  filter(cntry == "IS") %>% 
  mutate(
    trstplc = ifelse(trstplc %in% c(77, 88, 99), NA, trstplc), 
    aesfdrk = ifelse(aesfdrk %in% c(7, 8, 9), NA, aesfdrk), 
  )
table(iceland_data$trstplc)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
##   29   24   45   62  107  223  287  632 1212  872  456
table(iceland_data$aesfdrk)
## 
##    1    2    3    4 
## 2336 1254  281   76
iceland_data <- iceland_data %>%
  mutate(
    # Recoding gender
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)
    ),
  )

table(iceland_data$gndr)
## 
## Female   Male 
##   2022   1938
iceland_data <- iceland_data %>%
  mutate(
    # Recoding age variable, setting 999 to NA
    age = ifelse(agea == 999, NA, agea)
  )

table(iceland_data$age)
## 
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 
## 31 53 67 57 55 48 56 52 55 59 52 46 60 53 52 65 68 68 54 53 51 64 59 83 57 51 
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 
## 74 56 77 65 70 69 73 71 58 72 68 84 63 82 75 70 64 71 69 62 62 69 52 63 68 63 
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 93 
## 49 64 52 45 54 47 46 44 32 32 39 35 20 25 20 16 17 22 10 14  8  7 10 15  2  1 
## 94 
##  1
datasummary_skim(iceland_data %>% dplyr::select(aesfdrk, trstplc, age))
Unique (#) Missing (%) Mean SD Min Median Max
aesfdrk 5 1 1.5 0.7 1.0 1.0 4.0
trstplc 12 1 7.6 1.9 0.0 8.0 10.0
age 80 0 47.9 18.6 15.0 48.0 94.0

Removing NAs:

unique(iceland_data$gndr)
## [1] NA       "Female" "Male"
iceland_data <- iceland_data %>% filter(!is.na(gndr))
iceland_data <- iceland_data %>% filter(!is.na(aesfdrk))
iceland_data <- iceland_data %>% filter(!is.na(trstplc))
iceland_data <- iceland_data %>% filter(!is.na(age))
unique(iceland_data$gndr)
## [1] "Female" "Male"
unique(iceland_data$aesfdrk)
## [1] 2 1 4 3
unique(iceland_data$trstplc)
##  [1]  5  4  8 10  7  2  9  6  1  3  0
unique(iceland_data$age)
##  [1] 35 26 23 66 20 48 44 36 29 47 19 58 28 70 67 40 57 21 61 33 50 32 24 31 81
## [26] 46 16 52 49 27 51 53 65 73 55 74 45 39 30 25 63 75 37 18 62 38 42 56 54 71
## [51] 41 15 68 34 84 17 43 69 64 22 79 60 90 86 87 80 78 82 88 59 72 76 77 85 83
## [76] 89 94 91

Check to see NAs removed or not:

table(iceland_data$gndr)
## 
## Female   Male 
##   2000   1918
table(iceland_data$aesfdrk)
## 
##    1    2    3    4 
## 2316 1246  280   76
table(iceland_data$trstplc)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
##   27   24   45   62  105  223  286  627 1206  865  448
table(iceland_data$age)
## 
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 
## 31 53 67 56 55 48 55 52 53 57 52 45 60 51 52 65 67 66 53 53 51 63 57 83 57 51 
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 
## 73 56 77 64 70 66 73 70 58 72 68 83 63 80 75 69 63 70 68 61 62 69 52 62 67 63 
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 94 
## 49 64 52 45 54 47 45 43 32 31 38 33 20 25 19 16 16 19 10 13  8  6 10 13  2  1

Long form coding for Chi-Square Calculating and interpreting critical value for independence

tab_dat <- iceland_data

# Create a crosstable and save it into the object mytab
mytab <- table(tab_dat$aesfdrk, tab_dat$trstplc)

# Calculate row-wise sums of counts in table 
rsums <- rowSums(mytab)

# Calculate column-wise sums of counts in table
csums <- colSums(mytab)

# Get the total count of the table (N)
N <- sum(mytab)

# Generate table of expected proportions
ptab <- tcrossprod(rsums/N, csums/N)
cat("Table of Expected Proportions:\n")
## Table of Expected Proportions:
print(ptab)
##              [,1]         [,2]         [,3]         [,4]         [,5]
## [1,] 0.0040735538 0.0036209367 0.0067892563 0.0093540865 0.0158415981
## [2,] 0.0021915579 0.0019480514 0.0036525965 0.0050324662 0.0085227251
## [3,] 0.0004924849 0.0004377644 0.0008208082 0.0011308913 0.0019152191
## [4,] 0.0001336745 0.0001188218 0.0002227908 0.0003069562 0.0005198452
##             [,6]        [,7]        [,8]        [,9]       [,10]       [,11]
## [1,] 0.033644537 0.043149496 0.094596971 0.181952069 0.130504594 0.067590819
## [2,] 0.018100645 0.023214280 0.050892844 0.097889585 0.070211021 0.036363627
## [3,] 0.004067561 0.005216692 0.011436594 0.021997660 0.015777757 0.008171602
## [4,] 0.001104052 0.001415959 0.003104218 0.005970793 0.004282534 0.002218006
ftab <- N * ptab
cat("Table of Expected Frequencies:\n")
## Table of Expected Frequencies:
print(ftab)
##            [,1]       [,2]       [,3]      [,4]      [,5]       [,6]       [,7]
## [1,] 15.9601838 14.1868300 26.6003063 36.649311 62.067381 131.819296 169.059724
## [2,]  8.5865237  7.6324655 14.3108729 19.717203 33.392037  70.918326  90.953548
## [3,]  1.9295559  1.7151608  3.2159265  4.430832  7.503828  15.936702  20.438999
## [4,]  0.5237366  0.4655436  0.8728943  1.202654  2.036753   4.325676   5.547728
##           [,8]      [,9]     [,10]      [,11]
## [1,] 370.63093 712.88821 511.31700 264.820827
## [2,] 199.39816 383.53139 275.08678 142.472690
## [3,]  44.80858  86.18683  61.81725  32.016335
## [4,]  12.16233  23.39357  16.77897   8.690148
# Critical Value for Independence:
alpha <- 0.05 # significance level
c_val <- qchisq(alpha, df = 1, lower.tail = FALSE)
cat("Critical Value:", round(c_val, 3), "\n")
## Critical Value: 3.841

Interpreting the Critical Value: If the computed Chi-squared test statistic (obtained from the chisq.test function) exceeds 3.841, we can reject the null hypothesis at a significance level of 0.05. This implies a statistically significant association between trust in the police and feelings of safety while walking alone in local areas after dark within my dataset.

Calculating and interpreting Pearson’s chi-squared statistic:

# Pearson's chi-squared statistic:
test_stat <- sum((mytab - ftab)^2 / ftab)
cat("Pearson's X^2:", round(test_stat, 4), "\n")
## Pearson's X^2: 91.4996

Comparison to the critical value: We previously calculated a critical value of 3.841. Our observed X-squared value significantly exceeds this critical threshold. Interpretation: The disparity between our observed data and the expected outcomes under the assumption of independence is statistically significant, indicating a strong association between trust in the police and Feeling of safety when walking alone in local areas after dark. Consequently, it can be inferred that there exists substantial variation in trust levels towards the police among individuals with regards to their perceived safety while walking alone at night in local areas.

Calculating the p-value:

# p-value:
p_val <- pchisq(test_stat, df = 1, lower.tail = F)
cat("p-value:", round(p_val, 4), "\n")
## p-value: 0
# Using chisq.test for validation:
cat("Chi-squared test result:\n")
## Chi-squared test result:
print(chisq.test(mytab, correct = F))
## Warning in chisq.test(mytab, correct = F): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytab
## X-squared = 91.5, df = 30, p-value = 3.884e-08

The p-value, expressed in scientific notation as < 3.884e-08, signifies an extremely close proximity to zero and serves as another representation of its insignificance. The chi-squared test is employed to determine the independence or association between two categorical variables. In our analysis, the remarkably small p-value (effectively zero) leads us to reject the null hypothesis that posits independence between the variables. Consequently, compelling evidence emerges suggesting a significant association exists within mytab dataset.

iceland_data <- ess %>%
  filter(cntry == "IS") %>%
  mutate(
    trstplc = case_when(
      trstplc %in% c(0, 1, 2, 3, 4, 5) ~ "Distrust", 
      trstplc %in% c(6, 7, 8, 9, 10) ~ "Trust",
      trstplc %in% c(77, 88, 99) ~ NA_character_,  # Handle other specific cases where you want to set it as NA
      TRUE ~ as.character(trstplc)  # Keep other values as-is but ensure they are characters
    ),
  )
unique(iceland_data$trstplc)
## [1] NA         "Distrust" "Trust"
iceland_data <- iceland_data %>% filter(!is.na(trstplc))
table(iceland_data$trstplc)
## 
## Distrust    Trust 
##      490     3459

Hypothesis testing using infer: I used Example 1 for Tutorial 6

Step 1: Calculate the test statistic of your sample

test_stat <- iceland_data %>%
  specify(explanatory = trstplc, # for explanatory variable
          response = aesfdrk) %>% # for outcome of interest
  hypothesize(null = "independence") %>%
  calculate(stat = "t") 
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "Distrust" - "Trust", or divided in the order "Distrust" / "Trust" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("Distrust", "Trust")` to the calculate() function.
print(test_stat$stat)
##        t 
## 4.333925

Interpretation: The t-statistic quantifies the deviation between the observed sample mean and the population mean (or another sample mean in case of two-sample tests) in terms of standard error units. A higher absolute value of t (regardless of its sign) indicates stronger evidence against the null hypothesis. With a t-statistic value of 4.33, there is a considerable difference in Feeling of safety when walking alone in local areas after dark between individuals who trust and distrust the police. This suggests that there exists a disparity in the average perception of safety for these two groups - those who trust and those who distrust the police.

Step 2: Simulate the null distribution:

null_distribution <- iceland_data %>%
  specify(explanatory = trstplc,
          response = aesfdrk) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>% 
  calculate(stat = "t")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "Distrust" - "Trust", or divided in the order "Distrust" / "Trust" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("Distrust", "Trust")` to the calculate() function.

Step 3: Calculate the p-value of your sample:

p_val <- null_distribution %>% 
  get_pvalue(obs_stat = test_stat, direction = "two-sided") 
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

Step 4: Visualize:

null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided")

null_distribution
## Response: aesfdrk (numeric)
## Explanatory: trstplc (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 -0.550
##  2         2 -0.533
##  3         3 -0.996
##  4         4  1.86 
##  5         5 -1.74 
##  6         6 -0.370
##  7         7 -0.792
##  8         8 -0.424
##  9         9  1.31 
## 10        10  0.507
## # ℹ 990 more rows

Visualizes the null distribution alongside the observed test statistic (represented by the red line), providing clear evidence, based on assumptions of frequentist hypothesis testing, to reject the null hypothesis. In case the observed statistic falls within the null distribution, we would observe shaded red regions in the null distribution that are equally or more extreme than our observed statistic.

Additionally, confidence intervals can be employed for visualization purposes.

conf_int <- null_distribution %>%
  get_confidence_interval(level = 0.95, type = "percentile")


null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided") +
  shade_confidence_interval(endpoints = conf_int)

null_distribution
## Response: aesfdrk (numeric)
## Explanatory: trstplc (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 -0.550
##  2         2 -0.533
##  3         3 -0.996
##  4         4  1.86 
##  5         5 -1.74 
##  6         6 -0.370
##  7         7 -0.792
##  8         8 -0.424
##  9         9  1.31 
## 10        10  0.507
## # ℹ 990 more rows

Survey weights code:

ess$weight <- ess$dweight * ess$pweight
survey_design <- svydesign(ids = ~1, data = ess, weights = ~weight)
iceland_data <- ess %>%
  filter(cntry == "IS") 
iceland_data$trstplc <- ifelse(iceland_data$trstplc > 10, NA, iceland_data$trstplc)

# Creating and rescaling the trust in police variable
iceland_data$trust <- scales::rescale(iceland_data$trstplt + iceland_data$trstplc + iceland_data$trstprt, na.rm = TRUE, to = c(0, 100))

iceland_data$aesfdrk <- scales::rescale(iceland_data$trust, na.rm = TRUE, to=c(100,0))
iceland_data <- iceland_data %>%
  mutate(
    # gender
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)
    ),
  )

model1 <- lm(aesfdrk ~ trstplc + gndr, data = iceland_data, weights = weight)
modelsummary(
  list(model1),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)
trstplc -1.0 (0.1)***
gndrMale 1.6 (0.3)***
Num.Obs. 3936
R2 0.046
R2 Adj. 0.045
AIC 29051.4
BIC 29076.5
Log.Lik. -14521.707
RMSE 9.56
iceland_data <- iceland_data %>%
  mutate(
    # Recoding age variable, setting 999 to NA
    age = ifelse(agea == 999, NA, agea),
    # Recoding cohort variable, setting years before 1900 and after 2000 to NA
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on year of birth
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # Converting cohort to a factor with labels
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
model2 <- lm(aesfdrk ~ trstplc + gen, data = iceland_data, weights = weight)
modelsummary(
  list(model1, model2),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                  "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)   (2)
trstplc -1.0 (0.1)*** -1.0 (0.1)***
gndrMale 1.6 (0.3)***
genBaby Boomers 2.0 (0.5)***
genGen X 2.3 (0.5)***
genMillennials 1.1 (0.5)*
Num.Obs. 3936 3706
R2 0.046 0.054
R2 Adj. 0.045 0.053
AIC 29051.4 26768.4
BIC 29076.5 26805.7
Log.Lik. -14521.707 -13378.183
RMSE 9.56 8.78
 models <- list(
   "Model 3" = lm(aesfdrk ~ trstplc, data = iceland_data, weights = weight),
   "Model 4" = lm(aesfdrk ~ trstplc + gndr, data = iceland_data, weights = weight),
   "Model 5" = lm(aesfdrk ~ trstplc + gndr + gen*gndr, data = iceland_data, weights = weight))
   
modelplot(models, coef_omit = 'Interc')

modelsummary(models, fmt = 1,
  estimate  = c("{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_rename = c("Trust1" = "Trust"),
  title = 'Table 2. Regression models predicting trust (scale 0 to 100)')
Table 2. Regression models predicting trust (scale 0 to 100)
 Model 3  Model 4  Model 5
(Intercept) 98.7 (0.6)*** 97.7 (0.7)*** 95.5 (0.8)***
trstplc -1.1 (0.1)*** -1.0 (0.1)*** -1.0 (0.1)***
gndrMale 1.6 (0.3)*** 3.1 (0.8)***
genBaby Boomers 2.8 (0.7)***
genGen X 3.3 (0.7)***
genMillennials 1.8 (0.7)*
gndrMale:genBaby Boomers -1.7 (0.9)+
gndrMale:genGen X -2.0 (1.0)*
gndrMale:genMillennials -1.4 (1.0)
Num.Obs. 3949 3936 3696
R2 0.040 0.046 0.062
R2 Adj. 0.040 0.045 0.060
AIC 29242.2 29051.4 26668.0
BIC 29261.0 29076.5 26730.1
Log.Lik. -14618.088 -14521.707 -13323.998
RMSE 9.67 9.56 8.73

From Tutorial 9:

iceland_data <- ess %>%
  filter(cntry == "IS")

# create a duplicate
df <- iceland_data
df <- df %>%
  mutate(trust = trstplc,
         secure = impsafe,
         safety = ipstrgv,
         gender = gndr,
         rules = ipfrule) %>%
  mutate(across(c("trust", "secure", "safety", "gndr", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("trust", "secure", "safety", "gndr", "rules"), ~ 7 - .x ))

# Now calculate 'schwartzauth' after the NA recoding
df$safe <- scales::rescale(df$trust + 
                      df$secure + 
                      df$safety + 
                      df$gndr + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(safe))
ggplot(df, aes(x = safe)) +
  geom_histogram(bins = 30, fill = "pink", color = "black") +
  theme_minimal() +
  labs(title = "Histogram of Feeling Safe Scale",
       x = "Feeling Safe Scale",
       y = "Count")

df$year <- NA
replacements <- c(1983, 1988, 1993, 1998, 2003, 2008, 2013, 2018, 2023)
for(i in 1:9){
  df$year[df$essround == i] <- replacements[i]
}
safe_avg <- df %>%
  group_by(year) %>%
  summarize(safe_avg = mean(safe, na.rm = TRUE))

plot_zoom_safe <- ggplot(safe_avg, aes(x = year, y = safe_avg)) +
  geom_point(aes(size = safe_avg, color = safe_avg), alpha = 0.7) + 
  geom_line(aes(group = 1), color = "pink", linetype = "dashed") +  
  labs(title = "Feeling Safe Scale Average by Year (Iceland)",
       x = "Survey Year",
       y = "Feeling Safe Scale Average") +
  theme_minimal() +
  theme(legend.position = "none")

print(plot_zoom_safe)
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).

plot_full_scale_safe <- ggplot(safe_avg, aes(x = year, y = safe_avg)) +
  geom_point(aes(size = safe_avg, color = safe_avg), alpha = 0.7) + 
  geom_line(aes(group = 1), color = "pink", linetype = "dashed") +  
  labs(title = "Feeling Safe Scale Average by Year (Iceland)",
       x = "Survey Year",
       y = "Feeling Safe Scale Average") +
  theme_minimal() +
  theme(legend.position = "none") +
  scale_y_continuous(limits = c(0, 100))

print(plot_full_scale_safe)
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).

ess$year <- NA
replacements <- c(1983, 1988, 1993, 1998, 2003, 2008, 2013, 2018, 2023)
for(i in 1:9){
  ess$year[ess$essround == i] <- replacements[i]
}

ess <- ess %>%
  mutate(trust = trstplc,
         secure = impsafe,
         safety = ipstrgv,
         gender = gndr,
         rules = ipfrule) %>%
  mutate(across(c("trust", "secure", "safety", "gndr", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("trust", "secure", "safety", "gndr", "rules"), ~ 7 - .x ))

# Now calculate 'schwartzauth' after the NA recoding
ess$safe <- scales::rescale(ess$trust + 
                             ess$secure + 
                             ess$safety + 
                             ess$gndr + 
                             ess$rules, to=c(0,100), na.rm=TRUE)
ess_cleaned <- ess %>%
  mutate(
    safe = safe,
    year = as.factor(year)  
  ) %>%
  filter(!is.na(safe))

# yearly averages for each country
yearly_safe_avg <- ess_cleaned %>%
  group_by(cntry, year) %>%
  summarize(avg_safe = mean(safe), .groups = 'drop')

# specific countries and overall ESS baseline
specific_countries_safe <- yearly_safe_avg %>%
  filter(cntry %in% c("IS", "CA", "KR", "DE", "SE"))

ess_baseline_safe <- ess_cleaned %>%
  group_by(year) %>%
  summarize(avg_safe = mean(safe), .groups = 'drop') %>%
  mutate(cntry = "ESS Baseline")

# specific countries with ESS baseline
combined_data_safe <- rbind(specific_countries_safe, ess_baseline_safe)

#Plot
ggplot(combined_data_safe, aes(x = year, y = avg_safe, group = cntry, color = cntry)) +
  geom_line() +
  scale_color_manual(values = c("IS" = "pink", "DE" = "orange", "SE" = "blue", "ESS Baseline" = "black")) +
  labs(title = "Average Feeling Safe Scale by Year for Selected Countries and ESS Baseline",
       x = "Year",
       y = "Feeling Safe Scale") +
  theme_minimal() +
  theme(legend.position = "right")

df <- df %>%
  mutate(
    Trust = case_when(
      trstplc %in% 0:3 ~ "No Trust",                    
      trstplc %in% 7:10 ~ "Complete Trust",                     
      trstplc %in% 4:6 ~ "Moderate",                  
      trstplc %in% c(77, 88, 99) ~ NA_character_      
    ),
    
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)
    ),
    gen = factor(gen, levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")),
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)
      ),
  )
df <- df %>% filter(!is.na(gen))
df <- df %>% filter(!is.na(gndr))
df <- df %>% filter(!is.na(Trust))

Data Summary Table:

d <- df %>%
  # Convert factors/characters to numeric
  mutate(
    Trust_num = as.numeric(factor(Trust, levels = c("No Trust", "Moderate", "Complete Trust"))),
    gen_num = as.numeric(gen),  
    gndr_num = as.numeric(gndr),
    
  )
table1 <- datasummary_skim(d  %>% dplyr::select(Trust_num, gen_num, gndr_num, safe), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
table1

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

Trust_num

3

0

2.2

0.7

1.0

2.0

3.0

gen_num

4

0

2.6

1.0

1.0

3.0

4.0

gndr_num

2

0

5.5

0.5

5.0

6.0

6.0

safe

25

0

88.6

3.5

76.6

88.8

100.0

table2 <- datasummary_skim(d  %>% dplyr::select(Trust, gen), type = "categorical", title = "Table 1. Categorical Predictors Summary", output = "flextable")

table2
Table 1. Categorical Predictors Summary

N

%

Trust

Complete Trust

382

36.9

Moderate

516

49.9

No Trust

136

13.2

gen

Interwar

154

14.9

Baby Boomers

328

31.7

Gen X

282

27.3

Millennials

270

26.1

test_stat <- df %>%
  specify(explanatory = Trust, # for explanatory variable
          response = safe) %>% # for outcome of interest
  hypothesize(null = "independence") %>%
  calculate(stat = "F") 
print(test_stat$stat)
## [1] 272.9981
null_dist <- df %>%
   specify(safe ~ Trust) %>%
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "F")

null_dist
## Response: safe (numeric)
## Explanatory: Trust (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 0.861 
##  2         2 0.733 
##  3         3 0.894 
##  4         4 0.841 
##  5         5 0.997 
##  6         6 0.0306
##  7         7 0.595 
##  8         8 0.511 
##  9         9 1.83  
## 10        10 0.143 
## # ℹ 990 more rows
p_val <- null_dist %>% # 
  get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0
conf_int <- null_dist%>%
  get_confidence_interval(level = 0.95, type = "percentile")


null_dist %>%
  visualize(data, bins = 10, method = "simulation", dens_color = "black") +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int)
## Warning in min(diff(unique_loc)): no non-missing arguments to min; returning
## Inf

test_stat <- 272.9981

conf_int <- null_dist %>%
  get_confidence_interval(level = 0.95, type = "percentile")

# Visualize the null distribution
null_dist %>%
  visualize(bins = 15, method = "simulation", dens_color = "black") +
  geom_vline(aes(xintercept = test_stat), color = "pink", linetype = "dashed", size = 1) +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int) +
  coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
  labs(title = "Simulation-Based Null Distribution",
       x = "F statistic",
       y = "Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in min(diff(unique_loc)): no non-missing arguments to min; returning
## Inf

df$weight <- df$dweight * df$pweight
model1 <- lm(safe ~ Trust, data = df, weights = weight)
model2 <- lm(safe ~ Trust + gndr, data = df, weights = weight)
model3 <- lm(safe ~ Trust + gndr + Trust*gen, data = df, weights = weight)
modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)   (2)   (3)
TrustModerate 3.3 (0.2)*** 3.3 (0.2)*** 3.2 (0.5)***
TrustNo Trust 5.9 (0.3)*** 5.7 (0.3)*** 7.2 (0.8)***
gndr6 0.7 (0.2)*** 0.8 (0.2)***
genBaby Boomers -1.1 (0.4)**
genGen X -1.1 (0.4)**
genMillennials -1.2 (0.4)**
TrustModerate × genBaby Boomers 0.4 (0.6)
TrustNo Trust × genBaby Boomers -0.9 (0.9)
TrustModerate × genGen X 0.0 (0.6)
TrustNo Trust × genGen X -1.1 (1.0)
TrustModerate × genMillennials 0.5 (0.6)
TrustNo Trust × genMillennials -2.3 (1.0)*
Num.Obs. 1034 1034 1034
R2 0.345 0.356 0.378
R2 Adj. 0.344 0.354 0.370
AIC 5084.4 5068.3 5051.1
BIC 5104.2 5093.0 5120.3
Log.Lik. -2538.212 -2529.133 -2511.555
RMSE 2.80 2.77 2.73
interaction_plot <- effect("Trust*gen", model3, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generation",
     ylab="Feeling Safe scale")