# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "kableExtra", "flextable", "equatiomatic") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'effects' was built under R version 4.3.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.2
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## Warning: package 'survey' was built under R version 4.3.2
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
## Attaching package: 'flextable'
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "MASS" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "kableExtra" "MASS" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "flextable" "kableExtra" "MASS" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "equatiomatic" "flextable" "kableExtra" "MASS" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
ess <- read_fst("All-ESS-Data.fst")
What’s your research question? How does trust in the police relate to feeling safe of walking alone in local area after dark based on gender in Iceland?
What’s the main debate related to your question (add at least one citation)? According to the Global Peace Index, Iceland has consistently ranked as the safest country in the world for 14 consecutive years, as reported by World Population Review. The Global Peace Index (GPI) evaluates and ranks countries based on their safety and peacefulness. However, academic articles suggest that while Iceland maintains a low crime rate, there is an increasing concern among its population regarding crime. Although not explicitly mentioned, it can be inferred that the low crime rates potentially contribute to people’s trust in law enforcement agencies. By perceiving effective performance from the police due to reduced criminal activities, individuals may develop a sense of confidence and reliance on them. Nevertheless, with rising apprehensions about crimes such as drug-related offenses highlighted in these articles, people’s expectations towards law enforcement might also escalate accordingly. Therefore, if authorities consistently succeed in maintaining low crime rates and ensuring public safety, it will further enhance people’s trust in them; conversely, any failure could undermine this trust. Safest Countries in the World 2023 (worldpopulationreview.com)Links to an external site. https://heinonline.org/HOL/P?h=hein.journals/socprob48&i=98Links to an external site. Gunnlaugsson, H. (2001). Going public with social science: crime and criminal justice policy in iceland. Social Problems, 48(1), 88-92.
What’s your main hypothesis (not your null or interaction)? In the given context, my primary hypothesis would be that individuals who exhibit higher levels of trust in the police are more likely to feel secure when walking alone in the area after dark, taking into account gender differences within Iceland.
Why did you choose to explore your particular question, outcome, and country combo? I have chosen to investigate the specific correlation between a particular question, outcome, and country of trust in the police relate to feeling safe of walking alone in local area after dark based on gender in Iceland primarily due to my personal interest towards examining the underlying reasons behind Iceland’s consistent ranking as the safest country in the world for fourteen consecutive years. This interest was sparked by my perusal of information on World Population Review. Furthermore, after delving into Gunnlaugsson’s scholarly article, where he actively engages in the public discourse surrounding crime and drug issues within Icelandic society, I became even more intrigued.
table(ess$essround)
##
## 1 2 3 4 5 6 7 8 9 10
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
Create year variable from 1983 to 2018, based on every 5 year
ess$year <- NA
replacements <- c(1983, 1988, 1993, 1998, 2003, 2008, 2013, 2018, 2023)
for(i in 1:9){
ess$year[ess$essround == i] <- replacements[i]
}
iceland_data <- ess %>%
filter(cntry == "IS")
ice <- iceland_data
Variables: trstplc - Trust in the police, the independent variable https://ess.sikt.no/en/datafile/b2b0bf39-176b-4eca-8d26-3c05ea83d2cb/266?tab=1&elements=[%2275644eb7-7153-4e4f-b8a8-647c1093545f/4%22]
aesfdrk - Feeling of safety of walking alone in local area after dark, the dependent variable https://ess.sikt.no/en/datafile/b2b0bf39-176b-4eca-8d26-3c05ea83d2cb/266?tab=1&elements=[%2295133f07-a60c-462e-a90b-a055146a2533/2%22]
gndr - gender, as the second independent variable.
Variable Cleaning
iceland_data <- ess %>%
filter(cntry == "IS") %>%
mutate(
trstplc = ifelse(trstplc %in% c(77, 88, 99), NA, trstplc),
aesfdrk = ifelse(aesfdrk %in% c(7, 8, 9), NA, aesfdrk),
)
table(iceland_data$trstplc)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 29 24 45 62 107 223 287 632 1212 872 456
table(iceland_data$aesfdrk)
##
## 1 2 3 4
## 2336 1254 281 76
iceland_data <- iceland_data %>%
mutate(
# Recoding gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
)
table(iceland_data$gndr)
##
## Female Male
## 2022 1938
iceland_data <- iceland_data %>%
mutate(
# Recoding age variable, setting 999 to NA
age = ifelse(agea == 999, NA, agea)
)
table(iceland_data$age)
##
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 31 53 67 57 55 48 56 52 55 59 52 46 60 53 52 65 68 68 54 53 51 64 59 83 57 51
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 74 56 77 65 70 69 73 71 58 72 68 84 63 82 75 70 64 71 69 62 62 69 52 63 68 63
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 93
## 49 64 52 45 54 47 46 44 32 32 39 35 20 25 20 16 17 22 10 14 8 7 10 15 2 1
## 94
## 1
datasummary_skim(iceland_data %>% dplyr::select(aesfdrk, trstplc, age))
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max | ||
|---|---|---|---|---|---|---|---|---|
| aesfdrk | 5 | 1 | 1.5 | 0.7 | 1.0 | 1.0 | 4.0 | |
| trstplc | 12 | 1 | 7.6 | 1.9 | 0.0 | 8.0 | 10.0 | |
| age | 80 | 0 | 47.9 | 18.6 | 15.0 | 48.0 | 94.0 |
Removing NAs:
unique(iceland_data$gndr)
## [1] NA "Female" "Male"
iceland_data <- iceland_data %>% filter(!is.na(gndr))
iceland_data <- iceland_data %>% filter(!is.na(aesfdrk))
iceland_data <- iceland_data %>% filter(!is.na(trstplc))
iceland_data <- iceland_data %>% filter(!is.na(age))
unique(iceland_data$gndr)
## [1] "Female" "Male"
unique(iceland_data$aesfdrk)
## [1] 2 1 4 3
unique(iceland_data$trstplc)
## [1] 5 4 8 10 7 2 9 6 1 3 0
unique(iceland_data$age)
## [1] 35 26 23 66 20 48 44 36 29 47 19 58 28 70 67 40 57 21 61 33 50 32 24 31 81
## [26] 46 16 52 49 27 51 53 65 73 55 74 45 39 30 25 63 75 37 18 62 38 42 56 54 71
## [51] 41 15 68 34 84 17 43 69 64 22 79 60 90 86 87 80 78 82 88 59 72 76 77 85 83
## [76] 89 94 91
Check to see NAs removed or not:
table(iceland_data$gndr)
##
## Female Male
## 2000 1918
table(iceland_data$aesfdrk)
##
## 1 2 3 4
## 2316 1246 280 76
table(iceland_data$trstplc)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 27 24 45 62 105 223 286 627 1206 865 448
table(iceland_data$age)
##
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 31 53 67 56 55 48 55 52 53 57 52 45 60 51 52 65 67 66 53 53 51 63 57 83 57 51
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 73 56 77 64 70 66 73 70 58 72 68 83 63 80 75 69 63 70 68 61 62 69 52 62 67 63
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 94
## 49 64 52 45 54 47 45 43 32 31 38 33 20 25 19 16 16 19 10 13 8 6 10 13 2 1
Long form coding for Chi-Square Calculating and interpreting critical value for independence
tab_dat <- iceland_data
# Create a crosstable and save it into the object mytab
mytab <- table(tab_dat$aesfdrk, tab_dat$trstplc)
# Calculate row-wise sums of counts in table
rsums <- rowSums(mytab)
# Calculate column-wise sums of counts in table
csums <- colSums(mytab)
# Get the total count of the table (N)
N <- sum(mytab)
# Generate table of expected proportions
ptab <- tcrossprod(rsums/N, csums/N)
cat("Table of Expected Proportions:\n")
## Table of Expected Proportions:
print(ptab)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.0040735538 0.0036209367 0.0067892563 0.0093540865 0.0158415981
## [2,] 0.0021915579 0.0019480514 0.0036525965 0.0050324662 0.0085227251
## [3,] 0.0004924849 0.0004377644 0.0008208082 0.0011308913 0.0019152191
## [4,] 0.0001336745 0.0001188218 0.0002227908 0.0003069562 0.0005198452
## [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 0.033644537 0.043149496 0.094596971 0.181952069 0.130504594 0.067590819
## [2,] 0.018100645 0.023214280 0.050892844 0.097889585 0.070211021 0.036363627
## [3,] 0.004067561 0.005216692 0.011436594 0.021997660 0.015777757 0.008171602
## [4,] 0.001104052 0.001415959 0.003104218 0.005970793 0.004282534 0.002218006
ftab <- N * ptab
cat("Table of Expected Frequencies:\n")
## Table of Expected Frequencies:
print(ftab)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 15.9601838 14.1868300 26.6003063 36.649311 62.067381 131.819296 169.059724
## [2,] 8.5865237 7.6324655 14.3108729 19.717203 33.392037 70.918326 90.953548
## [3,] 1.9295559 1.7151608 3.2159265 4.430832 7.503828 15.936702 20.438999
## [4,] 0.5237366 0.4655436 0.8728943 1.202654 2.036753 4.325676 5.547728
## [,8] [,9] [,10] [,11]
## [1,] 370.63093 712.88821 511.31700 264.820827
## [2,] 199.39816 383.53139 275.08678 142.472690
## [3,] 44.80858 86.18683 61.81725 32.016335
## [4,] 12.16233 23.39357 16.77897 8.690148
# Critical Value for Independence:
alpha <- 0.05 # significance level
c_val <- qchisq(alpha, df = 1, lower.tail = FALSE)
cat("Critical Value:", round(c_val, 3), "\n")
## Critical Value: 3.841
Interpreting the Critical Value: If the computed Chi-squared test statistic (obtained from the chisq.test function) exceeds 3.841, we can reject the null hypothesis at a significance level of 0.05. This implies a statistically significant association between trust in the police and feelings of safety while walking alone in local areas after dark within my dataset.
Calculating and interpreting Pearson’s chi-squared statistic:
# Pearson's chi-squared statistic:
test_stat <- sum((mytab - ftab)^2 / ftab)
cat("Pearson's X^2:", round(test_stat, 4), "\n")
## Pearson's X^2: 91.4996
Comparison to the critical value: We previously calculated a critical value of 3.841. Our observed X-squared value significantly exceeds this critical threshold. Interpretation: The disparity between our observed data and the expected outcomes under the assumption of independence is statistically significant, indicating a strong association between trust in the police and Feeling of safety when walking alone in local areas after dark. Consequently, it can be inferred that there exists substantial variation in trust levels towards the police among individuals with regards to their perceived safety while walking alone at night in local areas.
Calculating the p-value:
# p-value:
p_val <- pchisq(test_stat, df = 1, lower.tail = F)
cat("p-value:", round(p_val, 4), "\n")
## p-value: 0
# Using chisq.test for validation:
cat("Chi-squared test result:\n")
## Chi-squared test result:
print(chisq.test(mytab, correct = F))
## Warning in chisq.test(mytab, correct = F): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: mytab
## X-squared = 91.5, df = 30, p-value = 3.884e-08
The p-value, expressed in scientific notation as < 3.884e-08, signifies an extremely close proximity to zero and serves as another representation of its insignificance. The chi-squared test is employed to determine the independence or association between two categorical variables. In our analysis, the remarkably small p-value (effectively zero) leads us to reject the null hypothesis that posits independence between the variables. Consequently, compelling evidence emerges suggesting a significant association exists within mytab dataset.
iceland_data <- ess %>%
filter(cntry == "IS") %>%
mutate(
trstplc = case_when(
trstplc %in% c(0, 1, 2, 3, 4, 5) ~ "Distrust",
trstplc %in% c(6, 7, 8, 9, 10) ~ "Trust",
trstplc %in% c(77, 88, 99) ~ NA_character_, # Handle other specific cases where you want to set it as NA
TRUE ~ as.character(trstplc) # Keep other values as-is but ensure they are characters
),
)
unique(iceland_data$trstplc)
## [1] NA "Distrust" "Trust"
iceland_data <- iceland_data %>% filter(!is.na(trstplc))
table(iceland_data$trstplc)
##
## Distrust Trust
## 490 3459
Hypothesis testing using infer: I used Example 1 for Tutorial 6
Step 1: Calculate the test statistic of your sample
test_stat <- iceland_data %>%
specify(explanatory = trstplc, # for explanatory variable
response = aesfdrk) %>% # for outcome of interest
hypothesize(null = "independence") %>%
calculate(stat = "t")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "Distrust" - "Trust", or divided in the order "Distrust" / "Trust" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("Distrust", "Trust")` to the calculate() function.
print(test_stat$stat)
## t
## 4.333925
Interpretation: The t-statistic quantifies the deviation between the observed sample mean and the population mean (or another sample mean in case of two-sample tests) in terms of standard error units. A higher absolute value of t (regardless of its sign) indicates stronger evidence against the null hypothesis. With a t-statistic value of 4.33, there is a considerable difference in Feeling of safety when walking alone in local areas after dark between individuals who trust and distrust the police. This suggests that there exists a disparity in the average perception of safety for these two groups - those who trust and those who distrust the police.
Step 2: Simulate the null distribution:
null_distribution <- iceland_data %>%
specify(explanatory = trstplc,
response = aesfdrk) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "t")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "Distrust" - "Trust", or divided in the order "Distrust" / "Trust" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("Distrust", "Trust")` to the calculate() function.
Step 3: Calculate the p-value of your sample:
p_val <- null_distribution %>%
get_pvalue(obs_stat = test_stat, direction = "two-sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
Step 4: Visualize:
null_distribution %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided")
null_distribution
## Response: aesfdrk (numeric)
## Explanatory: trstplc (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 -0.550
## 2 2 -0.533
## 3 3 -0.996
## 4 4 1.86
## 5 5 -1.74
## 6 6 -0.370
## 7 7 -0.792
## 8 8 -0.424
## 9 9 1.31
## 10 10 0.507
## # ℹ 990 more rows
Visualizes the null distribution alongside the observed test statistic (represented by the red line), providing clear evidence, based on assumptions of frequentist hypothesis testing, to reject the null hypothesis. In case the observed statistic falls within the null distribution, we would observe shaded red regions in the null distribution that are equally or more extreme than our observed statistic.
Additionally, confidence intervals can be employed for visualization purposes.
conf_int <- null_distribution %>%
get_confidence_interval(level = 0.95, type = "percentile")
null_distribution %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided") +
shade_confidence_interval(endpoints = conf_int)
null_distribution
## Response: aesfdrk (numeric)
## Explanatory: trstplc (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 -0.550
## 2 2 -0.533
## 3 3 -0.996
## 4 4 1.86
## 5 5 -1.74
## 6 6 -0.370
## 7 7 -0.792
## 8 8 -0.424
## 9 9 1.31
## 10 10 0.507
## # ℹ 990 more rows
Survey weights code:
ess$weight <- ess$dweight * ess$pweight
survey_design <- svydesign(ids = ~1, data = ess, weights = ~weight)
iceland_data <- ess %>%
filter(cntry == "IS")
iceland_data$trstplc <- ifelse(iceland_data$trstplc > 10, NA, iceland_data$trstplc)
# Creating and rescaling the trust in police variable
iceland_data$trust <- scales::rescale(iceland_data$trstplt + iceland_data$trstplc + iceland_data$trstprt, na.rm = TRUE, to = c(0, 100))
iceland_data$aesfdrk <- scales::rescale(iceland_data$trust, na.rm = TRUE, to=c(100,0))
iceland_data <- iceland_data %>%
mutate(
# gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
)
model1 <- lm(aesfdrk ~ trstplc + gndr, data = iceland_data, weights = weight)
modelsummary(
list(model1),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| (1) | |
|---|---|
| trstplc | -1.0 (0.1)*** |
| gndrMale | 1.6 (0.3)*** |
| Num.Obs. | 3936 |
| R2 | 0.046 |
| R2 Adj. | 0.045 |
| AIC | 29051.4 |
| BIC | 29076.5 |
| Log.Lik. | -14521.707 |
| RMSE | 9.56 |
iceland_data <- iceland_data %>%
mutate(
# Recoding age variable, setting 999 to NA
age = ifelse(agea == 999, NA, agea),
# Recoding cohort variable, setting years before 1900 and after 2000 to NA
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
# Recoding generational cohorts based on year of birth
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
# Converting cohort to a factor with labels
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
model2 <- lm(aesfdrk ~ trstplc + gen, data = iceland_data, weights = weight)
modelsummary(
list(model1, model2),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| (1) | (2) | |
|---|---|---|
| trstplc | -1.0 (0.1)*** | -1.0 (0.1)*** |
| gndrMale | 1.6 (0.3)*** | |
| genBaby Boomers | 2.0 (0.5)*** | |
| genGen X | 2.3 (0.5)*** | |
| genMillennials | 1.1 (0.5)* | |
| Num.Obs. | 3936 | 3706 |
| R2 | 0.046 | 0.054 |
| R2 Adj. | 0.045 | 0.053 |
| AIC | 29051.4 | 26768.4 |
| BIC | 29076.5 | 26805.7 |
| Log.Lik. | -14521.707 | -13378.183 |
| RMSE | 9.56 | 8.78 |
models <- list(
"Model 3" = lm(aesfdrk ~ trstplc, data = iceland_data, weights = weight),
"Model 4" = lm(aesfdrk ~ trstplc + gndr, data = iceland_data, weights = weight),
"Model 5" = lm(aesfdrk ~ trstplc + gndr + gen*gndr, data = iceland_data, weights = weight))
modelplot(models, coef_omit = 'Interc')
modelsummary(models, fmt = 1,
estimate = c("{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_rename = c("Trust1" = "Trust"),
title = 'Table 2. Regression models predicting trust (scale 0 to 100)')
| Model 3 | Model 4 | Model 5 | |
|---|---|---|---|
| (Intercept) | 98.7 (0.6)*** | 97.7 (0.7)*** | 95.5 (0.8)*** |
| trstplc | -1.1 (0.1)*** | -1.0 (0.1)*** | -1.0 (0.1)*** |
| gndrMale | 1.6 (0.3)*** | 3.1 (0.8)*** | |
| genBaby Boomers | 2.8 (0.7)*** | ||
| genGen X | 3.3 (0.7)*** | ||
| genMillennials | 1.8 (0.7)* | ||
| gndrMale:genBaby Boomers | -1.7 (0.9)+ | ||
| gndrMale:genGen X | -2.0 (1.0)* | ||
| gndrMale:genMillennials | -1.4 (1.0) | ||
| Num.Obs. | 3949 | 3936 | 3696 |
| R2 | 0.040 | 0.046 | 0.062 |
| R2 Adj. | 0.040 | 0.045 | 0.060 |
| AIC | 29242.2 | 29051.4 | 26668.0 |
| BIC | 29261.0 | 29076.5 | 26730.1 |
| Log.Lik. | -14618.088 | -14521.707 | -13323.998 |
| RMSE | 9.67 | 9.56 | 8.73 |
From Tutorial 9:
iceland_data <- ess %>%
filter(cntry == "IS")
# create a duplicate
df <- iceland_data
df <- df %>%
mutate(trust = trstplc,
secure = impsafe,
safety = ipstrgv,
gender = gndr,
rules = ipfrule) %>%
mutate(across(c("trust", "secure", "safety", "gndr", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
# Apply the reverse coding
mutate(across(c("trust", "secure", "safety", "gndr", "rules"), ~ 7 - .x ))
# Now calculate 'schwartzauth' after the NA recoding
df$safe <- scales::rescale(df$trust +
df$secure +
df$safety +
df$gndr +
df$rules, to=c(0,100), na.rm=TRUE)
df <- df %>% filter(!is.na(safe))
ggplot(df, aes(x = safe)) +
geom_histogram(bins = 30, fill = "pink", color = "black") +
theme_minimal() +
labs(title = "Histogram of Feeling Safe Scale",
x = "Feeling Safe Scale",
y = "Count")
df$year <- NA
replacements <- c(1983, 1988, 1993, 1998, 2003, 2008, 2013, 2018, 2023)
for(i in 1:9){
df$year[df$essround == i] <- replacements[i]
}
safe_avg <- df %>%
group_by(year) %>%
summarize(safe_avg = mean(safe, na.rm = TRUE))
plot_zoom_safe <- ggplot(safe_avg, aes(x = year, y = safe_avg)) +
geom_point(aes(size = safe_avg, color = safe_avg), alpha = 0.7) +
geom_line(aes(group = 1), color = "pink", linetype = "dashed") +
labs(title = "Feeling Safe Scale Average by Year (Iceland)",
x = "Survey Year",
y = "Feeling Safe Scale Average") +
theme_minimal() +
theme(legend.position = "none")
print(plot_zoom_safe)
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
plot_full_scale_safe <- ggplot(safe_avg, aes(x = year, y = safe_avg)) +
geom_point(aes(size = safe_avg, color = safe_avg), alpha = 0.7) +
geom_line(aes(group = 1), color = "pink", linetype = "dashed") +
labs(title = "Feeling Safe Scale Average by Year (Iceland)",
x = "Survey Year",
y = "Feeling Safe Scale Average") +
theme_minimal() +
theme(legend.position = "none") +
scale_y_continuous(limits = c(0, 100))
print(plot_full_scale_safe)
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
ess$year <- NA
replacements <- c(1983, 1988, 1993, 1998, 2003, 2008, 2013, 2018, 2023)
for(i in 1:9){
ess$year[ess$essround == i] <- replacements[i]
}
ess <- ess %>%
mutate(trust = trstplc,
secure = impsafe,
safety = ipstrgv,
gender = gndr,
rules = ipfrule) %>%
mutate(across(c("trust", "secure", "safety", "gndr", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
# Apply the reverse coding
mutate(across(c("trust", "secure", "safety", "gndr", "rules"), ~ 7 - .x ))
# Now calculate 'schwartzauth' after the NA recoding
ess$safe <- scales::rescale(ess$trust +
ess$secure +
ess$safety +
ess$gndr +
ess$rules, to=c(0,100), na.rm=TRUE)
ess_cleaned <- ess %>%
mutate(
safe = safe,
year = as.factor(year)
) %>%
filter(!is.na(safe))
# yearly averages for each country
yearly_safe_avg <- ess_cleaned %>%
group_by(cntry, year) %>%
summarize(avg_safe = mean(safe), .groups = 'drop')
# specific countries and overall ESS baseline
specific_countries_safe <- yearly_safe_avg %>%
filter(cntry %in% c("IS", "CA", "KR", "DE", "SE"))
ess_baseline_safe <- ess_cleaned %>%
group_by(year) %>%
summarize(avg_safe = mean(safe), .groups = 'drop') %>%
mutate(cntry = "ESS Baseline")
# specific countries with ESS baseline
combined_data_safe <- rbind(specific_countries_safe, ess_baseline_safe)
#Plot
ggplot(combined_data_safe, aes(x = year, y = avg_safe, group = cntry, color = cntry)) +
geom_line() +
scale_color_manual(values = c("IS" = "pink", "DE" = "orange", "SE" = "blue", "ESS Baseline" = "black")) +
labs(title = "Average Feeling Safe Scale by Year for Selected Countries and ESS Baseline",
x = "Year",
y = "Feeling Safe Scale") +
theme_minimal() +
theme(legend.position = "right")
df <- df %>%
mutate(
Trust = case_when(
trstplc %in% 0:3 ~ "No Trust",
trstplc %in% 7:10 ~ "Complete Trust",
trstplc %in% 4:6 ~ "Moderate",
trstplc %in% c(77, 88, 99) ~ NA_character_
),
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
gen = factor(gen, levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")),
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
)
df <- df %>% filter(!is.na(gen))
df <- df %>% filter(!is.na(gndr))
df <- df %>% filter(!is.na(Trust))
Data Summary Table:
d <- df %>%
# Convert factors/characters to numeric
mutate(
Trust_num = as.numeric(factor(Trust, levels = c("No Trust", "Moderate", "Complete Trust"))),
gen_num = as.numeric(gen),
gndr_num = as.numeric(gndr),
)
table1 <- datasummary_skim(d %>% dplyr::select(Trust_num, gen_num, gndr_num, safe), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
table1
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
Trust_num | 3 | 0 | 2.2 | 0.7 | 1.0 | 2.0 | 3.0 |
gen_num | 4 | 0 | 2.6 | 1.0 | 1.0 | 3.0 | 4.0 |
gndr_num | 2 | 0 | 5.5 | 0.5 | 5.0 | 6.0 | 6.0 |
safe | 25 | 0 | 88.6 | 3.5 | 76.6 | 88.8 | 100.0 |
table2 <- datasummary_skim(d %>% dplyr::select(Trust, gen), type = "categorical", title = "Table 1. Categorical Predictors Summary", output = "flextable")
table2
|
| N | % |
|---|---|---|---|
Trust | Complete Trust | 382 | 36.9 |
Moderate | 516 | 49.9 | |
No Trust | 136 | 13.2 | |
gen | Interwar | 154 | 14.9 |
Baby Boomers | 328 | 31.7 | |
Gen X | 282 | 27.3 | |
Millennials | 270 | 26.1 |
test_stat <- df %>%
specify(explanatory = Trust, # for explanatory variable
response = safe) %>% # for outcome of interest
hypothesize(null = "independence") %>%
calculate(stat = "F")
print(test_stat$stat)
## [1] 272.9981
null_dist <- df %>%
specify(safe ~ Trust) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "F")
null_dist
## Response: safe (numeric)
## Explanatory: Trust (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.861
## 2 2 0.733
## 3 3 0.894
## 4 4 0.841
## 5 5 0.997
## 6 6 0.0306
## 7 7 0.595
## 8 8 0.511
## 9 9 1.83
## 10 10 0.143
## # ℹ 990 more rows
p_val <- null_dist %>% #
get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
conf_int <- null_dist%>%
get_confidence_interval(level = 0.95, type = "percentile")
null_dist %>%
visualize(data, bins = 10, method = "simulation", dens_color = "black") +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int)
## Warning in min(diff(unique_loc)): no non-missing arguments to min; returning
## Inf
test_stat <- 272.9981
conf_int <- null_dist %>%
get_confidence_interval(level = 0.95, type = "percentile")
# Visualize the null distribution
null_dist %>%
visualize(bins = 15, method = "simulation", dens_color = "black") +
geom_vline(aes(xintercept = test_stat), color = "pink", linetype = "dashed", size = 1) +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int) +
coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
labs(title = "Simulation-Based Null Distribution",
x = "F statistic",
y = "Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in min(diff(unique_loc)): no non-missing arguments to min; returning
## Inf
df$weight <- df$dweight * df$pweight
model1 <- lm(safe ~ Trust, data = df, weights = weight)
model2 <- lm(safe ~ Trust + gndr, data = df, weights = weight)
model3 <- lm(safe ~ Trust + gndr + Trust*gen, data = df, weights = weight)
modelsummary(
list(model1, model2, model3),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| (1) | (2) | (3) | |
|---|---|---|---|
| TrustModerate | 3.3 (0.2)*** | 3.3 (0.2)*** | 3.2 (0.5)*** |
| TrustNo Trust | 5.9 (0.3)*** | 5.7 (0.3)*** | 7.2 (0.8)*** |
| gndr6 | 0.7 (0.2)*** | 0.8 (0.2)*** | |
| genBaby Boomers | -1.1 (0.4)** | ||
| genGen X | -1.1 (0.4)** | ||
| genMillennials | -1.2 (0.4)** | ||
| TrustModerate × genBaby Boomers | 0.4 (0.6) | ||
| TrustNo Trust × genBaby Boomers | -0.9 (0.9) | ||
| TrustModerate × genGen X | 0.0 (0.6) | ||
| TrustNo Trust × genGen X | -1.1 (1.0) | ||
| TrustModerate × genMillennials | 0.5 (0.6) | ||
| TrustNo Trust × genMillennials | -2.3 (1.0)* | ||
| Num.Obs. | 1034 | 1034 | 1034 |
| R2 | 0.345 | 0.356 | 0.378 |
| R2 Adj. | 0.344 | 0.354 | 0.370 |
| AIC | 5084.4 | 5068.3 | 5051.1 |
| BIC | 5104.2 | 5093.0 | 5120.3 |
| Log.Lik. | -2538.212 | -2529.133 | -2511.555 |
| RMSE | 2.80 | 2.77 | 2.73 |
interaction_plot <- effect("Trust*gen", model3, na.rm=TRUE)
plot(interaction_plot,
main="Interaction effect",
xlab="Generation",
ylab="Feeling Safe scale")