packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer",
"fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable", "infer")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "viridis" "viridisLite" "fst" "RColorBrewer" "modelsummary"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[7]]
## [1] "knitr" "viridis" "viridisLite" "fst" "RColorBrewer"
## [6] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[8]]
## [1] "rmarkdown" "knitr" "viridis" "viridisLite" "fst"
## [6] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "ggridges" "rmarkdown" "knitr" "viridis" "viridisLite"
## [6] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "ggridges" "rmarkdown" "knitr" "viridis" "viridisLite"
## [6] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "questionr" "ggridges" "rmarkdown" "knitr" "viridis"
## [6] "viridisLite" "fst" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[12]]
## [1] "flextable" "questionr" "ggridges" "rmarkdown" "knitr"
## [6] "viridis" "viridisLite" "fst" "RColorBrewer" "modelsummary"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[13]]
## [1] "infer" "flextable" "questionr" "ggridges" "rmarkdown"
## [6] "knitr" "viridis" "viridisLite" "fst" "RColorBrewer"
## [11] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
setwd("C:/Users/Erika/Desktop/SOC_202_YAY"
)
library(fst)
ess <- read_fst("All-ESS-Data.fst")
france_data <- ess %>%
# Filter for France
filter(cntry == "FR") %>%
# Recoding clsprty and cleaning trstplt (will use later)
mutate(
clsprty = case_when(
clsprty == 1 ~ "Yes", # Recode 1 to "Yes"
clsprty == 2 ~ "No", # Recode 2 to "No"
clsprty %in% c(7, 8, 9) ~ NA_character_, # Handle other specific cases where you want to set it as NA
TRUE ~ as.character(clsprty) # Keep other values as-is but ensure they are characters
),
trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt),
# Recoding education for rounds 1-4
edulvla = case_when(
essround < 5 & edulvla == 55 ~ NA_real_,
TRUE ~ edulvla
),
# Re-coding education for rounds 5 and later
edulvlb = case_when(
essround >= 5 & edulvlb == 5555 ~ NA_real_,
TRUE ~ edulvlb
),
# Create educ_level column
educ_level = case_when(
essround < 5 & edulvla == 5 ~ "BA",
essround >= 5 & edulvlb > 600 ~ "BA",
TRUE ~ "No BA"
),
# Recoding gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
)
)
unique(france_data$gndr)
## [1] "Female" "Male"
unique(france_data$clsprty)
## [1] "Yes" "No" NA
france_data <- france_data %>% filter(!is.na(clsprty))
france_data <- france_data %>% filter(!is.na(trstplt))
france_data <- france_data %>% filter(!is.na(educ_level))
unique(france_data$clsprty)
## [1] "Yes" "No"
table(france_data$clsprty)
##
## No Yes
## 9098 9480
tab_dat <- france_data
mytab <- table(tab_dat$clsprty, tab_dat$educ_level)
rsums <- rowSums(mytab)
csums <- colSums(mytab)
N <- sum(mytab)
ptab <- tcrossprod(rsums/N, csums/N)
cat("Table of Expected Proportions:\n")
## Table of Expected Proportions:
print(ptab)
## [,1] [,2]
## [1,] 0.1095265 0.3801926
## [2,] 0.1141252 0.3961558
ftab <- N * ptab
cat("Table of Expected Frequencies:\n")
## Table of Expected Frequencies:
print(ftab)
## [,1] [,2]
## [1,] 2034.783 7063.217
## [2,] 2120.217 7359.783
alpha <- 0.05
c_val <- qchisq(alpha, df = 1, lower.tail = FALSE)
cat("Critical Value:", round(c_val, 3), "\n")
## Critical Value: 3.841
test_stat <- sum((mytab - ftab)^2 / ftab)
cat("Pearson's X^2:", round(test_stat, 4), "\n")
## Pearson's X^2: 159.6903
p_val <- pchisq(test_stat, df = 1, lower.tail = F)
cat("p-value:", round(p_val, 4), "\n")
## p-value: 0
cat("Chi-squared test result:\n")
## Chi-squared test result:
print(chisq.test(mytab, correct = F))
##
## Pearson's Chi-squared test
##
## data: mytab
## X-squared = 159.69, df = 1, p-value < 2.2e-16
test_stat <- france_data %>%
specify(explanatory = educ_level,
response = trstplt) %>%
hypothesize(null = "independence") %>%
calculate(stat = "t")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.
print(test_stat$stat)
## t
## 14.10355
null_distribution <- france_data %>%
specify(explanatory = educ_level,
response = trstplt) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "t")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.
p_val <- null_distribution %>%
get_pvalue(obs_stat = test_stat, direction = "two-sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
null_distribution %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided")

null_distribution
## Response: trstplt (numeric)
## Explanatory: educ_level (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 -0.119
## 2 2 0.0192
## 3 3 0.407
## 4 4 0.509
## 5 5 1.62
## 6 6 1.02
## 7 7 -0.0215
## 8 8 -0.572
## 9 9 -1.43
## 10 10 1.45
## # ℹ 990 more rows
conf_int <- null_distribution %>%
get_confidence_interval(level = 0.95, type = "percentile")
null_distribution %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided") +
shade_confidence_interval(endpoints = conf_int)

null_distribution
## Response: trstplt (numeric)
## Explanatory: educ_level (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 -0.119
## 2 2 0.0192
## 3 3 0.407
## 4 4 0.509
## 5 5 1.62
## 6 6 1.02
## 7 7 -0.0215
## 8 8 -0.572
## 9 9 -1.43
## 10 10 1.45
## # ℹ 990 more rows
edu_stat <- france_data %>%
specify(explanatory = gndr,
response = educ_level,
success = "BA") %>%
hypothesize(null = "independence") %>%
calculate(stat = "z")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "Female" - "Male", or divided in the order "Female" / "Male" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("Female", "Male")` to the calculate() function.
print(edu_stat$stat)
## [1] -1.11198
null_edu <- france_data %>%
specify(explanatory = gndr,
response = educ_level,
success = "BA") %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "z")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "Female" - "Male", or divided in the order "Female" / "Male" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("Female", "Male")` to the calculate() function.
p_val <- null_edu %>%
get_pvalue(obs_stat = edu_stat, direction = "two-sided")
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0.286
null_edu %>%
visualize() +
shade_p_value(obs_stat = edu_stat, direction = "two-sided")

null_edu
## Response: educ_level (factor)
## Explanatory: gndr (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 1
## stat
## <dbl>
## 1 1.43
## 2 -0.512
## 3 -1.29
## 4 0.229
## 5 -0.0532
## 6 -3.23
## 7 -0.583
## 8 -1.15
## 9 1.01
## 10 -0.265
## # ℹ 990 more rows
conf_int <- null_distribution %>%
get_confidence_interval(level = 0.95, type = "percentile")
null_edu %>%
visualize() +
shade_p_value(obs_stat = edu_stat, direction = "two-sided") +
shade_confidence_interval(endpoints = conf_int)

null_edu
## Response: educ_level (factor)
## Explanatory: gndr (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 1
## stat
## <dbl>
## 1 1.43
## 2 -0.512
## 3 -1.29
## 4 0.229
## 5 -0.0532
## 6 -3.23
## 7 -0.583
## 8 -1.15
## 9 1.01
## 10 -0.265
## # ℹ 990 more rows