Final - SOC222

# List of packages
packages <- c("tidyverse", "modelsummary", "flextable","infer","effects",
              "fst", "viridis", "knitr", "kableExtra", "rmarkdown", "ggridges", 
              "questionr","survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales")

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: viridisLite
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following objects are masked from 'package:flextable':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## Attaching package: 'aod'
## 
## 
## The following object is masked from 'package:survival':
## 
##     rats
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "flextable"    "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "infer"        "flextable"    "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "infer"        "flextable"    "modelsummary"
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "fst"          "effects"      "carData"      "infer"        "flextable"   
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "viridis"      "viridisLite"  "fst"          "effects"      "carData"     
##  [6] "infer"        "flextable"    "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "effects"     
##  [6] "carData"      "infer"        "flextable"    "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[9]]
##  [1] "kableExtra"   "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "effects"      "carData"      "infer"        "flextable"    "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[10]]
##  [1] "rmarkdown"    "kableExtra"   "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "effects"      "carData"      "infer"        "flextable"   
## [11] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "ggridges"     "rmarkdown"    "kableExtra"   "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "effects"      "carData"      "infer"       
## [11] "flextable"    "modelsummary" "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[12]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "kableExtra"   "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "effects"      "carData"     
## [11] "infer"        "flextable"    "modelsummary" "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "questionr"   
##  [6] "ggridges"     "rmarkdown"    "kableExtra"   "knitr"        "viridis"     
## [11] "viridisLite"  "fst"          "effects"      "carData"      "infer"       
## [16] "flextable"    "modelsummary" "lubridate"    "forcats"      "stringr"     
## [21] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [26] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [31] "utils"        "datasets"     "methods"      "base"        
## 
## [[14]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "questionr"    "ggridges"     "rmarkdown"    "kableExtra"   "knitr"       
## [11] "viridis"      "viridisLite"  "fst"          "effects"      "carData"     
## [16] "infer"        "flextable"    "modelsummary" "lubridate"    "forcats"     
## [21] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [26] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [31] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[15]]
##  [1] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "questionr"    "ggridges"     "rmarkdown"    "kableExtra"  
## [11] "knitr"        "viridis"      "viridisLite"  "fst"          "effects"     
## [16] "carData"      "infer"        "flextable"    "modelsummary" "lubridate"   
## [21] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [26] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [31] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [36] "base"        
## 
## [[16]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "questionr"    "ggridges"     "rmarkdown"   
## [11] "kableExtra"   "knitr"        "viridis"      "viridisLite"  "fst"         
## [16] "effects"      "carData"      "infer"        "flextable"    "modelsummary"
## [21] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [26] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [31] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [36] "methods"      "base"        
## 
## [[17]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "questionr"    "ggridges"     "rmarkdown"   
## [11] "kableExtra"   "knitr"        "viridis"      "viridisLite"  "fst"         
## [16] "effects"      "carData"      "infer"        "flextable"    "modelsummary"
## [21] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [26] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [31] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [36] "methods"      "base"        
## 
## [[18]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "questionr"    "ggridges"     "rmarkdown"   
## [11] "kableExtra"   "knitr"        "viridis"      "viridisLite"  "fst"         
## [16] "effects"      "carData"      "infer"        "flextable"    "modelsummary"
## [21] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [26] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [31] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [36] "methods"      "base"        
## 
## [[19]]
##  [1] "scales"       "interactions" "aod"          "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "questionr"    "ggridges"    
## [11] "rmarkdown"    "kableExtra"   "knitr"        "viridis"      "viridisLite" 
## [16] "fst"          "effects"      "carData"      "infer"        "flextable"   
## [21] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [26] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [31] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [36] "datasets"     "methods"      "base"

Descriptive statistics

france_data <- read.fst("/Users/jessie/france_data.fst")

table(france_data$rlgblg)

## 
##    1    2    7    8 
## 9509 9434   53   42

table(france_data$yrbrn)

## 
## 1905 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 
##    1    2    1    2    5    2    8    9    5    8   12   16   41   40   67   56 
## 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 
##   64   70   90   96  115  126  144  180  163  185  169  169  206  201  195  179 
## 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 
##  199  211  238  235  243  246  307  368  341  348  351  311  358  337  352  284 
## 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 
##  355  343  309  323  323  338  339  347  317  333  337  337  340  345  346  358 
## 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 
##  321  317  291  315  287  303  311  280  311  278  298  217  245  211  229  179 
## 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 
##  222  206  140  174  153  137   99  125   84   89   97   78   79   59   38   45 
## 2004 2005 7777 8888 
##   41   26    6    1

table(france_data$imbgeco)

## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88 
## 1393  654 1315 1665 1739 5108 2041 2157 1669  434  579   45  239

france_clean <- france_data %>%
  mutate(
    # Clean 'imbgeco' by setting 77 and 88 to NA
    imbgeco = ifelse(imbgeco %in% c(77, 88), NA, imbgeco)
  ) %>%
  # Remove rows with NAs in these columns
  filter( !is.na(imbgeco))%>%
  mutate(

    religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ),
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)
    ),
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")),
    # Convert 'imbgeco' into a numeric response variable if needed
    imbgeco = as.numeric(imbgeco)
  ) %>%
  # Remove NA values for a clean dataset
  drop_na(gen,religion)

table(france_clean$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3882         6241         4747         3265

table(france_clean$rlgblg)

## 
##    1    2 
## 9160 8975

table(france_clean$imbgeco)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 1370  633 1281 1621 1670 4971 1936 2067 1617  411  558

Table 1

# Generate the table summary with a flextable output
table_summary <- datasummary_skim(france_clean %>% dplyr::select(yrbrn, rlgblg, imbgeco),
                                  output = "flextable")

## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.

# Print the table summary
table_summary

	Unique (#)	Mean	SD	Min	Median	Max
yrbrn	89	1960.8	18.2	1905.0	1961.0	1996.0
rlgblg	2	1.5	0.5	1.0	1.0	2.0
imbgeco	11	4.8	2.5	0.0	5.0	10.0

Null distribution - main predictor = Religion

test_stat <- france_clean%>%
  specify(explanatory = religion, # change variable name for explanatory variable
          response = imbgeco) %>% # change variable name for outcome of interest
  hypothesize(null = "independence") %>%
  calculate(stat = "t") # replace in between quotation marks appropriate test statistic

print(test_stat$stat)

##        t 
## 3.677069

null_distribution <- france_clean %>%
  specify(explanatory = religion,
          response = imbgeco) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>% 
  calculate(stat = "t")

p_val <- null_distribution %>% 
  get_pvalue(obs_stat = test_stat, direction = "two-sided")

p_val

## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

p<-null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided")

# labels to the plot
p <- p + labs(x = "Religion")

print(p)

null_distribution

## Response: imbgeco (numeric)
## Explanatory: religion (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1  1.54 
##  2         2 -0.128
##  3         3 -0.310
##  4         4 -0.177
##  5         5  0.102
##  6         6  1.52 
##  7         7  1.21 
##  8         8  2.62 
##  9         9 -0.480
## 10        10  0.187
## # ℹ 990 more rows

Graph 1 - Simulation Based Null Distribution——Religion

conf_int <- null_distribution %>%
  get_confidence_interval(level = 0.95, type = "percentile")

p<-null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided") +
  shade_confidence_interval(endpoints = conf_int)
p <- p + labs(x = "Religion")

print(p)

null_distribution

## Response: imbgeco (numeric)
## Explanatory: religion (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1  1.54 
##  2         2 -0.128
##  3         3 -0.310
##  4         4 -0.177
##  5         5  0.102
##  6         6  1.52 
##  7         7  1.21 
##  8         8  2.62 
##  9         9 -0.480
## 10        10  0.187
## # ℹ 990 more rows

Regression modelling:

# Regression model with 'religion' as the predictor
model_1 <- lm(imbgeco ~ religion, data = france_clean)
summary(model_1)

## 
## Call:
## lm(formula = imbgeco ~ religion, data = france_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9142 -1.7803  0.2197  2.0858  5.2197 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.91421    0.02586 190.029  < 2e-16 ***
## religionYes -0.13386    0.03639  -3.679 0.000235 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.45 on 18133 degrees of freedom
## Multiple R-squared:  0.0007458,  Adjusted R-squared:  0.0006906 
## F-statistic: 13.53 on 1 and 18133 DF,  p-value: 0.0002351

# Model 2: Two predictors (religion & gen) without interaction
model_2 <- lm(imbgeco ~ religion + gen, data = france_clean)
summary(model_2)

## 
## Call:
## lm(formula = imbgeco ~ religion + gen, data = france_clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.153 -1.330  0.206  1.847  5.680 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.320313   0.046921  92.076   <2e-16 ***
## religionYes     0.009738   0.037115   0.262    0.793    
## genBaby Boomers 0.463953   0.050167   9.248   <2e-16 ***
## genGen X        0.823069   0.053645  15.343   <2e-16 ***
## genMillennials  0.812340   0.058999  13.769   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.431 on 18130 degrees of freedom
## Multiple R-squared:  0.01627,    Adjusted R-squared:  0.01605 
## F-statistic: 74.96 on 4 and 18130 DF,  p-value: < 2.2e-16

# Model 3: Interaction model involving both predictors
model_3 <- lm(imbgeco ~ religion + gen + religion * gen, data = france_clean)
summary(model_3)

## 
## Call:
## lm(formula = imbgeco ~ religion + gen + religion * gen, data = france_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3202 -1.5035  0.1379  1.7315  5.7476 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  4.50346    0.07144  63.038  < 2e-16 ***
## religionYes                 -0.25108    0.08525  -2.945  0.00323 ** 
## genBaby Boomers              0.35865    0.08402   4.269 1.98e-05 ***
## genGen X                     0.55673    0.08511   6.542 6.25e-11 ***
## genMillennials               0.52296    0.08939   5.850 4.99e-09 ***
## religionYes:genBaby Boomers  0.11011    0.10514   1.047  0.29495    
## religionYes:genGen X         0.45936    0.11124   4.130 3.65e-05 ***
## religionYes:genMillennials   0.54489    0.12242   4.451 8.60e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.429 on 18127 degrees of freedom
## Multiple R-squared:  0.01809,    Adjusted R-squared:  0.01771 
## F-statistic:  47.7 on 7 and 18127 DF,  p-value: < 2.2e-16

Table 2 - Regression Model Summary

# Create the models list
models_list <- list(model_1, model_2, model_3)

# Generate the table summary with a title
modelsummary(
  models_list,
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)	(3)
(Intercept)	4.9 (0.0)***	4.3 (0.0)***	4.5 (0.1)***
religionYes	−0.1 (0.0)***	0.0 (0.0)	−0.3 (0.1)**
genBaby Boomers		0.5 (0.1)***	0.4 (0.1)***
genGen X		0.8 (0.1)***	0.6 (0.1)***
genMillennials		0.8 (0.1)***	0.5 (0.1)***
religionYes × genBaby Boomers			0.1 (0.1)
religionYes × genGen X			0.5 (0.1)***
religionYes × genMillennials			0.5 (0.1)***
Num.Obs.	18135	18135	18135
R2	0.001	0.016	0.018
R2 Adj.	0.001	0.016	0.018
AIC	83968.8	83690.8	83663.3
BIC	83992.2	83737.7	83733.6
Log.Lik.	−41981.396	−41839.421	−41822.661
RMSE	2.45	2.43	2.43

Interaction effect

Graph 2 - Intereaction plot

interaction_plot <- effect("religion * gen", model_3, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generations",
     ylab="Religion")

interaction_plot

## 
##  religion*gen effect
##         gen
## religion Interwar Baby Boomers    Gen X Millennials
##      No  4.503460     4.862115 5.060189    5.026419
##      Yes 4.252384     4.721154 5.268477    5.320229

##Consider specifically the trend line from Boomers to Millennials between religion = No vs. religion = Yes.

Graph 3 - cat_plot

cat_plot(model_3, pred = gen, modx = religion, jnplot = TRUE)

Null distribution - main predictor = Generations

# Assuming imbgeco can be categorized
france_clean$imbgeco <- factor(france_clean$imbgeco)
france_clean$gen <- factor(france_clean$gen)

# Conduct the chi-square test
test_stat <- chisq.test(table(france_clean$gen, france_clean$imbgeco))

# Print the test statistic
print(test_stat$statistic)

## X-squared 
##  344.5087

null_distribution <- france_clean %>%
  specify(explanatory = gen,
          response = imbgeco) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>% 
  calculate(stat = "chisq")

p_val <- null_distribution %>%
  get_pvalue(obs_stat = test_stat$statistic, direction = "two-sided") # use test_stat$statistic instead of test_stat

## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.

p_val

## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

# Assuming 'test_stat' contains a component named 'statistic' that is numeric
p <- null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat$statistic, direction = "two-sided")

# Add labels to the plot
p <- p + labs(x = "Gen")

print(p)

## Warning: Chi-Square usually corresponds to right-tailed tests. Proceed with
## caution.

Graph 4 Simulation Based Null Distribution——Generations

  conf_int <- null_distribution %>%
  get_confidence_interval(level = 0.95, type = "percentile")

observed_statistic <- test_stat$statistic 

p <- null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = observed_statistic, direction = "two-sided") +
  shade_confidence_interval(endpoints = conf_int)

# Add labels to the plot
p <- p + labs(x = "Gen")

print(p)

## Warning: Chi-Square usually corresponds to right-tailed tests. Proceed with
## caution.

null_distribution

## Response: imbgeco (factor)
## Explanatory: gen (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate  stat
##        <int> <dbl>
##  1         1  38.3
##  2         2  35.0
##  3         3  30.5
##  4         4  33.5
##  5         5  29.4
##  6         6  38.7
##  7         7  33.7
##  8         8  30.2
##  9         9  28.7
## 10        10  21.1
## # ℹ 990 more rows

Final - SOC222

Milla Zhang & Jiayi Yu

Descriptive statistics

Table 1

Null distribution - main predictor = Religion

Graph 1 - Simulation Based Null Distribution——Religion

Regression modelling:

Table 2 - Regression Model Summary

Interaction effect

Graph 2 - Intereaction plot

Graph 3 - cat_plot

Null distribution - main predictor = Generations

Graph 4 Simulation Based Null Distribution——Generations