Downloading Data

packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer", "sjPlot" , "dplyr", "car") 

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Loading required package: viridisLite
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## Loading required package: carData
## 
## 
## Attaching package: 'car'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## 
## The following object is masked from 'package:purrr':
## 
##     some
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "viridis"      "viridisLite"  "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[5]]
##  [1] "kableExtra"   "viridis"      "viridisLite"  "modelsummary" "fst"         
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "flextable"    "kableExtra"   "viridis"      "viridisLite"  "modelsummary"
##  [6] "fst"          "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "officer"      "flextable"    "kableExtra"   "viridis"      "viridisLite" 
##  [6] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "sjPlot"       "officer"      "flextable"    "kableExtra"   "viridis"     
##  [6] "viridisLite"  "modelsummary" "fst"          "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "sjPlot"       "officer"      "flextable"    "kableExtra"   "viridis"     
##  [6] "viridisLite"  "modelsummary" "fst"          "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "car"          "carData"      "sjPlot"       "officer"      "flextable"   
##  [6] "kableExtra"   "viridis"      "viridisLite"  "modelsummary" "fst"         
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"
library(dplyr)
library(ggplot2)
library(tidyverse)
library(tibble)
library(flextable)
library(modelsummary)
library(sjPlot)
library(car)
load("~/Downloads/gss2022.Rdata")
gss <- df

Data Cleaning and Recoding

table(gss$polviews)
## 
##             extremely liberal                       liberal 
##                          2081                          7623 
##              slightly liberal  moderate, middle of the road 
##                          7900                         23992 
##         slightly conservative                  conservative 
##                          9596                          9361 
##        extremely conservative                    don't know 
##                          2165                             0 
##                           iap            I don't have a job 
##                             0                             0 
##                   dk, na, iap                     no answer 
##                             0                             0 
##    not imputable_(2147483637)    not imputable_(2147483638) 
##                             0                             0 
##                       refused                skipped on web 
##                             0                             0 
##                    uncodeable not available in this release 
##                             0                             0 
##    not available in this year                  see codebook 
##                             0                             0
unique(gss$polviews)
## [1] <NA>                         moderate, middle of the road
## [3] slightly conservative        conservative                
## [5] liberal                      extremely conservative      
## [7] slightly liberal             extremely liberal           
## 20 Levels: extremely liberal liberal ... see codebook
gss <- gss %>%
mutate(polviews_recoded = case_when(
    polviews %in% c("extremely liberal") ~ "Extremely Liberal",
    polviews %in% c("slightly liberal") ~ "Slightly Liberal",
    polviews %in% c("liberal") ~ "Liberal",
    polviews %in% c("moderate, middle of the road") ~ "Moderate",
    polviews %in% c("extremely conservative") ~ "Extremely Conservative",
    polviews %in% c("slightly conservative") ~ "Slightly Conservative",
    polviews %in% c("conservative") ~ "Conservative",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(polviews_recoded))

# Checking the recoding
gss %>%
  count(polviews_recoded)
##         polviews_recoded     n
## 1           Conservative  9361
## 2 Extremely Conservative  2165
## 3      Extremely Liberal  2081
## 4                Liberal  7623
## 5               Moderate 23992
## 6  Slightly Conservative  9596
## 7       Slightly Liberal  7900
table(gss$sexeduc)
## 
##                         favor                        oppose 
##                         33833                          4625 
##   depends on age/grade (vol.)                    don't know 
##                             9                             0 
##                           iap            I don't have a job 
##                             0                             0 
##                   dk, na, iap                     no answer 
##                             0                             0 
##    not imputable_(2147483637)    not imputable_(2147483638) 
##                             0                             0 
##                       refused                skipped on web 
##                             0                             0 
##                    uncodeable not available in this release 
##                             0                             0 
##    not available in this year                  see codebook 
##                             0                             0
unique(gss$sexeduc)
## [1] favor                       oppose                     
## [3] <NA>                        depends on age/grade (vol.)
## 16 Levels: favor oppose depends on age/grade (vol.) don't know ... see codebook
gss<- gss %>%
mutate(sexeduc_recoded = case_when(
    sexeduc %in% c("favor") ~ "In Favour",
    sexeduc %in% c("oppose") ~ "Oppose",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(sexeduc_recoded))

# Checking the recoding
gss %>%
  count(sexeduc_recoded)
##   sexeduc_recoded     n
## 1       In Favour 33833
## 2          Oppose  4625
table(gss$reliten)
## 
##                        strong               not very strong 
##                         12117                         13172 
##        somewhat strong (vol.)                   no religion 
##                          2897                          4251 
##                    don't know                           iap 
##                             0                             0 
##            I don't have a job                   dk, na, iap 
##                             0                             0 
##                     no answer    not imputable_(2147483637) 
##                             0                             0 
##    not imputable_(2147483638)                       refused 
##                             0                             0 
##                skipped on web                    uncodeable 
##                             0                             0 
## not available in this release    not available in this year 
##                             0                             0 
##                  see codebook 
##                             0
unique(gss$reliten)
## [1] strong                 not very strong        somewhat strong (vol.)
## [4] no religion            <NA>                  
## 17 Levels: strong not very strong somewhat strong (vol.) ... see codebook
gss <- gss %>%
mutate(reliten_recoded = case_when(
    reliten %in% c("strong") ~ "Strong",
    reliten %in% c("somewhat strong (vol.)") ~ "Somewhat Strong",
    reliten %in% c("not very strong") ~ "Not Very Strong",
    reliten %in% c("no religion") ~ "No Religious Affiliation",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(reliten_recoded))

# Checking the recoding
gss %>%
  count(reliten_recoded)
##            reliten_recoded     n
## 1 No Religious Affiliation  4251
## 2          Not Very Strong 13172
## 3          Somewhat Strong  2897
## 4                   Strong 12117

Descriptive Statistics

library(dplyr)
library(knitr)

# Convert to character if necessary
gss$polviews_recoded <- as.character(gss$polviews_recoded)
gss$reliten_recoded <- as.character(gss$reliten_recoded)
gss$sexeduc_recoded <- as.character(gss$sexeduc_recoded)

# Recode variables
gss <- gss %>%
  mutate(
    polviews_recoded_numeric = case_when(
      polviews_recoded == "Extremely Liberal" ~ 1,
      polviews_recoded == "Liberal" ~ 2,
      polviews_recoded == "Slightly Liberal" ~ 3,
      polviews_recoded == "Moderate" ~ 4,
      polviews_recoded == "Slightly Conservative" ~ 5,
      polviews_recoded == "Conservative" ~ 6,
      polviews_recoded == "Strongly conservative" ~ 7,
      TRUE ~ NA_real_
    ),
    reliten_recoded_numeric = case_when(
      reliten_recoded == "Strong" ~ 1,
      reliten_recoded == "Not Very Strong" ~ 2,
      reliten_recoded == "Somewhat Strong" ~ 3,
      reliten_recoded == "No Religious Affiliation" ~ 4,
      TRUE ~ NA_real_
    ),
    sexeduc_recoded_numeric = case_when(
      sexeduc_recoded == "In Favour" ~ 1,
      sexeduc_recoded == "Oppose" ~ 2,
      TRUE ~ NA_real_
    )
  )

gss_renamed <- gss %>%
  rename(
    "Political Views" = polviews_recoded_numeric,
    "Levels of Religious Affiliation" = reliten_recoded_numeric,
    "Attitude on Public School Sex Education" = sexeduc_recoded_numeric
  )

# Create the summary table with datasummary_skim
datasummary_skim(
  gss_renamed %>% select(
    "Political Views",
    "Levels of Religious Affiliation",
    "Attitude on Public School Sex Education"
  ),
  histogram = TRUE
)
tinytable_av0t7qj5oc0lwxw7vpzy
Unique Missing Pct. Mean SD Min Median Max Histogram
Political Views 7 3 4.0 1.3 1.0 4.0 6.0
Levels of Religious Affiliation 4 0 2.0 1.0 1.0 2.0 4.0
Attitude on Public School Sex Education 2 0 1.1 0.3 1.0 1.0 2.0