Downloading Data
packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer", "sjPlot" , "dplyr", "car")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
## Loading required package: carData
##
##
## Attaching package: 'car'
##
##
## The following object is masked from 'package:dplyr':
##
## recode
##
##
## The following object is masked from 'package:purrr':
##
## some
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "flextable" "kableExtra" "viridis" "viridisLite" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "officer" "flextable" "kableExtra" "viridis" "viridisLite"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "sjPlot" "officer" "flextable" "kableExtra" "viridis"
## [6] "viridisLite" "modelsummary" "fst" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "sjPlot" "officer" "flextable" "kableExtra" "viridis"
## [6] "viridisLite" "modelsummary" "fst" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "car" "carData" "sjPlot" "officer" "flextable"
## [6] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
library(dplyr)
library(ggplot2)
library(tidyverse)
library(tibble)
library(flextable)
library(modelsummary)
library(sjPlot)
library(car)
load("~/Downloads/gss2022.Rdata")
gss <- df
Data Cleaning and Recoding
table(gss$polviews)
##
## extremely liberal liberal
## 2081 7623
## slightly liberal moderate, middle of the road
## 7900 23992
## slightly conservative conservative
## 9596 9361
## extremely conservative don't know
## 2165 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
unique(gss$polviews)
## [1] <NA> moderate, middle of the road
## [3] slightly conservative conservative
## [5] liberal extremely conservative
## [7] slightly liberal extremely liberal
## 20 Levels: extremely liberal liberal ... see codebook
gss <- gss %>%
mutate(polviews_recoded = case_when(
polviews %in% c("extremely liberal") ~ "Extremely Liberal",
polviews %in% c("slightly liberal") ~ "Slightly Liberal",
polviews %in% c("liberal") ~ "Liberal",
polviews %in% c("moderate, middle of the road") ~ "Moderate",
polviews %in% c("extremely conservative") ~ "Extremely Conservative",
polviews %in% c("slightly conservative") ~ "Slightly Conservative",
polviews %in% c("conservative") ~ "Conservative",
TRUE ~ NA_character_
)) %>%
filter(!is.na(polviews_recoded))
# Checking the recoding
gss %>%
count(polviews_recoded)
## polviews_recoded n
## 1 Conservative 9361
## 2 Extremely Conservative 2165
## 3 Extremely Liberal 2081
## 4 Liberal 7623
## 5 Moderate 23992
## 6 Slightly Conservative 9596
## 7 Slightly Liberal 7900
table(gss$sexeduc)
##
## favor oppose
## 33833 4625
## depends on age/grade (vol.) don't know
## 9 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
unique(gss$sexeduc)
## [1] favor oppose
## [3] <NA> depends on age/grade (vol.)
## 16 Levels: favor oppose depends on age/grade (vol.) don't know ... see codebook
gss<- gss %>%
mutate(sexeduc_recoded = case_when(
sexeduc %in% c("favor") ~ "In Favour",
sexeduc %in% c("oppose") ~ "Oppose",
TRUE ~ NA_character_
)) %>%
filter(!is.na(sexeduc_recoded))
# Checking the recoding
gss %>%
count(sexeduc_recoded)
## sexeduc_recoded n
## 1 In Favour 33833
## 2 Oppose 4625
table(gss$reliten)
##
## strong not very strong
## 12117 13172
## somewhat strong (vol.) no religion
## 2897 4251
## don't know iap
## 0 0
## I don't have a job dk, na, iap
## 0 0
## no answer not imputable_(2147483637)
## 0 0
## not imputable_(2147483638) refused
## 0 0
## skipped on web uncodeable
## 0 0
## not available in this release not available in this year
## 0 0
## see codebook
## 0
unique(gss$reliten)
## [1] strong not very strong somewhat strong (vol.)
## [4] no religion <NA>
## 17 Levels: strong not very strong somewhat strong (vol.) ... see codebook
gss <- gss %>%
mutate(reliten_recoded = case_when(
reliten %in% c("strong") ~ "Strong",
reliten %in% c("somewhat strong (vol.)") ~ "Somewhat Strong",
reliten %in% c("not very strong") ~ "Not Very Strong",
reliten %in% c("no religion") ~ "No Religious Affiliation",
TRUE ~ NA_character_
)) %>%
filter(!is.na(reliten_recoded))
# Checking the recoding
gss %>%
count(reliten_recoded)
## reliten_recoded n
## 1 No Religious Affiliation 4251
## 2 Not Very Strong 13172
## 3 Somewhat Strong 2897
## 4 Strong 12117
Descriptive Statistics
library(dplyr)
library(knitr)
# Convert to character if necessary
gss$polviews_recoded <- as.character(gss$polviews_recoded)
gss$reliten_recoded <- as.character(gss$reliten_recoded)
gss$sexeduc_recoded <- as.character(gss$sexeduc_recoded)
# Recode variables
gss <- gss %>%
mutate(
polviews_recoded_numeric = case_when(
polviews_recoded == "Extremely Liberal" ~ 1,
polviews_recoded == "Liberal" ~ 2,
polviews_recoded == "Slightly Liberal" ~ 3,
polviews_recoded == "Moderate" ~ 4,
polviews_recoded == "Slightly Conservative" ~ 5,
polviews_recoded == "Conservative" ~ 6,
polviews_recoded == "Strongly conservative" ~ 7,
TRUE ~ NA_real_
),
reliten_recoded_numeric = case_when(
reliten_recoded == "Strong" ~ 1,
reliten_recoded == "Not Very Strong" ~ 2,
reliten_recoded == "Somewhat Strong" ~ 3,
reliten_recoded == "No Religious Affiliation" ~ 4,
TRUE ~ NA_real_
),
sexeduc_recoded_numeric = case_when(
sexeduc_recoded == "In Favour" ~ 1,
sexeduc_recoded == "Oppose" ~ 2,
TRUE ~ NA_real_
)
)
gss_renamed <- gss %>%
rename(
"Political Views" = polviews_recoded_numeric,
"Levels of Religious Affiliation" = reliten_recoded_numeric,
"Attitude on Public School Sex Education" = sexeduc_recoded_numeric
)
# Create the summary table with datasummary_skim
datasummary_skim(
gss_renamed %>% select(
"Political Views",
"Levels of Religious Affiliation",
"Attitude on Public School Sex Education"
),
histogram = TRUE
)
tinytable_av0t7qj5oc0lwxw7vpzy
| |
Unique |
Missing Pct. |
Mean |
SD |
Min |
Median |
Max |
Histogram |
| Political Views |
7 |
3 |
4.0 |
1.3 |
1.0 |
4.0 |
6.0 |
 |
| Levels of Religious Affiliation |
4 |
0 |
2.0 |
1.0 |
1.0 |
2.0 |
4.0 |
 |
| Attitude on Public School Sex Education |
2 |
0 |
1.1 |
0.3 |
1.0 |
1.0 |
2.0 |
 |