packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "flextable" "kableExtra" "viridis" "viridisLite" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "officer" "flextable" "kableExtra" "viridis" "viridisLite"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
setwd("C:/Users/matej/OneDrive/Desktop/U of T/Summer 2024/SOC252/RMarkdowns")
gss <- load("gss2022.Rdata")
gss <- df
Here is what our variables look like before cleaning
table(gss$polviews)
##
## extremely liberal liberal
## 2081 7623
## slightly liberal moderate, middle of the road
## 7900 23992
## slightly conservative conservative
## 9596 9361
## extremely conservative don't know
## 2165 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
unique(gss$polviews)
## [1] <NA> moderate, middle of the road
## [3] slightly conservative conservative
## [5] liberal extremely conservative
## [7] slightly liberal extremely liberal
## 20 Levels: extremely liberal liberal ... see codebook
table(gss$attend)
##
## never less than once a year
## 13855 5825
## about once or twice a year several times a year
## 9415 8752
## about once a month 2-3 times a month
## 4831 6114
## nearly every week every week
## 4029 13659
## several times a week don't know
## 5210 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
unique(gss$attend)
## [1] about once or twice a year every week
## [3] about once a month never
## [5] several times a year several times a week
## [7] 2-3 times a month less than once a year
## [9] nearly every week <NA>
## 22 Levels: never less than once a year ... see codebook
table(gss$sexeduc)
##
## favor oppose
## 35639 5127
## depends on age/grade (vol.) don't know
## 9 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
unique(gss$sexeduc)
## [1] <NA> favor
## [3] oppose depends on age/grade (vol.)
## 16 Levels: favor oppose depends on age/grade (vol.) don't know ... see codebook
Lets clean these up
# Removing NA values
gss <- gss %>%
mutate(
polviews = case_when(
polviews %in% c("extremely liberal", "liberal", "slightly liberal", "moderate, middle of the road", "slightly conservative", "conservative", "extremely conservative") ~ polviews,
TRUE ~ NA_character_
),
attend = case_when(
attend %in% c("about once or twice a year", "every week", "about once a month", "never", "several times a year", "several times a week", "2-3 times a month", "less than once a year", "nearly every week") ~ attend,
TRUE ~ NA_character_
),
#we need to make sexeduc a dichotomous variable
sexeduc = case_when(
sexeduc %in% c("favor", "oppose") ~ sexeduc,
TRUE ~ NA_character_
)
)
gss_filtered <- gss %>%
dplyr::select(polviews, attend, sexeduc)
categorical_summary <- datasummary_skim(gss_filtered, type = "categorical")
categorical_summary
| N | % | ||
|---|---|---|---|
| polviews | conservative | 9361 | 12.9 |
| extremely conservative | 2165 | 3.0 | |
| extremely liberal | 2081 | 2.9 | |
| liberal | 7623 | 10.5 | |
| moderate, middle of the road | 23992 | 33.1 | |
| slightly conservative | 9596 | 13.3 | |
| slightly liberal | 7900 | 10.9 | |
| NA | 9672 | 13.4 | |
| attend | 2-3 times a month | 6114 | 8.4 |
| about once a month | 4831 | 6.7 | |
| about once or twice a year | 9415 | 13.0 | |
| every week | 13659 | 18.9 | |
| less than once a year | 5825 | 8.0 | |
| nearly every week | 4029 | 5.6 | |
| never | 13855 | 19.1 | |
| several times a week | 5210 | 7.2 | |
| several times a year | 8752 | 12.1 | |
| NA | 700 | 1.0 | |
| sexeduc | favor | 35639 | 49.2 |
| oppose | 5127 | 7.1 | |
| NA | 31624 | 43.7 |
Lets add some finishing cosmetic touches and remove NA.
gss_cleaned <- gss %>%
filter(!is.na(polviews), !is.na(attend), !is.na(sexeduc)) %>%
mutate(
polviews = recode(polviews,
"extremely liberal" = "Extremely Liberal",
"liberal" = "Liberal",
"slightly liberal" = "Slightly Liberal",
"moderate, middle of the road" = "Moderate",
"slightly conservative" = "Slightly Conservative",
"conservative" = "Conservative",
"extremely conservative" = "Extremely Conservative"),
polviews = factor(polviews, levels = c("Extremely Liberal", "Liberal", "Slightly Liberal", "Moderate", "Slightly Conservative", "Conservative", "Extremely Conservative")),
sexeduc = recode(sexeduc, "favor" = "Favor", "oppose" = "Oppose"),
sexeduc = factor(sexeduc, levels = c("Oppose", "Favor")),
attend = recode(attend,
"about once or twice a year" = "Abt 1-2/year",
"every week" = "Every week",
"about once a month" = "Abt 1/month",
"never" = "Never",
"several times a year" = "Several times/year",
"several times a week" = "Several times/week",
"2-3 times a month" = "2-3 times/month",
"less than once a year" = "Less than 1/year",
"nearly every week" = "Nearly every week"),
attend = factor(attend, levels = c("Never", "Less than 1/year", "Abt 1-2 times/year", "Several times/year", "Abt 1/month", "2-3 times/month", "Nearly every week", "Every week", "Several times/week"))
)
gss_cleaned <- gss_cleaned %>%
rename(
"Political Views" = polviews,
"Religiosity by Attendance" = attend,
"Attitude on Public School Sex Education" = sexeduc
)
# Now our table
categorical_summary_flextable <- datasummary_skim(
gss_cleaned %>%
dplyr::select(`Political Views`, `Religiosity by Attendance`, `Attitude on Public School Sex Education`),
type = "categorical",
output = "flextable"
)
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
## produced by the `tinytable` backend.
categorical_summary_flextable <- categorical_summary_flextable %>%
set_header_labels(Variable = "Variable", Value = "Value", Freq = "Frequency") %>%
theme_box() %>%
bold(part = "header") %>%
bg(part = "header", bg = "deepskyblue4") %>%
color(part = "header", color = "white") %>%
border_remove() %>%
border_inner_v(border = fp_border(color = "black", width = 1)) %>%
autofit()
flextable::htmltools_value(categorical_summary_flextable)
|
| N | % |
|---|---|---|---|
Political Views | Extremely Liberal | 1320 | 3.5 |
Liberal | 4695 | 12.3 | |
Slightly Liberal | 4867 | 12.8 | |
Moderate | 14489 | 38.0 | |
Slightly Conservative | 5787 | 15.2 | |
Conservative | 5689 | 14.9 | |
Extremely Conservative | 1310 | 3.4 | |
Religiosity by Attendance | Never | 7789 | 20.4 |
Less than 1/year | 3118 | 8.2 | |
Abt 1-2 times/year | 0 | 0.0 | |
Several times/year | 4470 | 11.7 | |
Abt 1/month | 2579 | 6.8 | |
2-3 times/month | 3290 | 8.6 | |
Nearly every week | 2027 | 5.3 | |
Every week | 7167 | 18.8 | |
Several times/week | 2610 | 6.8 | |
Attitude on Public School Sex Education | Oppose | 4588 | 12.0 |
Favor | 33569 | 88.0 |
Above is a datasummary table showing our variables