# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "flextable" "kableExtra" "viridis" "viridisLite" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "officer" "flextable" "kableExtra" "viridis" "viridisLite"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
load("gss2022.RData")
gss <- df
rm(df)
gss <- gss %>%
mutate(polviews
= factor(polviews, levels = c(
"slightly liberal",
"liberal",
"extremely liberal",
"moderate, middle of the road",
"slghtly conservative",
"conservative",
"extrmly conservative"
),
labels = c(
"Liberal",
"Liberal",
"Liberal",
"Moderate",
"Conservative",
"Conservative",
"Conservative"
), ordered = TRUE)) %>%
filter(!is.na(polviews), !is.na(sex))
gss <- gss %>%
mutate(
race = case_when(
race %in% c("white", "black", "other") ~ race,
TRUE ~ NA_character_
),
sex = case_when(
sex %in% c("male", "female") ~ sex,
TRUE ~ NA_character_
),
degree = case_when(
degree %in% c("less than high school", "high school", "junior college", "bachelor", "graduate") ~ degree,
TRUE ~ NA_character_
)
)
gss_cleaned <- gss %>%
filter(!is.na(race), !is.na(sex), !is.na(degree))
gss_filtered <- gss_cleaned %>%
dplyr::select(race, sex, degree)
categorical_summary <- datasummary_skim(gss_filtered, type = "categorical")
gss_cleaned <- gss_cleaned %>%
rename(
"Respondent Race" = race,
"Respondent Sex" = sex,
"Highest Degree" = degree
)
categorical_summary_flextable <- datasummary_skim(
gss_cleaned %>%
dplyr::select(`Respondent Race`, `Respondent Sex`, `Highest Degree`),
type = "categorical",
output = "flextable"
)
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
## produced by the `tinytable` backend.
categorical_summary_flextable <- categorical_summary_flextable %>%
set_header_labels(Variable = "Variable", Value = "Value", Freq = "Frequency") %>%
theme_box() %>%
bold(part = "header") %>%
bg(part = "header", bg = "#4CAF50") %>%
color(part = "header", color = "white") %>%
border_remove() %>%
border_inner_v(border = fp_border(color = "black", width = 1)) %>%
autofit()
categorical_summary_flextable
|
| N | % |
|---|---|---|---|
Respondent Race | black | 5925 | 15.0 |
other | 2453 | 6.2 | |
white | 31029 | 78.7 | |
Respondent Sex | female | 22207 | 56.4 |
male | 17200 | 43.6 | |
Highest Degree | graduate | 4481 | 11.4 |
high school | 25869 | 65.6 | |
less than high school | 9057 | 23.0 |
ggplot(gss, aes(x = polviews, fill = sex)) +
geom_bar(position = "dodge") +
scale_fill_brewer(palette = "Set1") +
labs(title = "Distribution of Political Views by Gender",
x = "Political Views",
y = "Count",
fill = "Gender")
gss_yearly <- gss %>%
group_by(year, attend) %>%
summarize(count = n(), .groups = 'drop') %>%
group_by(year) %>%
mutate(total = sum(count),
proportion = count / total) %>%
filter(!is.na(proportion))
ggplot(gss_yearly, aes(x = year, y = proportion, color = attend, group = attend)) +
geom_line(linewidth = 1.2) +
scale_color_brewer(palette = "Set3") +
labs(title = "Evolution of Religious Preferences Over Time",
x = "Year",
y = "Proportion",
color = "Religious attendance over time") +
theme_minimal() +
theme(legend.position = "bottom")
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_line()`).
gss_filtered <- gss %>%
mutate(age_group = case_when(
age >= 18 & age <= 29 ~ "18-29",
age >= 30 & age <= 44 ~ "30-44",
age >= 45 & age <= 59 ~ "45-59",
age >= 60 ~ "60+",
TRUE ~ NA_character_
)) %>%
filter(!is.na(age_group)) %>%
filter(!is.na(fejobaff))
ggplot(gss_filtered, aes(x = age_group, fill = fejobaff)) +
geom_bar(position = "fill") +
labs(
title = "Distribution of Preferential Hiring Across Age",
x = "Age Group",
y = "Proportion"
) +
theme_minimal()