# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## 载入需要的程序包:viridisLite
##
##
## 载入程序包:'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## 载入程序包:'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "flextable" "kableExtra" "viridis" "viridisLite" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "officer" "flextable" "kableExtra" "viridis" "viridisLite"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
gss <- load("gss2022.RData")
gss <- df
table(gss$polviews)
##
## extremely liberal liberal
## 2081 7623
## slightly liberal moderate, middle of the road
## 7900 23992
## slightly conservative conservative
## 9596 9361
## extremely conservative don't know
## 2165 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
unique(gss$polviews)
## [1] <NA> moderate, middle of the road
## [3] slightly conservative conservative
## [5] liberal extremely conservative
## [7] slightly liberal extremely liberal
## 20 Levels: extremely liberal liberal ... see codebook
gss <- gss %>%
mutate(
polviews = case_when(
polviews %in% c("liberal", "moderate", "conservative") ~ polviews,
TRUE ~ NA_character_
),
race = case_when(
race %in% c("white", "black", "other") ~ race,
TRUE ~ NA_character_
),
sex = case_when(
sex %in% c("male", "female") ~ sex,
TRUE ~ NA_character_
),
degree = case_when(
degree %in% c("less than high school", "high school", "junior college", "bachelor", "graduate") ~ degree,
TRUE ~ NA_character_
),
)
gss_filtered <- gss %>%
dplyr::select(polviews, race, sex, degree)
categorical_summary <- datasummary_skim(gss_filtered, type = "categorical")
categorical_summary
tinytable_t94fwob7vx8im4izs8in
| |
|
N |
% |
| polviews |
conservative |
9361 |
12.9 |
| |
liberal |
7623 |
10.5 |
| |
NA |
55406 |
76.5 |
| race |
black |
10215 |
14.1 |
| |
other |
4411 |
6.1 |
| |
white |
57657 |
79.6 |
| |
NA |
107 |
0.1 |
| sex |
female |
40301 |
55.7 |
| |
male |
31977 |
44.2 |
| |
NA |
112 |
0.2 |
| degree |
graduate |
5953 |
8.2 |
| |
high school |
36446 |
50.3 |
| |
less than high school |
14192 |
19.6 |
| |
NA |
15799 |
21.8 |
gss_cleaned <- gss %>%
filter(!is.na(polviews),
!is.na(race), !is.na(sex), !is.na(degree)) %>%
mutate(
polviews = recode(polviews, "conservative" = "Conservative", "liberal" = "Liberal", "moderate" = "Moderate"),
race = recode(race, "white" = "White", "black" = "Black", "other" = "Other"),
sex = recode(sex, "male" = "Male", "female" = "Female"),
degree = recode(degree, "less than high school" = "Less than High School", "high school" = "High School", "junior college" = "Junior College", "bachelor" = "Bachelor", "graduate" = "Graduate"),
)
gss_cleaned <- gss_cleaned %>%
rename(
"Political Views" = polviews,
"Respondent Race" = race,
"Respondent Sex" = sex,
"Highest Degree" = degree,
)
categorical_summary_relabelled <- datasummary_skim(
gss_cleaned %>%
dplyr::select(`Political Views`, `Respondent Race`, `Respondent Sex`, `Highest Degree`), # Select categorical variables
type = "categorical", # Specify the type of variables to summarize
output = "kableExtra" # Specify the output format
)
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
## produced by the `tinytable` backend.
categorical_summary_relabelled %>%
kableExtra::kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% # Apply table styling options
kableExtra::row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>% # Customize the header row
kableExtra::column_spec(1, bold = TRUE) %>% # Make the first column bold
kableExtra::add_header_above(c(" " = 1, "Summary Statistics for Categorical Variables" = 3)) # Add a header above the table
|
|
Summary Statistics for Categorical Variables
|
|
|
|
N
|
%
|
|
Political Views
|
Conservative
|
6945
|
55.2
|
|
|
Liberal
|
5627
|
44.8
|
|
Respondent Race
|
Black
|
1717
|
13.7
|
|
|
Other
|
772
|
6.1
|
|
|
White
|
10083
|
80.2
|
|
Respondent Sex
|
Female
|
6813
|
54.2
|
|
|
Male
|
5759
|
45.8
|
|
Highest Degree
|
Graduate
|
1943
|
15.5
|
|
|
High School
|
7940
|
63.2
|
|
|
Less than High School
|
2689
|
21.4
|
table(df$polviews)
##
## extremely liberal liberal
## 2081 7623
## slightly liberal moderate, middle of the road
## 7900 23992
## slightly conservative conservative
## 9596 9361
## extremely conservative don't know
## 2165 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
table(df$sex)
##
## male female
## 31977 40301
## don't know iap
## 0 0
## I don't have a job dk, na, iap
## 0 0
## no answer not imputable_(2147483637)
## 0 0
## not imputable_(2147483638) refused
## 0 0
## skipped on web uncodeable
## 0 0
## not available in this release not available in this year
## 0 0
## see codebook
## 0
polviews_summary <- gss %>%
count(polviews) %>% # Count the occurrences of each religious preference
mutate(pct = n / sum(n) * 100) # Calculate the percentage of each preference
ggplot(gss, aes(x = polviews)) +
geom_bar(fill = "lightblue", color = "black") + # Create a bar plot with light blue fill and black borders
labs(title = "Distribution of Political Views", x = "Religious Preference", y = "Count") + # Add title and axis labels
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis text for better readability

gss_sex <- gss %>%
group_by(sex, polviews) %>% # Group by year and religious preference
summarize(count = n(), .groups = 'drop') %>% # Calculate the count for each group
group_by(sex) %>% # Group by year
mutate(total = sum(count), # Calculate the total count per year
proportion = count / total) # Calculate the proportion of each religious preference per year
ggplot(gss_sex, aes(x = sex, y = proportion, color = polviews, group = polviews)) +
geom_line(size = 1.2) + # Create lines for each religious preference with increased line size
scale_color_brewer(palette = "Set3") + # Use a color palette for better differentiation
labs(title = "Evolution of Religious Preferences Over Time", # Add plot title
x = "Sex", # Label x-axis
y = "Proportion", # Label y-axis
color = "Political Views") + # Label the legend
theme_minimal() + # Apply a minimal theme to the plot
theme(legend.position = "bottom") # Position the legend at the bottom
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

gss_filtered <- gss %>%
filter(polviews %in% c("protestant", "catholic", "jewish", "none", "other"))
gss_yearly <- gss_filtered %>%
group_by(sex, polviews) %>%
summarize(count = n(), .groups = 'drop') %>%
group_by(sex) %>%
mutate(total = sum(count),
proportion = count / total)
ggplot(gss_yearly, aes(x = sex, y = proportion, color = polviews, group = polviews)) +
geom_line(size = 1.2) +
scale_color_brewer(palette = "Dark2") +
labs(title = "Evolution of Political Views Over Time",
x = "Sex",
y = "Proportion",
color = "Political Views") +
theme_minimal() +
theme(legend.position = "bottom")

gss_filtered_clean <- gss_filtered %>%
filter(!is.na(year) & !is.na(fejobaff) & !is.na(age))
gss_yearly <- gss_filtered_clean %>%
count(year, fejobaff, age) %>% # Count occurrences for each combination of year, partyid_recoded, and abany
group_by(year, fejobaff) %>% # Group by year and political identity
mutate(total = sum(n), # Calculate the total count per year and political identity
proportion = n / total) %>% # Calculate the proportion of each response within each year and political identity
filter(age == "Yes") # Filter to keep only 'Yes' responses
print(head(gss_yearly))
## # A tibble: 0 × 6
## # Groups: year, fejobaff [0]
## # ℹ 6 variables: year <int>, fejobaff <fct>, age <int>, n <int>, total <int>,
## # proportion <dbl>
ggplot(gss_yearly, aes(x = year, y = proportion, color = fejobaff)) +
geom_line(size = 1.2) + # Create line plot with increased line size
scale_color_brewer(palette = "Dark2", name = "Political Identification") + # Use Dark2 color palette for lines and set legend title
labs(title = "Preferential Hiring by Year and age",
subtitle = "General Social Survey, 1972-2022",
x = "Year",
y = "Preferential Hiring") + # Add title, subtitle, and axis labels
theme_minimal() + # Apply minimal theme
theme(legend.position = "bottom") # Position legend at the bottom
