packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer") # add any you need here
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "flextable" "kableExtra" "viridis" "viridisLite" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "officer" "flextable" "kableExtra" "viridis" "viridisLite"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
library(dplyr)
library(ggplot2)
library(tidyverse)
library(tibble)
library(flextable)
load("~/Downloads/gss2022.Rdata")
gss <- df
table(gss$polviews)
##
## extremely liberal liberal
## 2081 7623
## slightly liberal moderate, middle of the road
## 7900 23992
## slightly conservative conservative
## 9596 9361
## extremely conservative don't know
## 2165 0
## iap I don't have a job
## 0 0
## dk, na, iap no answer
## 0 0
## not imputable_(2147483637) not imputable_(2147483638)
## 0 0
## refused skipped on web
## 0 0
## uncodeable not available in this release
## 0 0
## not available in this year see codebook
## 0 0
gss <- gss %>%
mutate(Polviews = case_when(
polviews %in% c("extremely liberal", "slightly liberal", "liberal") ~ "Liberal",
polviews %in% c("moderate, middle of the road") ~ "Moderate",
polviews %in% c("extremely conservative, slightly conservative", "conservative") ~ "Conservative",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Polviews))
# Check the recoding
gss %>%
count(Polviews)
## Polviews n
## 1 Conservative 9361
## 2 Liberal 17604
## 3 Moderate 23992
gss <- gss %>%
mutate(race = case_when(
race %in% c("white", "black", "other") ~ race,
TRUE ~ NA_character_
),
sex = case_when(
sex %in% c("male", "female") ~ sex,
TRUE ~ NA_character_
),
degree = case_when(
degree %in% c("less than high school", "high school", "junior college", "bachelor", "graduate") ~ degree,
TRUE ~ NA_character_
))
gss_cleaned <- gss %>%
filter(!is.na(sex), !is.na(degree), !is.na(race)) %>%
mutate(
race = recode(race, "white" = "White", "black" = "Black", "other" = "Other"),
sex = recode(sex, "male" = "Male", "female" = "Female"),
degree = recode(degree, "less than high school" = "Less than High School", "high school" = "High School", "junior college" = "Junior College", "bachelor" = "Bachelor", "graduate" = "Graduate"))
gss_cleaned <- gss_cleaned %>%
rename(
"Respondent Race" = race,
"Respondent Sex" = sex,
"Highest Degree" = degree)
categorical_summary_flextable <- datasummary_skim(
gss_cleaned %>%
dplyr::select(`Polviews`, `Respondent Race`, `Respondent Sex`, `Highest Degree`),
type = "categorical",
output = "flextable"
)
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
## produced by the `tinytable` backend.
categorical_summary_flextable <- categorical_summary_flextable %>%
set_header_labels(Variable = "Variable", Value = "Value", Freq = "Frequency") %>%
theme_box() %>%
bold(part = "header") %>%
bg(part = "header", bg = "#66C") %>%
color(part = "header", color = "white") %>%
border_remove() %>%
border_inner_v(border = fp_border(color = "black", width = 1)) %>%
autofit()
print(categorical_summary_flextable)
## a flextable object.
## col_keys: ` `, ` `, `N`, `%`
## header has 1 row(s)
## body has 11 row(s)
## original dataset sample:
## N %
## 1 Polviews Conservative 6945 17.6
## 2 Liberal 12985 33.0
## 3 Moderate 19477 49.4
## 4 Respondent Race Black 5925 15.0
## 5 Other 2453 6.2
gss_cleaned %>%
count(Polviews, `Respondent Sex`) %>%
ggplot(aes(x = Polviews, y = n, fill = `Respondent Sex`)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_brewer(palette = "Set1", name = "Gender") +
labs(title = "Political Views by Gender",
subtitle = "General Social Survey, 1972-2022",
x = "Political View",
y = "Count") +
theme_minimal() +
theme(legend.position = "bottom")
gss_filtered <- gss %>%
filter(!is.na(year) & !is.na(attend))
gss_yearly <- gss_filtered %>%
group_by(year, attend) %>%
summarize(count = n(), .groups = 'drop') %>%
group_by(year) %>%
mutate(total = sum(count),
proportion = count / total)
ggplot(gss_yearly, aes(x = year, y = proportion, color = attend, group = attend)) +
geom_line(size = 1.2) +
scale_color_brewer(palette = "Set3") +
labs(title = "Evolution of Religious Attendence Over Time",
x = "Year", # Label x-axis
y = "Proportion", # Label y-axis
color = "Religious Attendence") +
theme_minimal() +
theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
table(gss$age)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 190 647 686 810 818 948 920 1060 1026 1044 1103 1054 1134 1072 1126 1077
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## 1113 1048 1078 1045 1066 960 983 973 954 931 896 824 843 830 849 840
## 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
## 818 794 773 806 761 746 759 705 740 734 733 679 683 688 587 659
## 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
## 615 646 616 583 581 499 499 444 488 412 405 365 344 321 282 272
## 82 83 84 85 86 87 88 89
## 214 206 176 168 140 115 94 303
gss_filtered <- gss %>%
mutate(Age = case_when(
age %in% c("18","19","20","21","22","23","24","25","26","27","28","29") ~ "18-29",
age %in% c("30","31","32","33","34","35","36","37","38","39","40","41", "42", "43", "44") ~ "30-44",
age %in% c("45","46","47","48","49","50","51","52","53","54","55","56", "57", "58", "59") ~ "45-59",
age %in% c("60","61","62","63","64","65","66","67","68","69","70","71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89") ~ "60+",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Age))
gss_filtered %>%
count(Age)
## Age n
## 1 18-29 10306
## 2 30-44 15456
## 3 45-59 11822
## 4 60+ 12817
gss_filtered <- gss_filtered %>%
filter(!is.na(Age) & !is.na(fejobaff))
gss_filtered %>%
count(Age, fejobaff) %>%
group_by(Age) %>%
mutate(proportion = n / sum(n)) %>%
ggplot(aes(x = Age, y = proportion, fill = fejobaff)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_brewer(palette = "Set2", name = "Preferential Hiring") +
labs(title = "Preferential Hiring by Age",
subtitle = "General Social Survey, 1972-2022",
x = "Age Groups",
y = "Proportion") +
theme_minimal() +
theme(legend.position = "bottom")