# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Loading required package: viridisLite
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "viridis"      "viridisLite"  "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[5]]
##  [1] "kableExtra"   "viridis"      "viridisLite"  "modelsummary" "fst"         
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "flextable"    "kableExtra"   "viridis"      "viridisLite"  "modelsummary"
##  [6] "fst"          "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "officer"      "flextable"    "kableExtra"   "viridis"      "viridisLite" 
##  [6] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"
load("gss2022.RData")
gss <- df
rm(df)
gss <- gss %>%
  mutate(polviews
         = factor(polviews, levels = c(
    "slightly liberal",
    "liberal",
    "extremely liberal",
    "moderate, middle of the road",
    "slghtly conservative",
    "conservative",
    "extrmly conservative"
    ), 
    labels = c(
   "Liberal",
   "Liberal",
   "Liberal",
   "Moderate",
   "Conservative",
   "Conservative",
   "Conservative"
  ), ordered = TRUE)) %>%
  filter(!is.na(polviews), !is.na(sex))
gss <- gss %>%
  mutate(
    race = case_when(
      race %in% c("white", "black", "other") ~ race,
      TRUE ~ NA_character_
    ),
    sex = case_when(
      sex %in% c("male", "female") ~ sex,
      TRUE ~ NA_character_
    ),
    degree = case_when(
      degree %in% c("less than high school", "high school", "junior college", "bachelor", "graduate") ~ degree,
      TRUE ~ NA_character_
    )
  )
gss_cleaned <- gss %>%
  filter(!is.na(race), !is.na(sex), !is.na(degree))
gss_filtered <- gss_cleaned %>%
  dplyr::select(race, sex, degree)


categorical_summary <- datasummary_skim(gss_filtered, type = "categorical")
gss_cleaned <- gss_cleaned %>%
  rename(
    "Respondent Race" = race,
    "Respondent Sex" = sex,
    "Highest Degree" = degree
  )

categorical_summary_flextable <- datasummary_skim(
  gss_cleaned %>%
    dplyr::select(`Respondent Race`, `Respondent Sex`, `Highest Degree`),
  type = "categorical",
  output = "flextable"
)
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
##   produced by the `tinytable` backend.
categorical_summary_flextable <- categorical_summary_flextable %>%
  set_header_labels(Variable = "Variable", Value = "Value", Freq = "Frequency") %>%
  theme_box() %>%
  bold(part = "header") %>%
  bg(part = "header", bg = "#4CAF50") %>%
  color(part = "header", color = "white") %>%
  border_remove() %>%
  border_inner_v(border = fp_border(color = "black", width = 1)) %>%
  autofit()

categorical_summary_flextable

N

%

Respondent Race

black

5925

15.0

other

2453

6.2

white

31029

78.7

Respondent Sex

female

22207

56.4

male

17200

43.6

Highest Degree

graduate

4481

11.4

high school

25869

65.6

less than high school

9057

23.0

ggplot(gss, aes(x = polviews, fill = sex)) +
  geom_bar(position = "dodge") +
  scale_fill_brewer(palette = "Set1") +
  labs(title = "Distribution of Political Views by Gender",
       x = "Political Views",
       y = "Count",
       fill = "Gender")

gss_yearly <- gss %>%
  group_by(year, attend) %>% 
  summarize(count = n(), .groups = 'drop') %>%
  group_by(year) %>% 
  mutate(total = sum(count), 
         proportion = count / total) %>%
  filter(!is.na(proportion))


ggplot(gss_yearly, aes(x = year, y = proportion, color = attend, group = attend)) +
  geom_line(linewidth = 1.2) + 
  scale_color_brewer(palette = "Set3") +
  labs(title = "Evolution of Religious Preferences Over Time",
       x = "Year",
       y = "Proportion",
       color = "Religious attendance over time") +
  theme_minimal() +
  theme(legend.position = "bottom")
## Warning: Removed 32 rows containing missing values or values outside the scale range
## (`geom_line()`).

gss_filtered <- gss %>%
  mutate(age_group = case_when(
    age >= 18 & age <= 29 ~ "18-29",
    age >= 30 & age <= 44 ~ "30-44",
    age >= 45 & age <= 59 ~ "45-59",
    age >= 60 ~ "60+",
    TRUE ~ NA_character_
  )) %>%
  filter(!is.na(age_group)) %>%
  filter(!is.na(fejobaff))



ggplot(gss_filtered, aes(x = age_group, fill = fejobaff)) +
  geom_bar(position = "fill") +
  labs(
    title = "Distribution of Preferential Hiring Across Age",
    x = "Age Group",
    y = "Proportion"
  ) +
  theme_minimal()