packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer")

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Loading required package: viridisLite
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "viridis"      "viridisLite"  "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[5]]
##  [1] "kableExtra"   "viridis"      "viridisLite"  "modelsummary" "fst"         
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "flextable"    "kableExtra"   "viridis"      "viridisLite"  "modelsummary"
##  [6] "fst"          "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "officer"      "flextable"    "kableExtra"   "viridis"      "viridisLite" 
##  [6] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"
gss <- load("gss2018_egp.RData")
gss <- df

Task 1

str(gss$polviews)
##  Factor w/ 10 levels "extremely liberal",..: 4 NA 6 4 2 4 4 6 3 3 ...
gss <- gss %>%
  mutate(egp = factor(polviews, levels = c(
    "-100",
    "-99",
    "-98",
    "-97",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7"
  ), labels = c(
    "Inapplicable   ",
    "No answer",
    "Do not Know/Cannot Choose",
    "Skipped on Web",
    "Liberal",
    "Liberal",
    "Moderate",
    "Moderate",
    "Moderate",
    "Conservative",
    "Conservative"
  ), ordered = TRUE))
gss_filtered <- gss %>%
  dplyr::select(race, sex, degree)
categorical_summary <- datasummary_skim(gss_filtered, type = "categorical")
categorical_summary 
tinytable_cbqlp0hlbkp7tqj84sam
N %
race white 52033 81.2
black 8480 13.2
other 3594 5.6
IAP 0 0.0
sex male 28337 44.2
female 35770 55.8
degree lt high school 13321 20.8
high school 32857 51.3
junior college 3645 5.7
bachelor 9423 14.7
graduate 4693 7.3
dk 0 0.0
iap 0 0.0
na 0 0.0
gss_cleaned <- gss %>%
  filter(!is.na(race), !is.na(sex), !is.na(degree)) %>%
  mutate(
    race = recode(race, "white" = "White", "black" = "Black", "other" = "Other"),
    sex = recode(sex, "male" = "Male", "female" = "Female"),
    degree = recode(degree, "less than high school" = "Less than High School", "high school" = "High School", "junior college" = "Junior College", "bachelor" = "Bachelor", "graduate" = "Graduate"),
    )

gss_cleaned <- gss_cleaned %>%
  rename(
    "Political View" = polviews,
    "Respondent Race" = race,
    "Respondent Sex" = sex,
    "Highest Degree" = degree,
  )

Task 2

categorical_summary_relabelled <- datasummary_skim(
  gss_cleaned %>%
    dplyr::select(`Political View`,`Respondent Race`, `Respondent Sex`, `Highest Degree`), 
  type = "categorical", 
  output = "kableExtra" )
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
##   produced by the `tinytable` backend.
categorical_summary_relabelled %>%
  kableExtra::kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% 
  kableExtra::row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>% 
  kableExtra::column_spec(1, bold = TRUE) %>% 
  kableExtra::add_header_above(c(" " = 1, "Summary Statistics for Categorical Variables" = 3)) 
Summary Statistics for Categorical Variables
N %
Political View extremely liberal 1648 2.6
liberal 6388 10.0
slightly liberal 6909 10.8
moderate 21121 33.0
slghtly conservative 8607 13.5
conservative 8154 12.8
extrmly conservative 1802 2.8
DK 0 0.0
IAP 0 0.0
NA 0 0.0
Respondent Race White 51921 81.2
Black 8433 13.2
Other 3585 5.6
IAP 0 0.0
Respondent Sex Male 28261 44.2
Female 35678 55.8
Highest Degree lt high school 13321 20.8
High School 32857 51.4
Junior College 3645 5.7
Bachelor 9423 14.7
Graduate 4693 7.3
dk 0 0.0
iap 0 0.0
na 0 0.0

Task 3

library(ggplot2)
library(RColorBrewer)

ggplot(gss, aes(x = polviews, fill = sex)) +
  geom_bar(position = "dodge") +
  scale_fill_brewer(palette = "Set1") + 
  labs(title = "Distribution of Political Views by Gender",
       x = "Political Views",
       y = "Count") +
  theme_minimal() + 
  theme(plot.title = element_text(hjust = 0.5)) 

Task 4

gss_yearly <- gss %>%
  group_by(year, relig) %>% 
  summarize(count = n(), .groups = 'drop') %>% 
  group_by(year) %>% 
  mutate(total = sum(count), # Calculate the total count per year
         proportion = count / total) 

ggplot(gss_yearly, aes(x = year, y = proportion, color = relig, group = relig)) +
  geom_line(size = 1.2) + 
  scale_color_brewer(palette = "Set3") + 
  labs(title = "Evolution of Religious Preferences Over Time", # Add plot title
       x = "Year", 
       y = "Proportion", 
       color = "Religious Preference") + 
  theme_minimal() + 
  theme(legend.position = "bottom") 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors
## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_line()`).

Task 5

library(ggplot2)
library(dplyr)

gss_filtered <- gss %>%
  filter(!is.na(age) & !is.na(fejobaff)) %>%
  mutate(
    age_group = cut(age,
                    breaks = c(17, 29, 44, 59, Inf),
                    labels = c("18-29", "30-44", "45-59", "60+"),
                    right = TRUE),
  )

ggplot(gss_filtered, aes(x = age_group, fill = fejobaff)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "Set1") +  
  labs(title = "Distribution of Preferential Hiring Views by Age Group",
       x = "Age Group",
       y = "Percentage") +
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5))