Task 1

options(repos = c(CRAN = "https://cloud.r-project.org"))
# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer")

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## 필요한 패키지를 로딩중입니다: viridisLite
## 
## 
## 다음의 패키지를 부착합니다: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## 다음의 패키지를 부착합니다: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose

Visualizing and looking into the GSS

gss <- load("C:/Users/kevin/Downloads/gss2022.Rdata")
gss <- df
str(gss)

Recode polviews into three categories: “Liberal”, “Moderate”, and “Conservative”.

gss <- gss %>%
  mutate(polviews = case_when(
    polviews %in% c("extremely liberal", "liberal", "slightly liberal") ~ "Liberal",
    polviews %in% c("moderate") ~ "Moderate",
    polviews %in% c("extremely conservative", "conservative", "slightly conservative") ~ "Conservative",
    TRUE ~ NA_character_
  ))

Clean sex, degree, and race but retain the relevant categories.

gss <- gss %>%
  mutate(sex = case_when(
    sex %in% c("male", "female") ~ sex,
    TRUE ~ NA_character_
  ),
  degree = case_when(
    degree %in% c("less than high school", "high school", "junior college", "bachelor", "graduate") ~ degree,
    TRUE ~ NA_character_
  ),
  race = case_when(
    race %in% c("white", "black", "other") ~ race,
    TRUE ~ NA_character_
  ))

Task 2

load packages

library(modelsummary)
library(flextable)

Generate a summary table for selected variables

selected_vars <- gss %>%
  select(polviews, sex, degree, race)

categorical_summary <- datasummary_skim(selected_vars, type = "categorical", output = "flextable")
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
##   produced by the `tinytable` backend.
categorical_summary <- categorical_summary %>%
  set_header_labels(Variable = "Variable", Value = "Value", Freq = "Frequency") %>%
  theme_box() %>%
  bold(part = "header") %>%
  bg(part = "header", bg = "#4CAF50") %>%
  color(part = "header", color = "white") %>%
  border_remove() %>%
  border_inner_v(border = fp_border(color = "black", width = 1)) %>%
  autofit()

plot(categorical_summary)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## 윈도우즈 폰트데이터베이스에서 찾을 수 없는 폰트페밀리입니다
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## 윈도우즈 폰트데이터베이스에서 찾을 수 없는 폰트페밀리입니다

Task 3

load package

library(ggplot2)

Create a bar chart showing the distribution of political views by gender.

ggplot(gss, aes(x = polviews, fill = sex)) +
  geom_bar(position = "dodge") +
  scale_fill_brewer(palette = "Set1") +
  labs(title = "Distribution of Political Views by Gender", x = "Political Views", y = "Count", fill = "Gender") +
  theme_minimal()

Task 4

load package

library(dplyr)

Select the year and attend variables

attendance_data <- gss %>%
  select(year, attend)

Create a line plot showing the proportion of each category of religious attendance over time.

attendance_trend <- attendance_data %>%
  group_by(year, attend) %>%
  summarize(count = n()) %>%
  group_by(year) %>%
  mutate(total = sum(count), proportion = count / total)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
p <- ggplot(attendance_trend, aes(x = year, y = proportion, color = attend, group = attend)) +
  geom_line(size = 1.2) +
  scale_color_brewer(palette = "Set2") +
  labs(title = "Proportion of Religious Attendance Categories Over Time", x = "Year", y = "Proportion", color = "Attendance") +
  theme_minimal() +
  theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(p)
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning: Removed 68 rows containing missing values or values outside the scale range
## (`geom_line()`).

Task 5

Categorizing age into “18-29”, “30-44”, “45-59”, “60+”.

gss <- gss %>%
  mutate(age_group = case_when(
    age >= 18 & age <= 29 ~ "18-29",
    age >= 30 & age <= 44 ~ "30-44",
    age >= 45 & age <= 59 ~ "45-59",
    age >= 60 ~ "60+",
    TRUE ~ NA_character_
  ))

Create a stacked bar chart showing the distribution of the fejobaff response categories for each age group

ggplot(gss, aes(x = age_group, fill = fejobaff)) +
  geom_bar(position = "fill") +
  scale_fill_brewer(palette = "Set3") +
  labs(title = "Distribution of fejobaff Responses by Age Group", x = "Age Group", y = "Proportion", fill = "fejobaff Response") +
  theme_minimal() +
  theme(legend.position = "bottom")