packages <- c("tidyverse", "fst", "modelsummary", "viridis", "kableExtra", "flextable", "officer")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "kableExtra" "viridis" "viridisLite" "modelsummary" "fst"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "flextable" "kableExtra" "viridis" "viridisLite" "modelsummary"
## [6] "fst" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "officer" "flextable" "kableExtra" "viridis" "viridisLite"
## [6] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
gss <- load("gss2018_egp.RData")
gss <- df
Task 1
str(gss$polviews)
## Factor w/ 10 levels "extremely liberal",..: 4 NA 6 4 2 4 4 6 3 3 ...
gss <- gss %>%
mutate(egp = factor(polviews, levels = c(
"-100",
"-99",
"-98",
"-97",
"1",
"2",
"3",
"4",
"5",
"6",
"7"
), labels = c(
"Inapplicable ",
"No answer",
"Do not Know/Cannot Choose",
"Skipped on Web",
"Liberal",
"Liberal",
"Moderate",
"Moderate",
"Moderate",
"Conservative",
"Conservative"
), ordered = TRUE))
gss_filtered <- gss %>%
dplyr::select(race, sex, degree)
categorical_summary <- datasummary_skim(gss_filtered, type = "categorical")
categorical_summary
| N | % | ||
|---|---|---|---|
| race | white | 52033 | 81.2 |
| black | 8480 | 13.2 | |
| other | 3594 | 5.6 | |
| IAP | 0 | 0.0 | |
| sex | male | 28337 | 44.2 |
| female | 35770 | 55.8 | |
| degree | lt high school | 13321 | 20.8 |
| high school | 32857 | 51.3 | |
| junior college | 3645 | 5.7 | |
| bachelor | 9423 | 14.7 | |
| graduate | 4693 | 7.3 | |
| dk | 0 | 0.0 | |
| iap | 0 | 0.0 | |
| na | 0 | 0.0 |
gss_cleaned <- gss %>%
filter(!is.na(race), !is.na(sex), !is.na(degree)) %>%
mutate(
race = recode(race, "white" = "White", "black" = "Black", "other" = "Other"),
sex = recode(sex, "male" = "Male", "female" = "Female"),
degree = recode(degree, "less than high school" = "Less than High School", "high school" = "High School", "junior college" = "Junior College", "bachelor" = "Bachelor", "graduate" = "Graduate"),
)
gss_cleaned <- gss_cleaned %>%
rename(
"Political View" = polviews,
"Respondent Race" = race,
"Respondent Sex" = sex,
"Highest Degree" = degree,
)
Task 2
categorical_summary_relabelled <- datasummary_skim(
gss_cleaned %>%
dplyr::select(`Political View`,`Respondent Race`, `Respondent Sex`, `Highest Degree`),
type = "categorical",
output = "kableExtra" )
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
## produced by the `tinytable` backend.
categorical_summary_relabelled %>%
kableExtra::kable_styling(full_width = F, bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
kableExtra::row_spec(0, bold = TRUE, color = "white", background = "#4CAF50") %>%
kableExtra::column_spec(1, bold = TRUE) %>%
kableExtra::add_header_above(c(" " = 1, "Summary Statistics for Categorical Variables" = 3))
| N | % | ||
|---|---|---|---|
| Political View | extremely liberal | 1648 | 2.6 |
| liberal | 6388 | 10.0 | |
| slightly liberal | 6909 | 10.8 | |
| moderate | 21121 | 33.0 | |
| slghtly conservative | 8607 | 13.5 | |
| conservative | 8154 | 12.8 | |
| extrmly conservative | 1802 | 2.8 | |
| DK | 0 | 0.0 | |
| IAP | 0 | 0.0 | |
| NA | 0 | 0.0 | |
| Respondent Race | White | 51921 | 81.2 |
| Black | 8433 | 13.2 | |
| Other | 3585 | 5.6 | |
| IAP | 0 | 0.0 | |
| Respondent Sex | Male | 28261 | 44.2 |
| Female | 35678 | 55.8 | |
| Highest Degree | lt high school | 13321 | 20.8 |
| High School | 32857 | 51.4 | |
| Junior College | 3645 | 5.7 | |
| Bachelor | 9423 | 14.7 | |
| Graduate | 4693 | 7.3 | |
| dk | 0 | 0.0 | |
| iap | 0 | 0.0 | |
| na | 0 | 0.0 |
Task 3
library(ggplot2)
library(RColorBrewer)
ggplot(gss, aes(x = polviews, fill = sex)) +
geom_bar(position = "dodge") +
scale_fill_brewer(palette = "Set1") +
labs(title = "Distribution of Political Views by Gender",
x = "Political Views",
y = "Count") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Task 4
gss_yearly <- gss %>%
group_by(year, relig) %>%
summarize(count = n(), .groups = 'drop') %>%
group_by(year) %>%
mutate(total = sum(count), # Calculate the total count per year
proportion = count / total)
ggplot(gss_yearly, aes(x = year, y = proportion, color = relig, group = relig)) +
geom_line(size = 1.2) +
scale_color_brewer(palette = "Set3") +
labs(title = "Evolution of Religious Preferences Over Time", # Add plot title
x = "Year",
y = "Proportion",
color = "Religious Preference") +
theme_minimal() +
theme(legend.position = "bottom")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set3 is 12
## Returning the palette you asked for with that many colors
## Warning: Removed 43 rows containing missing values or values outside the scale range
## (`geom_line()`).
Task 5
library(ggplot2)
library(dplyr)
gss_filtered <- gss %>%
filter(!is.na(age) & !is.na(fejobaff)) %>%
mutate(
age_group = cut(age,
breaks = c(17, 29, 44, 59, Inf),
labels = c("18-29", "30-44", "45-59", "60+"),
right = TRUE),
)
ggplot(gss_filtered, aes(x = age_group, fill = fejobaff)) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "Set1") +
labs(title = "Distribution of Preferential Hiring Views by Age Group",
x = "Age Group",
y = "Percentage") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))