# Load required packages
packages <- c("tidyverse", "srvyr", "srvyrexploR", "broom", "gt")
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "srvyr" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "srvyrexploR" "srvyr" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "broom" "srvyrexploR" "srvyr" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "gt" "broom" "srvyrexploR" "srvyr" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
First, examine the survey question for TrustPeople
:
Look up the exact question wording in the codebook
Identify all possible response categories
Then analyze the responses:
Check the data quality by calculating:
Total number of respondents
Number of valid responses
Number of missing responses
Response rate
Create a distribution table showing counts and percentages for each response category
Hint: Follow the same steps we used for TrustGovernment, but apply them to TrustPeople. Remember to examine the question context first
First, let’s examine the survey question: - Question text: “Generally speaking, how often can you trust other people?” - Response options: Always, Most of the time, About half the time, Some of the time, Never
##
## Always Most of the time About half the time Some of the time
## 48 3511 2020 1597
## Never
## 264
# Check data quality
trust_people_quality <- anes_2020 %>%
summarize(
total_respondents = n(),
valid_responses = sum(!is.na(TrustPeople)),
missing_responses = sum(is.na(TrustPeople)),
response_rate = round(100 * valid_responses / total_respondents, 1)
)
trust_people_quality
## # A tibble: 1 × 4
## total_respondents valid_responses missing_responses response_rate
## <int> <int> <int> <dbl>
## 1 7453 7440 13 99.8
# Calculate total valid responses
total_valid_trust <- sum(!is.na(anes_2020$TrustPeople))
total_valid_trust
## [1] 7440
# Create distribution with proper ordering
trust_people_dist <- anes_2020 %>%
filter(!is.na(TrustPeople)) %>%
mutate(TrustPeople = factor(TrustPeople,
levels = c("Always", "Most of the time", "About half the time",
"Some of the time", "Never"))) %>%
group_by(TrustPeople) %>%
summarize(
count = n(),
percentage = round(100 * count / total_valid_trust, 1)
) %>%
arrange(desc(count))
trust_people_dist
## # A tibble: 5 × 3
## TrustPeople count percentage
## <fct> <int> <dbl>
## 1 Most of the time 3511 47.2
## 2 About half the time 2020 27.2
## 3 Some of the time 1597 21.5
## 4 Never 264 3.5
## 5 Always 48 0.6
# Create professional table
trust_people_dist %>%
gt() %>%
cols_label(
TrustPeople = "Trust Level",
count = "Responses (N)",
percentage = "Percent (%)"
) %>%
fmt_number(
columns = count,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = percentage,
decimals = 1
) %>%
tab_header(
title = md("**Distribution of Trust in Other People**"),
subtitle = "Generally speaking, how often can you trust other people?"
) %>%
tab_source_note(
source_note = sprintf(
"Note: Based on %d valid responses (%.1f%% response rate).",
trust_people_quality$valid_responses,
trust_people_quality$response_rate
)
) %>%
tab_options(
table.border.top.width = 2,
table.border.bottom.width = 2
)
Distribution of Trust in Other People | ||
Generally speaking, how often can you trust other people? | ||
Trust Level | Responses (N) | Percent (%) |
---|---|---|
Most of the time | 3,511 | 47.2 |
About half the time | 2,020 | 27.2 |
Some of the time | 1,597 | 21.5 |
Never | 264 | 3.5 |
Always | 48 | 0.6 |
Note: Based on 7440 valid responses (99.8% response rate). |
Start by understanding the variable:
Look up the exact question text for CampaignInterest
in the codebook
Note how the question was framed to respondents
Then create a summary analysis:
Calculate data quality metrics (response rates, missing data)
Create a formatted table showing:
Response counts
Percentages
Valid percentages (excluding missing data)
Hint: Think about how the question wording might affect response patterns
Question text: “Some people don’t pay much attention to political campaigns. How about you? Would you say that you have been very much interested, somewhat interested or not much interested in the political campaigns so far this year?”
##
## Very much interested Somewhat interested Not much interested
## 3940 2569 943
# Calculate data quality metrics
campaign_quality <- anes_2020 %>%
summarize(
total_respondents = n(),
valid_responses = sum(!is.na(CampaignInterest)),
missing_responses = sum(is.na(CampaignInterest)),
response_rate = round(100 * valid_responses / total_respondents, 1)
)
campaign_quality
## # A tibble: 1 × 4
## total_respondents valid_responses missing_responses response_rate
## <int> <int> <int> <dbl>
## 1 7453 7452 1 100
# Create distribution with proper ordering
campaign_dist <- anes_2020 %>%
filter(!is.na(CampaignInterest)) %>%
mutate(CampaignInterest = factor(CampaignInterest,
levels = c("Very much interested", "Somewhat interested",
"Not much interested"))) %>%
group_by(CampaignInterest) %>%
summarize(
count = n(),
percentage = round(100 * count / total_valid_campaign)
) %>%
arrange(desc(count)) %>%
# Add total row
bind_rows(
summarize(.,
CampaignInterest = "Total",
count = sum(count),
percentage = sum(percentage)
)
)
campaign_dist
## # A tibble: 4 × 3
## CampaignInterest count percentage
## <chr> <int> <dbl>
## 1 Very much interested 3940 53
## 2 Somewhat interested 2569 34
## 3 Not much interested 943 13
## 4 Total 7452 100
# Format table
campaign_dist %>%
gt() %>%
cols_label(
CampaignInterest = "Interest Level",
count = "Count (N)",
percentage = "Valid Percent (%)"
) %>%
fmt_number(
columns = count,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = percentage,
decimals = 1
) %>%
tab_header(
title = md("**Campaign Interest Distribution**"),
subtitle = "Level of interest in political campaigns"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = CampaignInterest == "Total"
)
) %>%
tab_source_note(
source_note = sprintf(
"Note: Based on %d valid responses.",
campaign_quality$valid_responses,
campaign_quality$response_rate,
campaign_quality$missing_responses
)
) %>%
tab_options(
table.border.top.width = 2,
table.border.bottom.width = 2
)
Campaign Interest Distribution | ||
Level of interest in political campaigns | ||
Interest Level | Count (N) | Valid Percent (%) |
---|---|---|
Very much interested | 3,940 | 53.0 |
Somewhat interested | 2,569 | 34.0 |
Not much interested | 943 | 13.0 |
Total | 7,452 | 100.0 |
Note: Based on 7452 valid responses. |
Create a professional table that combines:
Race/ethnicity distribution using RaceEth
:
Counts and percentages for each category
Note any missing data
Education levels using Education
:
Grouped into meaningful categories
Percentages for each level
Format using gt with appropriate styling and clear labels.
Hint: This is different from our tutorial example as it combines two categorical demographic variables
##
## White Black Hispanic Asian, NH/PI
## 5420 650 662 248
## AI/AN Other/multiple race
## 155 237
##
## Less than HS High school Post HS Bachelor's Graduate
## 312 1160 2514 1877 1474
# Create demo stats
demo_stats <- anes_2020 %>%
summarize(
# Basic counts
n_total = n(),
n_race_valid = sum(!is.na(RaceEth)),
n_educ_valid = sum(!is.na(Education)),
# Race percentages
pct_white = round(100 * mean(RaceEth == "White", na.rm = TRUE), 1),
pct_black = round(100 * mean(RaceEth == "Black", na.rm = TRUE), 1),
pct_hispanic = round(100 * mean(RaceEth == "Hispanic", na.rm = TRUE), 1),
pct_asian = round(100 * mean(RaceEth == "Asian, NH/PI", na.rm = TRUE), 1),
pct_other = 100 - (
round(100 * mean(RaceEth == "White", na.rm = TRUE), 1) +
round(100 * mean(RaceEth == "Black", na.rm = TRUE), 1) +
round(100 * mean(RaceEth == "Hispanic", na.rm = TRUE), 1) +
round(100 * mean(RaceEth == "Asian, NH/PI", na.rm = TRUE), 1)
),
# Education percentages
pct_hs_or_less = round(100 * mean(Education %in%
c("Less than HS", "High school"),
na.rm = TRUE), 1),
pct_some_college = round(100 * mean(Education == "Post HS",
na.rm = TRUE), 1),
pct_bachelors_plus = 100 - (
round(100 * mean(Education %in% c("Less than HS", "High school"),
na.rm = TRUE), 1) +
round(100 * mean(Education == "Post HS", na.rm = TRUE), 1)
)
)
demo_stats
## # A tibble: 1 × 11
## n_total n_race_valid n_educ_valid pct_white pct_black pct_hispanic pct_asian
## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 7453 7372 7337 73.5 8.8 9 3.4
## # ℹ 4 more variables: pct_other <dbl>, pct_hs_or_less <dbl>,
## # pct_some_college <dbl>, pct_bachelors_plus <dbl>
# Create formatted table structure
demo_table <- data.frame(
characteristic = c(
"Sample Size (N)",
"",
"Race/Ethnicity (%)",
" White",
" Black",
" Hispanic",
" Asian/Pacific Islander",
" Other/Multiple Race",
" Total",
"",
"Education (%)",
" High School or Less",
" Some College",
" Bachelor's or Higher",
" Total"
),
value = c(
demo_stats$n_total,
"",
"",
demo_stats$pct_white,
demo_stats$pct_black,
demo_stats$pct_hispanic,
demo_stats$pct_asian,
demo_stats$pct_other,
100.0,
"",
"",
demo_stats$pct_hs_or_less,
demo_stats$pct_some_college,
demo_stats$pct_bachelors_plus,
100.0
)
)
# Format table
demo_table %>%
gt() %>%
cols_label(
characteristic = "",
value = ""
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = characteristic %in%
c("Sample Size (N)", "Race/Ethnicity (%)", "Education (%)")
)
) %>%
tab_header(
title = md("**Sample Demographics**")
) %>%
tab_source_note(
source_note = sprintf(
"Note: Based on %d respondents. Missing data: Race/Ethnicity (%d), Education (%d). Percentages based on valid responses.",
demo_stats$n_total,
demo_stats$n_total - demo_stats$n_race_valid,
demo_stats$n_total - demo_stats$n_educ_valid
)
) %>%
tab_options(
table.border.top.width = 2,
table.border.bottom.width = 2
)
Sample Demographics | |
Sample Size (N) | 7453 |
Race/Ethnicity (%) | |
White | 73.5 |
Black | 8.8 |
Hispanic | 9 |
Asian/Pacific Islander | 3.4 |
Other/Multiple Race | 5.3 |
Total | 100 |
Education (%) | |
High School or Less | 19.8 |
Some College | 34.3 |
Bachelor's or Higher | 45.9 |
Total | 100 |
Note: Based on 7453 respondents. Missing data: Race/Ethnicity (81), Education (116). Percentages based on valid responses. |
Create a descriptive analysis that examines:
Age statistics by party identification:
Mean age for each party ID group
Standard deviation within groups
Compare age distributions across different party affiliations
Present results in a professional table using gt
Hint: This combines continuous and categorical variables in a way different from our tutorial examples
##
## Strong democrat Not very strong democrat
## 1796 790
## Independent-democrat Independent
## 881 876
## Independent-republican Not very strong republican
## 782 758
## Strong republican
## 1545
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.00 37.00 53.00 51.83 66.00 80.00 294
# Calculate age statistics by party
age_party_stats <- anes_2020 %>%
# Remove missing values first
filter(!is.na(PartyID), !is.na(Age)) %>%
# Group by party and calculate statistics
group_by(PartyID) %>%
summarize(
n = n(),
mean_age = round(mean(Age), 1),
sd_age = round(sd(Age), 1),
median_age = round(median(Age), 1),
pct = round(100 * n() / sum(!is.na(anes_2020$PartyID) & !is.na(anes_2020$Age)), 1)
) %>%
# Add total row
bind_rows(
anes_2020 %>%
filter(!is.na(PartyID), !is.na(Age)) %>%
summarize(
PartyID = "Total",
n = n(),
mean_age = round(mean(Age), 1),
sd_age = round(sd(Age), 1),
median_age = round(median(Age), 1),
pct = 100.0
)
)
age_party_stats
## # A tibble: 8 × 6
## PartyID n mean_age sd_age median_age pct
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Strong democrat 1757 54.4 16.9 58 24.6
## 2 Not very strong democrat 768 46.1 16.7 44 10.7
## 3 Independent-democrat 856 47.7 17.3 45 12
## 4 Independent 812 48.2 17.1 46 11.4
## 5 Independent-republican 748 52.9 16.4 54 10.5
## 6 Not very strong republican 725 50.5 16.2 50 10.1
## 7 Strong republican 1482 56.3 16.5 59 20.7
## 8 Total 7148 51.8 17.1 53 100
# Create formatted table
age_party_stats %>%
gt() %>%
cols_label(
PartyID = "Party Identification",
n = "N",
pct = "% Sample",
mean_age = "Mean",
sd_age = "SD",
median_age = "Median"
) %>%
fmt_number(
columns = n,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = c(mean_age, sd_age, median_age, pct),
decimals = 1
) %>%
tab_spanner(
label = "Sample Size",
columns = c(n, pct)
) %>%
tab_spanner(
label = "Age Distribution",
columns = c(mean_age, sd_age, median_age)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = PartyID == "Total"
)
) %>%
tab_header(
title = md("**Age Distribution by Party Identification**"),
subtitle = "Mean age and dispersion measures by party affiliation"
) %>%
tab_source_note(
source_note = "Note: Analysis excludes missing age and party ID responses. SD = Standard Deviation."
) %>%
tab_options(
table.border.top.width = 2,
table.border.bottom.width = 2
)
Age Distribution by Party Identification | |||||
Mean age and dispersion measures by party affiliation | |||||
Party Identification |
Sample Size
|
Age Distribution
|
|||
---|---|---|---|---|---|
N | % Sample | Mean | SD | Median | |
Strong democrat | 1,757 | 24.6 | 54.4 | 16.9 | 58.0 |
Not very strong democrat | 768 | 10.7 | 46.1 | 16.7 | 44.0 |
Independent-democrat | 856 | 12.0 | 47.7 | 17.3 | 45.0 |
Independent | 812 | 11.4 | 48.2 | 17.1 | 46.0 |
Independent-republican | 748 | 10.5 | 52.9 | 16.4 | 54.0 |
Not very strong republican | 725 | 10.1 | 50.5 | 16.2 | 50.0 |
Strong republican | 1,482 | 20.7 | 56.3 | 16.5 | 59.0 |
Total | 7,148 | 100.0 | 51.8 | 17.1 | 53.0 |
Note: Analysis excludes missing age and party ID responses. SD = Standard Deviation. |
Create a summary table showing the relationship between education level and party identification:
Calculate the percent of each education level that identifies as Democrat (including independent-democrat), Republican (including independent-republican), or Independent
Create a professional formatted table that shows:
Education levels
Distribution across these three party groups
Include row totals to verify percentages sum to 100%
Use proper formatting for numbers and percentages
Hint: Remember to combine party categories first, then calculate percentages within education levels
##
## Strong democrat Not very strong democrat
## 1796 790
## Independent-democrat Independent
## 881 876
## Independent-republican Not very strong republican
## 782 758
## Strong republican
## 1545
##
## Less than HS High school Post HS Bachelor's Graduate
## 312 1160 2514 1877 1474
# Create party groups and calculate distribution
edu_party_dist <- anes_2020 %>%
# Remove missing values
filter(!is.na(PartyID), !is.na(Education)) %>%
# Create simplified party groups
mutate(
party_group = case_when(
PartyID %in% c("Strong democrat", "Not very strong democrat",
"Independent-democrat") ~ "Democrat",
PartyID %in% c("Strong republican", "Not very strong republican",
"Independent-republican") ~ "Republican",
PartyID == "Independent" ~ "Independent",
),
# Order education levels
Education = factor(Education,
levels = c("Less than HS", "High school", "Post HS",
"Bachelor's", "Graduate"))
) %>%
# Group and calculate percentages
group_by(Education, party_group) %>%
summarize(count = n(), .groups = "drop") %>%
group_by(Education) %>%
mutate(
total = sum(count),
pct = round(100 * count / total, 1)
) %>%
# Add total row for each education level
group_by(Education) %>%
arrange(Education, desc(count)) %>%
bind_rows(
group_by(., Education) %>%
summarize(
party_group = "Total",
count = sum(count),
total = first(total),
pct = 100.0,
.groups = "drop"
)
)
edu_party_dist
## # A tibble: 20 × 5
## # Groups: Education [5]
## Education party_group count total pct
## <fct> <chr> <int> <int> <dbl>
## 1 Less than HS Democrat 129 308 41.9
## 2 Less than HS Republican 119 308 38.6
## 3 Less than HS Independent 60 308 19.5
## 4 High school Republican 518 1158 44.7
## 5 High school Democrat 450 1158 38.9
## 6 High school Independent 190 1158 16.4
## 7 Post HS Republican 1155 2508 46.1
## 8 Post HS Democrat 1023 2508 40.8
## 9 Post HS Independent 330 2508 13.2
## 10 Bachelor's Democrat 944 1874 50.4
## 11 Bachelor's Republican 758 1874 40.4
## 12 Bachelor's Independent 172 1874 9.2
## 13 Graduate Democrat 872 1471 59.3
## 14 Graduate Republican 495 1471 33.7
## 15 Graduate Independent 104 1471 7.1
## 16 Less than HS Total 308 308 100
## 17 High school Total 1158 1158 100
## 18 Post HS Total 2508 2508 100
## 19 Bachelor's Total 1874 1874 100
## 20 Graduate Total 1471 1471 100
# Create formatted table
edu_party_dist %>%
gt() %>%
cols_label(
Education = "Education Level",
party_group = "Party Group",
count = "Count",
pct = "Percent (%)"
) %>%
fmt_number(
columns = count,
decimals = 0,
use_seps = TRUE
) %>%
fmt_number(
columns = pct,
decimals = 1
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = party_group == "Total"
)
) %>%
tab_header(
title = md("**Party Identification by Education Level**"),
subtitle = "Distribution of party groups across educational attainment"
) %>%
tab_source_note(
source_note = paste0(
"Note: Democrats include Independent-Democrats, Republicans include ",
"Independent-Republicans. Percentages calculated within education levels."
)
) %>%
tab_options(
table.border.top.width = 2,
table.border.bottom.width = 2
)
Party Identification by Education Level | |||
Distribution of party groups across educational attainment | |||
Party Group | Count | total | Percent (%) |
---|---|---|---|
Less than HS | |||
Democrat | 129 | 308 | 41.9 |
Republican | 119 | 308 | 38.6 |
Independent | 60 | 308 | 19.5 |
Total | 308 | 308 | 100.0 |
High school | |||
Republican | 518 | 1158 | 44.7 |
Democrat | 450 | 1158 | 38.9 |
Independent | 190 | 1158 | 16.4 |
Total | 1,158 | 1158 | 100.0 |
Post HS | |||
Republican | 1,155 | 2508 | 46.1 |
Democrat | 1,023 | 2508 | 40.8 |
Independent | 330 | 2508 | 13.2 |
Total | 2,508 | 2508 | 100.0 |
Bachelor's | |||
Democrat | 944 | 1874 | 50.4 |
Republican | 758 | 1874 | 40.4 |
Independent | 172 | 1874 | 9.2 |
Total | 1,874 | 1874 | 100.0 |
Graduate | |||
Democrat | 872 | 1471 | 59.3 |
Republican | 495 | 1471 | 33.7 |
Independent | 104 | 1471 | 7.1 |
Total | 1,471 | 1471 | 100.0 |
Note: Democrats include Independent-Democrats, Republicans include Independent-Republicans. Percentages calculated within education levels. |
Create a table comparing trust levels in government:
Calculate percentage who trust “Most of the time” or “Always”
Calculate percentage who trust “Never” or “Some of the time”
Show this comparison in a formatted table
anes_2020 %>%
filter(!is.na(TrustGovernment)) %>%
summarize(
high_trust = mean(TrustGovernment %in%
c("Most of the time", "Always")) * 100,
low_trust = mean(TrustGovernment %in%
c("Never", "Some of the time")) * 100,
n_total = n()
) %>%
gt() %>%
fmt_number(
columns = c(high_trust, low_trust),
decimals = 1
) %>%
cols_label(
high_trust = "High Trust %",
low_trust = "Low Trust %"
) %>%
tab_header(title = "Trust Level Distribution") %>%
tab_source_note(sprintf("Based on %d valid responses", .$n_total))
Trust Level Distribution | ||
High Trust % | Low Trust % | n_total |
---|---|---|
14.8 | 54.1 | 7424 |
Create a table showing trust patterns across age groups:
For each group calculate:
anes_2020 %>%
filter(!is.na(Age), !is.na(TrustGovernment)) %>%
mutate(
age_group = case_when(
Age < 35 ~ "18-34",
Age < 65 ~ "35-64",
TRUE ~ "65+"
)
) %>%
group_by(age_group) %>%
summarize(
n = n(),
pct_trust = mean(TrustGovernment %in%
c("Most of the time", "Always")) * 100
) %>%
gt() %>%
fmt_number(
columns = pct_trust,
decimals = 1
) %>%
cols_label(
age_group = "Age Group",
n = "Count",
pct_trust = "% High Trust"
) %>%
tab_header(title = "Trust Levels by Age Group")
Trust Levels by Age Group | ||
Age Group | Count | % High Trust |
---|---|---|
18-34 | 1435 | 12.1 |
35-64 | 3648 | 13.6 |
65+ | 2058 | 19.0 |
Create a table showing trust patterns by education:
For each education level:
anes_2020 %>%
filter(!is.na(Education), !is.na(TrustGovernment)) %>%
mutate(
edu_level = case_when(
Education %in% c("Less than HS", "High school") ~ "High school or less",
Education == "Post HS" ~ "Some post-secondary",
TRUE ~ "Bachelor's or higher"
)
) %>%
group_by(edu_level) %>%
summarize(
n = n(),
pct_never = mean(TrustGovernment == "Never") * 100
) %>%
gt() %>%
fmt_number(
columns = pct_never,
decimals = 1
) %>%
cols_label(
edu_level = "Education Level",
n = "Count",
pct_never = "% Never Trust"
) %>%
tab_header(title = "Trust by Education Level")
Trust by Education Level | ||
Education Level | Count | % Never Trust |
---|---|---|
Bachelor's or higher | 3339 | 7.3 |
High school or less | 1466 | 12.4 |
Some post-secondary | 2507 | 10.7 |
Create a distribution table for TrustPeople:
Show response categories
anes_2020 %>%
filter(!is.na(TrustPeople)) %>%
group_by(TrustPeople) %>%
summarize(
count = n()
) %>%
mutate(
percent = count / sum(count) * 100
) %>%
gt() %>%
fmt_number(
columns = percent,
decimals = 1
) %>%
cols_label(
TrustPeople = "Response",
count = "Count",
percent = "Percent"
) %>%
tab_header(title = "Trust in People Distribution")
Trust in People Distribution | ||
Response | Count | Percent |
---|---|---|
Always | 48 | 0.6 |
Most of the time | 3511 | 47.2 |
About half the time | 2020 | 27.2 |
Some of the time | 1597 | 21.5 |
Never | 264 | 3.5 |
Create a simple summary table showing:
Total respondents
Mean age
Percent female
Percent with Bachelor’s or higher
anes_2020 %>%
summarize(
total_resp = n(),
mean_age = mean(Age, na.rm = TRUE),
pct_female = mean(Gender == "Female", na.rm = TRUE) * 100,
pct_college = mean(Education %in%
c("Bachelor's", "Graduate"), na.rm = TRUE) * 100
) %>%
gt() %>%
fmt_number(
columns = c(mean_age, pct_female, pct_college),
decimals = 1
) %>%
cols_label(
total_resp = "Total Respondents",
mean_age = "Mean Age",
pct_female = "% Female",
pct_college = "% Bachelor's+"
) %>%
tab_header(title = "Sample Demographics Summary")
Sample Demographics Summary | |||
Total Respondents | Mean Age | % Female | % Bachelor's+ |
---|---|---|---|
7453 | 51.8 | 54.4 | 45.0 |