# Load required packages
packages <- c("tidyverse", "srvyr", "srvyrexploR", "broom", "gt")

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "srvyr"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "srvyrexploR" "srvyr"       "lubridate"   "forcats"     "stringr"    
##  [6] "dplyr"       "purrr"       "readr"       "tidyr"       "tibble"     
## [11] "ggplot2"     "tidyverse"   "stats"       "graphics"    "grDevices"  
## [16] "utils"       "datasets"    "methods"     "base"       
## 
## [[4]]
##  [1] "broom"       "srvyrexploR" "srvyr"       "lubridate"   "forcats"    
##  [6] "stringr"     "dplyr"       "purrr"       "readr"       "tidyr"      
## [11] "tibble"      "ggplot2"     "tidyverse"   "stats"       "graphics"   
## [16] "grDevices"   "utils"       "datasets"    "methods"     "base"       
## 
## [[5]]
##  [1] "gt"          "broom"       "srvyrexploR" "srvyr"       "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"

Practice Exercise Solutions

Exercise 1: Trust in People

First, examine the survey question for TrustPeople:

  1. Look up the exact question wording in the codebook

  2. Identify all possible response categories

Then analyze the responses:

  1. Check the data quality by calculating:

    • Total number of respondents

    • Number of valid responses

    • Number of missing responses

    • Response rate

  2. Create a distribution table showing counts and percentages for each response category

Hint: Follow the same steps we used for TrustGovernment, but apply them to TrustPeople. Remember to examine the question context first

First, let’s examine the survey question: - Question text: “Generally speaking, how often can you trust other people?” - Response options: Always, Most of the time, About half the time, Some of the time, Never

# First examine the raw distribution
table(anes_2020$TrustPeople)
## 
##              Always    Most of the time About half the time    Some of the time 
##                  48                3511                2020                1597 
##               Never 
##                 264
# Check data quality
trust_people_quality <- anes_2020 %>%
  summarize(
    total_respondents = n(),
    valid_responses = sum(!is.na(TrustPeople)),
    missing_responses = sum(is.na(TrustPeople)),
    response_rate = round(100 * valid_responses / total_respondents, 1)
  )

trust_people_quality 
## # A tibble: 1 × 4
##   total_respondents valid_responses missing_responses response_rate
##               <int>           <int>             <int>         <dbl>
## 1              7453            7440                13          99.8
# Calculate total valid responses
total_valid_trust <- sum(!is.na(anes_2020$TrustPeople))

total_valid_trust 
## [1] 7440
# Create distribution with proper ordering
trust_people_dist <- anes_2020 %>%
  filter(!is.na(TrustPeople)) %>%
  mutate(TrustPeople = factor(TrustPeople, 
    levels = c("Always", "Most of the time", "About half the time", 
               "Some of the time", "Never"))) %>%
  group_by(TrustPeople) %>%
  summarize(
    count = n(),
    percentage = round(100 * count / total_valid_trust, 1)
  ) %>%
  arrange(desc(count))

trust_people_dist
## # A tibble: 5 × 3
##   TrustPeople         count percentage
##   <fct>               <int>      <dbl>
## 1 Most of the time     3511       47.2
## 2 About half the time  2020       27.2
## 3 Some of the time     1597       21.5
## 4 Never                 264        3.5
## 5 Always                 48        0.6
# Create professional table
trust_people_dist %>%
  gt() %>%
  cols_label(
    TrustPeople = "Trust Level",
    count = "Responses (N)",
    percentage = "Percent (%)"
  ) %>%
  fmt_number(
    columns = count,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = percentage,
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Distribution of Trust in Other People**"),
    subtitle = "Generally speaking, how often can you trust other people?"
  ) %>%
  tab_source_note(
    source_note = sprintf(
      "Note: Based on %d valid responses (%.1f%% response rate).",
      trust_people_quality$valid_responses,
      trust_people_quality$response_rate
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )
Distribution of Trust in Other People
Generally speaking, how often can you trust other people?
Trust Level Responses (N) Percent (%)
Most of the time 3,511 47.2
About half the time 2,020 27.2
Some of the time 1,597 21.5
Never 264 3.5
Always 48 0.6
Note: Based on 7440 valid responses (99.8% response rate).

Exercise 2: Campaign Interest Analysis

Start by understanding the variable:

  1. Look up the exact question text for CampaignInterest in the codebook

  2. Note how the question was framed to respondents

Then create a summary analysis:

  1. Calculate data quality metrics (response rates, missing data)

  2. Create a formatted table showing:

    • Response counts

    • Percentages

    • Valid percentages (excluding missing data)

Hint: Think about how the question wording might affect response patterns

Question text: “Some people don’t pay much attention to political campaigns. How about you? Would you say that you have been very much interested, somewhat interested or not much interested in the political campaigns so far this year?”

# First examine the variable
table(anes_2020$CampaignInterest)
## 
## Very much interested  Somewhat interested  Not much interested 
##                 3940                 2569                  943
# Calculate data quality metrics
campaign_quality <- anes_2020 %>%
  summarize(
    total_respondents = n(),
    valid_responses = sum(!is.na(CampaignInterest)),
    missing_responses = sum(is.na(CampaignInterest)),
    response_rate = round(100 * valid_responses / total_respondents, 1)
  )
campaign_quality
## # A tibble: 1 × 4
##   total_respondents valid_responses missing_responses response_rate
##               <int>           <int>             <int>         <dbl>
## 1              7453            7452                 1           100
# Store total valid responses
total_valid_campaign <- sum(!is.na(anes_2020$CampaignInterest))
# Create distribution with proper ordering
campaign_dist <- anes_2020 %>%
  filter(!is.na(CampaignInterest)) %>%
  mutate(CampaignInterest = factor(CampaignInterest,
    levels = c("Very much interested", "Somewhat interested", 
               "Not much interested"))) %>%
  group_by(CampaignInterest) %>%
  summarize(
    count = n(),
    percentage = round(100 * count / total_valid_campaign)
  ) %>%
  arrange(desc(count)) %>%
  # Add total row
  bind_rows(
    summarize(.,
      CampaignInterest = "Total",
      count = sum(count),
      percentage = sum(percentage)
    )
  )
campaign_dist
## # A tibble: 4 × 3
##   CampaignInterest     count percentage
##   <chr>                <int>      <dbl>
## 1 Very much interested  3940         53
## 2 Somewhat interested   2569         34
## 3 Not much interested    943         13
## 4 Total                 7452        100
# Format table
campaign_dist %>%
  gt() %>%
  cols_label(
    CampaignInterest = "Interest Level",
    count = "Count (N)",
    percentage = "Valid Percent (%)"
  ) %>%
  fmt_number(
    columns = count,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = percentage,
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Campaign Interest Distribution**"),
    subtitle = "Level of interest in political campaigns"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = CampaignInterest == "Total"
    )
  ) %>%
  tab_source_note(
    source_note = sprintf(
      "Note: Based on %d valid responses.",
      campaign_quality$valid_responses,
      campaign_quality$response_rate,
      campaign_quality$missing_responses
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )
Campaign Interest Distribution
Level of interest in political campaigns
Interest Level Count (N) Valid Percent (%)
Very much interested 3,940 53.0
Somewhat interested 2,569 34.0
Not much interested 943 13.0
Total 7,452 100.0
Note: Based on 7452 valid responses.

Exercise 3: Race and Education Demographics Table

Create a professional table that combines:

  1. Race/ethnicity distribution using RaceEth:

    • Counts and percentages for each category

    • Note any missing data

  2. Education levels using Education:

    • Grouped into meaningful categories

    • Percentages for each level

Format using gt with appropriate styling and clear labels.

Hint: This is different from our tutorial example as it combines two categorical demographic variables

# First examine the variables
table(anes_2020$RaceEth)
## 
##               White               Black            Hispanic        Asian, NH/PI 
##                5420                 650                 662                 248 
##               AI/AN Other/multiple race 
##                 155                 237
table(anes_2020$Education)
## 
## Less than HS  High school      Post HS   Bachelor's     Graduate 
##          312         1160         2514         1877         1474
# Create demo stats
demo_stats <- anes_2020 %>%
  summarize(
    # Basic counts
    n_total = n(),
    n_race_valid = sum(!is.na(RaceEth)),
    n_educ_valid = sum(!is.na(Education)),
    
    # Race percentages
    pct_white = round(100 * mean(RaceEth == "White", na.rm = TRUE), 1),
    pct_black = round(100 * mean(RaceEth == "Black", na.rm = TRUE), 1),
    pct_hispanic = round(100 * mean(RaceEth == "Hispanic", na.rm = TRUE), 1),
    pct_asian = round(100 * mean(RaceEth == "Asian, NH/PI", na.rm = TRUE), 1),
    pct_other = 100 - (
      round(100 * mean(RaceEth == "White", na.rm = TRUE), 1) +
      round(100 * mean(RaceEth == "Black", na.rm = TRUE), 1) +
      round(100 * mean(RaceEth == "Hispanic", na.rm = TRUE), 1) +
      round(100 * mean(RaceEth == "Asian, NH/PI", na.rm = TRUE), 1)
    ),
    
    # Education percentages
    pct_hs_or_less = round(100 * mean(Education %in% 
                                     c("Less than HS", "High school"), 
                                   na.rm = TRUE), 1),
    pct_some_college = round(100 * mean(Education == "Post HS", 
                                     na.rm = TRUE), 1),
    pct_bachelors_plus = 100 - (
      round(100 * mean(Education %in% c("Less than HS", "High school"), 
                      na.rm = TRUE), 1) +
      round(100 * mean(Education == "Post HS", na.rm = TRUE), 1)
    )
  )
demo_stats
## # A tibble: 1 × 11
##   n_total n_race_valid n_educ_valid pct_white pct_black pct_hispanic pct_asian
##     <int>        <int>        <int>     <dbl>     <dbl>        <dbl>     <dbl>
## 1    7453         7372         7337      73.5       8.8            9       3.4
## # ℹ 4 more variables: pct_other <dbl>, pct_hs_or_less <dbl>,
## #   pct_some_college <dbl>, pct_bachelors_plus <dbl>
# Create formatted table structure
demo_table <- data.frame(
  characteristic = c(
    "Sample Size (N)",
    "",
    "Race/Ethnicity (%)",
    "    White",
    "    Black",
    "    Hispanic",
    "    Asian/Pacific Islander",
    "    Other/Multiple Race",
    "    Total",
    "",
    "Education (%)",
    "    High School or Less",
    "    Some College",
    "    Bachelor's or Higher",
    "    Total"
  ),
  value = c(
    demo_stats$n_total,
    "",
    "",
    demo_stats$pct_white,
    demo_stats$pct_black,
    demo_stats$pct_hispanic,
    demo_stats$pct_asian,
    demo_stats$pct_other,
    100.0,
    "",
    "",
    demo_stats$pct_hs_or_less,
    demo_stats$pct_some_college,
    demo_stats$pct_bachelors_plus,
    100.0
  )
)

# Format table
demo_table %>%
  gt() %>%
  cols_label(
    characteristic = "",
    value = ""
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = characteristic %in% 
        c("Sample Size (N)", "Race/Ethnicity (%)", "Education (%)")
    )
  ) %>%
  tab_header(
    title = md("**Sample Demographics**")
  ) %>%
  tab_source_note(
    source_note = sprintf(
      "Note: Based on %d respondents. Missing data: Race/Ethnicity (%d), Education (%d). Percentages based on valid responses.",
      demo_stats$n_total,
      demo_stats$n_total - demo_stats$n_race_valid,
      demo_stats$n_total - demo_stats$n_educ_valid
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )
Sample Demographics
Sample Size (N) 7453
Race/Ethnicity (%)
White 73.5
Black 8.8
Hispanic 9
Asian/Pacific Islander 3.4
Other/Multiple Race 5.3
Total 100
Education (%)
High School or Less 19.8
Some College 34.3
Bachelor's or Higher 45.9
Total 100
Note: Based on 7453 respondents. Missing data: Race/Ethnicity (81), Education (116). Percentages based on valid responses.

Exercise 4: Age and Party ID

Create a descriptive analysis that examines:

  1. Age statistics by party identification:

    • Mean age for each party ID group

    • Standard deviation within groups

    • Compare age distributions across different party affiliations

  2. Present results in a professional table using gt

Hint: This combines continuous and categorical variables in a way different from our tutorial examples

# First examine the variables
table(anes_2020$PartyID)
## 
##            Strong democrat   Not very strong democrat 
##                       1796                        790 
##       Independent-democrat                Independent 
##                        881                        876 
##     Independent-republican Not very strong republican 
##                        782                        758 
##          Strong republican 
##                       1545
summary(anes_2020$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   18.00   37.00   53.00   51.83   66.00   80.00     294
# Calculate age statistics by party
age_party_stats <- anes_2020 %>%
  # Remove missing values first
  filter(!is.na(PartyID), !is.na(Age)) %>%
  # Group by party and calculate statistics
  group_by(PartyID) %>%
  summarize(
    n = n(),
    mean_age = round(mean(Age), 1),
    sd_age = round(sd(Age), 1),
    median_age = round(median(Age), 1),
    pct = round(100 * n() / sum(!is.na(anes_2020$PartyID) & !is.na(anes_2020$Age)), 1)
  ) %>%
  # Add total row
  bind_rows(
    anes_2020 %>%
      filter(!is.na(PartyID), !is.na(Age)) %>%
      summarize(
        PartyID = "Total",
        n = n(),
        mean_age = round(mean(Age), 1),
        sd_age = round(sd(Age), 1),
        median_age = round(median(Age), 1),
        pct = 100.0
      )
  )
age_party_stats
## # A tibble: 8 × 6
##   PartyID                        n mean_age sd_age median_age   pct
##   <chr>                      <int>    <dbl>  <dbl>      <dbl> <dbl>
## 1 Strong democrat             1757     54.4   16.9         58  24.6
## 2 Not very strong democrat     768     46.1   16.7         44  10.7
## 3 Independent-democrat         856     47.7   17.3         45  12  
## 4 Independent                  812     48.2   17.1         46  11.4
## 5 Independent-republican       748     52.9   16.4         54  10.5
## 6 Not very strong republican   725     50.5   16.2         50  10.1
## 7 Strong republican           1482     56.3   16.5         59  20.7
## 8 Total                       7148     51.8   17.1         53 100
# Create formatted table
age_party_stats %>%
  gt() %>%
  cols_label(
    PartyID = "Party Identification",
    n = "N",
    pct = "% Sample",
    mean_age = "Mean",
    sd_age = "SD",
    median_age = "Median"
  ) %>%
  fmt_number(
    columns = n,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = c(mean_age, sd_age, median_age, pct),
    decimals = 1
  ) %>%
  tab_spanner(
    label = "Sample Size",
    columns = c(n, pct)
  ) %>%
  tab_spanner(
    label = "Age Distribution",
    columns = c(mean_age, sd_age, median_age)
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = PartyID == "Total"
    )
  ) %>%
  tab_header(
    title = md("**Age Distribution by Party Identification**"),
    subtitle = "Mean age and dispersion measures by party affiliation"
  ) %>%
  tab_source_note(
    source_note = "Note: Analysis excludes missing age and party ID responses. SD = Standard Deviation."
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )
Age Distribution by Party Identification
Mean age and dispersion measures by party affiliation
Party Identification
Sample Size
Age Distribution
N % Sample Mean SD Median
Strong democrat 1,757 24.6 54.4 16.9 58.0
Not very strong democrat 768 10.7 46.1 16.7 44.0
Independent-democrat 856 12.0 47.7 17.3 45.0
Independent 812 11.4 48.2 17.1 46.0
Independent-republican 748 10.5 52.9 16.4 54.0
Not very strong republican 725 10.1 50.5 16.2 50.0
Strong republican 1,482 20.7 56.3 16.5 59.0
Total 7,148 100.0 51.8 17.1 53.0
Note: Analysis excludes missing age and party ID responses. SD = Standard Deviation.

Exercise 5: Education and Party

Create a summary table showing the relationship between education level and party identification:

  1. Calculate the percent of each education level that identifies as Democrat (including independent-democrat), Republican (including independent-republican), or Independent

  2. Create a professional formatted table that shows:

    • Education levels

    • Distribution across these three party groups

    • Include row totals to verify percentages sum to 100%

    • Use proper formatting for numbers and percentages

Hint: Remember to combine party categories first, then calculate percentages within education levels

# First examine the variables
table(anes_2020$PartyID)
## 
##            Strong democrat   Not very strong democrat 
##                       1796                        790 
##       Independent-democrat                Independent 
##                        881                        876 
##     Independent-republican Not very strong republican 
##                        782                        758 
##          Strong republican 
##                       1545
table(anes_2020$Education)
## 
## Less than HS  High school      Post HS   Bachelor's     Graduate 
##          312         1160         2514         1877         1474
# Create party groups and calculate distribution
edu_party_dist <- anes_2020 %>%
  # Remove missing values
  filter(!is.na(PartyID), !is.na(Education)) %>%
  # Create simplified party groups
  mutate(
    party_group = case_when(
      PartyID %in% c("Strong democrat", "Not very strong democrat", 
                     "Independent-democrat") ~ "Democrat",
      PartyID %in% c("Strong republican", "Not very strong republican", 
                     "Independent-republican") ~ "Republican",
      PartyID == "Independent" ~ "Independent",
    ),
    # Order education levels
    Education = factor(Education, 
      levels = c("Less than HS", "High school", "Post HS", 
                 "Bachelor's", "Graduate"))
  ) %>%
  # Group and calculate percentages
  group_by(Education, party_group) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(Education) %>%
  mutate(
    total = sum(count),
    pct = round(100 * count / total, 1)
  ) %>%
  # Add total row for each education level
  group_by(Education) %>%
  arrange(Education, desc(count)) %>%
  bind_rows(
    group_by(., Education) %>%
      summarize(
        party_group = "Total",
        count = sum(count),
        total = first(total),
        pct = 100.0,
        .groups = "drop"
      )
  )

edu_party_dist
## # A tibble: 20 × 5
## # Groups:   Education [5]
##    Education    party_group count total   pct
##    <fct>        <chr>       <int> <int> <dbl>
##  1 Less than HS Democrat      129   308  41.9
##  2 Less than HS Republican    119   308  38.6
##  3 Less than HS Independent    60   308  19.5
##  4 High school  Republican    518  1158  44.7
##  5 High school  Democrat      450  1158  38.9
##  6 High school  Independent   190  1158  16.4
##  7 Post HS      Republican   1155  2508  46.1
##  8 Post HS      Democrat     1023  2508  40.8
##  9 Post HS      Independent   330  2508  13.2
## 10 Bachelor's   Democrat      944  1874  50.4
## 11 Bachelor's   Republican    758  1874  40.4
## 12 Bachelor's   Independent   172  1874   9.2
## 13 Graduate     Democrat      872  1471  59.3
## 14 Graduate     Republican    495  1471  33.7
## 15 Graduate     Independent   104  1471   7.1
## 16 Less than HS Total         308   308 100  
## 17 High school  Total        1158  1158 100  
## 18 Post HS      Total        2508  2508 100  
## 19 Bachelor's   Total        1874  1874 100  
## 20 Graduate     Total        1471  1471 100
# Create formatted table
edu_party_dist %>%
  gt() %>%
  cols_label(
    Education = "Education Level",
    party_group = "Party Group",
    count = "Count",
    pct = "Percent (%)"
  ) %>%
  fmt_number(
    columns = count,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = pct,
    decimals = 1
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = party_group == "Total"
    )
  ) %>%
  tab_header(
    title = md("**Party Identification by Education Level**"),
    subtitle = "Distribution of party groups across educational attainment"
  ) %>%
  tab_source_note(
    source_note = paste0(
      "Note: Democrats include Independent-Democrats, Republicans include ",
      "Independent-Republicans. Percentages calculated within education levels."
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )
Party Identification by Education Level
Distribution of party groups across educational attainment
Party Group Count total Percent (%)
Less than HS
Democrat 129 308 41.9
Republican 119 308 38.6
Independent 60 308 19.5
Total 308 308 100.0
High school
Republican 518 1158 44.7
Democrat 450 1158 38.9
Independent 190 1158 16.4
Total 1,158 1158 100.0
Post HS
Republican 1,155 2508 46.1
Democrat 1,023 2508 40.8
Independent 330 2508 13.2
Total 2,508 2508 100.0
Bachelor's
Democrat 944 1874 50.4
Republican 758 1874 40.4
Independent 172 1874 9.2
Total 1,874 1874 100.0
Graduate
Democrat 872 1471 59.3
Republican 495 1471 33.7
Independent 104 1471 7.1
Total 1,471 1471 100.0
Note: Democrats include Independent-Democrats, Republicans include Independent-Republicans. Percentages calculated within education levels.

Additional 5 exercises

Exercise 1: Trust Level Comparison

Create a table comparing trust levels in government:

  • Calculate percentage who trust “Most of the time” or “Always”

  • Calculate percentage who trust “Never” or “Some of the time”

  • Show this comparison in a formatted table

anes_2020 %>%
  filter(!is.na(TrustGovernment)) %>%
  summarize(
    high_trust = mean(TrustGovernment %in% 
                     c("Most of the time", "Always")) * 100,
    low_trust = mean(TrustGovernment %in% 
                    c("Never", "Some of the time")) * 100,
    n_total = n()
  ) %>%
  gt() %>%
  fmt_number(
    columns = c(high_trust, low_trust),
    decimals = 1
  ) %>%
  cols_label(
    high_trust = "High Trust %",
    low_trust = "Low Trust %"
  ) %>%
  tab_header(title = "Trust Level Distribution") %>%
  tab_source_note(sprintf("Based on %d valid responses", .$n_total))
Trust Level Distribution
High Trust % Low Trust % n_total
14.8 54.1 7424

Exercise 2: Age Group Analysis

Create a table showing trust patterns across age groups:

  • Create three age groups (18-34, 35-64, 65+)

For each group calculate:

  • Number of respondents
  • Mean trust level
  • Present in a formatted table
anes_2020 %>%
  filter(!is.na(Age), !is.na(TrustGovernment)) %>%
  mutate(
    age_group = case_when(
      Age < 35 ~ "18-34",
      Age < 65 ~ "35-64",
      TRUE ~ "65+"
    )
  ) %>%
  group_by(age_group) %>%
  summarize(
    n = n(),
    pct_trust = mean(TrustGovernment %in% 
                    c("Most of the time", "Always")) * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = pct_trust,
    decimals = 1
  ) %>%
  cols_label(
    age_group = "Age Group",
    n = "Count",
    pct_trust = "% High Trust"
  ) %>%
  tab_header(title = "Trust Levels by Age Group")
Trust Levels by Age Group
Age Group Count % High Trust
18-34 1435 12.1
35-64 3648 13.6
65+ 2058 19.0

Exercise 3: Education Level Analysis

Create a table showing trust patterns by education:

  • Group Education into 3 levels

For each education level:

  • Calculate percent who “Never” trust government
  • Show sample size
  • Present in a formatted table
anes_2020 %>%
  filter(!is.na(Education), !is.na(TrustGovernment)) %>%
  mutate(
    edu_level = case_when(
      Education %in% c("Less than HS", "High school") ~ "High school or less",
      Education == "Post HS" ~ "Some post-secondary",
      TRUE ~ "Bachelor's or higher"
    )
  ) %>%
  group_by(edu_level) %>%
  summarize(
    n = n(),
    pct_never = mean(TrustGovernment == "Never") * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = pct_never,
    decimals = 1
  ) %>%
  cols_label(
    edu_level = "Education Level",
    n = "Count",
    pct_never = "% Never Trust"
  ) %>%
  tab_header(title = "Trust by Education Level")
Trust by Education Level
Education Level Count % Never Trust
Bachelor's or higher 3339 7.3
High school or less 1466 12.4
Some post-secondary 2507 10.7

Exercise 4: Trust Response Distribution

Create a distribution table for TrustPeople:

Show response categories

  • Calculate percentages for each level
  • Include total sample size
anes_2020 %>%
  filter(!is.na(TrustPeople)) %>%
  group_by(TrustPeople) %>%
  summarize(
    count = n()
  ) %>%
  mutate(
    percent = count / sum(count) * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = percent,
    decimals = 1
  ) %>%
  cols_label(
    TrustPeople = "Response",
    count = "Count",
    percent = "Percent"
  ) %>%
  tab_header(title = "Trust in People Distribution")
Trust in People Distribution
Response Count Percent
Always 48 0.6
Most of the time 3511 47.2
About half the time 2020 27.2
Some of the time 1597 21.5
Never 264 3.5

Exercise 5: Demographics Overview

Create a simple summary table showing:

  • Total respondents

  • Mean age

  • Percent female

  • Percent with Bachelor’s or higher

anes_2020 %>%
  summarize(
    total_resp = n(),
    mean_age = mean(Age, na.rm = TRUE),
    pct_female = mean(Gender == "Female", na.rm = TRUE) * 100,
    pct_college = mean(Education %in% 
                      c("Bachelor's", "Graduate"), na.rm = TRUE) * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = c(mean_age, pct_female, pct_college),
    decimals = 1
  ) %>%
  cols_label(
    total_resp = "Total Respondents",
    mean_age = "Mean Age",
    pct_female = "% Female",
    pct_college = "% Bachelor's+"
  ) %>%
  tab_header(title = "Sample Demographics Summary")
Sample Demographics Summary
Total Respondents Mean Age % Female % Bachelor's+
7453 51.8 54.4 45.0