# Load required packages
packages <- c("tidyverse", "srvyr", "srvyrexploR", "broom", "gt")

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "srvyr"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "srvyrexploR" "srvyr"       "lubridate"   "forcats"     "stringr"    
##  [6] "dplyr"       "purrr"       "readr"       "tidyr"       "tibble"     
## [11] "ggplot2"     "tidyverse"   "stats"       "graphics"    "grDevices"  
## [16] "utils"       "datasets"    "methods"     "base"       
## 
## [[4]]
##  [1] "broom"       "srvyrexploR" "srvyr"       "lubridate"   "forcats"    
##  [6] "stringr"     "dplyr"       "purrr"       "readr"       "tidyr"      
## [11] "tibble"      "ggplot2"     "tidyverse"   "stats"       "graphics"   
## [16] "grDevices"   "utils"       "datasets"    "methods"     "base"       
## 
## [[5]]
##  [1] "gt"          "broom"       "srvyrexploR" "srvyr"       "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"

Practice Exercise Solutions

Exercise 1: Trust in People

First, examine the survey question for TrustPeople:

Look up the exact question wording in the codebook
Identify all possible response categories

Then analyze the responses:

Check the data quality by calculating:
- Total number of respondents
- Number of valid responses
- Number of missing responses
- Response rate
Create a distribution table showing counts and percentages for each response category

Hint: Follow the same steps we used for TrustGovernment, but apply them to TrustPeople. Remember to examine the question context first

First, let’s examine the survey question: - Question text: “Generally speaking, how often can you trust other people?” - Response options: Always, Most of the time, About half the time, Some of the time, Never

# First examine the raw distribution
table(anes_2020$TrustPeople)

## 
##              Always    Most of the time About half the time    Some of the time 
##                  48                3511                2020                1597 
##               Never 
##                 264

# Check data quality
trust_people_quality <- anes_2020 %>%
  summarize(
    total_respondents = n(),
    valid_responses = sum(!is.na(TrustPeople)),
    missing_responses = sum(is.na(TrustPeople)),
    response_rate = round(100 * valid_responses / total_respondents, 1)
  )

trust_people_quality

## # A tibble: 1 × 4
##   total_respondents valid_responses missing_responses response_rate
##               <int>           <int>             <int>         <dbl>
## 1              7453            7440                13          99.8

# Calculate total valid responses
total_valid_trust <- sum(!is.na(anes_2020$TrustPeople))

total_valid_trust

## [1] 7440

# Create distribution with proper ordering
trust_people_dist <- anes_2020 %>%
  filter(!is.na(TrustPeople)) %>%
  mutate(TrustPeople = factor(TrustPeople, 
    levels = c("Always", "Most of the time", "About half the time", 
               "Some of the time", "Never"))) %>%
  group_by(TrustPeople) %>%
  summarize(
    count = n(),
    percentage = round(100 * count / total_valid_trust, 1)
  ) %>%
  arrange(desc(count))

trust_people_dist

## # A tibble: 5 × 3
##   TrustPeople         count percentage
##   <fct>               <int>      <dbl>
## 1 Most of the time     3511       47.2
## 2 About half the time  2020       27.2
## 3 Some of the time     1597       21.5
## 4 Never                 264        3.5
## 5 Always                 48        0.6

# Create professional table
trust_people_dist %>%
  gt() %>%
  cols_label(
    TrustPeople = "Trust Level",
    count = "Responses (N)",
    percentage = "Percent (%)"
  ) %>%
  fmt_number(
    columns = count,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = percentage,
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Distribution of Trust in Other People**"),
    subtitle = "Generally speaking, how often can you trust other people?"
  ) %>%
  tab_source_note(
    source_note = sprintf(
      "Note: Based on %d valid responses (%.1f%% response rate).",
      trust_people_quality$valid_responses,
      trust_people_quality$response_rate
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )

Trust Level	Responses (N)	Percent (%)
Distribution of Trust in Other People
Generally speaking, how often can you trust other people?
Most of the time	3,511	47.2
About half the time	2,020	27.2
Some of the time	1,597	21.5
Never	264	3.5
Always	48	0.6
Note: Based on 7440 valid responses (99.8% response rate).

Exercise 2: Campaign Interest Analysis

Start by understanding the variable:

Look up the exact question text for CampaignInterest in the codebook
Note how the question was framed to respondents

Then create a summary analysis:

Calculate data quality metrics (response rates, missing data)
Create a formatted table showing:
- Response counts
- Percentages
- Valid percentages (excluding missing data)

Hint: Think about how the question wording might affect response patterns

Question text: “Some people don’t pay much attention to political campaigns. How about you? Would you say that you have been very much interested, somewhat interested or not much interested in the political campaigns so far this year?”

# First examine the variable
table(anes_2020$CampaignInterest)

## 
## Very much interested  Somewhat interested  Not much interested 
##                 3940                 2569                  943

# Calculate data quality metrics
campaign_quality <- anes_2020 %>%
  summarize(
    total_respondents = n(),
    valid_responses = sum(!is.na(CampaignInterest)),
    missing_responses = sum(is.na(CampaignInterest)),
    response_rate = round(100 * valid_responses / total_respondents, 1)
  )
campaign_quality

## # A tibble: 1 × 4
##   total_respondents valid_responses missing_responses response_rate
##               <int>           <int>             <int>         <dbl>
## 1              7453            7452                 1           100

# Store total valid responses
total_valid_campaign <- sum(!is.na(anes_2020$CampaignInterest))

# Create distribution with proper ordering
campaign_dist <- anes_2020 %>%
  filter(!is.na(CampaignInterest)) %>%
  mutate(CampaignInterest = factor(CampaignInterest,
    levels = c("Very much interested", "Somewhat interested", 
               "Not much interested"))) %>%
  group_by(CampaignInterest) %>%
  summarize(
    count = n(),
    percentage = round(100 * count / total_valid_campaign)
  ) %>%
  arrange(desc(count)) %>%
  # Add total row
  bind_rows(
    summarize(.,
      CampaignInterest = "Total",
      count = sum(count),
      percentage = sum(percentage)
    )
  )
campaign_dist

## # A tibble: 4 × 3
##   CampaignInterest     count percentage
##   <chr>                <int>      <dbl>
## 1 Very much interested  3940         53
## 2 Somewhat interested   2569         34
## 3 Not much interested    943         13
## 4 Total                 7452        100

# Format table
campaign_dist %>%
  gt() %>%
  cols_label(
    CampaignInterest = "Interest Level",
    count = "Count (N)",
    percentage = "Valid Percent (%)"
  ) %>%
  fmt_number(
    columns = count,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = percentage,
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Campaign Interest Distribution**"),
    subtitle = "Level of interest in political campaigns"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = CampaignInterest == "Total"
    )
  ) %>%
  tab_source_note(
    source_note = sprintf(
      "Note: Based on %d valid responses.",
      campaign_quality$valid_responses,
      campaign_quality$response_rate,
      campaign_quality$missing_responses
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )

Interest Level	Count (N)	Valid Percent (%)
Campaign Interest Distribution
Level of interest in political campaigns
Very much interested	3,940	53.0
Somewhat interested	2,569	34.0
Not much interested	943	13.0
Total	7,452	100.0
Note: Based on 7452 valid responses.

Exercise 3: Race and Education Demographics Table

Create a professional table that combines:

Race/ethnicity distribution using RaceEth:
- Counts and percentages for each category
- Note any missing data
Education levels using Education:
- Grouped into meaningful categories
- Percentages for each level

Format using gt with appropriate styling and clear labels.

Hint: This is different from our tutorial example as it combines two categorical demographic variables

# First examine the variables
table(anes_2020$RaceEth)

## 
##               White               Black            Hispanic        Asian, NH/PI 
##                5420                 650                 662                 248 
##               AI/AN Other/multiple race 
##                 155                 237

table(anes_2020$Education)

## 
## Less than HS  High school      Post HS   Bachelor's     Graduate 
##          312         1160         2514         1877         1474

# Create demo stats
demo_stats <- anes_2020 %>%
  summarize(
    # Basic counts
    n_total = n(),
    n_race_valid = sum(!is.na(RaceEth)),
    n_educ_valid = sum(!is.na(Education)),
    
    # Race percentages
    pct_white = round(100 * mean(RaceEth == "White", na.rm = TRUE), 1),
    pct_black = round(100 * mean(RaceEth == "Black", na.rm = TRUE), 1),
    pct_hispanic = round(100 * mean(RaceEth == "Hispanic", na.rm = TRUE), 1),
    pct_asian = round(100 * mean(RaceEth == "Asian, NH/PI", na.rm = TRUE), 1),
    pct_other = 100 - (
      round(100 * mean(RaceEth == "White", na.rm = TRUE), 1) +
      round(100 * mean(RaceEth == "Black", na.rm = TRUE), 1) +
      round(100 * mean(RaceEth == "Hispanic", na.rm = TRUE), 1) +
      round(100 * mean(RaceEth == "Asian, NH/PI", na.rm = TRUE), 1)
    ),
    
    # Education percentages
    pct_hs_or_less = round(100 * mean(Education %in% 
                                     c("Less than HS", "High school"), 
                                   na.rm = TRUE), 1),
    pct_some_college = round(100 * mean(Education == "Post HS", 
                                     na.rm = TRUE), 1),
    pct_bachelors_plus = 100 - (
      round(100 * mean(Education %in% c("Less than HS", "High school"), 
                      na.rm = TRUE), 1) +
      round(100 * mean(Education == "Post HS", na.rm = TRUE), 1)
    )
  )
demo_stats

## # A tibble: 1 × 11
##   n_total n_race_valid n_educ_valid pct_white pct_black pct_hispanic pct_asian
##     <int>        <int>        <int>     <dbl>     <dbl>        <dbl>     <dbl>
## 1    7453         7372         7337      73.5       8.8            9       3.4
## # ℹ 4 more variables: pct_other <dbl>, pct_hs_or_less <dbl>,
## #   pct_some_college <dbl>, pct_bachelors_plus <dbl>

# Create formatted table structure
demo_table <- data.frame(
  characteristic = c(
    "Sample Size (N)",
    "",
    "Race/Ethnicity (%)",
    "    White",
    "    Black",
    "    Hispanic",
    "    Asian/Pacific Islander",
    "    Other/Multiple Race",
    "    Total",
    "",
    "Education (%)",
    "    High School or Less",
    "    Some College",
    "    Bachelor's or Higher",
    "    Total"
  ),
  value = c(
    demo_stats$n_total,
    "",
    "",
    demo_stats$pct_white,
    demo_stats$pct_black,
    demo_stats$pct_hispanic,
    demo_stats$pct_asian,
    demo_stats$pct_other,
    100.0,
    "",
    "",
    demo_stats$pct_hs_or_less,
    demo_stats$pct_some_college,
    demo_stats$pct_bachelors_plus,
    100.0
  )
)

# Format table
demo_table %>%
  gt() %>%
  cols_label(
    characteristic = "",
    value = ""
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = characteristic %in% 
        c("Sample Size (N)", "Race/Ethnicity (%)", "Education (%)")
    )
  ) %>%
  tab_header(
    title = md("**Sample Demographics**")
  ) %>%
  tab_source_note(
    source_note = sprintf(
      "Note: Based on %d respondents. Missing data: Race/Ethnicity (%d), Education (%d). Percentages based on valid responses.",
      demo_stats$n_total,
      demo_stats$n_total - demo_stats$n_race_valid,
      demo_stats$n_total - demo_stats$n_educ_valid
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )


Sample Demographics
Sample Size (N)	7453

Race/Ethnicity (%)
White	73.5
Black	8.8
Hispanic	9
Asian/Pacific Islander	3.4
Other/Multiple Race	5.3
Total	100

Education (%)
High School or Less	19.8
Some College	34.3
Bachelor's or Higher	45.9
Total	100
Note: Based on 7453 respondents. Missing data: Race/Ethnicity (81), Education (116). Percentages based on valid responses.

Exercise 4: Age and Party ID

Create a descriptive analysis that examines:

Age statistics by party identification:
- Mean age for each party ID group
- Standard deviation within groups
- Compare age distributions across different party affiliations
Present results in a professional table using gt

Hint: This combines continuous and categorical variables in a way different from our tutorial examples

# First examine the variables
table(anes_2020$PartyID)

## 
##            Strong democrat   Not very strong democrat 
##                       1796                        790 
##       Independent-democrat                Independent 
##                        881                        876 
##     Independent-republican Not very strong republican 
##                        782                        758 
##          Strong republican 
##                       1545

summary(anes_2020$Age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   18.00   37.00   53.00   51.83   66.00   80.00     294

# Calculate age statistics by party
age_party_stats <- anes_2020 %>%
  # Remove missing values first
  filter(!is.na(PartyID), !is.na(Age)) %>%
  # Group by party and calculate statistics
  group_by(PartyID) %>%
  summarize(
    n = n(),
    mean_age = round(mean(Age), 1),
    sd_age = round(sd(Age), 1),
    median_age = round(median(Age), 1),
    pct = round(100 * n() / sum(!is.na(anes_2020$PartyID) & !is.na(anes_2020$Age)), 1)
  ) %>%
  # Add total row
  bind_rows(
    anes_2020 %>%
      filter(!is.na(PartyID), !is.na(Age)) %>%
      summarize(
        PartyID = "Total",
        n = n(),
        mean_age = round(mean(Age), 1),
        sd_age = round(sd(Age), 1),
        median_age = round(median(Age), 1),
        pct = 100.0
      )
  )
age_party_stats

## # A tibble: 8 × 6
##   PartyID                        n mean_age sd_age median_age   pct
##   <chr>                      <int>    <dbl>  <dbl>      <dbl> <dbl>
## 1 Strong democrat             1757     54.4   16.9         58  24.6
## 2 Not very strong democrat     768     46.1   16.7         44  10.7
## 3 Independent-democrat         856     47.7   17.3         45  12  
## 4 Independent                  812     48.2   17.1         46  11.4
## 5 Independent-republican       748     52.9   16.4         54  10.5
## 6 Not very strong republican   725     50.5   16.2         50  10.1
## 7 Strong republican           1482     56.3   16.5         59  20.7
## 8 Total                       7148     51.8   17.1         53 100

# Create formatted table
age_party_stats %>%
  gt() %>%
  cols_label(
    PartyID = "Party Identification",
    n = "N",
    pct = "% Sample",
    mean_age = "Mean",
    sd_age = "SD",
    median_age = "Median"
  ) %>%
  fmt_number(
    columns = n,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = c(mean_age, sd_age, median_age, pct),
    decimals = 1
  ) %>%
  tab_spanner(
    label = "Sample Size",
    columns = c(n, pct)
  ) %>%
  tab_spanner(
    label = "Age Distribution",
    columns = c(mean_age, sd_age, median_age)
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = PartyID == "Total"
    )
  ) %>%
  tab_header(
    title = md("**Age Distribution by Party Identification**"),
    subtitle = "Mean age and dispersion measures by party affiliation"
  ) %>%
  tab_source_note(
    source_note = "Note: Analysis excludes missing age and party ID responses. SD = Standard Deviation."
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )

Party Identification	Sample Size		Age Distribution
Age Distribution by Party Identification
Mean age and dispersion measures by party affiliation
Party Identification	N	% Sample	Mean	SD	Median
Strong democrat	1,757	24.6	54.4	16.9	58.0
Not very strong democrat	768	10.7	46.1	16.7	44.0
Independent-democrat	856	12.0	47.7	17.3	45.0
Independent	812	11.4	48.2	17.1	46.0
Independent-republican	748	10.5	52.9	16.4	54.0
Not very strong republican	725	10.1	50.5	16.2	50.0
Strong republican	1,482	20.7	56.3	16.5	59.0
Total	7,148	100.0	51.8	17.1	53.0
Note: Analysis excludes missing age and party ID responses. SD = Standard Deviation.

Exercise 5: Education and Party

Create a summary table showing the relationship between education level and party identification:

Calculate the percent of each education level that identifies as Democrat (including independent-democrat), Republican (including independent-republican), or Independent
Create a professional formatted table that shows:
- Education levels
- Distribution across these three party groups
- Include row totals to verify percentages sum to 100%
- Use proper formatting for numbers and percentages

Hint: Remember to combine party categories first, then calculate percentages within education levels

# First examine the variables
table(anes_2020$PartyID)

## 
##            Strong democrat   Not very strong democrat 
##                       1796                        790 
##       Independent-democrat                Independent 
##                        881                        876 
##     Independent-republican Not very strong republican 
##                        782                        758 
##          Strong republican 
##                       1545

table(anes_2020$Education)

## 
## Less than HS  High school      Post HS   Bachelor's     Graduate 
##          312         1160         2514         1877         1474

# Create party groups and calculate distribution
edu_party_dist <- anes_2020 %>%
  # Remove missing values
  filter(!is.na(PartyID), !is.na(Education)) %>%
  # Create simplified party groups
  mutate(
    party_group = case_when(
      PartyID %in% c("Strong democrat", "Not very strong democrat", 
                     "Independent-democrat") ~ "Democrat",
      PartyID %in% c("Strong republican", "Not very strong republican", 
                     "Independent-republican") ~ "Republican",
      PartyID == "Independent" ~ "Independent",
    ),
    # Order education levels
    Education = factor(Education, 
      levels = c("Less than HS", "High school", "Post HS", 
                 "Bachelor's", "Graduate"))
  ) %>%
  # Group and calculate percentages
  group_by(Education, party_group) %>%
  summarize(count = n(), .groups = "drop") %>%
  group_by(Education) %>%
  mutate(
    total = sum(count),
    pct = round(100 * count / total, 1)
  ) %>%
  # Add total row for each education level
  group_by(Education) %>%
  arrange(Education, desc(count)) %>%
  bind_rows(
    group_by(., Education) %>%
      summarize(
        party_group = "Total",
        count = sum(count),
        total = first(total),
        pct = 100.0,
        .groups = "drop"
      )
  )

edu_party_dist

## # A tibble: 20 × 5
## # Groups:   Education [5]
##    Education    party_group count total   pct
##    <fct>        <chr>       <int> <int> <dbl>
##  1 Less than HS Democrat      129   308  41.9
##  2 Less than HS Republican    119   308  38.6
##  3 Less than HS Independent    60   308  19.5
##  4 High school  Republican    518  1158  44.7
##  5 High school  Democrat      450  1158  38.9
##  6 High school  Independent   190  1158  16.4
##  7 Post HS      Republican   1155  2508  46.1
##  8 Post HS      Democrat     1023  2508  40.8
##  9 Post HS      Independent   330  2508  13.2
## 10 Bachelor's   Democrat      944  1874  50.4
## 11 Bachelor's   Republican    758  1874  40.4
## 12 Bachelor's   Independent   172  1874   9.2
## 13 Graduate     Democrat      872  1471  59.3
## 14 Graduate     Republican    495  1471  33.7
## 15 Graduate     Independent   104  1471   7.1
## 16 Less than HS Total         308   308 100  
## 17 High school  Total        1158  1158 100  
## 18 Post HS      Total        2508  2508 100  
## 19 Bachelor's   Total        1874  1874 100  
## 20 Graduate     Total        1471  1471 100

# Create formatted table
edu_party_dist %>%
  gt() %>%
  cols_label(
    Education = "Education Level",
    party_group = "Party Group",
    count = "Count",
    pct = "Percent (%)"
  ) %>%
  fmt_number(
    columns = count,
    decimals = 0,
    use_seps = TRUE
  ) %>%
  fmt_number(
    columns = pct,
    decimals = 1
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = party_group == "Total"
    )
  ) %>%
  tab_header(
    title = md("**Party Identification by Education Level**"),
    subtitle = "Distribution of party groups across educational attainment"
  ) %>%
  tab_source_note(
    source_note = paste0(
      "Note: Democrats include Independent-Democrats, Republicans include ",
      "Independent-Republicans. Percentages calculated within education levels."
    )
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )

Party Group	Count	total	Percent (%)
Party Identification by Education Level
Distribution of party groups across educational attainment
Less than HS
Democrat	129	308	41.9
Republican	119	308	38.6
Independent	60	308	19.5
Total	308	308	100.0
High school
Republican	518	1158	44.7
Democrat	450	1158	38.9
Independent	190	1158	16.4
Total	1,158	1158	100.0
Post HS
Republican	1,155	2508	46.1
Democrat	1,023	2508	40.8
Independent	330	2508	13.2
Total	2,508	2508	100.0
Bachelor's
Democrat	944	1874	50.4
Republican	758	1874	40.4
Independent	172	1874	9.2
Total	1,874	1874	100.0
Graduate
Democrat	872	1471	59.3
Republican	495	1471	33.7
Independent	104	1471	7.1
Total	1,471	1471	100.0
Note: Democrats include Independent-Democrats, Republicans include Independent-Republicans. Percentages calculated within education levels.

Additional 5 exercises

Exercise 1: Trust Level Comparison

Create a table comparing trust levels in government:

Calculate percentage who trust “Most of the time” or “Always”
Calculate percentage who trust “Never” or “Some of the time”
Show this comparison in a formatted table

anes_2020 %>%
  filter(!is.na(TrustGovernment)) %>%
  summarize(
    high_trust = mean(TrustGovernment %in% 
                     c("Most of the time", "Always")) * 100,
    low_trust = mean(TrustGovernment %in% 
                    c("Never", "Some of the time")) * 100,
    n_total = n()
  ) %>%
  gt() %>%
  fmt_number(
    columns = c(high_trust, low_trust),
    decimals = 1
  ) %>%
  cols_label(
    high_trust = "High Trust %",
    low_trust = "Low Trust %"
  ) %>%
  tab_header(title = "Trust Level Distribution") %>%
  tab_source_note(sprintf("Based on %d valid responses", .$n_total))

High Trust %	Low Trust %	n_total
Trust Level Distribution
14.8	54.1	7424

Exercise 2: Age Group Analysis

Create a table showing trust patterns across age groups:

Create three age groups (18-34, 35-64, 65+)

For each group calculate:

Number of respondents
Mean trust level
Present in a formatted table

anes_2020 %>%
  filter(!is.na(Age), !is.na(TrustGovernment)) %>%
  mutate(
    age_group = case_when(
      Age < 35 ~ "18-34",
      Age < 65 ~ "35-64",
      TRUE ~ "65+"
    )
  ) %>%
  group_by(age_group) %>%
  summarize(
    n = n(),
    pct_trust = mean(TrustGovernment %in% 
                    c("Most of the time", "Always")) * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = pct_trust,
    decimals = 1
  ) %>%
  cols_label(
    age_group = "Age Group",
    n = "Count",
    pct_trust = "% High Trust"
  ) %>%
  tab_header(title = "Trust Levels by Age Group")

Age Group	Count	% High Trust
Trust Levels by Age Group
18-34	1435	12.1
35-64	3648	13.6
65+	2058	19.0

Exercise 3: Education Level Analysis

Create a table showing trust patterns by education:

Group Education into 3 levels

For each education level:

Calculate percent who “Never” trust government
Show sample size
Present in a formatted table

anes_2020 %>%
  filter(!is.na(Education), !is.na(TrustGovernment)) %>%
  mutate(
    edu_level = case_when(
      Education %in% c("Less than HS", "High school") ~ "High school or less",
      Education == "Post HS" ~ "Some post-secondary",
      TRUE ~ "Bachelor's or higher"
    )
  ) %>%
  group_by(edu_level) %>%
  summarize(
    n = n(),
    pct_never = mean(TrustGovernment == "Never") * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = pct_never,
    decimals = 1
  ) %>%
  cols_label(
    edu_level = "Education Level",
    n = "Count",
    pct_never = "% Never Trust"
  ) %>%
  tab_header(title = "Trust by Education Level")

Education Level	Count	% Never Trust
Trust by Education Level
Bachelor's or higher	3339	7.3
High school or less	1466	12.4
Some post-secondary	2507	10.7

Exercise 4: Trust Response Distribution

Create a distribution table for TrustPeople:

Show response categories

Calculate percentages for each level
Include total sample size

anes_2020 %>%
  filter(!is.na(TrustPeople)) %>%
  group_by(TrustPeople) %>%
  summarize(
    count = n()
  ) %>%
  mutate(
    percent = count / sum(count) * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = percent,
    decimals = 1
  ) %>%
  cols_label(
    TrustPeople = "Response",
    count = "Count",
    percent = "Percent"
  ) %>%
  tab_header(title = "Trust in People Distribution")

Response	Count	Percent
Trust in People Distribution
Always	48	0.6
Most of the time	3511	47.2
About half the time	2020	27.2
Some of the time	1597	21.5
Never	264	3.5

Exercise 5: Demographics Overview

Create a simple summary table showing:

Total respondents
Mean age
Percent female
Percent with Bachelor’s or higher

anes_2020 %>%
  summarize(
    total_resp = n(),
    mean_age = mean(Age, na.rm = TRUE),
    pct_female = mean(Gender == "Female", na.rm = TRUE) * 100,
    pct_college = mean(Education %in% 
                      c("Bachelor's", "Graduate"), na.rm = TRUE) * 100
  ) %>%
  gt() %>%
  fmt_number(
    columns = c(mean_age, pct_female, pct_college),
    decimals = 1
  ) %>%
  cols_label(
    total_resp = "Total Respondents",
    mean_age = "Mean Age",
    pct_female = "% Female",
    pct_college = "% Bachelor's+"
  ) %>%
  tab_header(title = "Sample Demographics Summary")

Total Respondents	Mean Age	% Female	% Bachelor's+
Sample Demographics Summary
7453	51.8	54.4	45.0

Tutorial 2: Practice Exercise Solutions

Sébastien Parker

SOC3320 - Winter 2025