Graph Clinic

First, we’ll download the data, helpfully saved as a static `rds`` file

library(tidyverse)
library(magrittr)
library(showtext)
library(ggtext)
library(httr)

font_add_google("Roboto")


t2 <- "https://github.com/thomasjwood/ps7160/raw/master/gpss/gpss_honesty_76_23.rds" %>% 
  url %>% 
  readRDS 

We need to turn some of the eccentric haven::labelled variables into nice old fashioned factor()s.

t2 %<>%
  modify_if(
    t2 %>% 
    map_lgl(
      \(i)
      i %>% 
        class %>% 
        str_detect("label") %>% 
        any
      ),
    as_factor
    )

These data are from the Gallup Poll Social Series, which has been conducted since 2000. Helpfully, where Gallup data pre-exist this particular program, Gallup stacks these data above the corresponding current survey Each month covers a separate topic (crime, the economy, values, etc). Sometimes the items are pretty well designed, others more idiosyncratic.

Today we’re interested in partisan differences in perceived vocational honesty, over time. Here are the occupations being measured

t2 %>% 
  select(
    accountants:veterinarians
    ) %>% 
  names
##  [1] "accountants"                  "ad_practitioners"            
##  [3] "auto_mechanics"               "bankers"                     
##  [5] "building_contractors"         "business_execs"              
##  [7] "car_salespeople"              "chiropractors"               
##  [9] "clergy"                       "college_teachers"            
## [11] "computer_industry_execs"      "computer_salespeople"        
## [13] "day_care_providers"           "democratic_officeholders"    
## [15] "dentists"                     "engineers"                   
## [17] "entertainment_industry_execs" "firefighters"                
## [19] "funeral_directors"            "grade_school_teachers"       
## [21] "grade_high_school_teachers"   "gun_salespeople"             
## [23] "high_school_teachers"         "hmo_managers"                
## [25] "home_repair_people"           "insurance_salespeople"       
## [27] "internet_journalists"         "jewelers"                    
## [29] "journalists"                  "judges"                      
## [31] "union_leaders"                "lawyers"                     
## [33] "lobbyists"                    "local_officeholders"         
## [35] "doctors"                      "members_congress"            
## [37] "military_members"             "military_officers"           
## [39] "newspaper_reporters"          "nurses"                      
## [41] "nursing_home_operators"       "pharmacists"                 
## [43] "police_officers"              "political_officeholders"     
## [45] "psychiatrists"                "pollsters"                   
## [47] "real_estate_agents"           "real_estate_developers"      
## [49] "republican_officeholders"     "senators"                    
## [51] "state_governors"              "state_officeholders"         
## [53] "stockbrokers"                 "store_salespeople"           
## [55] "telemarketers"                "television_reporters"        
## [57] "tv_talk_show_hosts"           "veterinarians"

Now, we have the initial complication that coverage of the items is spotty, temporally.

Let’s see where each item is included or omitted from the survey:

cov_jobs <- t2 %>% 
  select(
    yr, accountants:veterinarians
  ) %>% 
  pivot_longer(
    accountants:veterinarians, 
    names_to = "jobs" ,
    values_to = "honesty", 
    values_drop_na = T
  ) %>% 
  count(yr, jobs) %>% 
  complete(yr, jobs, fill = list(n = 0)) %>% 
  arrange(jobs, yr) %>% 
  group_by(jobs) %>% 
  mutate(
    cum_obs = n %>% cumsum,
    yr = yr %>% as.character %>% as.numeric
  ) 

cov_jobs %>% 
  ggplot(
    aes(
      yr, cum_obs, group = jobs
    )
  ) +
  geom_step() +
  geom_text(
    aes(
      label = jobs
    ),
    data = cov_jobs %>% 
      slice(n()),
    hjust = 0,
    size = 2.5, 
    check_overlap = T
  ) +
  scale_x_continuous(
    expand = expansion(add = c(1, 15)),
    breaks = seq(1980, 2020, 10)
  )

See, not the best looking graph, but quite informative, and only ~40 lines of code. Anyway, HMO manager drop off in the 2000-2002 period, Veterinarians appears seemingly once in 2004 and is never asked again, whereas doctors and lawyers have appeared every time since 1990. Since we’re just being speculative, let’s generate a list of jobs which have been surveyed 7 or more times

kj <- t2 %>% 
  select(
    unique_id, wtfctr, yr, partyr, accountants:veterinarians
  ) %>% 
  pivot_longer(
    accountants:veterinarians, 
    names_to = "jobs",
    values_to = "vals", 
    values_drop_na = T
  ) %>% 
  group_by(jobs) %>% 
  summarize(
    yc = yr %>% unique %>% length
  ) %>% 
  arrange(desc(yc)) %>% 
  filter( yc >= 7) %>% 
  use_series(jobs)

The respondents score occupations on a 5 part likert, which we’ll convert into a numerical score and then summarize into means.

t3 <- t2 %>% 
  select(
    unique_id, wtfctr, yr, partyr, any_of(kj)
  ) %>% 
  mutate(
    partyr = partyr %>% 
      str_extract(
        "Republican|Independent|Democrat"
      ) %>% 
      factor(
        c("Democrat",
          "Independent",
          "Republican")
      ),
    yr = yr %>% 
      as.numeric %>% 
      cut_interval(length = 4)
    ) %>%  
  pivot_longer(
    any_of(kj), 
    names_to = "jobs",
    values_to = "vals", 
    values_drop_na = T
  ) %>% 
  mutate(
    v2 = vals %>% 
      case_match(
        "Very high" ~ 5,
        "High" ~ 4,
        "Average" ~ 3, 
        "Low" ~ 2,
        "Very low" ~ 1,
        c("DK", "REF") ~ NA
        ) %>% 
      as.character %>% 
      as.numeric
    ) %>% 
  group_by(
    partyr, yr, jobs
  ) %>% 
  summarize(
    mu = v2 %>% weighted.mean(wtfctr, na.rm = T)
  ) %>% 
  ungroup %>% 
  mutate(
    jobs = jobs %>% 
      str_replace_all("_", " ") %>% 
      str_to_title %>% 
      fct_reorder(mu)
    ) %>% 
  filter(
    partyr %>% 
      is.na %>% 
      not
  ) %>% 
  mutate(
    mufil = mu %>% 
      cut_number(11)
  )

Which should look like

## # A tibble: 1,131 × 5
##    partyr   yr          jobs                    mu mufil      
##    <fct>    <fct>       <fct>                <dbl> <fct>      
##  1 Democrat [1976,1980] Ad Practitioners      2.53 (2.47,2.64]
##  2 Democrat [1976,1980] Building Contractors  3.02 (2.98,3.1] 
##  3 Democrat [1976,1980] Business Execs        2.94 (2.89,2.98]
##  4 Democrat [1976,1980] College Teachers      3.47 (3.41,3.57]
##  5 Democrat [1976,1980] Doctors               3.58 (3.57,3.74]
##  6 Democrat [1976,1980] Engineers             3.51 (3.41,3.57]
##  7 Democrat [1976,1980] Journalists           3.25 (3.1,3.26] 
##  8 Democrat [1976,1980] Lawyers               2.97 (2.89,2.98]
##  9 Democrat [1976,1980] Members Congress      2.74 (2.64,2.76]
## 10 Democrat [1976,1980] Psychiatrists         3.18 (3.1,3.26] 
## # ℹ 1,121 more rows

Ok, we have the data in a nice format – we’ll have years on the x-axis, we’ll have jobs in the rows, and we’ll use the mean level of trust as the fill of a tile.

t3 %>% 
  ggplot(
    aes(
      yr, jobs, fill = mufil
    )
  ) +
  geom_tile(
    alpha = .85,
    color = "grey30",
    linewidth = .2
  ) +
  facet_wrap(~partyr, nrow = 1)

Let’s clean up–first a nice set of x-axis labels, and some labels

xlab <- tibble(
  old = t3$yr %>% 
    levels,
  new = t3$yr %>% 
    levels %>% 
    map_chr(
      \(i)
      
      str_c(
        i %>% 
          str_extract("\\d{4}") %>% 
          str_sub(3),
        "-",
        i %>% 
          str_extract(",\\d{4}") %>% 
          str_sub(-2)
      )
    )
  )

xlab$new[[12]] <- "20-23"

t3 %>% 
  ggplot(
    aes(
      yr, jobs, fill = mufil
    )
  ) +
  geom_tile(
    alpha = .85,
    color = "grey30",
    linewidth = .2
  ) +
  facet_wrap(~partyr, nrow = 1) +
  scale_x_discrete(
    breaks = xlab$old,
    labels = xlab$new
  ) +
  labs(
    x = "",
    y =  "",
    title = "Perceived vocational honesty, by partisanship, 1976-2023",
    caption = "Data source: Gallup Social Series Honesty and Ethics, 1976-2024"
    )

Better color scale, and let’s replace the colorbar legend with a couple of colored labels

library(ggtext)

t3 %>% 
  ggplot(
    aes(
      yr, jobs, fill = mufil
    )
  ) +
  geom_tile(
    alpha = .85,
    color = "grey30",
    linewidth = .2
  ) +
  facet_wrap(~partyr, nrow = 1) +
  scale_fill_brewer(
    palette = "RdYlGn",
    type = "div"
    ) +
  scale_x_discrete(
    breaks = xlab$old,
    labels = xlab$new
  ) +
  labs(
    x = "",
    y =  "",
    title = "Perceived vocational honesty, by partisanship, 1976-2023",
    subtitle = "Honesty measured on 5pt scale, from <span style = 'color:#a50026'>**very low**</span> and <span style = 'color:#006837'>**very high**</span>.",
    caption = "Data source: Gallup Social Series Honesty and Ethics, 1976-2024."
    ) +
  theme(
    legend.position = "none",
    plot.subtitle = element_markdown(),
  )

Typefaces and themes!

t3 %>% 
  ggplot(
    aes(
      yr, jobs, fill = mufil
    )
  ) +
  geom_tile(
    alpha = .85,
    color = "grey30",
    linewidth = .2
  ) +
  facet_wrap(~partyr, nrow = 1) +
  scale_fill_brewer(
    palette = "RdYlGn",
    type = "div"
    ) +
  scale_x_discrete(
    breaks = xlab$old,
    labels = xlab$new
  ) +
  labs(
    x = "",
    y =  "",
    title = "Perceived vocational honesty, by partisanship, 1976-2023",
    subtitle = "Honesty measured on 5pt scale, from <span style = 'color:#a50026'>**very low**</span> and <span style = 'color:#006837'>**very high**</span>.",
    caption = "Data source: Gallup Social Series Honesty and Ethics, 1976-2024."
    ) +
  theme_minimal(base_family = "Roboto") +
  theme(
    panel.grid = element_blank(),
    strip.background = element_rect(
      fill = "grey95",
      color = "grey95"
    ),
    panel.background = element_rect(
      fill = "grey95",
      color = "grey95"
    ),
    axis.text.x = element_text(
      angle = 45*.8, hjust = 1
    ),
    strip.text.y = element_text(angle = 0),
    legend.position = "none",
    legend.margin = margin(-.3, unit = "cm"),
    plot.title = element_text(face = "bold"),
    plot.subtitle = element_markdown(),
    plot.caption = element_text(face = "italic")
  )