First, we’ll download the data, helpfully saved as a static `rds`` file
library(tidyverse)
library(magrittr)
library(showtext)
library(ggtext)
library(httr)
font_add_google("Roboto")
t2 <- "https://github.com/thomasjwood/ps7160/raw/master/gpss/gpss_honesty_76_23.rds" %>%
url %>%
readRDS
We need to turn some of the eccentric haven::labelled
variables into nice old fashioned factor()
s.
t2 %<>%
modify_if(
t2 %>%
map_lgl(
\(i)
i %>%
class %>%
str_detect("label") %>%
any
),
as_factor
)
These data are from the Gallup Poll Social Series, which has been conducted since 2000. Helpfully, where Gallup data pre-exist this particular program, Gallup stacks these data above the corresponding current survey Each month covers a separate topic (crime, the economy, values, etc). Sometimes the items are pretty well designed, others more idiosyncratic.
Today we’re interested in partisan differences in perceived vocational honesty, over time. Here are the occupations being measured
t2 %>%
select(
accountants:veterinarians
) %>%
names
## [1] "accountants" "ad_practitioners"
## [3] "auto_mechanics" "bankers"
## [5] "building_contractors" "business_execs"
## [7] "car_salespeople" "chiropractors"
## [9] "clergy" "college_teachers"
## [11] "computer_industry_execs" "computer_salespeople"
## [13] "day_care_providers" "democratic_officeholders"
## [15] "dentists" "engineers"
## [17] "entertainment_industry_execs" "firefighters"
## [19] "funeral_directors" "grade_school_teachers"
## [21] "grade_high_school_teachers" "gun_salespeople"
## [23] "high_school_teachers" "hmo_managers"
## [25] "home_repair_people" "insurance_salespeople"
## [27] "internet_journalists" "jewelers"
## [29] "journalists" "judges"
## [31] "union_leaders" "lawyers"
## [33] "lobbyists" "local_officeholders"
## [35] "doctors" "members_congress"
## [37] "military_members" "military_officers"
## [39] "newspaper_reporters" "nurses"
## [41] "nursing_home_operators" "pharmacists"
## [43] "police_officers" "political_officeholders"
## [45] "psychiatrists" "pollsters"
## [47] "real_estate_agents" "real_estate_developers"
## [49] "republican_officeholders" "senators"
## [51] "state_governors" "state_officeholders"
## [53] "stockbrokers" "store_salespeople"
## [55] "telemarketers" "television_reporters"
## [57] "tv_talk_show_hosts" "veterinarians"
Now, we have the initial complication that coverage of the items is spotty, temporally.
Let’s see where each item is included or omitted from the survey:
cov_jobs <- t2 %>%
select(
yr, accountants:veterinarians
) %>%
pivot_longer(
accountants:veterinarians,
names_to = "jobs" ,
values_to = "honesty",
values_drop_na = T
) %>%
count(yr, jobs) %>%
complete(yr, jobs, fill = list(n = 0)) %>%
arrange(jobs, yr) %>%
group_by(jobs) %>%
mutate(
cum_obs = n %>% cumsum,
yr = yr %>% as.character %>% as.numeric
)
cov_jobs %>%
ggplot(
aes(
yr, cum_obs, group = jobs
)
) +
geom_step() +
geom_text(
aes(
label = jobs
),
data = cov_jobs %>%
slice(n()),
hjust = 0,
size = 2.5,
check_overlap = T
) +
scale_x_continuous(
expand = expansion(add = c(1, 15)),
breaks = seq(1980, 2020, 10)
)
See, not the best looking graph, but quite informative, and only ~40 lines of code. Anyway, HMO manager drop off in the 2000-2002 period, Veterinarians appears seemingly once in 2004 and is never asked again, whereas doctors and lawyers have appeared every time since 1990. Since we’re just being speculative, let’s generate a list of jobs which have been surveyed 7 or more times
kj <- t2 %>%
select(
unique_id, wtfctr, yr, partyr, accountants:veterinarians
) %>%
pivot_longer(
accountants:veterinarians,
names_to = "jobs",
values_to = "vals",
values_drop_na = T
) %>%
group_by(jobs) %>%
summarize(
yc = yr %>% unique %>% length
) %>%
arrange(desc(yc)) %>%
filter( yc >= 7) %>%
use_series(jobs)
The respondents score occupations on a 5 part likert, which we’ll convert into a numerical score and then summarize into means.
t3 <- t2 %>%
select(
unique_id, wtfctr, yr, partyr, any_of(kj)
) %>%
mutate(
partyr = partyr %>%
str_extract(
"Republican|Independent|Democrat"
) %>%
factor(
c("Democrat",
"Independent",
"Republican")
),
yr = yr %>%
as.numeric %>%
cut_interval(length = 4)
) %>%
pivot_longer(
any_of(kj),
names_to = "jobs",
values_to = "vals",
values_drop_na = T
) %>%
mutate(
v2 = vals %>%
case_match(
"Very high" ~ 5,
"High" ~ 4,
"Average" ~ 3,
"Low" ~ 2,
"Very low" ~ 1,
c("DK", "REF") ~ NA
) %>%
as.character %>%
as.numeric
) %>%
group_by(
partyr, yr, jobs
) %>%
summarize(
mu = v2 %>% weighted.mean(wtfctr, na.rm = T)
) %>%
ungroup %>%
mutate(
jobs = jobs %>%
str_replace_all("_", " ") %>%
str_to_title %>%
fct_reorder(mu)
) %>%
filter(
partyr %>%
is.na %>%
not
) %>%
mutate(
mufil = mu %>%
cut_number(11)
)
Which should look like
## # A tibble: 1,131 × 5
## partyr yr jobs mu mufil
## <fct> <fct> <fct> <dbl> <fct>
## 1 Democrat [1976,1980] Ad Practitioners 2.53 (2.47,2.64]
## 2 Democrat [1976,1980] Building Contractors 3.02 (2.98,3.1]
## 3 Democrat [1976,1980] Business Execs 2.94 (2.89,2.98]
## 4 Democrat [1976,1980] College Teachers 3.47 (3.41,3.57]
## 5 Democrat [1976,1980] Doctors 3.58 (3.57,3.74]
## 6 Democrat [1976,1980] Engineers 3.51 (3.41,3.57]
## 7 Democrat [1976,1980] Journalists 3.25 (3.1,3.26]
## 8 Democrat [1976,1980] Lawyers 2.97 (2.89,2.98]
## 9 Democrat [1976,1980] Members Congress 2.74 (2.64,2.76]
## 10 Democrat [1976,1980] Psychiatrists 3.18 (3.1,3.26]
## # ℹ 1,121 more rows
Ok, we have the data in a nice format – we’ll have years on the x-axis, we’ll have jobs in the rows, and we’ll use the mean level of trust as the fill of a tile.
t3 %>%
ggplot(
aes(
yr, jobs, fill = mufil
)
) +
geom_tile(
alpha = .85,
color = "grey30",
linewidth = .2
) +
facet_wrap(~partyr, nrow = 1)
Let’s clean up–first a nice set of x-axis labels, and some labels
xlab <- tibble(
old = t3$yr %>%
levels,
new = t3$yr %>%
levels %>%
map_chr(
\(i)
str_c(
i %>%
str_extract("\\d{4}") %>%
str_sub(3),
"-",
i %>%
str_extract(",\\d{4}") %>%
str_sub(-2)
)
)
)
xlab$new[[12]] <- "20-23"
t3 %>%
ggplot(
aes(
yr, jobs, fill = mufil
)
) +
geom_tile(
alpha = .85,
color = "grey30",
linewidth = .2
) +
facet_wrap(~partyr, nrow = 1) +
scale_x_discrete(
breaks = xlab$old,
labels = xlab$new
) +
labs(
x = "",
y = "",
title = "Perceived vocational honesty, by partisanship, 1976-2023",
caption = "Data source: Gallup Social Series Honesty and Ethics, 1976-2024"
)
Better color scale, and let’s replace the colorbar legend with a couple of colored labels
library(ggtext)
t3 %>%
ggplot(
aes(
yr, jobs, fill = mufil
)
) +
geom_tile(
alpha = .85,
color = "grey30",
linewidth = .2
) +
facet_wrap(~partyr, nrow = 1) +
scale_fill_brewer(
palette = "RdYlGn",
type = "div"
) +
scale_x_discrete(
breaks = xlab$old,
labels = xlab$new
) +
labs(
x = "",
y = "",
title = "Perceived vocational honesty, by partisanship, 1976-2023",
subtitle = "Honesty measured on 5pt scale, from <span style = 'color:#a50026'>**very low**</span> and <span style = 'color:#006837'>**very high**</span>.",
caption = "Data source: Gallup Social Series Honesty and Ethics, 1976-2024."
) +
theme(
legend.position = "none",
plot.subtitle = element_markdown(),
)
Typefaces and themes!
t3 %>%
ggplot(
aes(
yr, jobs, fill = mufil
)
) +
geom_tile(
alpha = .85,
color = "grey30",
linewidth = .2
) +
facet_wrap(~partyr, nrow = 1) +
scale_fill_brewer(
palette = "RdYlGn",
type = "div"
) +
scale_x_discrete(
breaks = xlab$old,
labels = xlab$new
) +
labs(
x = "",
y = "",
title = "Perceived vocational honesty, by partisanship, 1976-2023",
subtitle = "Honesty measured on 5pt scale, from <span style = 'color:#a50026'>**very low**</span> and <span style = 'color:#006837'>**very high**</span>.",
caption = "Data source: Gallup Social Series Honesty and Ethics, 1976-2024."
) +
theme_minimal(base_family = "Roboto") +
theme(
panel.grid = element_blank(),
strip.background = element_rect(
fill = "grey95",
color = "grey95"
),
panel.background = element_rect(
fill = "grey95",
color = "grey95"
),
axis.text.x = element_text(
angle = 45*.8, hjust = 1
),
strip.text.y = element_text(angle = 0),
legend.position = "none",
legend.margin = margin(-.3, unit = "cm"),
plot.title = element_text(face = "bold"),
plot.subtitle = element_markdown(),
plot.caption = element_text(face = "italic")
)