Basics
Data summary
metadata1 %>%
group_by(T_SURVEY_NAME) %>%
summarise(n_surveys = n_distinct(T_SURVEY_ID),
n_countries = n_distinct(T_SURVEY_COUNTRY),
n_waves = n_distinct(T_SURVEY_ROUND),
min_year = min(T_SURVEY_YEAR),
max_year = max(T_SURVEY_YEAR))
There are 45 countries altogether. Of those, 17 are not included in all projects:
metadata1 %>%
count(T_SURVEY_NAME, T_SURVEY_COUNTRY) %>%
spread(T_SURVEY_NAME, n) %>%
filter(is.na(EQLS + ESS + EVS + ISSP))
Excess bias
by project
Proportion of surveys with excess (> 1.96) absolute bias:
metadata1 %>%
mutate(is_sig = Q_Abs_bias_internal_criteria > 1.96) %>%
count(T_SURVEY_NAME, is_sig) %>%
group_by(T_SURVEY_NAME) %>%
mutate(prop_sig = round(n / sum(n), 3)) %>%
filter(is_sig == TRUE) %>%
select(T_SURVEY_NAME, prop_sig)
2008 was a difficult year for surveys?
Numbers above bars indicate the number of surveys for which bias was possible to calculate.
part1 <- metadata1 %>%
drop_na(Q_Abs_bias_internal_criteria) %>%
filter(T_SURVEY_NAME != "ISSP") %>%
mutate(is_sig = Q_Abs_bias_internal_criteria > 1.96,
round = gsub("^[A-Z]{1,4}", "", T_SURVEY_ROUND)) %>%
count(T_SURVEY_NAME, round, is_sig) %>%
group_by(T_SURVEY_NAME, round) %>%
mutate(prop_sig = n / sum(n),
nsurveys = sum(n)) %>%
filter(is_sig == TRUE) %>%
ggplot(.) +
geom_bar(aes(x = round, y = prop_sig), stat = "Identity", fill = "gray70") +
geom_text(aes(x = round, y = prop_sig + 0.03, label = nsurveys), col = "gray20") +
theme_bw() +
facet_wrap("T_SURVEY_NAME", scales = "free_x")
part2 <- metadata1 %>%
drop_na(Q_Abs_bias_internal_criteria) %>%
filter(T_SURVEY_NAME == "ISSP") %>%
mutate(is_sig = Q_Abs_bias_internal_criteria > 1.96,
round = gsub("^[A-Z]{1,4}", "", T_SURVEY_ROUND)) %>%
count(T_SURVEY_NAME, round, is_sig) %>%
group_by(T_SURVEY_NAME, round) %>%
mutate(prop_sig = n / sum(n),
nsurveys = sum(n)) %>%
filter(is_sig == TRUE) %>%
ggplot(.) +
geom_bar(aes(x = round, y = prop_sig), stat = "Identity", fill = "gray70") +
geom_text(aes(x = round, y = prop_sig + 0.035, label = nsurveys), col = "gray20") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90)) +
facet_wrap("T_SURVEY_NAME", scales = "free_x")
part1 / part2

by country
Surveys with absolute bias > 1.96 by country.
metadata1 %>%
drop_na(Q_Abs_bias_internal_criteria) %>%
group_by(T_SURVEY_COUNTRY) %>%
mutate(total_surveys = n(),
total_projects = n_distinct(T_SURVEY_NAME),
is_sig = Q_Abs_bias_internal_criteria > 1.96) %>%
group_by(T_SURVEY_COUNTRY, is_sig) %>%
mutate(bias_surveys = n(),
bias_projects = n_distinct(T_SURVEY_NAME)) %>%
ungroup() %>%
distinct(T_SURVEY_COUNTRY, total_surveys, bias_surveys, total_projects, bias_projects, is_sig) %>%
complete(is_sig, T_SURVEY_COUNTRY, fill = list(bias_surveys = 0, bias_projects = 0)) %>%
group_by(T_SURVEY_COUNTRY) %>%
mutate(total_surveys = max(total_surveys, na.rm = T),
total_projects = max(total_projects, na.rm = T)) %>%
filter(is_sig == TRUE) %>%
mutate(prop_bias = round(bias_surveys / total_surveys, 3)) %>%
select(T_SURVEY_COUNTRY, total_surveys, bias_surveys, prop_bias, total_projects, bias_projects) %>%
arrange(desc(prop_bias))
Changes over time
Bias
Orange dots indicate surveys from the waves analyzed in Kohler 2007.
metadata1 %>%
ungroup() %>%
mutate(S_SAMPLE_TYPE = fct_collapse(S_SAMPLE_TYPE,
noinfo_insuff = c("no info", "insuff")),
S_SAMPLE_TYPE = fct_rev(S_SAMPLE_TYPE),
K2007 = T_SURVEY_ROUND %in% c("EQLS1", "ESS1", "ESS2", "EVS1999", "ISSP2002")) %>%
ggplot(., aes(x = T_SURVEY_YEAR, y = Q_Bias_internal_criteria, col = K2007)) +
geom_point(size = 2, alpha = 0.5) +
scale_color_manual(values = c("gray50", "darkorange")) +
theme_bw() +
theme(legend.position = "none") +
xlab("")

Surveys with absolute bias exceeding 5 are labelled.
metadata1 %>%
ggplot(., aes(x = T_SURVEY_YEAR, y = Q_Bias_internal_criteria)) +
geom_point(size = 2) +
gghighlight(Q_Abs_bias_internal_criteria > 5) +
geom_text_repel(aes(label = T_SURVEY_ID), size = 3.5) +
theme_bw() +
xlab("")

Absolute bias and response rates
Note the different Y axis ranges.
metadata1 %>%
filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
mode == "f2f") %>%
# filter(Q_Abs_bias_internal_criteria < 4.5) %>%
select(T_SURVEY_NAME, T_SURVEY_COUNTRY, T_SURVEY_ROUND, T_SURVEY_YEAR,
Q_Abs_bias_internal_criteria, S_RR1_CALC_VALUE) %>%
gather(var, value, c(S_RR1_CALC_VALUE, Q_Abs_bias_internal_criteria)) %>%
drop_na(value) %>%
ggplot(., aes(x = T_SURVEY_YEAR,
y = value)) +
geom_point(alpha = 0.1) +
geom_smooth(method = "loess") +
theme_bw() +
facet_wrap(T_SURVEY_NAME ~ var, scales = "free_y", ncol = 2)

Absolute bias by sample type
part1 <- metadata1 %>%
filter(T_SURVEY_NAME %in% c("ESS", "EQLS")) %>%
ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
geom_boxplot() +
theme_bw() +
xlab("") +
facet_wrap("T_SURVEY_NAME", ncol = 1)
part2 <- metadata1 %>%
filter(T_SURVEY_NAME %in% c("ISSP", "EVS")) %>%
ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
geom_boxplot() +
theme_bw() +
ylab("") + xlab("") +
facet_wrap("T_SURVEY_NAME", ncol = 1)
part1 + part2 + plot_layout(widths = c(1, 2))

Excluding absolute bias > 4.
part1 <- metadata1 %>%
filter(T_SURVEY_NAME %in% c("ESS", "EQLS")) %>%
ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
geom_boxplot() +
theme_bw() +
xlab("") + ylim(0,4) +
facet_wrap("T_SURVEY_NAME", ncol = 1)
part2 <- metadata1 %>%
filter(T_SURVEY_NAME %in% c("ISSP", "EVS")) %>%
ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
geom_boxplot() +
theme_bw() +
ylab("") + xlab("") + ylim(0,4) +
facet_wrap("T_SURVEY_NAME", ncol = 1)
part1 + part2 + plot_layout(widths = c(1, 2))

Bias and response rates
Overall
Excludes sample types: no information, insufficient, non-probability.
metadata1 %>%
filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
mode == "f2f") %>%
ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
geom_point(aes(col = T_SURVEY_NAME), size = 2, alpha = 0.5) +
geom_smooth(method = "loess", se = FALSE) +
xlim(0, 1) +
theme_bw()

Excludes RR >= 0.9, and sample types: no information, insufficient, non-probability.
metadata1 %>%
filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
mode == "f2f",
S_RR1_CALC_VALUE < 0.9) %>%
ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
geom_point(aes(col = T_SURVEY_NAME), size = 2, alpha = 0.5) +
geom_smooth(method = "loess", se = FALSE) +
xlim(0, 1) +
theme_bw()

By sample type
Excludes RR >= 0.9, and sample types: no information, insufficient, non-probability.
metadata1 %>%
filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
mode == "f2f",
S_RR1_CALC_VALUE < 0.9) %>%
ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
geom_point(size = 2, alpha = 0.1) +
geom_smooth(method = "loess", se = FALSE, size = 1) +
theme_bw() +
facet_wrap("S_SAMPLE_TYPE")

By sample type and project
Excludes RR >= 0.9, and sample types: no information, insufficient, non-probability.
metadata1 %>%
filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
mode == "f2f",
S_RR1_CALC_VALUE < 0.9) %>%
ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
geom_point(size = 2, alpha = 0.1) +
geom_smooth(method = "loess", se = FALSE) +
theme_bw() +
facet_grid(T_SURVEY_NAME ~ S_SAMPLE_TYPE)

