Basics

Data summary

metadata1 %>%
  group_by(T_SURVEY_NAME) %>%
  summarise(n_surveys = n_distinct(T_SURVEY_ID),
            n_countries = n_distinct(T_SURVEY_COUNTRY),
            n_waves = n_distinct(T_SURVEY_ROUND),
            min_year = min(T_SURVEY_YEAR),
            max_year = max(T_SURVEY_YEAR))

There are 45 countries altogether. Of those, 17 are not included in all projects:

metadata1 %>%
  count(T_SURVEY_NAME, T_SURVEY_COUNTRY) %>%
  spread(T_SURVEY_NAME, n) %>%
  filter(is.na(EQLS + ESS + EVS + ISSP))

Excess bias

by project

Proportion of surveys with excess (> 1.96) absolute bias:

metadata1 %>%
  mutate(is_sig = Q_Abs_bias_internal_criteria > 1.96) %>%
  count(T_SURVEY_NAME, is_sig) %>%
  group_by(T_SURVEY_NAME) %>%
  mutate(prop_sig = round(n / sum(n), 3)) %>%
  filter(is_sig == TRUE) %>%
  select(T_SURVEY_NAME, prop_sig)

2008 was a difficult year for surveys?

Numbers above bars indicate the number of surveys for which bias was possible to calculate.

part1 <- metadata1 %>%
  drop_na(Q_Abs_bias_internal_criteria) %>%
  filter(T_SURVEY_NAME != "ISSP") %>%
  mutate(is_sig = Q_Abs_bias_internal_criteria > 1.96,
         round = gsub("^[A-Z]{1,4}", "", T_SURVEY_ROUND)) %>%
  count(T_SURVEY_NAME, round, is_sig) %>%
  group_by(T_SURVEY_NAME, round) %>%
  mutate(prop_sig = n / sum(n),
         nsurveys = sum(n)) %>%
  filter(is_sig == TRUE) %>%
  ggplot(.) +
  geom_bar(aes(x = round, y = prop_sig), stat = "Identity", fill = "gray70") +
  geom_text(aes(x = round, y = prop_sig + 0.03, label = nsurveys), col = "gray20") +
  theme_bw() +
  facet_wrap("T_SURVEY_NAME", scales = "free_x")

part2 <- metadata1 %>%
  drop_na(Q_Abs_bias_internal_criteria) %>%
  filter(T_SURVEY_NAME == "ISSP") %>%
  mutate(is_sig = Q_Abs_bias_internal_criteria > 1.96,
         round = gsub("^[A-Z]{1,4}", "", T_SURVEY_ROUND)) %>%
  count(T_SURVEY_NAME, round, is_sig) %>%
  group_by(T_SURVEY_NAME, round) %>%
  mutate(prop_sig = n / sum(n),
         nsurveys = sum(n)) %>%
  filter(is_sig == TRUE) %>%
  ggplot(.) +
  geom_bar(aes(x = round, y = prop_sig), stat = "Identity", fill = "gray70") +
  geom_text(aes(x = round, y = prop_sig + 0.035, label = nsurveys), col = "gray20") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) +
  facet_wrap("T_SURVEY_NAME", scales = "free_x")

part1 / part2

by country

Surveys with absolute bias > 1.96 by country.

metadata1 %>%
  drop_na(Q_Abs_bias_internal_criteria) %>%
  group_by(T_SURVEY_COUNTRY) %>%
  mutate(total_surveys = n(),
         total_projects = n_distinct(T_SURVEY_NAME),
         is_sig = Q_Abs_bias_internal_criteria > 1.96) %>%
  group_by(T_SURVEY_COUNTRY, is_sig) %>%
  mutate(bias_surveys = n(),
         bias_projects = n_distinct(T_SURVEY_NAME)) %>%
  ungroup() %>%
  distinct(T_SURVEY_COUNTRY, total_surveys, bias_surveys, total_projects, bias_projects, is_sig) %>%
  complete(is_sig, T_SURVEY_COUNTRY, fill = list(bias_surveys = 0, bias_projects = 0)) %>%
  group_by(T_SURVEY_COUNTRY) %>%
  mutate(total_surveys = max(total_surveys, na.rm = T),
         total_projects = max(total_projects, na.rm = T)) %>%
  filter(is_sig == TRUE) %>%
  mutate(prop_bias = round(bias_surveys / total_surveys, 3)) %>%
  select(T_SURVEY_COUNTRY, total_surveys, bias_surveys, prop_bias, total_projects, bias_projects) %>%
  arrange(desc(prop_bias))

Changes over time

Bias

Orange dots indicate surveys from the waves analyzed in Kohler 2007.

metadata1 %>%
  ungroup() %>%
  mutate(S_SAMPLE_TYPE = fct_collapse(S_SAMPLE_TYPE,
                                      noinfo_insuff = c("no info", "insuff")),
         S_SAMPLE_TYPE = fct_rev(S_SAMPLE_TYPE),
         K2007 = T_SURVEY_ROUND %in% c("EQLS1", "ESS1", "ESS2", "EVS1999", "ISSP2002")) %>%
  ggplot(., aes(x = T_SURVEY_YEAR, y = Q_Bias_internal_criteria, col = K2007)) +
  geom_point(size = 2, alpha = 0.5) +
  scale_color_manual(values = c("gray50", "darkorange")) +
  theme_bw() +
  theme(legend.position = "none") +
  xlab("")

Surveys with absolute bias exceeding 5 are labelled.

metadata1 %>%
  ggplot(., aes(x = T_SURVEY_YEAR, y = Q_Bias_internal_criteria)) +
  geom_point(size = 2) +
  gghighlight(Q_Abs_bias_internal_criteria > 5) +
  geom_text_repel(aes(label = T_SURVEY_ID), size = 3.5) +
  theme_bw() +
  xlab("")

Absolute bias and response rates

Note the different Y axis ranges.

metadata1 %>%
  filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
         mode == "f2f") %>%
  # filter(Q_Abs_bias_internal_criteria < 4.5) %>%
  select(T_SURVEY_NAME, T_SURVEY_COUNTRY, T_SURVEY_ROUND, T_SURVEY_YEAR,
         Q_Abs_bias_internal_criteria, S_RR1_CALC_VALUE) %>%
  gather(var, value, c(S_RR1_CALC_VALUE, Q_Abs_bias_internal_criteria)) %>%
  drop_na(value) %>%
  ggplot(., aes(x = T_SURVEY_YEAR,
                y = value)) +
  geom_point(alpha = 0.1) +
  geom_smooth(method = "loess") +
  theme_bw() +
  facet_wrap(T_SURVEY_NAME ~ var, scales = "free_y", ncol = 2)

Absolute bias by sample type

part1 <- metadata1 %>%
  filter(T_SURVEY_NAME %in% c("ESS", "EQLS")) %>%
  ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
  geom_boxplot() +
  theme_bw() +
  xlab("") +
  facet_wrap("T_SURVEY_NAME", ncol = 1)

part2 <- metadata1 %>%
  filter(T_SURVEY_NAME %in% c("ISSP", "EVS")) %>%
  ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
  geom_boxplot() +
  theme_bw() +
  ylab("") + xlab("") +
  facet_wrap("T_SURVEY_NAME", ncol = 1)

part1 + part2 + plot_layout(widths = c(1, 2))

Excluding absolute bias > 4.

part1 <- metadata1 %>%
  filter(T_SURVEY_NAME %in% c("ESS", "EQLS")) %>%
  ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
  geom_boxplot() +
  theme_bw() +
  xlab("") + ylim(0,4) +
  facet_wrap("T_SURVEY_NAME", ncol = 1)

part2 <- metadata1 %>%
  filter(T_SURVEY_NAME %in% c("ISSP", "EVS")) %>%
  ggplot(., aes(x = S_SAMPLE_TYPE, y = Q_Abs_bias_internal_criteria)) +
  geom_boxplot() +
  theme_bw() +
  ylab("") + xlab("") + ylim(0,4) +
  facet_wrap("T_SURVEY_NAME", ncol = 1)

part1 + part2 + plot_layout(widths = c(1, 2))

Bias and response rates

Overall

Excludes sample types: no information, insufficient, non-probability.

metadata1 %>%
  filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
         mode == "f2f") %>%
  ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
  geom_point(aes(col = T_SURVEY_NAME), size = 2, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE) +
  xlim(0, 1) +
  theme_bw()

Excludes RR >= 0.9, and sample types: no information, insufficient, non-probability.

metadata1 %>%
  filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
         mode == "f2f",
         S_RR1_CALC_VALUE < 0.9) %>%
  ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
  geom_point(aes(col = T_SURVEY_NAME), size = 2, alpha = 0.5) +
  geom_smooth(method = "loess", se = FALSE) +
  xlim(0, 1) +
  theme_bw()

By sample type

Excludes RR >= 0.9, and sample types: no information, insufficient, non-probability.

metadata1 %>%
  filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
         mode == "f2f",
         S_RR1_CALC_VALUE < 0.9) %>%  
  ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
  geom_point(size = 2, alpha = 0.1) +
  geom_smooth(method = "loess", se = FALSE, size = 1) +
  theme_bw() +
  facet_wrap("S_SAMPLE_TYPE")

By sample type and project

Excludes RR >= 0.9, and sample types: no information, insufficient, non-probability.

metadata1 %>%
  filter(!S_SAMPLE_TYPE %in% c("no info", "insuff", "non-prob"),
         mode == "f2f",
         S_RR1_CALC_VALUE < 0.9) %>%  
  ggplot(., aes(x = S_RR1_CALC_VALUE, y = Q_Abs_bias_internal_criteria)) +
  geom_point(size = 2, alpha = 0.1) +
  geom_smooth(method = "loess", se = FALSE) +
  theme_bw() +
  facet_grid(T_SURVEY_NAME ~ S_SAMPLE_TYPE)

