library(tidyverse) # for manipulating data
library(labelled) # for using labels
library(flextable) # for formatting output tables
library(readr) # for reading csv files
library(broom) # for dealing with annoying output chunks
library(scales) # for percentage labelling
library(countrycode) # for working with country names

SUPLEMENTARY MATERIALS

As accompanying material to “Multi-project assessments of sample quality in cross-national surveys: The role of weights in applying external and internal measures of sample bias”, this document presents details of data transformations, statistical procedures, and exact results of analyses performed in the paper’s main body. Code chunks are hidden by default but can be accessed by clicking the relevant buttons. All calculations rely on the publicly available integrated .csv data file.

ESM2e02 <- read_delim("ESM2e02.csv", ";",
                      escape_double = FALSE, 
                      na = "Not applicable", 
                      trim_ws = TRUE, 
                      col_types = cols(
  .default = col_character(),
  T_SURVEY_YEAR = col_double(),
  Q_PROP_FEM_POPULATION_18_74 = col_double(),
  Q_EFFECT_SIZE_PROP_FEMALE_EXTERNAL_18_74_psweights = col_double(),
  Q_EFFECT_SIZE_PROP_FEMALE_EXTERNAL_18_74_dweights_ESS = col_double(),
  Q_EFFECT_SIZE_PROP_FEMALE_EXTERNAL_18_74_no_weights = col_double(),
  Q_PROP_FEM_INTERNAL_KOHLER_psweights_strict = col_double(),
  Q_PROP_FEM_INTERNAL_KOHLER_no_weights_strict = col_double(),
  Q_SUBSAMPLE_SIZE_INTERNAL_KOHLER_strict = col_double(),
  Q_EFFECT_SIZE_PROP_FEMALE_INTERNAL_KOHLER_psweights_strict = col_double(),
  Q_EFFECT_SIZE_PROP_FEMALE_INTERNAL_KOHLER_no_weights_strict = col_double(),
  Q_ABS_BIAS_INTERNAL_KOHLER_psweights_strict = col_double(),
  Q_ABS_BIAS_INTERNAL_KOHLER_no_weights_strict = col_double(),
  Q_BIAS_INTERNAL_KOHLER_psweights_strict = col_double(),
  Q_BIAS_INTERNAL_KOHLER_no_weights_strict = col_double(),
  Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_dweights_ESS = col_double(),
  Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights = col_double(),
  Q_ABS_BIAS_INTERNAL_KOHLER_dweights_ESS = col_double(),
  Q_ABS_BIAS_INTERNAL_KOHLER_no_weights = col_double(),
  Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_psweights = col_double(),
  Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights = col_double(),
  Q_ABS_BIAS_INTERNAL_KOHLER_psweights = col_double(),
  Q_ABS_BIAS_INTERNAL_KOHLER_no_weights = col_double()
))

DISTRIBUTION OF THE TYPES OF WEIGHTS AVAILABLE ACROSS THE FOUR PROJECTS

Raw counts and row percentages of data represented in the paper on Figure 1.

ESM2e02 %>%
  count(T_SURVEY_NAME, T_Weighting_factor2) %>%
  pivot_wider(names_from = T_SURVEY_NAME, values_from = n, values_fill = 0) %>%
  rename("Type of weight" = T_Weighting_factor2) %>%
  qflextable() %>%
  align(j = 2:5, align = "center") %>%
  align_nottext_col(align = "center")
ESM2e02 %>%
  count(T_SURVEY_NAME, T_Weighting_factor2) %>%
  group_by(T_SURVEY_NAME) %>%
  mutate(Percent = percent(n/sum(n), accuracy = 0.1)) %>% 
  select(T_SURVEY_NAME, T_Weighting_factor2, Percent) %>% 
  ungroup() %>%
  pivot_wider(names_from = T_SURVEY_NAME, values_from = Percent, values_fill = "0%") %>% 
  rename("Type of weight" = T_Weighting_factor2) %>%
  qflextable() %>% 
  theme_booktabs() %>%
  align(j = 1, align = "left")


STRICT VS LENIENT APPROACH TO INTERNAL CRITERIA: SEE SECTION 4.3.

Pearson correlation coefficients between biases resulting internal criteria without weights according to the strict approach vs the lenient approach. Calculations performed for surveys within the two projects where data for implementing the strict approach was available.

ESM2e02 %>% filter(T_SURVEY_NAME %in% c("ESS", "EQLS")) %>%
group_by(T_SURVEY_NAME) %>% 
summarize(`Perason correlation` = (cor(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, Q_ABS_BIAS_INTERNAL_KOHLER_no_weights_strict, method = 'pearson', use = "pairwise.complete.obs")), .groups = 'drop') %>% 
  rename("Project name" = T_SURVEY_NAME) %>%
qflextable() %>% 
theme_booktabs() %>% 
align_nottext_col(align = "center") %>% 
colformat_num(digits = 2, na_str = "N/A")


DIFFERENCES IN THE VALUES OF BIAS ACCORDING TO INTERNAL AND EXTERNAL CRITERIA: NO WEIGHT VS DESIGN WEIGHT

Mean and median values for data represented on Figure 2.

ESM2e02 %>% filter(T_SURVEY_NAME == "ESS", T_DEFFp_ESS == "DEFF>1") %>%
  mutate(delta1 = Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_dweights_ESS  - Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights,
         delta2 = Q_ABS_BIAS_INTERNAL_KOHLER_dweights_ESS - Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
  group_by(T_SURVEY_EDITION) %>%
  summarise(meanXdifference_external = mean(delta1),
            medianXdifference_external = median(delta1),
            meanXdifference_internal = mean(delta2),
            medianXdifference_internal = median(delta2), .groups = 'drop') %>%
  rename("ESS wave" = T_SURVEY_EDITION, 
         "Mean difference \n external" = meanXdifference_external,
         "Median difference \n external"  = medianXdifference_external,
         "Mean difference \n internal" = meanXdifference_internal,
         "Median difference \n internal" = medianXdifference_internal) %>%
  qflextable() %>% 
  theme_booktabs() %>%
  align_nottext_col(align = "center", header = T) %>%
  colformat_num(j = 2:5, digits = 1, na_str = "N/A")


Mean and median differences as referenced in section 5.1.

ESM2e02 %>% filter(T_SURVEY_NAME == "ESS", T_DEFFp_ESS == "DEFF>1") %>%
  mutate(delta1 = Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_dweights_ESS  - Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights,
         delta2 = Q_ABS_BIAS_INTERNAL_KOHLER_dweights_ESS - Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
  summarise(meanXdifference_external = mean(delta1),
            medianXdifference_external = median(delta1),
            meanXdifference_internal = mean(delta2),
            medianXdifference_internal = median(delta2), .groups = 'drop') %>%
  pivot_longer(1:4, names_to = c("fun", "Difference"), values_to = "values", names_sep = "X") %>%
  pivot_wider(names_from = fun, values_from = values) %>%
  qflextable() %>% 
  theme_booktabs() %>%
  colformat_num(j = 2:3, digits = 1, na_str = "N/A")


WITHIN-PROJECT DIFFERENCES BETWEEN BIAS WITH AND WITHOUT POST-STRATIFICATION WEIGHTS

See Table 2 in the paper.

ESM2e02 %>% filter(T_Weighing_factor == "Total weights present in dataset") %>% 
  mutate(delta1 = Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_psweights - Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights,
         delta2 = Q_ABS_BIAS_INTERNAL_KOHLER_psweights - Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
  group_by(T_SURVEY_NAME) %>%
  summarise(mean_external = mean(delta1),
            median_external = median(delta1),
            mean_internal = mean(delta2, na.rm = T),
            median_internal = median(delta2, na.rm = T), .groups = 'drop') %>%
  rename("Project name" = T_SURVEY_NAME,
         "Mean difference \n external" = mean_external,
         "Median difference \n external" = median_external,
         "Mean difference \n internal" = mean_internal,
         "Median difference \n internal" = median_internal) %>%
  qflextable() %>% 
  theme_booktabs() %>% 
  align_nottext_col(align = "center") %>%
  colformat_num(j = 2:3, big.mark = ",", digits = 3, na_str = "N/A")


INTERNAL CRITERIA WITH NO WEIGHTS: THE OUTLIERS

Descriptive statistics of data referenced in section 6.

ESM2e02 %>%
  group_by(T_SURVEY_NAME) %>%
  summarise(Q1 = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.25, na.rm = T),
            median = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.25, na.rm = T),
            Q3 = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.75, na.rm = T),
            IQR = IQR(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, na.rm = T),
            Outlier_threshold = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.75, na.rm = T) + 1.5 * IQR(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, na.rm = T), .groups = 'drop') %>%
  rename("Project name" = T_SURVEY_NAME, "Outlier \n threshold" = Outlier_threshold) %>%
  qflextable() %>% 
  theme_booktabs() %>% 
  align_nottext_col(align = "center") %>%
  colformat_num(j = 2:3, digits = 2, na_str = "N/A") %>%
  colformat_num(j = 4:6, digits = 3, na_str = "N/A")


A list of outliers as represented on Figure 4.

ESM2e02 %>% 
  group_by(T_SURVEY_NAME) %>%
  filter(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights > quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.75, na.rm = T) + 1.5 * IQR(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, na.rm = T)) %>%
  select(T_SURVEY_NAME, T_SURVEY_YEAR, T_COUNTRY, Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
  rename("Project name" = T_SURVEY_NAME, "Year" = T_SURVEY_YEAR, "Country code" = T_COUNTRY,  "Internal bias" = Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
  ungroup() %>%
  mutate(`Country name` = countrycode(`Country code`, origin = "iso2c", destination = "country.name")) %>%
  relocate(`Project name`, `Year`, `Country code`, `Country name`, `Internal bias`) %>%
  qflextable() %>% 
  theme_booktabs() %>%
  align(j = 2:4, align = "center") %>%
  align_nottext_col(align = "center", header = T) %>%
  colformat_num(digits = 3, big.mark = "", na_str = "N/A")
