library(tidyverse) # for manipulating data
library(labelled) # for using labels
library(flextable) # for formatting output tables
library(readr) # for reading csv files
library(broom) # for dealing with annoying output chunks
library(scales) # for percentage labelling
library(countrycode) # for working with country names
SUPLEMENTARY MATERIALS
As accompanying material to “Multi-project assessments of sample quality in cross-national surveys: The role of weights in applying external and internal measures of sample bias”, this document presents details of data transformations, statistical procedures, and exact results of analyses performed in the paper’s main body. Code chunks are hidden by default but can be accessed by clicking the relevant buttons. All calculations rely on the publicly available integrated .csv data file.
ESM2e02 <- read_delim("ESM2e02.csv", ";",
escape_double = FALSE,
na = "Not applicable",
trim_ws = TRUE,
col_types = cols(
.default = col_character(),
T_SURVEY_YEAR = col_double(),
Q_PROP_FEM_POPULATION_18_74 = col_double(),
Q_EFFECT_SIZE_PROP_FEMALE_EXTERNAL_18_74_psweights = col_double(),
Q_EFFECT_SIZE_PROP_FEMALE_EXTERNAL_18_74_dweights_ESS = col_double(),
Q_EFFECT_SIZE_PROP_FEMALE_EXTERNAL_18_74_no_weights = col_double(),
Q_PROP_FEM_INTERNAL_KOHLER_psweights_strict = col_double(),
Q_PROP_FEM_INTERNAL_KOHLER_no_weights_strict = col_double(),
Q_SUBSAMPLE_SIZE_INTERNAL_KOHLER_strict = col_double(),
Q_EFFECT_SIZE_PROP_FEMALE_INTERNAL_KOHLER_psweights_strict = col_double(),
Q_EFFECT_SIZE_PROP_FEMALE_INTERNAL_KOHLER_no_weights_strict = col_double(),
Q_ABS_BIAS_INTERNAL_KOHLER_psweights_strict = col_double(),
Q_ABS_BIAS_INTERNAL_KOHLER_no_weights_strict = col_double(),
Q_BIAS_INTERNAL_KOHLER_psweights_strict = col_double(),
Q_BIAS_INTERNAL_KOHLER_no_weights_strict = col_double(),
Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_dweights_ESS = col_double(),
Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights = col_double(),
Q_ABS_BIAS_INTERNAL_KOHLER_dweights_ESS = col_double(),
Q_ABS_BIAS_INTERNAL_KOHLER_no_weights = col_double(),
Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_psweights = col_double(),
Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights = col_double(),
Q_ABS_BIAS_INTERNAL_KOHLER_psweights = col_double(),
Q_ABS_BIAS_INTERNAL_KOHLER_no_weights = col_double()
))
DISTRIBUTION OF THE TYPES OF WEIGHTS AVAILABLE ACROSS THE FOUR PROJECTS
Raw counts and row percentages of data represented in the paper on Figure 1.
ESM2e02 %>%
count(T_SURVEY_NAME, T_Weighting_factor2) %>%
pivot_wider(names_from = T_SURVEY_NAME, values_from = n, values_fill = 0) %>%
rename("Type of weight" = T_Weighting_factor2) %>%
qflextable() %>%
align(j = 2:5, align = "center") %>%
align_nottext_col(align = "center")
Type of weight | EB | EQLS | ESS | ISSP |
Post-stratification weights | 462 | 64 | 0 | 207 |
Post-stratification weights with design weights | 0 | 61 | 196 | 0 |
No weights | 0 | 0 | 3 | 132 |
ESM2e02 %>%
count(T_SURVEY_NAME, T_Weighting_factor2) %>%
group_by(T_SURVEY_NAME) %>%
mutate(Percent = percent(n/sum(n), accuracy = 0.1)) %>%
select(T_SURVEY_NAME, T_Weighting_factor2, Percent) %>%
ungroup() %>%
pivot_wider(names_from = T_SURVEY_NAME, values_from = Percent, values_fill = "0%") %>%
rename("Type of weight" = T_Weighting_factor2) %>%
qflextable() %>%
theme_booktabs() %>%
align(j = 1, align = "left")
Type of weight | EB | EQLS | ESS | ISSP |
Post-stratification weights | 100.0% | 51.2% | 0% | 61.1% |
Post-stratification weights with design weights | 0% | 48.8% | 98.5% | 0% |
No weights | 0% | 0% | 1.5% | 38.9% |
STRICT VS LENIENT APPROACH TO INTERNAL CRITERIA: SEE SECTION 4.3.
Pearson correlation coefficients between biases resulting internal criteria without weights according to the strict approach vs the lenient approach. Calculations performed for surveys within the two projects where data for implementing the strict approach was available.
ESM2e02 %>% filter(T_SURVEY_NAME %in% c("ESS", "EQLS")) %>%
group_by(T_SURVEY_NAME) %>%
summarize(`Perason correlation` = (cor(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, Q_ABS_BIAS_INTERNAL_KOHLER_no_weights_strict, method = 'pearson', use = "pairwise.complete.obs")), .groups = 'drop') %>%
rename("Project name" = T_SURVEY_NAME) %>%
qflextable() %>%
theme_booktabs() %>%
align_nottext_col(align = "center") %>%
colformat_num(digits = 2, na_str = "N/A")
Project name | Perason correlation |
EQLS | 0.93 |
ESS | 0.96 |
DIFFERENCES IN THE VALUES OF BIAS ACCORDING TO INTERNAL AND EXTERNAL CRITERIA: NO WEIGHT VS DESIGN WEIGHT
Mean and median values for data represented on Figure 2.
ESM2e02 %>% filter(T_SURVEY_NAME == "ESS", T_DEFFp_ESS == "DEFF>1") %>%
mutate(delta1 = Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_dweights_ESS - Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights,
delta2 = Q_ABS_BIAS_INTERNAL_KOHLER_dweights_ESS - Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
group_by(T_SURVEY_EDITION) %>%
summarise(meanXdifference_external = mean(delta1),
medianXdifference_external = median(delta1),
meanXdifference_internal = mean(delta2),
medianXdifference_internal = median(delta2), .groups = 'drop') %>%
rename("ESS wave" = T_SURVEY_EDITION,
"Mean difference \n external" = meanXdifference_external,
"Median difference \n external" = medianXdifference_external,
"Mean difference \n internal" = meanXdifference_internal,
"Median difference \n internal" = medianXdifference_internal) %>%
qflextable() %>%
theme_booktabs() %>%
align_nottext_col(align = "center", header = T) %>%
colformat_num(j = 2:5, digits = 1, na_str = "N/A")
ESS wave | Mean difference external | Median difference external | Mean difference internal | Median difference internal |
ESS2002 | -0.17 | 0.15 | 0.04 | 0.05 |
ESS2004 | -0.02 | -0.08 | -0.14 | -0.07 |
ESS2006 | -0.75 | -0.70 | 0.07 | -0.03 |
ESS2008 | -0.41 | -0.07 | 0.11 | 0.01 |
ESS2010 | -0.13 | -0.08 | 0.11 | 0.00 |
ESS2012 | -0.27 | -0.06 | -0.20 | 0.00 |
ESS2014 | -0.30 | -0.14 | 0.09 | 0.08 |
ESS2016 | -0.25 | -0.22 | 0.07 | 0.04 |
Mean and median differences as referenced in section 5.1.
ESM2e02 %>% filter(T_SURVEY_NAME == "ESS", T_DEFFp_ESS == "DEFF>1") %>%
mutate(delta1 = Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_dweights_ESS - Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights,
delta2 = Q_ABS_BIAS_INTERNAL_KOHLER_dweights_ESS - Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
summarise(meanXdifference_external = mean(delta1),
medianXdifference_external = median(delta1),
meanXdifference_internal = mean(delta2),
medianXdifference_internal = median(delta2), .groups = 'drop') %>%
pivot_longer(1:4, names_to = c("fun", "Difference"), values_to = "values", names_sep = "X") %>%
pivot_wider(names_from = fun, values_from = values) %>%
qflextable() %>%
theme_booktabs() %>%
colformat_num(j = 2:3, digits = 1, na_str = "N/A")
Difference | mean | median |
difference_external | -0.28 | -0.08 |
difference_internal | 0.02 | 0.00 |
WITHIN-PROJECT DIFFERENCES BETWEEN BIAS WITH AND WITHOUT POST-STRATIFICATION WEIGHTS
See Table 2 in the paper.
ESM2e02 %>% filter(T_Weighing_factor == "Total weights present in dataset") %>%
mutate(delta1 = Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_psweights - Q_ABS_BIAS_PROP_FEMALE_EXTERNAL_no_weights,
delta2 = Q_ABS_BIAS_INTERNAL_KOHLER_psweights - Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
group_by(T_SURVEY_NAME) %>%
summarise(mean_external = mean(delta1),
median_external = median(delta1),
mean_internal = mean(delta2, na.rm = T),
median_internal = median(delta2, na.rm = T), .groups = 'drop') %>%
rename("Project name" = T_SURVEY_NAME,
"Mean difference \n external" = mean_external,
"Median difference \n external" = median_external,
"Mean difference \n internal" = mean_internal,
"Median difference \n internal" = median_internal) %>%
qflextable() %>%
theme_booktabs() %>%
align_nottext_col(align = "center") %>%
colformat_num(j = 2:3, big.mark = ",", digits = 3, na_str = "N/A")
Project name | Mean difference external | Median difference external | Mean difference internal | Median difference internal |
EB | -2.02 | -1.58 | -0.70 | -0.47 |
EQLS | -3.49 | -3.58 | -0.33 | -0.26 |
ESS | -2.01 | -1.68 | -0.37 | -0.30 |
ISSP | -2.71 | -2.25 | -0.18 | -0.14 |
INTERNAL CRITERIA WITH NO WEIGHTS: THE OUTLIERS
Descriptive statistics of data referenced in section 6.
ESM2e02 %>%
group_by(T_SURVEY_NAME) %>%
summarise(Q1 = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.25, na.rm = T),
median = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.25, na.rm = T),
Q3 = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.75, na.rm = T),
IQR = IQR(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, na.rm = T),
Outlier_threshold = quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.75, na.rm = T) + 1.5 * IQR(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, na.rm = T), .groups = 'drop') %>%
rename("Project name" = T_SURVEY_NAME, "Outlier \n threshold" = Outlier_threshold) %>%
qflextable() %>%
theme_booktabs() %>%
align_nottext_col(align = "center") %>%
colformat_num(j = 2:3, digits = 2, na_str = "N/A") %>%
colformat_num(j = 4:6, digits = 3, na_str = "N/A")
Project name | Q1 | median | Q3 | IQR | Outlier threshold |
EB | 0.60 | 0.60 | 2.12 | 1.52 | 4.41 |
EQLS | 0.55 | 0.55 | 1.84 | 1.29 | 3.78 |
ESS | 0.49 | 0.49 | 1.69 | 1.20 | 3.49 |
ISSP | 0.51 | 0.51 | 1.98 | 1.47 | 4.18 |
A list of outliers as represented on Figure 4.
ESM2e02 %>%
group_by(T_SURVEY_NAME) %>%
filter(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights > quantile(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, probs = 0.75, na.rm = T) + 1.5 * IQR(Q_ABS_BIAS_INTERNAL_KOHLER_no_weights, na.rm = T)) %>%
select(T_SURVEY_NAME, T_SURVEY_YEAR, T_COUNTRY, Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
rename("Project name" = T_SURVEY_NAME, "Year" = T_SURVEY_YEAR, "Country code" = T_COUNTRY, "Internal bias" = Q_ABS_BIAS_INTERNAL_KOHLER_no_weights) %>%
ungroup() %>%
mutate(`Country name` = countrycode(`Country code`, origin = "iso2c", destination = "country.name")) %>%
relocate(`Project name`, `Year`, `Country code`, `Country name`, `Internal bias`) %>%
qflextable() %>%
theme_booktabs() %>%
align(j = 2:4, align = "center") %>%
align_nottext_col(align = "center", header = T) %>%
colformat_num(digits = 3, big.mark = "", na_str = "N/A")
Project name | Year | Country code | Country name | Internal bias |
EB | 2004 | MT | Malta | 4.55 |
EB | 2005 | IT | Italy | 4.88 |
EB | 2014 | SE | Sweden | 7.01 |
EB | 2015 | SE | Sweden | 6.38 |
EB | 2016 | HR | Croatia | 4.66 |
EB | 2016 | SE | Sweden | 6.98 |
EQLS | 2003 | GB | United Kingdom | 4.58 |
EQLS | 2007 | IT | Italy | 3.99 |
EQLS | 2011 | CY | Cyprus | 4.01 |
EQLS | 2016 | HR | Croatia | 3.97 |
ESS | 2008 | SK | Slovakia | 11.16 |
ESS | 2010 | DK | Denmark | 3.56 |
ESS | 2010 | SK | Slovakia | 11.52 |
ESS | 2012 | CZ | Czechia | 3.52 |
ESS | 2012 | SK | Slovakia | 10.15 |
ESS | 2016 | NO | Norway | 3.68 |
ISSP | 2003 | FR | France | 6.88 |
ISSP | 2003 | NL | Netherlands | 6.15 |
ISSP | 2004 | FR | France | 6.07 |
ISSP | 2004 | NL | Netherlands | 6.15 |
ISSP | 2005 | FR | France | 5.32 |
ISSP | 2006 | FR | France | 8.34 |
ISSP | 2006 | NL | Netherlands | 6.33 |
ISSP | 2008 | HU | Hungary | 4.62 |
ISSP | 2008 | IT | Italy | 5.00 |
ISSP | 2008 | NL | Netherlands | 4.78 |
ISSP | 2009 | FR | France | 6.28 |
ISSP | 2009 | HU | Hungary | 4.64 |
ISSP | 2010 | FR | France | 9.66 |
ISSP | 2013 | FR | France | 4.69 |
ISSP | 2013 | NL | Netherlands | 6.87 |
ISSP | 2014 | NL | Netherlands | 6.87 |
ISSP | 2015 | FR | France | 4.75 |
ISSP | 2015 | GE | Georgia | 5.82 |
