options(digits = 2)
library(pacman)
p_load(
kirkegaard,
googlesheets4,
rvest,
stringi,
polycor,
rms,
glmnet,
writexl,
renv,
osfr,
spatstat
)
theme_set(theme_bw())
#strip Wikipedia sources in brackets from strings
str_rm_sources = function(x) {
str_replace_all(x, "\\[[^\\]]+\\]", "")
}
#length zero vector or data frame to NA
l0_to_NA = function(x) {
if (is.data.frame(x)) {
if (nrow(x) == 0) return(NA)
}
if (length(x) == 0) return(NA)
x
}
#infinite to NA to avoid functions that give nonsense results when Inf are present
Inf_to_NA = function(x) {
x[is.infinite(x)] = NA
x
}
#keep describe right
describe = function(...) {
y = psych::describe(...)
class(y) = "data.frame"
y
}
#fix list to numeric
list_to_numeric = function(x) {
map_dbl (x, function(x) {
if (is.null(x)) return(NA_real_)
as.numeric(x)
})
}
#loess fit from simulation
#used to impute data when observed % is 0
loess_fit = read_rds("data/loess_fit.rds")
#reorder correlation matrix variables based on correlations
reorder_cormat <- function(cormat) {
dd <- as.dist((1 - cormat)/2)
hc <- hclust(dd)
cormat <- cormat[hc$order, hc$order]
}
#melt to long without duplicates
melt_to_long = function(x) {
#get the lower half
#without throwing away any potential NAs
lower_x = lower.tri(x)
lower_idx = lower_x %>% which()
#melt
x_melt = x %>% reshape2::melt()
#add column from index melt
x_melt = x_melt[lower_idx ,]
x_melt
}
#test it
iris[1:4] %>% wtd.cors()
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.00 -0.12 0.87 0.82
## Sepal.Width -0.12 1.00 -0.43 -0.37
## Petal.Length 0.87 -0.43 1.00 0.96
## Petal.Width 0.82 -0.37 0.96 1.00
iris[1:4] %>% wtd.cors() %>% melt_to_long()
#labels
leftright_labels = c("Far-left", "Left", "Center-left", "Center", "Center-right", "Right", "Far-right")
#author
googlesheets4::gs4_auth("the.dfx@gmail.com")
#get sheets for journalist data
# voting = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "voting") %>% df_legalize_names()
voting = read_sheet("https://docs.google.com/spreadsheets/d/1o0f1XfA4wprMpuSeH0gUdPgdrfDQATD05qGrWlKFFVk/edit#gid=2060900662", sheet = "voting") %>% df_legalize_names()
## Auto-refreshing stale OAuth token.
## Reading from "*** Being Fixed*** EXTERNAL Journalist political preferences dataset"
## Range "'voting'"
voting$Journalist_pct_orig = voting$Journalist_pct
voting_orig = voting
# parties = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "parties") %>% df_legalize_names()
parties = read_sheet("https://docs.google.com/spreadsheets/d/1o0f1XfA4wprMpuSeH0gUdPgdrfDQATD05qGrWlKFFVk/edit#gid=355416179", sheet = "parties") %>% df_legalize_names()
## Reading from "*** Being Fixed*** EXTERNAL Journalist political preferences dataset"
## Range "'parties'"
parties_orig = parties
# samples = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "samples") %>% df_legalize_names()
samples = read_sheet("https://docs.google.com/spreadsheets/d/1o0f1XfA4wprMpuSeH0gUdPgdrfDQATD05qGrWlKFFVk/edit#gid=426865321", sheet = "samples") %>% df_legalize_names()
## Reading from "*** Being Fixed*** EXTERNAL Journalist political preferences dataset"
## Range "'samples'"
assert_that(!anyDuplicated(samples$ID))
## [1] TRUE
samples_orig = samples
#parties ratings
party_ratings = read_sheet("https://docs.google.com/spreadsheets/d/1tHT_ZJRg6M2CtJQyLePGNasPPIR8_TrJ6tL5LzbdIBE/edit#gid=0", sheet = "parties") %>% df_legalize_names()
## Reading from "Parties rating collected"
## Range "'parties'"
party_ratings_meta = read_sheet("https://docs.google.com/spreadsheets/d/1tHT_ZJRg6M2CtJQyLePGNasPPIR8_TrJ6tL5LzbdIBE/edit#gid=0", sheet = "raters") %>% df_legalize_names()
## Reading from "Parties rating collected"
## Range "'raters'"
## New names:
## * `` -> ...5
#temporal data for plots
timeseries = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "longitudinal")
## Reading from "INTERNAL Journalist political preferences dataset"
## Range "'longitudinal'"
#switch to english names
#one time use code
if (F) {
xx = left_join(voting, parties, by = c("Party_English", "Country"))
#add rows so we can find rows' location after sorting
xx$row = 1:nrow(xx)
#view rows not Other
xx %>% filter(Party != "Other") %>% View()
#insert "Other"
xx$Party_English = if_else(xx$Party == "Other", true = "Other", false = xx$Party_English)
#export
xx %>% write_clipboard()
}
To analyze the data, we need to recode them into relative rates and odds ratios, as well as their logs.
#samples data
#exclude russian data
samples = samples %>% filter(Country != "Russia")
#fix types
samples$n = samples$n %>% list_to_numeric()
samples$n_no_party_data = samples$n_no_party_data %>% list_to_numeric()
#recode to sensible format
samples$n_party_data = samples$n - samples$n_no_party_data
#impute likely sample size
n_sumstats = samples$n %>% describe()
n_party_sumstats = samples$n_party_data %>% describe()
n_party_sumstats
n_party_frac_sumstats = (samples$n_party_data / samples$n) %>% describe()
n_party_frac_sumstats
#save orig
samples$n_party_data_orig = samples$n_party_data
#impute likely values
samples$n_party_data = case_when(
#when we have data, use it
!is.na(samples$n_party_data) ~ samples$n_party_data,
#when we have full sample size, use that and assume median asnwer fraction
!is.na(samples$n) ~ samples$n * n_party_frac_sumstats$median,
#impute median effective sample size
is.na(samples$n) ~ n_party_sumstats$median
)
#fix id col
#some are given as ranges, Russian data
voting$journalist_sample_ID = voting$Journalist_sample %>% list_to_numeric()
#remove excluded data
voting = voting %>% filter(is.na(skip))
#join with sample size data
voting = left_join(voting, samples %>% select(ID, n, n_party_data, Question_coded), by = c("journalist_sample_ID" = "ID"))
voting %<>% filter(!is.na(Journalist_pct))
#impute proportions for zeros
voting$Journalist_pct2 = voting$Journalist_pct_orig
voting$Journalist_pct = voting$Journalist_pct_orig
voting$Journalist_pct2[voting$Journalist_pct == 0] = predict(loess_fit, newdata = tibble(n = voting$n_party_data[voting$Journalist_pct == 0])) * 100
#renormalize to 100%
voting = voting %>% plyr::ddply(c("journalist_sample_ID", "Election_type"), function(d) {
#skip if no zeros
if (!any(d$Journalist_pct == 0)) return(d)
#which were zero?
which0 = (d$Journalist_pct == 0)
#what was the orig sum?
#this is not always 100 because some others go to "other" parties
orig_sum = sum(d$Journalist_pct)
#investigate if not 100
if (abs(orig_sum - 100) > 2) browser()
#as fraction
d$Journalist_pct2 = d$Journalist_pct2 / orig_sum
#reduce nonzero values a bit
d$Journalist_pct2[!which0] = d$Journalist_pct2[!which0] * (1 - sum(d$Journalist_pct2[which0]))
#assert values ok
# browser()
# assert_that(sum(d$Journalist_pct2) == 1)
#as percent
d$Journalist_pct2 = d$Journalist_pct2 * orig_sum
d
})
#switch
voting$Journalist_pct = voting$Journalist_pct2
#add unique ID
voting %<>% mutate(
party_country = Party + " | " + Country
)
#merge within countries
#define function
aggr_voting_data = function(x) {
#base is mostly the same
y = x[1, ]
#sample IDs
y$journalist_sample_ID = str_c(x$journalist_sample_ID %>% unique() %>% na.omit(), collapse = ", ")
y$General_sample = str_c(x$General_sample %>% unique() %>% na.omit(), collapse = ", ")
#total sample size
y$n_total = sum(x$n, na.rm = T)
y$n_party_total = sum(x$n_party_data, na.rm = T)
#remove cols
y$Journalist_sample = NULL
y$Year = NULL
y$Election_type = NULL
y$n = NULL
y$n_party_data = NULL
y$Question_coded = NULL
y$comments = NULL
y$skip = NULL
#mean for the numericals, weight by sqrt sample size
#if no sample size, impute 1's
x$n_party_data[is.na(x$n_party_data)] = 1
#get means
y$Journalist_pct = wtd_mean(x$Journalist_pct, w = x$n_party_data %>% sqrt())
y$Journalist_pct_orig = wtd_mean(x$Journalist_pct_orig, w = x$n_party_data %>% sqrt())
y$General_pct = wtd_mean(x$General_pct, w = x$n_party_data %>% sqrt())
#RR and OR
y %>% mutate(
d = Journalist_pct - General_pct,
RR = Journalist_pct / General_pct,
OR = (Journalist_pct / (100 - Journalist_pct)) / (General_pct / (100 - General_pct) ),
logRR = log10(RR),
logOR = log10(OR),
#add versions based on unadjustment data
d_orig = Journalist_pct_orig - General_pct,
RR_orig = Journalist_pct_orig / General_pct,
OR_orig = (Journalist_pct_orig / (100 - Journalist_pct_orig)) / (General_pct / (100 - General_pct) ),
logRR_orig = log10(RR_orig) %>% Inf_to_NA(),
logOR_orig = log10(OR_orig) %>% Inf_to_NA()
)
}
#aggregate
voting_aggr = plyr::ddply(voting, c("Country", "Party"), .fun = aggr_voting_data)
voting_aggr_no_old = plyr::ddply(voting %>% filter(Year >= 2005), c("Country", "Party"), .fun = aggr_voting_data)
#assert no duplicated links
#means data error
assert_that(!any(duplicated(parties$Wikipedia_link)))
## [1] TRUE
#unique ID
parties %<>% dplyr::mutate(
party_country = Party + " | " + Country,
wp_page = NA,
Block = Block %>% str_to_lower() %>% factor() %>% fct_relevel("center"),
id = 1:n()
)
We need to scrape some party data from Wikipedia.
#loop over rows of parties and fetch data
for (p in seq_along_rows(parties)) {
party = parties$Party[p]
country = parties$Country[p]
#get page
#do we have it already?
party_page_file = sprintf("%s - %s.html", country, party)
data_location = "data/parties/"
party_page_file2 = data_location + party_page_file
#does file exist?
if (!file.exists(party_page_file2)) {
message(sprintf("%.0f%% -- Downloading page for %s -- %s", 100*p/nrow(parties), party, country))
#download it
party_page = parties$Wikipedia_link[p] %>% read_html()
#save it
party_page %>% as.character() %>% write_lines(party_page_file2)
} else {
#message(sprintf("%.0f%% -- Loading page for %s -- %s", 100*p/nrow(parties), party, country))
#read from disk
party_page = read_html(party_page_file2)
}
#add to table
parties$wp_page[p] = list(party_page)
}
#parse vcard data
parties_vcard_data = map_df(seq_along_rows(parties), function(p) {
# browser()
suppressWarnings({
y = try_else({
#get vcard
vcard = parties$wp_page[[p]] %>% html_node(".vcard") %>%
#replace linebreaks with spaces
#otherwise they disappear in table...
str_replace_all("<br>", ", ") %>%
#back to html
read_html()
})
if (is.null(y)) return(NULL)
#parse vcard table
vcard_table = html_table(vcard) %>% .[[1]]
#check for errors
if (!is.data.frame(vcard_table)) browser()
if (ncol(vcard_table) != 2) return(NULL)
#set names
colnames(vcard_table) = c("var", "data")
vcard_table$var %<>% str_replace_all("\\s", "_") #replace magic whitespace...
#premake cols
wp_ideolgy = vcard_table[vcard_table$var == "Ideology", 2] %>% l0_to_NA()
wp_pol_position = vcard_table[vcard_table$var == "Political_position", 2] %>% l0_to_NA()
#not vectors
# if (!is.vector(wp_ideolgy)) browser()
# if (!is.vector(wp_pol_position)) browser()
#combine party data
y = tibble(
id = p,
wp_ideolgy = wp_ideolgy %>% unlist(),
wp_pol_position = wp_pol_position %>% unlist()
)
#check for errors
if (nrow(y) != 1) browser()
})
y
}) %>%
#post cleaning
#do it here for vectorized approach instead of one at a time
mutate(
wp_ideolgy = wp_ideolgy %>% str_rm_sources(),
wp_pol_position = wp_pol_position %>% str_rm_sources() %>% str_replace_all(",", "")
)
#merge
parties = dplyr::left_join(parties, parties_vcard_data, by = "id")
#code dummy predictors
ideology_tags = list(
"socialism" = "socialism",
"green" = "(eco\\-)|green",
"conservative" = "conservative|conservatism",
"nationalism" = "nationalism",
"EU_skeptic" = "euroscepticism",
"EU_positive" = "pro\\-europeanism",
"feminism" = "feminism",
"libertarianism" = "libertarianism",
"national_conservatism" = "national conservatism",
"agrarianism" = "agrarianism",
"christian" = "christian",
"communism" = "communism|(anti\\-capitalism)|marxism|trotskyism",
"centrism" = "centrism",
"RW_populism" = "right[\\- ]wing populism",
"LW_populism" = "left[\\- ]wing populism",
"populism" = "populism",
"social_democracy" = "social democracy",
"democratic_socialism" = "democratic socialism",
"social_liberalism" = "social liberalism",
"liberalism" = "\\bliberalism\\b",
"direct_democracy" = "direct democracy"
)
#make dummies
parties_wp_ideology_dummies = map(ideology_tags, function(tag) {
stri_detect_regex(parties$wp_ideolgy, tag, case_insensitive = TRUE)}
) %>%
set_names(names(ideology_tags)) %>%
as.data.frame() %>%
mutate(tag_num = rowSums(.),
id = 1:nrow(.)) %>%
select(id, tag_num, everything())
#merge
parties = dplyr::left_join(parties %>% select(-Country), parties_wp_ideology_dummies, by = "id")
#set block levels
parties$Block %<>% fct_relevel("left", "center", "right")
The results don’t make a lot of sense because tags can have substitution effects.
#informative names by country, sex, age
party_ratings_meta %<>% mutate(
informative_id = str_glue("{Country}_{Sex}_{Age}") %>% str_uniquify()
)
#rater groups
all_raters = party_ratings %>% select(Portuguese_guy:REB) %>% names()
#good raters
good_raters = all_raters[-c(1, 9)]
for (v in all_raters) {
#fix
party_ratings[[v]] = party_ratings[[v]] %>% list_to_numeric()
}
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
#rescale to -3 to 3 format (as Wikipedia)
party_ratings[, all_raters] = party_ratings[, all_raters] - 4
#averages
party_ratings$mean = party_ratings[all_raters] %>% rowMeans(na.rm = T)
party_ratings$mean_good = party_ratings[good_raters] %>% rowMeans(na.rm = T)
#describe
party_ratings %>% select(Portuguese_guy:mean_good) %>% describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew
## Portuguese_guy 1 196 -0.6286 1.4 -1.000 -0.7177 1.5 -3.0 3.0 6.0 0.5252
## Arjen 2 196 -0.2398 1.6 0.000 -0.2975 1.5 -3.0 3.0 6.0 0.1923
## Victoria 3 196 -0.1485 1.9 -0.500 -0.1842 2.2 -3.0 3.0 6.0 0.2112
## S 4 196 -0.0255 1.7 0.000 0.0000 1.5 -3.0 3.0 6.0 -0.0997
## PET 5 196 -0.3316 1.6 0.000 -0.3671 1.5 -3.0 3.0 6.0 0.1442
## RY 6 196 -0.5515 1.4 -0.500 -0.6646 1.8 -3.0 3.0 6.0 0.5451
## R 7 196 0.3673 1.5 1.000 0.4494 1.5 -3.0 3.0 6.0 -0.4614
## FIL 8 196 0.2148 1.6 0.000 0.2722 2.2 -3.0 3.0 6.0 -0.2509
## LOU 9 196 -0.1837 1.9 0.000 -0.2215 3.0 -3.0 3.0 6.0 0.1635
## AS 10 196 -0.2704 1.6 -0.500 -0.3101 2.2 -3.0 3.0 6.0 0.2143
## EM 11 196 -0.0026 1.6 0.000 -0.0095 3.0 -3.0 3.0 6.0 0.0122
## TH 12 196 -0.0791 1.6 -0.350 -0.0696 2.0 -3.0 3.0 6.0 0.0044
## LOU_IS 13 196 -0.1546 1.2 0.000 -0.1525 1.5 -3.0 3.0 6.0 0.0187
## AND 14 196 -0.1786 1.7 0.000 -0.1962 1.5 -3.0 3.0 6.0 0.0786
## JAC 15 196 0.1480 1.9 0.000 0.1835 3.0 -3.0 3.0 6.0 -0.0755
## GUS 16 196 0.4541 1.7 1.000 0.4873 1.5 -3.0 3.0 6.0 -0.1736
## C 17 196 0.0459 1.6 0.000 0.0949 2.2 -3.0 3.0 6.0 -0.1554
## FI 18 196 0.1306 1.5 0.000 0.1399 1.5 -3.0 3.0 6.0 -0.1061
## MATH 19 196 -0.3245 1.2 -0.500 -0.3703 1.0 -3.0 3.0 6.0 0.3887
## A 20 196 0.2959 1.4 0.000 0.3671 1.5 -3.0 3.0 6.0 -0.2614
## BENJ 21 196 -0.1276 1.8 0.000 -0.1519 1.5 -3.0 3.0 6.0 0.1105
## SO 22 196 -0.0561 1.8 0.000 -0.0696 1.5 -3.0 3.0 6.0 0.0728
## J 23 196 0.0255 1.5 0.000 0.0538 2.2 -3.0 3.0 6.0 -0.1018
## PAU 24 196 -0.1867 1.7 0.000 -0.1892 2.1 -3.0 3.0 6.0 0.0477
## REB 25 193 -0.3938 1.5 0.000 -0.4323 1.5 -3.0 3.0 6.0 0.1467
## mean 26 196 -0.0878 1.2 0.092 -0.0596 1.2 -2.9 2.4 5.3 -0.2572
## mean_good 27 196 -0.0601 1.3 0.111 -0.0245 1.4 -2.9 2.4 5.3 -0.2941
## kurtosis se
## Portuguese_guy -0.636 0.103
## Arjen -0.965 0.118
## Victoria -1.121 0.133
## S -0.988 0.119
## PET -0.831 0.113
## RY -0.527 0.103
## R 0.043 0.104
## FIL -1.054 0.113
## LOU -1.210 0.134
## AS -0.954 0.115
## EM -1.319 0.118
## TH -1.277 0.113
## LOU_IS 0.136 0.084
## AND -0.674 0.119
## JAC -1.140 0.135
## GUS -1.044 0.123
## C -0.912 0.117
## FI -0.846 0.111
## MATH 0.544 0.088
## A -0.512 0.099
## BENJ -1.126 0.128
## SO -0.941 0.129
## J -1.021 0.110
## PAU -1.029 0.125
## REB -0.850 0.108
## mean -0.846 0.087
## mean_good -0.872 0.091
#distribution
party_ratings$mean %>% GG_denhist() +
scale_x_continuous("Political position", breaks = -3:3, limits = c(-3, 3), labels = leftright_labels)
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/party_position_ratings_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#maximum intercorrelations among raters
party_ratings %>% select(!!all_raters) %>% wtd.cors() %>% melt_to_long() %>% arrange(-value)
#ICC
party_ratings %>% select(!!all_raters) %>% ICC()
## Call: ICC(x = .)
##
## Intraclass correlation coefficients
## type ICC F df1 df2 p lower bound upper bound
## Single_raters_absolute ICC1 0.54 30 195 4704 0 0.50 0.58
## Single_random_raters ICC2 0.54 32 195 4680 0 0.50 0.58
## Single_fixed_raters ICC3 0.55 32 195 4680 0 0.51 0.60
## Average_raters_absolute ICC1k 0.97 30 195 4704 0 0.96 0.97
## Average_random_raters ICC2k 0.97 32 195 4680 0 0.96 0.97
## Average_fixed_raters ICC3k 0.97 32 195 4680 0 0.96 0.97
##
## Number of subjects = 196 Number of Judges = 25
party_ratings %>% select(!!good_raters) %>% ICC()
## Call: ICC(x = .)
##
## Intraclass correlation coefficients
## type ICC F df1 df2 p lower bound upper bound
## Single_raters_absolute ICC1 0.61 36 195 4312 0 0.57 0.65
## Single_random_raters ICC2 0.61 39 195 4290 0 0.57 0.65
## Single_fixed_raters ICC3 0.62 39 195 4290 0 0.58 0.66
## Average_raters_absolute ICC1k 0.97 36 195 4312 0 0.97 0.98
## Average_random_raters ICC2k 0.97 39 195 4290 0 0.97 0.98
## Average_fixed_raters ICC3k 0.97 39 195 4290 0 0.97 0.98
##
## Number of subjects = 196 Number of Judges = 23
#alpha
party_ratings %>% select(!!all_raters) %>% alpha()
## Number of categories should be increased in order to count frequencies.
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.97 0.97 0.98 0.56 32 0.0031 -0.088 1.2 0.61
##
## lower alpha upper 95% confidence boundaries
## 0.96 0.97 0.97
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Portuguese_guy 0.97 0.97 0.98 0.60 36 0.0029 0.014 0.62
## Arjen 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## Victoria 0.97 0.97 0.98 0.55 30 0.0033 0.031 0.61
## S 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## PET 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## RY 0.97 0.97 0.98 0.56 31 0.0032 0.032 0.61
## R 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## FIL 0.97 0.97 0.98 0.55 30 0.0032 0.031 0.60
## LOU 0.97 0.97 0.98 0.58 34 0.0028 0.025 0.62
## AS 0.97 0.97 0.98 0.56 31 0.0032 0.032 0.61
## EM 0.97 0.97 0.98 0.56 30 0.0032 0.032 0.61
## TH 0.97 0.97 0.98 0.55 30 0.0033 0.031 0.60
## LOU_IS 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## AND 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## JAC 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## GUS 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## C 0.97 0.97 0.98 0.55 30 0.0032 0.031 0.61
## FI 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## MATH 0.97 0.97 0.98 0.56 30 0.0032 0.032 0.61
## A 0.97 0.97 0.98 0.56 31 0.0031 0.032 0.62
## BENJ 0.97 0.97 0.98 0.55 30 0.0033 0.030 0.60
## SO 0.97 0.97 0.98 0.56 30 0.0032 0.031 0.61
## J 0.97 0.97 0.98 0.55 30 0.0032 0.031 0.61
## PAU 0.97 0.97 0.97 0.55 30 0.0033 0.031 0.60
## REB 0.97 0.97 0.98 0.55 30 0.0033 0.030 0.60
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Portuguese_guy 196 0.19 0.20 0.15 0.15 -0.6286 1.4
## Arjen 196 0.81 0.81 0.80 0.79 -0.2398 1.6
## Victoria 196 0.83 0.83 0.83 0.81 -0.1485 1.9
## S 196 0.81 0.80 0.80 0.79 -0.0255 1.7
## PET 196 0.81 0.81 0.81 0.79 -0.3316 1.6
## RY 196 0.73 0.73 0.71 0.70 -0.5515 1.4
## R 196 0.80 0.81 0.80 0.78 0.3673 1.5
## FIL 196 0.83 0.83 0.82 0.81 0.2148 1.6
## LOU 196 0.41 0.41 0.37 0.36 -0.1837 1.9
## AS 196 0.73 0.73 0.72 0.70 -0.2704 1.6
## EM 196 0.77 0.77 0.76 0.75 -0.0026 1.6
## TH 196 0.83 0.83 0.82 0.81 -0.0791 1.6
## LOU_IS 196 0.80 0.80 0.80 0.78 -0.1546 1.2
## AND 196 0.80 0.80 0.79 0.78 -0.1786 1.7
## JAC 196 0.80 0.80 0.79 0.78 0.1480 1.9
## GUS 196 0.80 0.80 0.79 0.78 0.4541 1.7
## C 196 0.82 0.82 0.82 0.80 0.0459 1.6
## FI 196 0.79 0.79 0.78 0.76 0.1306 1.5
## MATH 196 0.75 0.76 0.75 0.74 -0.3245 1.2
## A 196 0.69 0.69 0.67 0.66 0.2959 1.4
## BENJ 196 0.85 0.85 0.85 0.83 -0.1276 1.8
## SO 196 0.82 0.81 0.81 0.79 -0.0561 1.8
## J 196 0.82 0.82 0.82 0.80 0.0255 1.5
## PAU 196 0.84 0.83 0.83 0.82 -0.1867 1.7
## REB 193 0.85 0.86 0.86 0.84 -0.3938 1.5
party_ratings %>% select(!!good_raters) %>% alpha()
## Number of categories should be increased in order to count frequencies.
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.97 0.97 0.98 0.63 38 0.0026 -0.06 1.3 0.63
##
## lower alpha upper 95% confidence boundaries
## 0.97 0.97 0.98
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Arjen 0.97 0.97 0.98 0.62 37 0.0027 0.0053 0.62
## Victoria 0.97 0.97 0.98 0.62 36 0.0028 0.0052 0.62
## S 0.97 0.97 0.98 0.63 37 0.0027 0.0057 0.62
## PET 0.97 0.97 0.98 0.62 37 0.0027 0.0057 0.63
## RY 0.97 0.97 0.98 0.63 38 0.0027 0.0052 0.63
## R 0.97 0.97 0.98 0.62 37 0.0027 0.0054 0.62
## FIL 0.97 0.97 0.98 0.62 36 0.0028 0.0058 0.62
## AS 0.97 0.97 0.98 0.63 38 0.0027 0.0055 0.63
## EM 0.97 0.97 0.98 0.63 37 0.0027 0.0056 0.63
## TH 0.97 0.97 0.98 0.62 36 0.0028 0.0057 0.62
## LOU_IS 0.97 0.97 0.98 0.62 37 0.0027 0.0054 0.62
## AND 0.97 0.97 0.98 0.63 37 0.0027 0.0056 0.63
## JAC 0.97 0.97 0.98 0.63 37 0.0027 0.0057 0.62
## GUS 0.97 0.97 0.98 0.63 37 0.0027 0.0058 0.63
## C 0.97 0.97 0.98 0.62 36 0.0028 0.0055 0.62
## FI 0.97 0.97 0.98 0.63 37 0.0027 0.0052 0.63
## MATH 0.97 0.97 0.98 0.63 37 0.0027 0.0057 0.63
## A 0.97 0.97 0.98 0.64 38 0.0027 0.0048 0.64
## BENJ 0.97 0.97 0.98 0.62 36 0.0028 0.0054 0.62
## SO 0.97 0.97 0.98 0.62 37 0.0027 0.0053 0.62
## J 0.97 0.97 0.98 0.62 37 0.0028 0.0052 0.62
## PAU 0.97 0.97 0.98 0.62 36 0.0028 0.0052 0.62
## REB 0.97 0.97 0.98 0.62 36 0.0028 0.0049 0.62
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Arjen 196 0.81 0.81 0.81 0.79 -0.2398 1.6
## Victoria 196 0.83 0.83 0.83 0.81 -0.1485 1.9
## S 196 0.81 0.80 0.80 0.78 -0.0255 1.7
## PET 196 0.81 0.81 0.80 0.79 -0.3316 1.6
## RY 196 0.72 0.72 0.71 0.70 -0.5515 1.4
## R 196 0.81 0.82 0.81 0.80 0.3673 1.5
## FIL 196 0.83 0.83 0.82 0.81 0.2148 1.6
## AS 196 0.74 0.74 0.72 0.71 -0.2704 1.6
## EM 196 0.77 0.77 0.76 0.75 -0.0026 1.6
## TH 196 0.83 0.83 0.82 0.81 -0.0791 1.6
## LOU_IS 196 0.81 0.81 0.81 0.79 -0.1546 1.2
## AND 196 0.80 0.80 0.79 0.78 -0.1786 1.7
## JAC 196 0.80 0.80 0.79 0.78 0.1480 1.9
## GUS 196 0.81 0.80 0.79 0.78 0.4541 1.7
## C 196 0.83 0.83 0.82 0.81 0.0459 1.6
## FI 196 0.79 0.79 0.78 0.77 0.1306 1.5
## MATH 196 0.76 0.77 0.75 0.74 -0.3245 1.2
## A 196 0.68 0.68 0.66 0.65 0.2959 1.4
## BENJ 196 0.86 0.86 0.85 0.84 -0.1276 1.8
## SO 196 0.82 0.81 0.81 0.80 -0.0561 1.8
## J 196 0.82 0.82 0.82 0.80 0.0255 1.5
## PAU 196 0.84 0.83 0.83 0.82 -0.1867 1.7
## REB 193 0.86 0.86 0.86 0.85 -0.3938 1.5
#factor analysis
(party_ratings_factor = fa(party_ratings %>% select(!!all_raters)))
## Factor Analysis using method = minres
## Call: fa(r = party_ratings %>% select(!!all_raters))
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## Portuguese_guy 0.14 0.021 0.98 1
## Arjen 0.80 0.647 0.35 1
## Victoria 0.83 0.688 0.31 1
## S 0.79 0.629 0.37 1
## PET 0.80 0.641 0.36 1
## RY 0.70 0.497 0.50 1
## R 0.81 0.653 0.35 1
## FIL 0.82 0.671 0.33 1
## LOU 0.36 0.129 0.87 1
## AS 0.72 0.514 0.49 1
## EM 0.75 0.567 0.43 1
## TH 0.82 0.674 0.33 1
## LOU_IS 0.80 0.646 0.35 1
## AND 0.79 0.622 0.38 1
## JAC 0.79 0.617 0.38 1
## GUS 0.79 0.628 0.37 1
## C 0.82 0.671 0.33 1
## FI 0.78 0.610 0.39 1
## MATH 0.75 0.565 0.44 1
## A 0.66 0.438 0.56 1
## BENJ 0.85 0.722 0.28 1
## SO 0.81 0.648 0.35 1
## J 0.82 0.670 0.33 1
## PAU 0.83 0.683 0.32 1
## REB 0.86 0.737 0.26 1
##
## MR1
## SS loadings 14.59
## Proportion Var 0.58
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 300 and the objective function was 24 with Chi Square of 4433
## The degrees of freedom for the model are 275 and the objective function was 4.3
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic number of observations is 196 with the empirical chi square 323 with prob < 0.024
## The total number of observations was 196 with Likelihood Chi Square = 798 with prob < 1.9e-52
##
## Tucker Lewis Index of factoring reliability = 0.86
## RMSEA index = 0.098 and the 90 % confidence intervals are 0.091 0.11
## BIC = -653
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.99
## Multiple R square of scores with factors 0.98
## Minimum correlation of possible factor scores 0.95
#good raters only
(party_ratings_factor_good = fa(party_ratings %>% select(!!good_raters)))
## Factor Analysis using method = minres
## Call: fa(r = party_ratings %>% select(!!good_raters))
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## Arjen 0.81 0.65 0.35 1
## Victoria 0.83 0.69 0.31 1
## S 0.79 0.63 0.37 1
## PET 0.80 0.64 0.36 1
## RY 0.70 0.49 0.51 1
## R 0.81 0.66 0.34 1
## FIL 0.82 0.67 0.33 1
## AS 0.72 0.52 0.48 1
## EM 0.75 0.57 0.43 1
## TH 0.82 0.67 0.33 1
## LOU_IS 0.81 0.65 0.35 1
## AND 0.79 0.62 0.38 1
## JAC 0.78 0.62 0.38 1
## GUS 0.79 0.63 0.37 1
## C 0.82 0.67 0.33 1
## FI 0.78 0.61 0.39 1
## MATH 0.75 0.57 0.43 1
## A 0.66 0.43 0.57 1
## BENJ 0.85 0.73 0.27 1
## SO 0.80 0.65 0.35 1
## J 0.82 0.67 0.33 1
## PAU 0.83 0.68 0.32 1
## REB 0.86 0.74 0.26 1
##
## MR1
## SS loadings 14.44
## Proportion Var 0.63
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 253 and the objective function was 23 with Chi Square of 4375
## The degrees of freedom for the model are 230 and the objective function was 4.1
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic number of observations is 196 with the empirical chi square 279 with prob < 0.015
## The total number of observations was 196 with Likelihood Chi Square = 754 with prob < 5.3e-57
##
## Tucker Lewis Index of factoring reliability = 0.86
## RMSEA index = 0.11 and the 90 % confidence intervals are 0.1 0.12
## BIC = -460
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.99
## Multiple R square of scores with factors 0.98
## Minimum correlation of possible factor scores 0.95
#long form
party_ratings_long = party_ratings %>%
gather(key = rater, value = rating, !!all_raters)
#plot, order by the mean
party_ratings_long %>%
mutate(
Party = fct_reorder(Party, mean_good)
) %>%
ggplot(aes(Party, rating)) +
geom_boxplot(outlier.shape = NA) +
# geom_violin() +
scale_y_continuous(breaks = -3:3) +
coord_flip() +
theme(
axis.text.y = element_text(size = 2),
# axis.line.y = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank()
)
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
GG_save("figs/party_ratings.boxplot.png")
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
#copy mean ratings to regular data frame for further use
#put on the same scale
parties$mean_rating = NULL
#mutate
party_ratings %<>% mutate(
party_country = Party + " | " + Country,
#new name, same scale as Wikipedia data
mean_rating = mean
)
#join to main data
parties = left_join(parties, party_ratings %>% select(mean_rating, party_country), by = "party_country")
#simplify
#figure out what they are closest to
# stringdist::stringdist(a = parties$wp_pol_position %>% str_replace_all("\\\\n", ""),
# b = c("far-left", "far right", "left-wing", "right-wing", "centre", "centre-left", "centre-right", "centre to centre-left", "centre to centre-right", "left-wing to far-left", "right-wing to far-right"))
#clean it up a bit
parties$wp_pol_position_clean = parties$wp_pol_position %>%
str_replace_all("\\n", "") %>%
str_to_lower()
#code into numerical form: -3 to 3
#detect keywords, average score based on keywords
positions = c("far-left", "left-wing", "center-left", "center", "center-right", "right-wing", "far-right", "libertarianism", "syncretic")
positions_values = c(-3:3, 2, 0)
#recode as numeric
parties$wp_pol_position_numerical = parties$wp_pol_position_clean %>%
map_dbl(function(.) {
if (is.na(.)) return(NA)
# browser()
#split by " to "
#we need to average values if multiple are found
split_x = str_split(., " to ") %>% .[[1]]
#american spelling of "center"
split_x = map_chr(split_x, ~str_replace_all(., "centre", "center"))
#replace the value with the numerical value
x_recoded = plyr::mapvalues(split_x, positions, positions_values, warn_missing = F)
#as numeric, gets rid of strings
x_vals = as.numeric(x_recoded)
#mean
y = mean(x_vals, na.rm = T)
#odd?
# if (is.nan(y)) browser()
y
})
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
#hard-coded values
#done when text is not standardized properly
parties[parties$party_country == 'Russian Democratic Party "Yabloko" | Russia', "wp_pol_position_numerical"] = 0
parties[parties$party_country == "Scottish National Party | United Kingdom", "wp_pol_position_numerical"] = -1
parties[parties$party_country == "Liberal Party of Canada | Canada", "wp_pol_position_numerical"] = -0.5
parties[parties$party_country == "League of Polish Families | Poland", "wp_pol_position_numerical"] = 1
parties[parties$party_country == "United Russia | Russia", "wp_pol_position_numerical"] = 0
parties[parties$party_country == "Evangelical People's Party of Switzerland | Switzerland", "wp_pol_position_numerical"] = 0
parties[parties$party_country == "Democrats 66 | Netherlands", "wp_pol_position_numerical"] = 0
#ensure no indefinite values
assert_that(!any(is.infinite(parties$wp_pol_position_numerical)))
## [1] TRUE
assert_that(!any(is.nan(parties$wp_pol_position_numerical)))
## [1] TRUE
#add party data to voting data
voting_augmented_all = left_join(
voting_aggr,
parties %>% select(-Party_native, -Party), by = c("party_country")
)
#alternative version without old data
voting_augmented_all_no_old = left_join(
voting_aggr_no_old,
parties %>% select(-Party_native), by = c("party_country")
)
#error on missing joins!
assert_that(nrow(voting_augmented_all) == nrow(voting_aggr))
## [1] TRUE
assert_that(nrow(voting_augmented_all_no_old) == nrow(voting_aggr_no_old))
## [1] TRUE
#stats
parties$wp_pol_position_numerical %>% describe()
#main dataset - apply preferred data filter
voting_augmented = voting_augmented_all %>%
filter(General_pct >= 2,
Country != "Russia",
!is.na(RR)
)
#without old data
voting_augmented_no_old = voting_augmented_all_no_old %>%
filter(General_pct >= 2,
Country != "Russia",
!is.na(RR)
)
#how many excluded?
voting_augmented_all %>% filter(General_pct < 2) %>% nrow()
## [1] 19
voting_augmented_all %>% nrow()
## [1] 151
voting_augmented_all %>% filter(General_pct < 2) %>% nrow() / voting_augmented_all %>% nrow()
## [1] 0.13
voting_augmented_all %>% filter(Country == "Russia") %>% nrow()
## [1] 0
voting_augmented_all %>% filter(Country == "Russia") %>% nrow() / voting_augmented_all %>% nrow()
## [1] 0
#distribution
parties %>%
ggplot(aes(wp_pol_position_numerical)) +
geom_histogram(stat = "count") +
theme_bw() +
scale_x_continuous("Political position",
labels = c("far-right", "right", "center-right", "center", "center-left", "left", "far-left"),
breaks = seq(-3, 3, 1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Removed 13 rows containing non-finite values (stat_count).
GG_save("figs/position_dist.png")
## Warning: Removed 13 rows containing non-finite values (stat_count).
#small parties included
voting_augmented_small = voting_augmented_all %>%
filter(Country != "Russia",
!is.na(RR)
)
Calculate weights based on various metrics such as sample size or country-fixed weights (i.e. same weight to each country no matter how many parties).
#country fixed weights
#have to condition on missing data
#weights can result in Inf, which we recode to 1's
add_weights = function(d) {
#assign 1 weight distributed across parties with data
#per block
#but all parties should have this...
d$weight_b = 1 / sum(!is.na(d$Block))
#per political position = pp
d$weight_pp = 1 / sum(!is.na(d$wp_pol_position))
#ideology = i
d$weight_i = 1 / sum(!is.na(d$wp_ideolgy))
#both = ppi
d$weight_ppi = 1 / min(sum(!is.na(d$wp_pol_position)), sum(!is.na(d$wp_ideolgy)))
#replace Inf with the maximum
#Inf causes issues in step-wise regressions
d$weight_ppi[is.infinite(d$weight_ppi)] = pmin(d$weight_pp[is.infinite(d$weight_ppi)], d$weight_i[is.infinite(d$weight_ppi)])
#more Inf
d$weight_pp[is.infinite(d$weight_pp)] = 0
#sqrt n
d$sqrt_n = sqrt(d$n_total)
#constant weights
d$ones = 1
d
}
#add weights
voting_augmented = plyr::ddply(voting_augmented, c("Country"), .fun = add_weights)
voting_augmented_small = plyr::ddply(voting_augmented_small, c("Country"), .fun = add_weights)
voting_augmented_no_old = plyr::ddply(voting_augmented_no_old, c("Country"), .fun = add_weights)
#parties included, including only those with data and including pseudo-parties
voting_augmented %>% filter(!is.na(RR)) %>% nrow()
## [1] 132
#parties included, including only those with data and tags
voting_augmented %>% filter(!is.na(RR), !is.na(wp_ideolgy), !is.na(socialism)) %>% nrow()
## [1] 123
voting_augmented %>% filter(!is.na(RR), !is.na(wp_ideolgy), !is.na(socialism)) %>% .$Country %>% table2(include_NA = F) %>% print(n = 20)
## # A tibble: 17 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 France 17 13.8
## 2 Slovenia 9 7.32
## 3 Denmark 8 6.50
## 4 Finland 8 6.50
## 5 Norway 8 6.50
## 6 Poland 8 6.50
## 7 Sweden 8 6.50
## 8 Belgium 7 5.69
## 9 Ireland 7 5.69
## 10 Australia 6 4.88
## 11 Austria 6 4.88
## 12 Germany 6 4.88
## 13 Netherlands 6 4.88
## 14 Switzerland 6 4.88
## 15 United Kingdom 6 4.88
## 16 Canada 5 4.07
## 17 USA 2 1.63
#sample sizes
n_sumstats
n_party_sumstats
n_party_frac_sumstats
#samples by country
samples %>%
filter(Type == "Journalist") %>%
group_by(Country) %>%
dplyr::summarise(n = n()) %>%
pull(n) %>%
describe()
#party data
#coverage
parties$Wikipedia_link %>% is.na() %>% mean()
## [1] 0
parties$wp_pol_position_numerical %>% is.na() %>% mean()
## [1] 0.066
parties$wp_ideolgy %>% is.na() %>% mean()
## [1] 0.03
#countries
voting_orig %>%
filter(!is.na(Journalist_pct)) %>%
.$Country %>%
table2() %>%
print(n = Inf)
## # A tibble: 18 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 Sweden 35 15.4
## 2 France 28 12.3
## 3 Norway 20 8.77
## 4 Germany 19 8.33
## 5 Australia 18 7.89
## 6 Denmark 17 7.46
## 7 Belgium 16 7.02
## 8 Poland 12 5.26
## 9 Finland 9 3.95
## 10 Netherlands 9 3.95
## 11 Slovenia 9 3.95
## 12 Ireland 8 3.51
## 13 Switzerland 7 3.07
## 14 United Kingdom 7 3.07
## 15 Austria 6 2.63
## 16 Canada 6 2.63
## 17 USA 2 0.877
## 18 <NA> 0 0
#survey years
samples %>% filter(Type == "Journalist",
ID %in% voting$journalist_sample_ID,
Country != "Russia"
) %>%
pull(Year) %>%
str_match("\\d+") %>%
as.numeric() %>%
describe()
#Wikipedia vs. raters
parties %>%
#this is to randomize the order for the party names shown
#otherwise, parties from some countries are shown a lot more
sample_frac() %>%
GG_scatter("wp_pol_position_numerical", "mean_rating", case_names = "Party") +
scale_x_continuous("Wikipedia-derived position", breaks = -3:3, labels = leftright_labels) +
scale_y_continuous("Average position rating of online raters", breaks = -3:3, limits = c(-3, 3), labels = leftright_labels) +
geom_abline(intercept = 0, slope = 1, linetype = "dotted", alpha = .5)
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/party_position_ratings_compare.png")
## `geom_smooth()` using formula 'y ~ x'
#highest correlation to Wikipedia
#join data first
party_ratings = left_join(party_ratings, parties %>% select(party_country, wp_pol_position_numerical), by = "party_country")
#compute
party_ratings %>% select(!!all_raters, wp_pol_position_numerical) %>% wtd.cors() %>%
{
.[, ncol(.)]
} %>%
sort()
## Portuguese_guy LOU RY
## 0.096 0.228 0.546
## A EM AS
## 0.548 0.563 0.590
## S GUS JAC
## 0.601 0.618 0.636
## TH AND FIL
## 0.642 0.651 0.663
## PET SO MATH
## 0.676 0.687 0.689
## PAU R J
## 0.708 0.748 0.776
## C BENJ FI
## 0.787 0.788 0.818
## LOU_IS Victoria Arjen
## 0.834 0.837 0.848
## REB wp_pol_position_numerical
## 0.848 1.000
#Jensen's method
#using all raters
fa_Jensens_method(fa = party_ratings_factor, df = party_ratings, criterion = "wp_pol_position_numerical") +
scale_y_continuous("Correlation (Wikipedia position x rater)") +
scale_x_continuous("Rater's loading on general factor of ratings")
## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/party_position_ratings_Jensen_method.png")
## `geom_smooth()` using formula 'y ~ x'
#good raters only
fa_Jensens_method(fa = party_ratings_factor_good, df = party_ratings, criterion = "wp_pol_position_numerical") +
scale_y_continuous("Correlation (Wikipedia position x rater)") +
scale_x_continuous("Rater's loading on general factor of ratings")
## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/party_position_ratings_Jensen_method.png")
## `geom_smooth()` using formula 'y ~ x'
#heatmap
party_ratings %>%
select(Portuguese_guy:mean, wp_pol_position_numerical) %>%
rename(Wikipedia = wp_pol_position_numerical) %>%
{
#change to informative ids
colnames(.)[1:length(all_raters)] = party_ratings_meta$informative_id
#correlations
cor_mat = wtd.cors(.[1:length(all_raters)])
#reorder the raters, but not the last two variables
raters_reordered = reorder_cormat(cor_mat)
#reorder dataset
.[c(colnames(raters_reordered), "mean", "Wikipedia")]
} %>%
GG_heatmap(reorder_vars = F, font_size = 2)
GG_save("figs/party_position_ratings_heatmap.png")
#regression models
#to examine slope and intercept
ols(mean_rating ~ wp_pol_position_numerical, data = party_ratings)
## Frequencies of Missing Values Due to Each Variable
## mean_rating wp_pol_position_numerical
## 0 13
##
## Linear Regression Model
##
## ols(formula = mean_rating ~ wp_pol_position_numerical, data = party_ratings)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 183 LR chi2 245.94 R2 0.739
## sigma0.6223 d.f. 1 R2 adj 0.738
## d.f. 181 Pr(> chi2) 0.0000 g 1.195
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.815339 -0.444568 -0.006825 0.417689 1.802717
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1057 0.0460 -2.30 0.0228
## wp_pol_position_numerical 0.7285 0.0322 22.65 <0.0001
##
ols(wp_pol_position_numerical ~ mean_rating, data = party_ratings)
## Frequencies of Missing Values Due to Each Variable
## wp_pol_position_numerical mean_rating
## 13 0
##
## Linear Regression Model
##
## ols(formula = wp_pol_position_numerical ~ mean_rating, data = party_ratings)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 183 LR chi2 245.94 R2 0.739
## sigma0.7344 d.f. 1 R2 adj 0.738
## d.f. 181 Pr(> chi2) 0.0000 g 1.413
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.3643 -0.4373 0.0173 0.4596 2.3500
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1208 0.0544 2.22 0.0276
## mean_rating 1.0146 0.0448 22.65 <0.0001
##
Crude block level analysis.
#simple block level
table2(parties$Block)
#different outcomes
#difference in %
ols(d ~ Block, data = voting_augmented, weights = weight_b)
## Frequencies of Missing Values Due to Each Variable
## d Block (weights)
## 0 9 0
##
## Linear Regression Model
##
## ols(formula = d ~ Block, data = voting_augmented, weights = weight_b)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 123 LR chi2 53.32 R2 0.352
## sigma4.1173 d.f. 2 R2 adj 0.341
## d.f. 120 Pr(> chi2) 0.0000 g 8.207
##
## Residuals
##
## Min 1Q Median 3Q Max
## -22.3144 -7.7357 -0.3331 5.5776 39.8593
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 8.3324 1.4579 5.72 <0.0001
## Block=center -2.0611 3.8433 -0.54 0.5928
## Block=right -16.4528 2.0813 -7.91 <0.0001
##
#relative ratio
ols(RR ~ Block, data = voting_augmented, weights = weight_b)
## Frequencies of Missing Values Due to Each Variable
## RR Block (weights)
## 0 9 0
##
## Linear Regression Model
##
## ols(formula = RR ~ Block, data = voting_augmented, weights = weight_b)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 123 LR chi2 43.17 R2 0.296
## sigma0.4038 d.f. 2 R2 adj 0.284
## d.f. 120 Pr(> chi2) 0.0000 g 0.708
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.91125 -0.55961 -0.09107 0.33300 3.59501
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 2.0255 0.1430 14.17 <0.0001
## Block=center -0.1706 0.3769 -0.45 0.6515
## Block=right -1.4194 0.2041 -6.95 <0.0001
##
#odds ratio
ols(OR ~ Block, data = voting_augmented, weights = weight_b)
## Frequencies of Missing Values Due to Each Variable
## OR Block (weights)
## 0 9 0
##
## Linear Regression Model
##
## ols(formula = OR ~ Block, data = voting_augmented, weights = weight_b)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 123 LR chi2 35.21 R2 0.249
## sigma0.6418 d.f. 2 R2 adj 0.236
## d.f. 120 Pr(> chi2) 0.0000 g 0.996
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.4699 -0.7315 -0.2107 0.3540 9.5912
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 2.5752 0.2273 11.33 <0.0001
## Block=center -0.1962 0.5991 -0.33 0.7438
## Block=right -1.9979 0.3244 -6.16 <0.0001
##
#relative rate
voting_augmented %>%
GG_scatter("mean_rating", "RR", weights = "weight_pp", weight_as_size = F) +
scale_x_continuous("Political position (rater data)",
limits = c(-3, 3),
breaks = -3:3,
labels = leftright_labels) +
scale_y_continuous("Times representation among journalists\ncompared to elections") +
geom_smooth(se = F)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/mean_rating_RR.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
voting_augmented %>%
GG_scatter("mean_rating", "d", weights = "weight_pp", weight_as_size = F) +
scale_x_continuous("Political position (rater data)",
limits = c(-3, 3),
breaks = -3:3,
labels = leftright_labels) +
scale_y_continuous("%points representation among journalists\ncompared to elections") +
geom_smooth(se = F)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/mean_rating_d.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#logRR
voting_augmented %>%
GG_scatter("mean_rating", "logRR", weights = "weight_pp", weight_as_size = F) +
scale_x_continuous("Political position (rater data)",
limits = c(-3, 3),
breaks = -3:3,
labels = leftright_labels) +
scale_y_continuous("Times over-representation among general population (below 1)\nor journalists (above)",
breaks = seq(-1.5, 1.5, by = 0.5),
labels = format(c(1/10^c(-1.5, -1, -0.5), 1, 10^c(0.5, 1, 1.5)), nsmall = 1)
) +
geom_smooth(se = F)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/mean_rating_logRR.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#OR
cor.test(voting_augmented$OR, voting_augmented$mean_rating)
##
## Pearson's product-moment correlation
##
## data: voting_augmented$OR and voting_augmented$mean_rating
## t = -6, df = 122, p-value = 1e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.58 -0.30
## sample estimates:
## cor
## -0.45
#cor matrix
wtd.cors(voting_augmented %>% select(mean_rating, d, RR, OR, logRR, logOR), weight = voting_augmented$weight_pp)
## mean_rating d RR OR logRR logOR
## mean_rating 1.00 -0.50 -0.53 -0.47 -0.53 -0.54
## d -0.50 1.00 0.81 0.84 0.75 0.81
## RR -0.53 0.81 1.00 0.95 0.84 0.86
## OR -0.47 0.84 0.95 1.00 0.74 0.79
## logRR -0.53 0.75 0.84 0.74 1.00 0.99
## logOR -0.54 0.81 0.86 0.79 0.99 1.00
#d
#nonlinear fit
linear_fit = ols(d ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit
## Frequencies of Missing Values Due to Each Variable
## d mean_rating (weights)
## 0 8 0
##
## Linear Regression Model
##
## ols(formula = d ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 124 LR chi2 35.36 R2 0.248
## sigma4.0219 d.f. 1 R2 adj 0.242
## d.f. 122 Pr(> chi2) 0.0000 g 7.461
##
## Residuals
##
## Min 1Q Median 3Q Max
## -30.7782 -6.6455 -0.1508 4.3255 44.5075
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3645 0.9977 0.37 0.7155
## mean_rating -5.2550 0.8351 -6.29 <0.0001
##
nonlinear_fit = ols(d ~ rcs(mean_rating), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit
## Frequencies of Missing Values Due to Each Variable
## d mean_rating (weights)
## 0 8 0
##
## Linear Regression Model
##
## ols(formula = d ~ rcs(mean_rating), data = voting_augmented,
## weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 124 LR chi2 37.84 R2 0.263
## sigma4.0326 d.f. 4 R2 adj 0.238
## d.f. 119 Pr(> chi2) 0.0000 g 7.446
##
## Residuals
##
## Min 1Q Median 3Q Max
## -30.4381 -6.0077 -0.4037 4.7769 44.1405
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 11.9837 8.5754 1.40 0.1649
## mean_rating 1.8451 5.0637 0.36 0.7162
## mean_rating' -28.7166 24.1321 -1.19 0.2364
## mean_rating'' 95.3131 92.4260 1.03 0.3045
## mean_rating''' -141.9655 169.9775 -0.84 0.4053
##
#compare
lrtest(linear_fit,
nonlinear_fit
)
##
## Model 1: d ~ mean_rating
## Model 2: d ~ rcs(mean_rating)
##
## L.R. Chisq d.f. P
## 2.48 3.00 0.48
#logRR
#nonlinear fit
linear_fit2 = ols(logRR ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit2
## Frequencies of Missing Values Due to Each Variable
## logRR mean_rating (weights)
## 0 8 0
##
## Linear Regression Model
##
## ols(formula = logRR ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 124 LR chi2 41.19 R2 0.283
## sigma0.1600 d.f. 1 R2 adj 0.277
## d.f. 122 Pr(> chi2) 0.0000 g 0.324
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.35441 -0.19624 0.05326 0.27846 0.69519
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1060 0.0397 -2.67 0.0086
## mean_rating -0.2284 0.0332 -6.88 <0.0001
##
nonlinear_fit2 = ols(logRR ~ rcs(mean_rating), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit2
## Frequencies of Missing Values Due to Each Variable
## logRR mean_rating (weights)
## 0 8 0
##
## Linear Regression Model
##
## ols(formula = logRR ~ rcs(mean_rating), data = voting_augmented,
## weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 124 LR chi2 46.15 R2 0.311
## sigma0.1588 d.f. 4 R2 adj 0.288
## d.f. 119 Pr(> chi2) 0.0000 g 0.325
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.33988 -0.21973 0.07345 0.30939 0.62677
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.2552 0.3378 0.76 0.4514
## mean_rating 0.0213 0.1994 0.11 0.9151
## mean_rating' -0.5259 0.9505 -0.55 0.5811
## mean_rating'' 0.7475 3.6404 0.21 0.8377
## mean_rating''' 0.5324 6.6950 0.08 0.9367
##
#compare
lrtest(linear_fit2,
nonlinear_fit2
)
##
## Model 1: logRR ~ mean_rating
## Model 2: logRR ~ rcs(mean_rating)
##
## L.R. Chisq d.f. P
## 4.97 3.00 0.17
#without adjusted values
wtd.cors(voting_augmented %>% select(mean_rating, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented$weight_pp)
## mean_rating d_orig RR_orig OR_orig logRR_orig logOR_orig
## mean_rating 1.00 -0.50 -0.52 -0.46 -0.56 -0.56
## d_orig -0.50 1.00 0.81 0.85 0.80 0.86
## RR_orig -0.52 0.81 1.00 0.95 0.86 0.88
## OR_orig -0.46 0.85 0.95 1.00 0.77 0.81
## logRR_orig -0.56 0.80 0.86 0.77 1.00 0.99
## logOR_orig -0.56 0.86 0.88 0.81 0.99 1.00
#sample size change
voting_augmented$logRR %>% na.omit() %>% length()
## [1] 132
voting_augmented$logRR_orig %>% na.omit() %>% length()
## [1] 120
#with small parties
wtd.cors(voting_augmented_small %>% select(mean_rating, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_small$weight_pp)
## mean_rating d_orig RR_orig OR_orig logRR_orig logOR_orig
## mean_rating 1.00 -0.47 -0.47 -0.43 -0.51 -0.52
## d_orig -0.47 1.00 0.79 0.83 0.78 0.85
## RR_orig -0.47 0.79 1.00 0.95 0.86 0.88
## OR_orig -0.43 0.83 0.95 1.00 0.76 0.80
## logRR_orig -0.51 0.78 0.86 0.76 1.00 0.99
## logOR_orig -0.52 0.85 0.88 0.80 0.99 1.00
voting_augmented$logRR %>% na.omit() %>% length()
## [1] 132
voting_augmented_small$logRR %>% na.omit() %>% length()
## [1] 151
#without old data
wtd.cors(voting_augmented_no_old %>% select(mean_rating, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_no_old$weight_pp)
## mean_rating d_orig RR_orig OR_orig logRR_orig logOR_orig
## mean_rating 1.00 -0.55 -0.56 -0.48 -0.58 -0.59
## d_orig -0.55 1.00 0.80 0.84 0.78 0.85
## RR_orig -0.56 0.80 1.00 0.94 0.85 0.87
## OR_orig -0.48 0.84 0.94 1.00 0.74 0.79
## logRR_orig -0.58 0.78 0.85 0.74 1.00 0.99
## logOR_orig -0.59 0.85 0.87 0.79 0.99 1.00
voting_augmented$logRR %>% na.omit() %>% length()
## [1] 132
voting_augmented_no_old$logRR %>% na.omit() %>% length()
## [1] 100
Same as above, but using the Wikipedia position data.
#relative rate
voting_augmented %>%
GG_scatter("wp_pol_position_numerical", "RR", weights = "weight_pp", weight_as_size = F) +
scale_x_continuous("Political position (Wikipedia data)",
limits = c(-3, 3),
breaks = -3:3,
labels = leftright_labels) +
scale_y_continuous("Times representation among journalists\ncompared to elections") +
geom_smooth(se = F)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/wp_position_RR.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
voting_augmented %>%
GG_scatter("wp_pol_position_numerical", "d", weights = "weight_pp", weight_as_size = F) +
scale_x_continuous("Political position (Wikipedia data)",
limits = c(-3, 3),
breaks = -3:3,
labels = leftright_labels) +
scale_y_continuous("%points representation among journalists\ncompared to elections") +
geom_smooth(se = F)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/wp_position_d.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#logRR
voting_augmented %>%
GG_scatter("wp_pol_position_numerical", "logRR", weights = "weight_pp", weight_as_size = F) +
scale_x_continuous("Political position (Wikipedia data)",
limits = c(-3, 3),
breaks = -3:3,
labels = leftright_labels) +
scale_y_continuous("Times over-representation among general population (below 1)\nor journalists (above)",
breaks = seq(-1.5, 1.5, by = 0.5),
labels = format(c(1/10^c(-1.5, -1, -0.5), 1, 10^c(0.5, 1, 1.5)), nsmall = 1)
) +
geom_smooth(se = F)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/wp_position_logRR.png")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#OR
cor.test(voting_augmented$OR, voting_augmented$wp_pol_position_numerical)
##
## Pearson's product-moment correlation
##
## data: voting_augmented$OR and voting_augmented$wp_pol_position_numerical
## t = -5, df = 117, p-value = 4e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.55 -0.24
## sample estimates:
## cor
## -0.41
#cor matrix
wtd.cors(voting_augmented %>% select(wp_pol_position_numerical, d, RR, OR, logRR, logOR), weight = voting_augmented$weight_pp)
## wp_pol_position_numerical d RR OR logRR
## wp_pol_position_numerical 1.00 -0.47 -0.47 -0.40 -0.53
## d -0.47 1.00 0.81 0.84 0.75
## RR -0.47 0.81 1.00 0.95 0.84
## OR -0.40 0.84 0.95 1.00 0.74
## logRR -0.53 0.75 0.84 0.74 1.00
## logOR -0.53 0.81 0.86 0.79 0.99
## logOR
## wp_pol_position_numerical -0.53
## d 0.81
## RR 0.86
## OR 0.79
## logRR 0.99
## logOR 1.00
#d
#nonlinear fit
linear_fit = ols(d ~ wp_pol_position_numerical, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit
## Frequencies of Missing Values Due to Each Variable
## d wp_pol_position_numerical (weights)
## 0 13 0
##
## Linear Regression Model
##
## ols(formula = d ~ wp_pol_position_numerical, data = voting_augmented,
## weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 119 LR chi2 29.58 R2 0.220
## sigma4.1380 d.f. 1 R2 adj 0.213
## d.f. 117 Pr(> chi2) 0.0000 g 6.951
##
## Residuals
##
## Min 1Q Median 3Q Max
## -28.9059 -5.9654 -0.9795 4.6571 43.1572
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.6590 1.0345 0.64 0.5254
## wp_pol_position_numerical -4.3756 0.7616 -5.75 <0.0001
##
nonlinear_fit = ols(d ~ rcs(wp_pol_position_numerical), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit
## Frequencies of Missing Values Due to Each Variable
## d wp_pol_position_numerical (weights)
## 0 13 0
##
## Linear Regression Model
##
## ols(formula = d ~ rcs(wp_pol_position_numerical), data = voting_augmented,
## weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 119 LR chi2 37.32 R2 0.269
## sigma4.0579 d.f. 4 R2 adj 0.244
## d.f. 114 Pr(> chi2) 0.0000 g 7.369
##
## Residuals
##
## Min 1Q Median 3Q Max
## -29.792 -6.546 0.160 4.648 41.836
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 12.2200 6.9297 1.76 0.0805
## wp_pol_position_numerical 3.6683 4.1794 0.88 0.3819
## wp_pol_position_numerical' -23.0743 33.7173 -0.68 0.4951
## wp_pol_position_numerical'' 16.2526 107.6351 0.15 0.8802
## wp_pol_position_numerical''' 64.6132 141.4627 0.46 0.6487
##
#compare
lrtest(linear_fit,
nonlinear_fit
)
##
## Model 1: d ~ wp_pol_position_numerical
## Model 2: d ~ rcs(wp_pol_position_numerical)
##
## L.R. Chisq d.f. P
## 7.742 3.000 0.052
#logRR
#nonlinear fit
linear_fit2 = ols(logRR ~ wp_pol_position_numerical, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit2
## Frequencies of Missing Values Due to Each Variable
## logRR wp_pol_position_numerical (weights)
## 0 13 0
##
## Linear Regression Model
##
## ols(formula = logRR ~ wp_pol_position_numerical, data = voting_augmented,
## weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 119 LR chi2 39.13 R2 0.280
## sigma0.1613 d.f. 1 R2 adj 0.274
## d.f. 117 Pr(> chi2) 0.0000 g 0.318
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.56003 -0.18898 0.02685 0.30652 0.79985
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0916 0.0403 -2.27 0.0248
## wp_pol_position_numerical -0.2003 0.0297 -6.75 <0.0001
##
nonlinear_fit2 = ols(logRR ~ rcs(wp_pol_position_numerical), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit2
## Frequencies of Missing Values Due to Each Variable
## logRR wp_pol_position_numerical (weights)
## 0 13 0
##
## Linear Regression Model
##
## ols(formula = logRR ~ rcs(wp_pol_position_numerical), data = voting_augmented,
## weights = voting_augmented$weight_pp)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 119 LR chi2 49.44 R2 0.340
## sigma0.1564 d.f. 4 R2 adj 0.317
## d.f. 114 Pr(> chi2) 0.0000 g 0.315
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.60876 -0.20279 0.07733 0.29667 0.79987
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.2732 0.2672 1.02 0.3086
## wp_pol_position_numerical 0.0744 0.1611 0.46 0.6454
## wp_pol_position_numerical' -0.8403 1.2999 -0.65 0.5193
## wp_pol_position_numerical'' 1.5532 4.1497 0.37 0.7089
## wp_pol_position_numerical''' -1.2583 5.4538 -0.23 0.8180
##
#compare
lrtest(linear_fit2,
nonlinear_fit2
)
##
## Model 1: logRR ~ wp_pol_position_numerical
## Model 2: logRR ~ rcs(wp_pol_position_numerical)
##
## L.R. Chisq d.f. P
## 10.307 3.000 0.016
#without adjusted values
wtd.cors(voting_augmented %>% select(wp_pol_position_numerical, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented$weight_pp)
## wp_pol_position_numerical d_orig RR_orig OR_orig
## wp_pol_position_numerical 1.00 -0.47 -0.48 -0.40
## d_orig -0.47 1.00 0.81 0.85
## RR_orig -0.48 0.81 1.00 0.95
## OR_orig -0.40 0.85 0.95 1.00
## logRR_orig -0.55 0.80 0.86 0.77
## logOR_orig -0.55 0.86 0.88 0.81
## logRR_orig logOR_orig
## wp_pol_position_numerical -0.55 -0.55
## d_orig 0.80 0.86
## RR_orig 0.86 0.88
## OR_orig 0.77 0.81
## logRR_orig 1.00 0.99
## logOR_orig 0.99 1.00
#sample size change
voting_augmented$logRR %>% na.omit() %>% length()
## [1] 132
voting_augmented$logRR_orig %>% na.omit() %>% length()
## [1] 120
#with small parties
wtd.cors(voting_augmented_small %>% select(wp_pol_position_numerical, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_small$weight_pp)
## wp_pol_position_numerical d_orig RR_orig OR_orig
## wp_pol_position_numerical 1.00 -0.44 -0.43 -0.37
## d_orig -0.44 1.00 0.79 0.83
## RR_orig -0.43 0.79 1.00 0.95
## OR_orig -0.37 0.83 0.95 1.00
## logRR_orig -0.50 0.78 0.86 0.76
## logOR_orig -0.50 0.85 0.88 0.80
## logRR_orig logOR_orig
## wp_pol_position_numerical -0.50 -0.50
## d_orig 0.78 0.85
## RR_orig 0.86 0.88
## OR_orig 0.76 0.80
## logRR_orig 1.00 0.99
## logOR_orig 0.99 1.00
voting_augmented$logRR %>% na.omit() %>% length()
## [1] 132
voting_augmented_small$logRR %>% na.omit() %>% length()
## [1] 151
#without old data
wtd.cors(voting_augmented_no_old %>% select(wp_pol_position_numerical, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_no_old$weight_pp)
## wp_pol_position_numerical d_orig RR_orig OR_orig
## wp_pol_position_numerical 1.00 -0.50 -0.48 -0.40
## d_orig -0.50 1.00 0.80 0.84
## RR_orig -0.48 0.80 1.00 0.94
## OR_orig -0.40 0.84 0.94 1.00
## logRR_orig -0.57 0.78 0.85 0.74
## logOR_orig -0.58 0.85 0.87 0.79
## logRR_orig logOR_orig
## wp_pol_position_numerical -0.57 -0.58
## d_orig 0.78 0.85
## RR_orig 0.85 0.87
## OR_orig 0.74 0.79
## logRR_orig 1.00 0.99
## logOR_orig 0.99 1.00
voting_augmented$logRR %>% na.omit() %>% length()
## [1] 132
voting_augmented_no_old$logRR %>% na.omit() %>% length()
## [1] 100
#make a convenience function
bma_modeller = function(x, party_pos = "wp_pol_position_numerical", mcmc = "enumerate", seed = 1, exclude_vars = c(), ...) {
# browser()
#no missing data subset
d = voting_augmented %>%
select(
!!x,
!!party_pos,
!!names(parties_wp_ideology_dummies[-c(1:2)])
) %>%
miss_filter()
#remove excluded variables
for (v in exclude_vars) d[[v]] = NULL
#join the arguments
args = list(
X.data = d %>% select(!!x, !!everything()),
mcmc = mcmc
) %>% c(list(...))
#fit with seed
set.seed(seed)
fit = do.call(BMS::bms, args)
fit
}
#fit BMAs
bma_d = cache_object({bma_modeller("d")}, filename = "cache/bma_d.rds")
## Cache found, reading object from disk
## Registered S3 method overwritten by 'BMS':
## method from
## quantile.density
bma_logRR = cache_object({bma_modeller("logRR")}, filename = "cache/bma_logRR.rds")
## Cache found, reading object from disk
bma_logOR = cache_object({bma_modeller("logOR")}, filename = "cache/bma_logOR.rds")
## Cache found, reading object from disk
#alternative position variable
bma_d_rating = cache_object({bma_modeller("d", party_pos = "mean_rating")}, filename = "cache/bma_d_rating.rds")
## Cache found, reading object from disk
bma_logRR_rating = cache_object({bma_modeller("logRR", party_pos = "mean_rating")}, filename = "cache/bma_logRR_rating.rds")
## Cache found, reading object from disk
bma_logOR_rating = cache_object({bma_modeller("logOR", party_pos = "mean_rating")}, filename = "cache/bma_logOR_rating.rds")
## Cache found, reading object from disk
The populism results are strange. What happens if we leave one of them out?
#which parties are populist?
parties_populist = parties %>% filter(LW_populism | RW_populism | populism)
parties_populist %>% select(party_country, populism, LW_populism, RW_populism, wp_pol_position_clean, wp_pol_position_numerical, mean_rating) %>% print(n=Inf)
## # A tibble: 35 x 7
## party_country populism LW_populism RW_populism wp_pol_position_clean
## <chr> <lgl> <lgl> <lgl> <chr>
## 1 Danish People's Pa… TRUE FALSE TRUE right-wing to far-right
## 2 Sweden Democrats |… TRUE FALSE TRUE right-wing to far-right
## 3 Republican Party |… TRUE FALSE TRUE <NA>
## 4 Democratic Party |… TRUE TRUE FALSE <NA>
## 5 The Left | Germany TRUE TRUE FALSE left-wing to far-left
## 6 Alternative for Ge… TRUE FALSE TRUE right-wing to far-right…
## 7 Party of Democrati… TRUE TRUE FALSE left-wing
## 8 ANO 2011 | Czech R… TRUE FALSE FALSE centre to centre-right
## 9 Dawn - National Co… TRUE FALSE TRUE right-wing to far-right
## 10 Nick Xenophon Team… TRUE FALSE FALSE centre
## 11 Katter's Australia… TRUE FALSE TRUE <NA>
## 12 Palmer United Part… TRUE FALSE TRUE right-wing
## 13 Reform Party of Ca… TRUE FALSE TRUE centre-right to right-w…
## 14 Progressive Party … TRUE FALSE FALSE centre to centre-right
## 15 Self-Defence of th… TRUE FALSE FALSE economic: left-wing soc…
## 16 Law and Justice | … TRUE FALSE TRUE right-wing
## 17 Real Politics Unio… TRUE FALSE TRUE right-wing
## 18 New Zealand First … TRUE FALSE FALSE centre
## 19 Liberal Democratic… TRUE FALSE TRUE right-wing to far-right
## 20 Flemish Interest |… TRUE FALSE TRUE right-wing to far-right
## 21 People's Party | B… TRUE FALSE TRUE right-wing to far-right
## 22 Libertarian, Direc… TRUE FALSE TRUE centre-right to right-w…
## 23 Austrian People's … TRUE FALSE TRUE centre-right to right-w…
## 24 Freedom Party of A… TRUE FALSE TRUE right-wing to far-right
## 25 Peter Pilz List | … TRUE TRUE FALSE centre-left to left-wing
## 26 Alliance for the F… TRUE FALSE TRUE centre-right to right-w…
## 27 Swiss People's Par… TRUE FALSE TRUE right-wing to far-right
## 28 National Front | F… TRUE FALSE TRUE right-wing to far-right
## 29 Debout la Républiq… TRUE FALSE TRUE right-wing to far-right
## 30 National Republica… TRUE FALSE TRUE far-right
## 31 UK Independence Pa… TRUE FALSE TRUE right-wing to far-right
## 32 Finns Party | Finl… TRUE FALSE TRUE right-wing to far-right
## 33 Socialist Party | … TRUE TRUE FALSE left-wing
## 34 Slovenian Democrat… TRUE FALSE TRUE right-wing
## 35 Slovenian National… TRUE FALSE TRUE right-wing to far-right
## # … with 2 more variables: wp_pol_position_numerical <dbl>, mean_rating <dbl>
#sum stats
#all
parties_populist %>% select(LW_populism, RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()
#directional populism
parties_populist %>% filter(LW_populism) %>% select(RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()
parties_populist %>% filter(RW_populism) %>% select(RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()
#just populism
parties_populist %>% filter(!LW_populism, !RW_populism) %>% select(RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()
#without populism
bma_d_noP = cache_object({bma_modeller("d", exclude_vars = "populism")}, filename = "cache/bma_d_noP.rds")
## Cache not found, evaluating expression
## PIP Post Mean Post SD Cond.Pos.Sign Idx
## conservative 0.972 -9.6924 2.89 0.00000 4
## EU_positive 0.903 6.6803 2.96 1.00000 7
## green 0.575 4.3513 4.29 1.00000 3
## nationalism 0.461 -3.0995 3.77 0.00000 5
## direct_democracy 0.345 5.9158 9.13 1.00000 21
## LW_populism 0.111 -1.1615 3.90 0.00000 16
## feminism 0.107 0.6807 2.33 1.00000 8
## liberalism 0.103 -0.3677 1.32 0.00077 20
## national_conservatism 0.095 -0.5462 2.11 0.00572 10
## agrarianism 0.069 -0.2997 1.40 0.00000 11
## democratic_socialism 0.063 0.2510 1.29 0.99724 18
## wp_pol_position_numerical 0.055 -0.0702 0.48 0.14787 1
## socialism 0.054 0.1683 1.02 0.97923 2
## RW_populism 0.049 -0.1069 1.15 0.32089 15
## christian 0.044 -0.1133 0.81 0.00747 12
## social_liberalism 0.042 0.0957 0.93 0.72514 19
## libertarianism 0.041 -0.1781 1.34 0.00390 9
## EU_skeptic 0.040 -0.0398 0.72 0.41142 6
## centrism 0.040 -0.1645 1.29 0.00039 14
## social_democracy 0.033 0.0077 0.53 0.58333 17
## communism 0.030 0.0329 0.79 0.81373 13
##
## Mean no. regressors Draws Burnins Time
## "4.2318" "2097152" "0" "1.3602 mins"
## No. models visited Modelspace 2^K % visited % Topmodels
## "2097152" "2097152" "100" "0.024"
## Corr PMP No. Obs. Model Prior g-Prior
## "NA" "118" "random / 10.5" "UIP"
## Shrinkage-Stats
## "Av=0.9916"
##
## Time difference of 1.4 mins
bma_logRR_noP = cache_object({bma_modeller("logRR", exclude_vars = "populism")}, filename = "cache/bma_logRR_noP.rds")
## Cache not found, evaluating expression
## PIP Post Mean Post SD Cond.Pos.Sign Idx
## LW_populism 0.828 -0.6608 0.378 0.0e+00 16
## green 0.737 0.2434 0.177 1.0e+00 3
## nationalism 0.684 -0.2572 0.202 0.0e+00 5
## conservative 0.568 -0.1903 0.188 0.0e+00 4
## wp_pol_position_numerical 0.477 -0.0646 0.077 9.6e-06 1
## national_conservatism 0.385 -0.1686 0.243 0.0e+00 10
## libertarianism 0.277 -0.1280 0.234 0.0e+00 9
## agrarianism 0.218 -0.0571 0.124 0.0e+00 11
## social_liberalism 0.133 0.0229 0.070 9.9e-01 19
## EU_positive 0.116 0.0157 0.053 1.0e+00 7
## christian 0.104 -0.0169 0.061 4.6e-06 12
## centrism 0.080 -0.0211 0.092 4.0e-08 14
## democratic_socialism 0.071 0.0112 0.058 9.7e-01 18
## feminism 0.057 0.0082 0.049 1.0e+00 8
## liberalism 0.055 0.0039 0.028 8.9e-01 20
## social_democracy 0.052 -0.0035 0.032 1.5e-01 17
## socialism 0.051 0.0018 0.040 6.6e-01 2
## direct_democracy 0.049 0.0112 0.079 1.0e+00 21
## EU_skeptic 0.047 -0.0029 0.034 2.4e-01 6
## RW_populism 0.047 0.0024 0.047 6.4e-01 15
## communism 0.039 -0.0010 0.038 4.2e-01 13
##
## Mean no. regressors Draws Burnins Time
## "5.0726" "2097152" "0" "1.3561 mins"
## No. models visited Modelspace 2^K % visited % Topmodels
## "2097152" "2097152" "100" "0.024"
## Corr PMP No. Obs. Model Prior g-Prior
## "NA" "118" "random / 10.5" "UIP"
## Shrinkage-Stats
## "Av=0.9916"
##
## Time difference of 1.4 mins
bma_logOR_noP = cache_object({bma_modeller("logOR", exclude_vars = "populism")}, filename = "cache/bma_logOR_noP,rds")
## Cache not found, evaluating expression
## PIP Post Mean Post SD Cond.Pos.Sign Idx
## LW_populism 0.781 -0.65515 0.422 0.0e+00 16
## green 0.778 0.28929 0.193 1.0e+00 3
## nationalism 0.699 -0.29347 0.223 0.0e+00 5
## conservative 0.667 -0.25271 0.206 0.0e+00 4
## national_conservatism 0.386 -0.18609 0.267 0.0e+00 10
## wp_pol_position_numerical 0.373 -0.05213 0.077 8.7e-05 1
## libertarianism 0.270 -0.13292 0.248 0.0e+00 9
## agrarianism 0.231 -0.06757 0.141 0.0e+00 11
## EU_positive 0.216 0.04062 0.089 1.0e+00 7
## social_liberalism 0.118 0.02118 0.072 9.8e-01 19
## christian 0.091 -0.01521 0.061 1.9e-05 12
## democratic_socialism 0.086 0.01693 0.073 9.8e-01 18
## centrism 0.080 -0.02271 0.100 7.0e-08 14
## feminism 0.070 0.01308 0.064 1.0e+00 8
## direct_democracy 0.068 0.02361 0.118 1.0e+00 21
## socialism 0.056 0.00396 0.047 7.5e-01 2
## social_democracy 0.048 -0.00238 0.031 2.5e-01 17
## RW_populism 0.047 0.00180 0.050 6.1e-01 15
## EU_skeptic 0.047 -0.00269 0.036 2.8e-01 6
## liberalism 0.045 0.00105 0.024 6.3e-01 20
## communism 0.040 -0.00049 0.041 5.0e-01 13
##
## Mean no. regressors Draws Burnins Time
## "5.1970" "2097152" "0" "1.3540 mins"
## No. models visited Modelspace 2^K % visited % Topmodels
## "2097152" "2097152" "100" "0.024"
## Corr PMP No. Obs. Model Prior g-Prior
## "NA" "118" "random / 10.5" "UIP"
## Shrinkage-Stats
## "Av=0.9916"
##
## Time difference of 1.4 mins
#with alternative rating source
bma_d_noP_rating = cache_object({bma_modeller("d", exclude_vars = "populism", party_pos = "mean_rating")}, filename = "cache/bma_d_noP_rating.rds")
## Cache not found, evaluating expression
## PIP Post Mean Post SD Cond.Pos.Sign Idx
## conservative 0.853 -7.749 3.99 0.0e+00 4
## EU_positive 0.781 5.499 3.48 1.0e+00 7
## green 0.599 4.863 4.58 1.0e+00 3
## nationalism 0.369 -2.465 3.59 1.9e-05 5
## direct_democracy 0.296 5.164 8.91 1.0e+00 21
## mean_rating 0.162 -0.505 1.33 7.1e-03 1
## national_conservatism 0.127 -0.890 2.75 1.5e-03 10
## social_liberalism 0.096 0.521 1.99 1.0e+00 19
## RW_populism 0.096 -0.579 2.19 2.8e-02 15
## feminism 0.093 0.593 2.21 1.0e+00 8
## libertarianism 0.068 -0.445 2.07 1.4e-04 9
## agrarianism 0.066 -0.299 1.41 0.0e+00 11
## liberalism 0.061 -0.211 1.13 2.7e-02 20
## social_democracy 0.052 0.162 0.96 9.8e-01 17
## christian 0.049 -0.159 0.97 3.6e-03 12
## democratic_socialism 0.044 0.137 0.93 9.9e-01 18
## socialism 0.040 0.100 0.78 9.8e-01 2
## EU_skeptic 0.036 -0.041 0.67 3.5e-01 6
## communism 0.028 -0.018 0.78 5.1e-01 13
## LW_populism 0.028 0.017 0.92 6.7e-01 16
## centrism 0.027 -0.017 0.75 3.2e-01 14
##
## Mean no. regressors Draws Burnins Time
## "3.9730" "2097152" "0" "1.3589 mins"
## No. models visited Modelspace 2^K % visited % Topmodels
## "2097152" "2097152" "100" "0.024"
## Corr PMP No. Obs. Model Prior g-Prior
## "NA" "123" "random / 10.5" "UIP"
## Shrinkage-Stats
## "Av=0.9919"
##
## Time difference of 1.4 mins
bma_logRR_noP_rating = cache_object({bma_modeller("logRR", exclude_vars = "populism", party_pos = "mean_rating")}, filename = "cache/bma_logRR_noP_rating.rds")
## Cache not found, evaluating expression
## PIP Post Mean Post SD Cond.Pos.Sign Idx
## green 0.951 0.40925 0.148 1.0e+00 3
## national_conservatism 0.800 -0.44817 0.263 0.0e+00 10
## conservative 0.480 -0.12864 0.152 0.0e+00 4
## agrarianism 0.367 -0.11415 0.167 0.0e+00 11
## nationalism 0.315 -0.11015 0.181 0.0e+00 5
## social_liberalism 0.213 0.04392 0.095 1.0e+00 19
## LW_populism 0.167 -0.06955 0.177 0.0e+00 16
## mean_rating 0.110 -0.00940 0.033 1.5e-02 1
## libertarianism 0.107 -0.03445 0.117 0.0e+00 9
## christian 0.088 -0.01482 0.058 7.6e-07 12
## EU_positive 0.078 0.01012 0.043 9.9e-01 7
## feminism 0.063 0.01248 0.061 1.0e+00 8
## social_democracy 0.046 0.00446 0.030 9.7e-01 17
## liberalism 0.042 0.00200 0.023 7.9e-01 20
## democratic_socialism 0.038 0.00361 0.032 9.6e-01 18
## direct_democracy 0.032 0.00508 0.057 1.0e+00 21
## EU_skeptic 0.031 -0.00152 0.024 1.9e-01 6
## socialism 0.031 0.00092 0.024 7.3e-01 2
## communism 0.031 -0.00210 0.032 2.0e-01 13
## RW_populism 0.031 -0.00070 0.033 4.6e-01 15
## centrism 0.030 -0.00183 0.032 1.5e-01 14
##
## Mean no. regressors Draws Burnins Time
## "4.0503" "2097152" "0" "1.3364 mins"
## No. models visited Modelspace 2^K % visited % Topmodels
## "2097152" "2097152" "100" "0.024"
## Corr PMP No. Obs. Model Prior g-Prior
## "NA" "123" "random / 10.5" "UIP"
## Shrinkage-Stats
## "Av=0.9919"
##
## Time difference of 1.3 mins
bma_logOR_noP_rating = cache_object({bma_modeller("logOR", exclude_vars = "populism", party_pos = "mean_rating")}, filename = "cache/bma_logOR_noP_rating.rds")
## Cache not found, evaluating expression
## PIP Post Mean Post SD Cond.Pos.Sign Idx
## green 0.955 0.45464 0.162 1.00000 3
## national_conservatism 0.767 -0.46553 0.297 0.00000 10
## conservative 0.515 -0.15581 0.172 0.00000 4
## agrarianism 0.370 -0.12772 0.186 0.00000 11
## nationalism 0.339 -0.13232 0.205 0.00000 5
## social_liberalism 0.234 0.05551 0.113 1.00000 19
## EU_positive 0.130 0.02322 0.070 1.00000 7
## libertarianism 0.120 -0.04418 0.140 0.00000 9
## mean_rating 0.107 -0.00973 0.035 0.02051 1
## LW_populism 0.099 -0.03761 0.135 0.00000 16
## christian 0.080 -0.01422 0.059 0.00001 12
## feminism 0.076 0.01803 0.077 1.00000 8
## social_democracy 0.057 0.00730 0.041 0.98167 17
## direct_democracy 0.042 0.01190 0.085 1.00000 21
## liberalism 0.039 0.00041 0.025 0.66817 20
## democratic_socialism 0.037 0.00348 0.033 0.94990 18
## RW_populism 0.033 -0.00202 0.039 0.27965 15
## EU_skeptic 0.032 -0.00181 0.027 0.17752 6
## communism 0.032 -0.00244 0.036 0.18553 13
## socialism 0.031 0.00102 0.025 0.72384 2
## centrism 0.030 -0.00198 0.035 0.14405 14
##
## Mean no. regressors Draws Burnins Time
## "4.1236" "2097152" "0" "1.3297 mins"
## No. models visited Modelspace 2^K % visited % Topmodels
## "2097152" "2097152" "100" "0.024"
## Corr PMP No. Obs. Model Prior g-Prior
## "NA" "123" "random / 10.5" "UIP"
## Shrinkage-Stats
## "Av=0.9919"
##
## Time difference of 1.3 mins
#plot the model coefs as a briefer summary
bma_d_noP$topmod$betas() %>%
t() %>%
set_colnames(bma_d_noP$X.data[, -1] %>% colnames()) %>%
as_tibble() %>%
mutate(
model = 1:n()
) %>%
pivot_longer(
cols = -model,
names_to = "predictor",
values_to = "beta"
) %>%
ggplot(aes(beta)) +
geom_histogram() +
facet_wrap("predictor")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/bma_d_noP_betas.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
One plot to rule them all.
#aggregate from model summaries
metas_coefs = bind_rows(
coef(bma_d_noP) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "d", position = "Wikipedia"),
coef(bma_logRR_noP) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "logRR", position = "Wikipedia"),
coef(bma_d_noP_rating) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "d", position = "Raters"),
coef(bma_logRR_noP_rating) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "logRR", position = "Raters")
) %>%
#rename position source
mutate(
rowname = rowname %>% mapvalues(c("wp_pol_position_numerical", "mean_rating"),
c("Left-right position (Wikipedia)", "Left-right position (Raters)"))
) %>%
df_legalize_names()
#fix some labels
nicer_tags = function(x) {
x %>%
str_clean() %>%
str_replace("LW", "Left-wing") %>%
str_replace("RW", "Right-wing") %>%
str_to_sentence() %>%
str_replace("Eu", "EU")
}
#plots
metas_coefs %>%
ggplot(aes(rowname %>% nicer_tags(), PIP, fill = position + " " + metric)) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous("Probability of inclusion in best models", labels = scales::percent) +
scale_x_discrete("Predictor", guide = guide_axis(n.dodge = 1)) +
scale_fill_discrete("Meta-analysis") +
coord_flip()
GG_save("figs/metas_PIP.png")
metas_coefs %>%
ggplot(aes(rowname %>% nicer_tags(), Post_Mean, fill = position)) +
geom_bar(stat = "identity", position = "dodge") +
scale_y_continuous("Posterior mean effect size") +
scale_x_discrete("Predictor", guide = guide_axis(n.dodge = 1)) +
scale_fill_discrete("Meta-analysis") +
coord_flip() +
facet_wrap("metric", scales = c("free_x"))
GG_save("figs/metas_post_mean.png")
Here we make a measure of left-right bias and calculate it by country. The most obvious measure is a weighted mean of the party position using the votes among journos as the weight, subtracted by the same for the general population (which should be close to 0).
#weighted means
country_bias = plyr::ddply(voting_augmented, "Country", function(d) {
#skip if no data
# if (all(is.na(d$wp_pol_position_numerical))) return(NULL)
tibble(
wmean_journalists_wikipedia = wtd_mean(d$wp_pol_position_numerical, d$Journalist_pct),
wmean_general_wikipedia = wtd_mean(d$wp_pol_position_numerical, d$General_pct),
bias_wikipedia = wmean_journalists_wikipedia - wmean_general_wikipedia,
#ratings
wmean_journalists_raters = wtd_mean(d$mean_rating, d$Journalist_pct),
wmean_general_raters = wtd_mean(d$mean_rating, d$General_pct),
bias_raters = wmean_journalists_raters - wmean_general_raters
)
})
#stats
country_bias[-1] %>% describe()
#cors
country_bias[-1] %>% wtd.cors()
## wmean_journalists_wikipedia wmean_general_wikipedia
## wmean_journalists_wikipedia 1.00 0.69
## wmean_general_wikipedia 0.69 1.00
## bias_wikipedia 0.43 -0.35
## wmean_journalists_raters 0.83 0.52
## wmean_general_raters 0.72 0.77
## bias_raters 0.47 -0.20
## bias_wikipedia wmean_journalists_raters
## wmean_journalists_wikipedia 0.430 0.83
## wmean_general_wikipedia -0.354 0.52
## bias_wikipedia 1.000 0.43
## wmean_journalists_raters 0.431 1.00
## wmean_general_raters -0.035 0.87
## bias_raters 0.853 0.59
## wmean_general_raters bias_raters
## wmean_journalists_wikipedia 0.715 0.47
## wmean_general_wikipedia 0.769 -0.20
## bias_wikipedia -0.035 0.85
## wmean_journalists_raters 0.871 0.59
## wmean_general_raters 1.000 0.12
## bias_raters 0.116 1.00
#plot Wikipedia
country_bias %>%
filter(!is.na(bias_wikipedia)) %>%
mutate(Country = fct_reorder(Country, bias_wikipedia)) %>%
ggplot(aes(Country, bias_wikipedia)) +
geom_bar(stat = "identity") +
theme_bw() +
coord_flip()
GG_save("figs/bias_wikipedia_by_country.png")
#write table
country_bias %>% arrange(bias_wikipedia)
#plot raters
country_bias %>%
filter(!is.na(bias_raters)) %>%
mutate(Country = fct_reorder(Country, bias_raters)) %>%
ggplot(aes(Country, bias_raters)) +
geom_bar(stat = "identity") +
theme_bw() +
coord_flip()
GG_save("figs/bias_raters_by_country.png")
#write table
country_bias %>% arrange(bias_raters)
#relationships
GG_scatter(country_bias, "bias_wikipedia", "bias_raters", case_names = "Country")
## `geom_smooth()` using formula 'y ~ x'
For Sweden and USA
#add blocks
timeseries$block = "Party-level"
timeseries = timeseries %>% plyr::ddply(c("country", "year"), function(dd) {
#if USA, just return
if (dd$country[1] == "USA") return(dd)
#make blocks
#right
.right = dd[1, ]
.right$party = "right-wing block"
.right$block = "Block-level"
.right$percent = dd %>% filter(party %in% c("Centre Party", "Moderate Party", "Christian Democrats", "Liberals")) %>% pull(percent) %>% sum(na.rm = T)
#make left
.left = .right
.left$party = "left-wing block"
.left$percent = 100 - .right$percent
rbind(dd %>% filter(!party %in% c("right-wing block", "left-wing block")),
.right,
.left
)
})
#plot USA
timeseries %>%
filter(country == "USA") %>%
ggplot(aes(year, percent/100, color = party)) +
geom_path() +
theme_bw() +
scale_y_continuous("Percentage affiliation", labels = scales::percent) +
scale_color_discrete("Party")
GG_save("figs/USA_timeseries.png")
#Sweden, simple
timeseries %>%
filter(country == "Sweden") %>%
ggplot(aes(year, percent/100, color = party)) +
geom_path() +
theme_bw() +
scale_y_continuous("Percentage sympathy", labels = scales::percent, breaks = seq(0, 1, .1)) +
scale_color_discrete("Party/block") +
facet_wrap(~block, strip.position = "bottom")
## Warning: Removed 6 row(s) containing missing values (geom_path).
GG_save("figs/Sweden_timeseries.png")
## Warning: Removed 6 row(s) containing missing values (geom_path).
#Swedish parties only
timeseries %>%
filter(country == "Sweden", block == "Party-level") %>%
ggplot(aes(year, percent/100, color = party)) +
geom_path() +
theme_bw() +
scale_y_continuous("Percentage sympathy", labels = scales::percent, breaks = seq(0, 1, .1)) +
scale_color_discrete("Party")
## Warning: Removed 6 row(s) containing missing values (geom_path).
GG_save("figs/Sweden_timeseries2.png")
## Warning: Removed 6 row(s) containing missing values (geom_path).
write_sessioninfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] spatstat_1.64-1 rpart_4.1-15 nlme_3.1-152
## [4] spatstat.data_2.0-0 osfr_0.2.8 renv_0.13.0
## [7] writexl_1.3.1 glmnet_4.1-1 rms_6.1-1
## [10] SparseM_1.81 polycor_0.7-10 stringi_1.5.3
## [13] rvest_1.0.0 googlesheets4_0.3.0 kirkegaard_2021-02-08
## [16] metafor_2.4-0 Matrix_1.3-2 psych_2.0.12
## [19] magrittr_2.0.1 assertthat_0.2.1 weights_1.0.1
## [22] mice_3.13.0 gdata_2.18.0 Hmisc_4.5-0
## [25] Formula_1.2-4 survival_3.2-7 lattice_0.20-41
## [28] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5
## [31] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3
## [34] tibble_3.1.0 ggplot2_3.3.3 tidyverse_1.3.0
## [37] pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 BMS_0.3.4
## [4] plyr_1.8.6 selectr_0.4-2 splines_4.0.4
## [7] TH.data_1.0-10 digest_0.6.27 foreach_1.5.1
## [10] htmltools_0.5.1.1 fansi_0.4.2 checkmate_2.0.0
## [13] memoise_2.0.0 tensor_1.5 cluster_2.1.1
## [16] modelr_0.1.8 matrixStats_0.58.0 sandwich_3.0-0
## [19] askpass_1.1 jpeg_0.1-8.1 colorspace_2.0-0
## [22] haven_2.3.1 xfun_0.21 crayon_1.4.1
## [25] jsonlite_1.7.2 lme4_1.1-26 zoo_1.8-9
## [28] iterators_1.0.13 glue_1.4.2 polyclip_1.10-0
## [31] gtable_0.3.0 gargle_1.0.0 MatrixModels_0.5-0
## [34] shape_1.4.5 abind_1.4-5 scales_1.1.1
## [37] mvtnorm_1.1-1 DBI_1.1.1 Rcpp_1.0.6
## [40] htmlTable_2.1.0 tmvnsim_1.0-2 foreign_0.8-81
## [43] htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [46] ellipsis_0.3.1 farver_2.1.0 pkgconfig_2.0.3
## [49] nnet_7.3-15 dbplyr_2.1.0 deldir_0.2-10
## [52] utf8_1.1.4 crul_1.1.0 tidyselect_1.1.0
## [55] labeling_0.4.2 rlang_0.4.10 reshape2_1.4.4
## [58] multilevel_2.6 munsell_0.5.0 cellranger_1.1.0
## [61] tools_4.0.4 cachem_1.0.4 cli_2.3.1
## [64] generics_0.1.0 broom_0.7.5 evaluate_0.14
## [67] fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2
## [70] knitr_1.31 fs_1.5.0 quantreg_5.85
## [73] xml2_1.3.2 psychometric_2.2 compiler_4.0.4
## [76] rstudioapi_0.13 curl_4.3 png_0.1-7
## [79] spatstat.utils_2.0-0 reprex_1.0.0.9000 statmod_1.4.35
## [82] highr_0.8 nloptr_1.2.2.2 vctrs_0.3.6
## [85] pillar_1.5.1 lifecycle_1.0.0 data.table_1.14.0
## [88] conquer_1.0.2 R6_2.5.0 latticeExtra_0.6-29
## [91] gridExtra_2.3 codetools_0.2-18 polspline_1.1.19
## [94] boot_1.3-27 MASS_7.3-53.1 gtools_3.8.2
## [97] openssl_1.4.3 withr_2.4.1 httpcode_0.3.0
## [100] mnormt_2.0.2 multcomp_1.4-16 mgcv_1.8-33
## [103] parallel_4.0.4 hms_1.0.0 grid_4.0.4
## [106] minqa_1.2.4 rmarkdown_2.7 googledrive_1.0.1
## [109] lubridate_1.7.10 base64enc_0.1-3
#export data for reuse
voting_augmented %>% write_rds("data/party_data.rds")
voting_augmented %>% write_xlsx("data/party_data.xlsx")
#versioning
if (F) {
renv::init()
}
#upload files
if (F) {
#upload files in project
#overwrite existing (versioning)
#osf repo
osf_auth(read_lines("~/.config/osf_token"))
osf_proj = osf_retrieve_node("https://osf.io/6uvnu/")
osf_upload(osf_proj, path = c("data", "figs", "notebook.Rmd", "notebook.html", "sessions_info.txt", "renv.lock"), conflicts = "overwrite", recurse = T)
}