Init

options(digits = 2)

library(pacman)
p_load(
  kirkegaard, 
  googlesheets4, 
  rvest, 
  stringi, 
  polycor, 
  rms, 
  glmnet, 
  writexl, 
  renv, 
  osfr,
  spatstat
  )

theme_set(theme_bw())

Ad hoc functions

#strip Wikipedia sources in brackets from strings
str_rm_sources = function(x) {
  str_replace_all(x, "\\[[^\\]]+\\]", "")
}

#length zero vector or data frame to NA
l0_to_NA = function(x) {
  if (is.data.frame(x)) {
    if (nrow(x) == 0) return(NA)
  }
  
  if (length(x) == 0) return(NA)
  x
}

#infinite to NA to avoid functions that give nonsense results when Inf are present
Inf_to_NA = function(x) {
  x[is.infinite(x)] = NA
  x
}

#keep describe right
describe = function(...) {
  y = psych::describe(...)
  class(y) = "data.frame"
  y
}

#fix list to numeric
list_to_numeric = function(x) {
  map_dbl (x, function(x) {
  if (is.null(x)) return(NA_real_)
  as.numeric(x)
  })
}

#loess fit from simulation
#used to impute data when observed % is 0
loess_fit = read_rds("data/loess_fit.rds")

#reorder correlation matrix variables based on correlations
reorder_cormat <- function(cormat) {
            dd <- as.dist((1 - cormat)/2)
            hc <- hclust(dd)
            cormat <- cormat[hc$order, hc$order]
}

#melt to long without duplicates
melt_to_long = function(x) {
  #get the lower half
  #without throwing away any potential NAs
  lower_x = lower.tri(x)
  lower_idx = lower_x %>% which()
  
  #melt
  x_melt = x %>% reshape2::melt()
  
  #add column from index melt
  x_melt = x_melt[lower_idx ,]
  
  x_melt
}

#test it
iris[1:4] %>% wtd.cors()

##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length         1.00       -0.12         0.87        0.82
## Sepal.Width         -0.12        1.00        -0.43       -0.37
## Petal.Length         0.87       -0.43         1.00        0.96
## Petal.Width          0.82       -0.37         0.96        1.00

iris[1:4] %>% wtd.cors() %>% melt_to_long()

Labels

#labels
leftright_labels = c("Far-left", "Left", "Center-left", "Center", "Center-right", "Right", "Far-right")

Data

#author
googlesheets4::gs4_auth("the.dfx@gmail.com")

#get sheets for journalist data
# voting = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "voting") %>% df_legalize_names()
voting = read_sheet("https://docs.google.com/spreadsheets/d/1o0f1XfA4wprMpuSeH0gUdPgdrfDQATD05qGrWlKFFVk/edit#gid=2060900662", sheet = "voting") %>% df_legalize_names()

## Auto-refreshing stale OAuth token.

## Reading from "*** Being Fixed*** EXTERNAL Journalist political preferences dataset"

## Range "'voting'"

voting$Journalist_pct_orig = voting$Journalist_pct
voting_orig = voting

# parties = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "parties") %>% df_legalize_names()
parties = read_sheet("https://docs.google.com/spreadsheets/d/1o0f1XfA4wprMpuSeH0gUdPgdrfDQATD05qGrWlKFFVk/edit#gid=355416179", sheet = "parties") %>% df_legalize_names()

## Reading from "*** Being Fixed*** EXTERNAL Journalist political preferences dataset"

## Range "'parties'"

parties_orig = parties

# samples = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "samples") %>% df_legalize_names()
samples = read_sheet("https://docs.google.com/spreadsheets/d/1o0f1XfA4wprMpuSeH0gUdPgdrfDQATD05qGrWlKFFVk/edit#gid=426865321", sheet = "samples") %>% df_legalize_names()

## Reading from "*** Being Fixed*** EXTERNAL Journalist political preferences dataset"

## Range "'samples'"

assert_that(!anyDuplicated(samples$ID))

## [1] TRUE

samples_orig = samples

#parties ratings
party_ratings = read_sheet("https://docs.google.com/spreadsheets/d/1tHT_ZJRg6M2CtJQyLePGNasPPIR8_TrJ6tL5LzbdIBE/edit#gid=0", sheet = "parties") %>% df_legalize_names()

## Reading from "Parties rating collected"

## Range "'parties'"

party_ratings_meta = read_sheet("https://docs.google.com/spreadsheets/d/1tHT_ZJRg6M2CtJQyLePGNasPPIR8_TrJ6tL5LzbdIBE/edit#gid=0", sheet = "raters") %>% df_legalize_names()

## Reading from "Parties rating collected"

## Range "'raters'"

## New names:
## * `` -> ...5

#temporal data for plots
timeseries = read_sheet("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0", sheet = "longitudinal")

## Reading from "INTERNAL Journalist political preferences dataset"

## Range "'longitudinal'"

#switch to english names
#one time use code
if (F) {
  xx = left_join(voting, parties, by = c("Party_English", "Country"))
  #add rows so we can find rows' location after sorting
  xx$row = 1:nrow(xx)
  #view rows not Other
  xx %>% filter(Party != "Other") %>% View()
  #insert "Other"
  xx$Party_English = if_else(xx$Party == "Other", true = "Other", false = xx$Party_English)
  #export
  xx %>% write_clipboard()
}

Recode

To analyze the data, we need to recode them into relative rates and odds ratios, as well as their logs.

Initial recoding

#samples data
#exclude russian data
samples = samples %>% filter(Country != "Russia")

#fix types
samples$n = samples$n %>% list_to_numeric()
samples$n_no_party_data = samples$n_no_party_data %>% list_to_numeric()

#recode to sensible format
samples$n_party_data = samples$n - samples$n_no_party_data

#impute likely sample size
n_sumstats = samples$n %>% describe()

n_party_sumstats = samples$n_party_data %>% describe()
n_party_sumstats

n_party_frac_sumstats = (samples$n_party_data / samples$n) %>% describe()
n_party_frac_sumstats

#save orig
samples$n_party_data_orig = samples$n_party_data

#impute likely values
samples$n_party_data = case_when(
  #when we have data, use it
  !is.na(samples$n_party_data) ~ samples$n_party_data,
  #when we have full sample size, use that and assume median asnwer fraction
  !is.na(samples$n) ~ samples$n * n_party_frac_sumstats$median,
  #impute median effective sample size
  is.na(samples$n) ~ n_party_sumstats$median
  )

#fix id col
#some are given as ranges, Russian data
voting$journalist_sample_ID = voting$Journalist_sample %>% list_to_numeric()

#remove excluded data
voting = voting %>% filter(is.na(skip))

#join with sample size data
voting = left_join(voting, samples %>% select(ID, n, n_party_data, Question_coded), by = c("journalist_sample_ID" = "ID"))
voting %<>% filter(!is.na(Journalist_pct))

#impute proportions for zeros
voting$Journalist_pct2 = voting$Journalist_pct_orig
voting$Journalist_pct = voting$Journalist_pct_orig
voting$Journalist_pct2[voting$Journalist_pct == 0] = predict(loess_fit, newdata = tibble(n = voting$n_party_data[voting$Journalist_pct == 0])) * 100

#renormalize to 100%
voting = voting %>% plyr::ddply(c("journalist_sample_ID", "Election_type"), function(d) {
  #skip if no zeros
  if (!any(d$Journalist_pct == 0)) return(d)
  
  #which were zero?
  which0 = (d$Journalist_pct == 0)
  
  #what was the orig sum?
  #this is not always 100 because some others go to "other" parties
  orig_sum = sum(d$Journalist_pct)
  
  #investigate if not 100
  if (abs(orig_sum - 100) > 2) browser()
  
  #as fraction
  d$Journalist_pct2 = d$Journalist_pct2 / orig_sum
  
  #reduce nonzero values a bit
  d$Journalist_pct2[!which0] = d$Journalist_pct2[!which0] * (1 - sum(d$Journalist_pct2[which0]))
  
  #assert values ok
  # browser()
  # assert_that(sum(d$Journalist_pct2) == 1)
  
  #as percent
  d$Journalist_pct2 = d$Journalist_pct2 * orig_sum
  
  d
})

#switch
voting$Journalist_pct = voting$Journalist_pct2

#add unique ID
voting %<>% mutate(
  party_country = Party + " | " + Country
)

#merge within countries
#define function
aggr_voting_data = function(x) {
  #base is mostly the same
  y = x[1, ]
  
  #sample IDs
  y$journalist_sample_ID = str_c(x$journalist_sample_ID %>% unique() %>% na.omit(), collapse = ", ")
  y$General_sample = str_c(x$General_sample %>% unique() %>% na.omit(), collapse = ", ")
  
  #total sample size
  y$n_total = sum(x$n, na.rm = T)
  y$n_party_total = sum(x$n_party_data, na.rm = T)

  #remove cols
  y$Journalist_sample = NULL
  y$Year = NULL
  y$Election_type = NULL
  y$n = NULL
  y$n_party_data = NULL
  y$Question_coded = NULL
  y$comments = NULL
  y$skip = NULL

  #mean for the numericals, weight by sqrt sample size
  #if no sample size, impute 1's
  x$n_party_data[is.na(x$n_party_data)] = 1
  
  #get means
  y$Journalist_pct = wtd_mean(x$Journalist_pct, w = x$n_party_data %>% sqrt())
  y$Journalist_pct_orig = wtd_mean(x$Journalist_pct_orig, w = x$n_party_data %>% sqrt())
  y$General_pct = wtd_mean(x$General_pct, w = x$n_party_data %>% sqrt())
  
  #RR and OR
  y %>% mutate(
    d = Journalist_pct - General_pct,
    RR = Journalist_pct / General_pct,
    OR = (Journalist_pct / (100 - Journalist_pct)) / (General_pct / (100 - General_pct) ),
    logRR = log10(RR),
    logOR = log10(OR),
    
    #add versions based on unadjustment data
    d_orig = Journalist_pct_orig - General_pct,
    RR_orig = Journalist_pct_orig / General_pct,
    OR_orig = (Journalist_pct_orig / (100 - Journalist_pct_orig)) / (General_pct / (100 - General_pct) ),
    logRR_orig = log10(RR_orig) %>% Inf_to_NA(),
    logOR_orig = log10(OR_orig) %>% Inf_to_NA()
  )
}

#aggregate
voting_aggr = plyr::ddply(voting, c("Country", "Party"), .fun = aggr_voting_data)
voting_aggr_no_old = plyr::ddply(voting %>% filter(Year >= 2005), c("Country", "Party"), .fun = aggr_voting_data)

#assert no duplicated links
#means data error
assert_that(!any(duplicated(parties$Wikipedia_link)))

## [1] TRUE

#unique ID
parties %<>% dplyr::mutate(
  party_country = Party + " | " + Country,
  wp_page = NA,
  Block = Block %>% str_to_lower() %>% factor() %>% fct_relevel("center"),
  id = 1:n()
)

Download party data

We need to scrape some party data from Wikipedia.

#loop over rows of parties and fetch data
for (p in seq_along_rows(parties)) {
  party = parties$Party[p]
  country = parties$Country[p]
  
  #get page
  #do we have it already?
  party_page_file = sprintf("%s - %s.html", country, party)
  data_location = "data/parties/"
  party_page_file2 = data_location + party_page_file
  
  #does file exist?
  if (!file.exists(party_page_file2)) {
    message(sprintf("%.0f%% -- Downloading page for %s -- %s", 100*p/nrow(parties), party, country))
    #download it
    party_page = parties$Wikipedia_link[p] %>% read_html()
    
    #save it
    party_page %>% as.character() %>% write_lines(party_page_file2)
  } else {
    #message(sprintf("%.0f%% -- Loading page for %s -- %s", 100*p/nrow(parties), party, country))

    #read from disk
    party_page = read_html(party_page_file2)
  }
  
  #add to table
  parties$wp_page[p] = list(party_page)
}

Parse and extract party data

#parse vcard data
parties_vcard_data = map_df(seq_along_rows(parties), function(p) {
  # browser()
  suppressWarnings({
    y = try_else({
      #get vcard
      vcard = parties$wp_page[[p]] %>% html_node(".vcard") %>% 
      #replace linebreaks with spaces
      #otherwise they disappear in table...
      str_replace_all("<br>", ", ") %>% 
      #back to html
      read_html()
  })
  
  if (is.null(y)) return(NULL)
    
  #parse vcard table
  vcard_table = html_table(vcard) %>% .[[1]]
  
  #check for errors
  if (!is.data.frame(vcard_table)) browser()
  if (ncol(vcard_table) != 2) return(NULL)
  
  #set names
  colnames(vcard_table) = c("var", "data")
  vcard_table$var %<>% str_replace_all("\\s", "_") #replace magic whitespace...
  
  #premake cols
  wp_ideolgy = vcard_table[vcard_table$var == "Ideology", 2] %>% l0_to_NA()
  wp_pol_position = vcard_table[vcard_table$var == "Political_position", 2] %>% l0_to_NA()
  
  #not vectors
  # if (!is.vector(wp_ideolgy)) browser()
  # if (!is.vector(wp_pol_position)) browser()
  
  #combine party data
  y = tibble(
    id = p,
    wp_ideolgy = wp_ideolgy %>% unlist(),
    wp_pol_position = wp_pol_position %>% unlist()
  )
  
  #check for errors
  if (nrow(y) != 1) browser()
  })
  
  y
}) %>% 
  #post cleaning
  #do it here for vectorized approach instead of one at a time
  mutate(
    wp_ideolgy = wp_ideolgy %>% str_rm_sources(),
    wp_pol_position = wp_pol_position %>% str_rm_sources() %>% str_replace_all(",", "")
  )

#merge
parties = dplyr::left_join(parties, parties_vcard_data, by = "id")

#code dummy predictors
ideology_tags = list(
  "socialism" = "socialism",
  "green" = "(eco\\-)|green",
  "conservative" = "conservative|conservatism",
  "nationalism" = "nationalism",
  "EU_skeptic" = "euroscepticism",
  "EU_positive" = "pro\\-europeanism",
  "feminism" = "feminism",
  "libertarianism" = "libertarianism",
  "national_conservatism" = "national conservatism",
  "agrarianism" = "agrarianism",
  "christian" = "christian",
  "communism" = "communism|(anti\\-capitalism)|marxism|trotskyism",
  "centrism" = "centrism",
  "RW_populism" = "right[\\- ]wing populism",
  "LW_populism" = "left[\\- ]wing populism",
  "populism" = "populism",
  "social_democracy" = "social democracy",
  "democratic_socialism" = "democratic socialism",
  "social_liberalism" = "social liberalism",
  "liberalism" = "\\bliberalism\\b",
  "direct_democracy" = "direct democracy"
)

#make dummies
parties_wp_ideology_dummies = map(ideology_tags, function(tag) {
  stri_detect_regex(parties$wp_ideolgy, tag, case_insensitive = TRUE)}
  ) %>% 
  set_names(names(ideology_tags)) %>% 
  as.data.frame() %>% 
  mutate(tag_num = rowSums(.),
         id = 1:nrow(.)) %>% 
  select(id, tag_num, everything())

#merge
parties = dplyr::left_join(parties %>% select(-Country), parties_wp_ideology_dummies, by = "id")

#set block levels
parties$Block %<>% fct_relevel("left", "center", "right")

Analyze party data

The results don’t make a lot of sense because tags can have substitution effects.

Party position ratings

#informative names by country, sex, age
party_ratings_meta %<>% mutate(
  informative_id = str_glue("{Country}_{Sex}_{Age}") %>% str_uniquify()
)

#rater groups
all_raters = party_ratings %>% select(Portuguese_guy:REB) %>% names()
#good raters
good_raters = all_raters[-c(1, 9)]

for (v in all_raters) {
  #fix
  party_ratings[[v]] = party_ratings[[v]] %>% list_to_numeric()
}

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

#rescale to -3 to 3 format (as Wikipedia)
party_ratings[, all_raters] = party_ratings[, all_raters] - 4

#averages
party_ratings$mean = party_ratings[all_raters] %>% rowMeans(na.rm = T)
party_ratings$mean_good = party_ratings[good_raters] %>% rowMeans(na.rm = T)

#describe
party_ratings %>% select(Portuguese_guy:mean_good) %>% describe() %>% as.matrix()

##                vars   n    mean  sd median trimmed mad  min max range    skew
## Portuguese_guy    1 196 -0.6286 1.4 -1.000 -0.7177 1.5 -3.0 3.0   6.0  0.5252
## Arjen             2 196 -0.2398 1.6  0.000 -0.2975 1.5 -3.0 3.0   6.0  0.1923
## Victoria          3 196 -0.1485 1.9 -0.500 -0.1842 2.2 -3.0 3.0   6.0  0.2112
## S                 4 196 -0.0255 1.7  0.000  0.0000 1.5 -3.0 3.0   6.0 -0.0997
## PET               5 196 -0.3316 1.6  0.000 -0.3671 1.5 -3.0 3.0   6.0  0.1442
## RY                6 196 -0.5515 1.4 -0.500 -0.6646 1.8 -3.0 3.0   6.0  0.5451
## R                 7 196  0.3673 1.5  1.000  0.4494 1.5 -3.0 3.0   6.0 -0.4614
## FIL               8 196  0.2148 1.6  0.000  0.2722 2.2 -3.0 3.0   6.0 -0.2509
## LOU               9 196 -0.1837 1.9  0.000 -0.2215 3.0 -3.0 3.0   6.0  0.1635
## AS               10 196 -0.2704 1.6 -0.500 -0.3101 2.2 -3.0 3.0   6.0  0.2143
## EM               11 196 -0.0026 1.6  0.000 -0.0095 3.0 -3.0 3.0   6.0  0.0122
## TH               12 196 -0.0791 1.6 -0.350 -0.0696 2.0 -3.0 3.0   6.0  0.0044
## LOU_IS           13 196 -0.1546 1.2  0.000 -0.1525 1.5 -3.0 3.0   6.0  0.0187
## AND              14 196 -0.1786 1.7  0.000 -0.1962 1.5 -3.0 3.0   6.0  0.0786
## JAC              15 196  0.1480 1.9  0.000  0.1835 3.0 -3.0 3.0   6.0 -0.0755
## GUS              16 196  0.4541 1.7  1.000  0.4873 1.5 -3.0 3.0   6.0 -0.1736
## C                17 196  0.0459 1.6  0.000  0.0949 2.2 -3.0 3.0   6.0 -0.1554
## FI               18 196  0.1306 1.5  0.000  0.1399 1.5 -3.0 3.0   6.0 -0.1061
## MATH             19 196 -0.3245 1.2 -0.500 -0.3703 1.0 -3.0 3.0   6.0  0.3887
## A                20 196  0.2959 1.4  0.000  0.3671 1.5 -3.0 3.0   6.0 -0.2614
## BENJ             21 196 -0.1276 1.8  0.000 -0.1519 1.5 -3.0 3.0   6.0  0.1105
## SO               22 196 -0.0561 1.8  0.000 -0.0696 1.5 -3.0 3.0   6.0  0.0728
## J                23 196  0.0255 1.5  0.000  0.0538 2.2 -3.0 3.0   6.0 -0.1018
## PAU              24 196 -0.1867 1.7  0.000 -0.1892 2.1 -3.0 3.0   6.0  0.0477
## REB              25 193 -0.3938 1.5  0.000 -0.4323 1.5 -3.0 3.0   6.0  0.1467
## mean             26 196 -0.0878 1.2  0.092 -0.0596 1.2 -2.9 2.4   5.3 -0.2572
## mean_good        27 196 -0.0601 1.3  0.111 -0.0245 1.4 -2.9 2.4   5.3 -0.2941
##                kurtosis    se
## Portuguese_guy   -0.636 0.103
## Arjen            -0.965 0.118
## Victoria         -1.121 0.133
## S                -0.988 0.119
## PET              -0.831 0.113
## RY               -0.527 0.103
## R                 0.043 0.104
## FIL              -1.054 0.113
## LOU              -1.210 0.134
## AS               -0.954 0.115
## EM               -1.319 0.118
## TH               -1.277 0.113
## LOU_IS            0.136 0.084
## AND              -0.674 0.119
## JAC              -1.140 0.135
## GUS              -1.044 0.123
## C                -0.912 0.117
## FI               -0.846 0.111
## MATH              0.544 0.088
## A                -0.512 0.099
## BENJ             -1.126 0.128
## SO               -0.941 0.129
## J                -1.021 0.110
## PAU              -1.029 0.125
## REB              -0.850 0.108
## mean             -0.846 0.087
## mean_good        -0.872 0.091

#distribution
party_ratings$mean %>% GG_denhist() +
  scale_x_continuous("Political position", breaks = -3:3, limits = c(-3, 3), labels = leftright_labels)

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/party_position_ratings_dist.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#maximum intercorrelations among raters
party_ratings %>% select(!!all_raters) %>% wtd.cors() %>% melt_to_long() %>% arrange(-value)

#ICC
party_ratings %>% select(!!all_raters) %>% ICC()

## Call: ICC(x = .)
## 
## Intraclass correlation coefficients 
##                          type  ICC  F df1  df2 p lower bound upper bound
## Single_raters_absolute   ICC1 0.54 30 195 4704 0        0.50        0.58
## Single_random_raters     ICC2 0.54 32 195 4680 0        0.50        0.58
## Single_fixed_raters      ICC3 0.55 32 195 4680 0        0.51        0.60
## Average_raters_absolute ICC1k 0.97 30 195 4704 0        0.96        0.97
## Average_random_raters   ICC2k 0.97 32 195 4680 0        0.96        0.97
## Average_fixed_raters    ICC3k 0.97 32 195 4680 0        0.96        0.97
## 
##  Number of subjects = 196     Number of Judges =  25

party_ratings %>% select(!!good_raters) %>% ICC()

## Call: ICC(x = .)
## 
## Intraclass correlation coefficients 
##                          type  ICC  F df1  df2 p lower bound upper bound
## Single_raters_absolute   ICC1 0.61 36 195 4312 0        0.57        0.65
## Single_random_raters     ICC2 0.61 39 195 4290 0        0.57        0.65
## Single_fixed_raters      ICC3 0.62 39 195 4290 0        0.58        0.66
## Average_raters_absolute ICC1k 0.97 36 195 4312 0        0.97        0.98
## Average_random_raters   ICC2k 0.97 39 195 4290 0        0.97        0.98
## Average_fixed_raters    ICC3k 0.97 39 195 4290 0        0.97        0.98
## 
##  Number of subjects = 196     Number of Judges =  23

#alpha
party_ratings %>% select(!!all_raters) %>% alpha()

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase   mean  sd median_r
##       0.97      0.97    0.98      0.56  32 0.0031 -0.088 1.2     0.61
## 
##  lower alpha upper     95% confidence boundaries
## 0.96 0.97 0.97 
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Portuguese_guy      0.97      0.97    0.98      0.60  36   0.0029 0.014  0.62
## Arjen               0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## Victoria            0.97      0.97    0.98      0.55  30   0.0033 0.031  0.61
## S                   0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## PET                 0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## RY                  0.97      0.97    0.98      0.56  31   0.0032 0.032  0.61
## R                   0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## FIL                 0.97      0.97    0.98      0.55  30   0.0032 0.031  0.60
## LOU                 0.97      0.97    0.98      0.58  34   0.0028 0.025  0.62
## AS                  0.97      0.97    0.98      0.56  31   0.0032 0.032  0.61
## EM                  0.97      0.97    0.98      0.56  30   0.0032 0.032  0.61
## TH                  0.97      0.97    0.98      0.55  30   0.0033 0.031  0.60
## LOU_IS              0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## AND                 0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## JAC                 0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## GUS                 0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## C                   0.97      0.97    0.98      0.55  30   0.0032 0.031  0.61
## FI                  0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## MATH                0.97      0.97    0.98      0.56  30   0.0032 0.032  0.61
## A                   0.97      0.97    0.98      0.56  31   0.0031 0.032  0.62
## BENJ                0.97      0.97    0.98      0.55  30   0.0033 0.030  0.60
## SO                  0.97      0.97    0.98      0.56  30   0.0032 0.031  0.61
## J                   0.97      0.97    0.98      0.55  30   0.0032 0.031  0.61
## PAU                 0.97      0.97    0.97      0.55  30   0.0033 0.031  0.60
## REB                 0.97      0.97    0.98      0.55  30   0.0033 0.030  0.60
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop    mean  sd
## Portuguese_guy 196  0.19  0.20  0.15   0.15 -0.6286 1.4
## Arjen          196  0.81  0.81  0.80   0.79 -0.2398 1.6
## Victoria       196  0.83  0.83  0.83   0.81 -0.1485 1.9
## S              196  0.81  0.80  0.80   0.79 -0.0255 1.7
## PET            196  0.81  0.81  0.81   0.79 -0.3316 1.6
## RY             196  0.73  0.73  0.71   0.70 -0.5515 1.4
## R              196  0.80  0.81  0.80   0.78  0.3673 1.5
## FIL            196  0.83  0.83  0.82   0.81  0.2148 1.6
## LOU            196  0.41  0.41  0.37   0.36 -0.1837 1.9
## AS             196  0.73  0.73  0.72   0.70 -0.2704 1.6
## EM             196  0.77  0.77  0.76   0.75 -0.0026 1.6
## TH             196  0.83  0.83  0.82   0.81 -0.0791 1.6
## LOU_IS         196  0.80  0.80  0.80   0.78 -0.1546 1.2
## AND            196  0.80  0.80  0.79   0.78 -0.1786 1.7
## JAC            196  0.80  0.80  0.79   0.78  0.1480 1.9
## GUS            196  0.80  0.80  0.79   0.78  0.4541 1.7
## C              196  0.82  0.82  0.82   0.80  0.0459 1.6
## FI             196  0.79  0.79  0.78   0.76  0.1306 1.5
## MATH           196  0.75  0.76  0.75   0.74 -0.3245 1.2
## A              196  0.69  0.69  0.67   0.66  0.2959 1.4
## BENJ           196  0.85  0.85  0.85   0.83 -0.1276 1.8
## SO             196  0.82  0.81  0.81   0.79 -0.0561 1.8
## J              196  0.82  0.82  0.82   0.80  0.0255 1.5
## PAU            196  0.84  0.83  0.83   0.82 -0.1867 1.7
## REB            193  0.85  0.86  0.86   0.84 -0.3938 1.5

party_ratings %>% select(!!good_raters) %>% alpha()

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase  mean  sd median_r
##       0.97      0.97    0.98      0.63  38 0.0026 -0.06 1.3     0.63
## 
##  lower alpha upper     95% confidence boundaries
## 0.97 0.97 0.98 
## 
##  Reliability if an item is dropped:
##          raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## Arjen         0.97      0.97    0.98      0.62  37   0.0027 0.0053  0.62
## Victoria      0.97      0.97    0.98      0.62  36   0.0028 0.0052  0.62
## S             0.97      0.97    0.98      0.63  37   0.0027 0.0057  0.62
## PET           0.97      0.97    0.98      0.62  37   0.0027 0.0057  0.63
## RY            0.97      0.97    0.98      0.63  38   0.0027 0.0052  0.63
## R             0.97      0.97    0.98      0.62  37   0.0027 0.0054  0.62
## FIL           0.97      0.97    0.98      0.62  36   0.0028 0.0058  0.62
## AS            0.97      0.97    0.98      0.63  38   0.0027 0.0055  0.63
## EM            0.97      0.97    0.98      0.63  37   0.0027 0.0056  0.63
## TH            0.97      0.97    0.98      0.62  36   0.0028 0.0057  0.62
## LOU_IS        0.97      0.97    0.98      0.62  37   0.0027 0.0054  0.62
## AND           0.97      0.97    0.98      0.63  37   0.0027 0.0056  0.63
## JAC           0.97      0.97    0.98      0.63  37   0.0027 0.0057  0.62
## GUS           0.97      0.97    0.98      0.63  37   0.0027 0.0058  0.63
## C             0.97      0.97    0.98      0.62  36   0.0028 0.0055  0.62
## FI            0.97      0.97    0.98      0.63  37   0.0027 0.0052  0.63
## MATH          0.97      0.97    0.98      0.63  37   0.0027 0.0057  0.63
## A             0.97      0.97    0.98      0.64  38   0.0027 0.0048  0.64
## BENJ          0.97      0.97    0.98      0.62  36   0.0028 0.0054  0.62
## SO            0.97      0.97    0.98      0.62  37   0.0027 0.0053  0.62
## J             0.97      0.97    0.98      0.62  37   0.0028 0.0052  0.62
## PAU           0.97      0.97    0.98      0.62  36   0.0028 0.0052  0.62
## REB           0.97      0.97    0.98      0.62  36   0.0028 0.0049  0.62
## 
##  Item statistics 
##            n raw.r std.r r.cor r.drop    mean  sd
## Arjen    196  0.81  0.81  0.81   0.79 -0.2398 1.6
## Victoria 196  0.83  0.83  0.83   0.81 -0.1485 1.9
## S        196  0.81  0.80  0.80   0.78 -0.0255 1.7
## PET      196  0.81  0.81  0.80   0.79 -0.3316 1.6
## RY       196  0.72  0.72  0.71   0.70 -0.5515 1.4
## R        196  0.81  0.82  0.81   0.80  0.3673 1.5
## FIL      196  0.83  0.83  0.82   0.81  0.2148 1.6
## AS       196  0.74  0.74  0.72   0.71 -0.2704 1.6
## EM       196  0.77  0.77  0.76   0.75 -0.0026 1.6
## TH       196  0.83  0.83  0.82   0.81 -0.0791 1.6
## LOU_IS   196  0.81  0.81  0.81   0.79 -0.1546 1.2
## AND      196  0.80  0.80  0.79   0.78 -0.1786 1.7
## JAC      196  0.80  0.80  0.79   0.78  0.1480 1.9
## GUS      196  0.81  0.80  0.79   0.78  0.4541 1.7
## C        196  0.83  0.83  0.82   0.81  0.0459 1.6
## FI       196  0.79  0.79  0.78   0.77  0.1306 1.5
## MATH     196  0.76  0.77  0.75   0.74 -0.3245 1.2
## A        196  0.68  0.68  0.66   0.65  0.2959 1.4
## BENJ     196  0.86  0.86  0.85   0.84 -0.1276 1.8
## SO       196  0.82  0.81  0.81   0.80 -0.0561 1.8
## J        196  0.82  0.82  0.82   0.80  0.0255 1.5
## PAU      196  0.84  0.83  0.83   0.82 -0.1867 1.7
## REB      193  0.86  0.86  0.86   0.85 -0.3938 1.5

#factor analysis
(party_ratings_factor = fa(party_ratings %>% select(!!all_raters)))

## Factor Analysis using method =  minres
## Call: fa(r = party_ratings %>% select(!!all_raters))
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1    h2   u2 com
## Portuguese_guy 0.14 0.021 0.98   1
## Arjen          0.80 0.647 0.35   1
## Victoria       0.83 0.688 0.31   1
## S              0.79 0.629 0.37   1
## PET            0.80 0.641 0.36   1
## RY             0.70 0.497 0.50   1
## R              0.81 0.653 0.35   1
## FIL            0.82 0.671 0.33   1
## LOU            0.36 0.129 0.87   1
## AS             0.72 0.514 0.49   1
## EM             0.75 0.567 0.43   1
## TH             0.82 0.674 0.33   1
## LOU_IS         0.80 0.646 0.35   1
## AND            0.79 0.622 0.38   1
## JAC            0.79 0.617 0.38   1
## GUS            0.79 0.628 0.37   1
## C              0.82 0.671 0.33   1
## FI             0.78 0.610 0.39   1
## MATH           0.75 0.565 0.44   1
## A              0.66 0.438 0.56   1
## BENJ           0.85 0.722 0.28   1
## SO             0.81 0.648 0.35   1
## J              0.82 0.670 0.33   1
## PAU            0.83 0.683 0.32   1
## REB            0.86 0.737 0.26   1
## 
##                  MR1
## SS loadings    14.59
## Proportion Var  0.58
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  300  and the objective function was  24 with Chi Square of  4433
## The degrees of freedom for the model are 275  and the objective function was  4.3 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  196 with the empirical chi square  323  with prob <  0.024 
## The total number of observations was  196  with Likelihood Chi Square =  798  with prob <  1.9e-52 
## 
## Tucker Lewis Index of factoring reliability =  0.86
## RMSEA index =  0.098  and the 90 % confidence intervals are  0.091 0.11
## BIC =  -653
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.95

#good raters only
(party_ratings_factor_good = fa(party_ratings %>% select(!!good_raters)))

## Factor Analysis using method =  minres
## Call: fa(r = party_ratings %>% select(!!good_raters))
## Standardized loadings (pattern matrix) based upon correlation matrix
##           MR1   h2   u2 com
## Arjen    0.81 0.65 0.35   1
## Victoria 0.83 0.69 0.31   1
## S        0.79 0.63 0.37   1
## PET      0.80 0.64 0.36   1
## RY       0.70 0.49 0.51   1
## R        0.81 0.66 0.34   1
## FIL      0.82 0.67 0.33   1
## AS       0.72 0.52 0.48   1
## EM       0.75 0.57 0.43   1
## TH       0.82 0.67 0.33   1
## LOU_IS   0.81 0.65 0.35   1
## AND      0.79 0.62 0.38   1
## JAC      0.78 0.62 0.38   1
## GUS      0.79 0.63 0.37   1
## C        0.82 0.67 0.33   1
## FI       0.78 0.61 0.39   1
## MATH     0.75 0.57 0.43   1
## A        0.66 0.43 0.57   1
## BENJ     0.85 0.73 0.27   1
## SO       0.80 0.65 0.35   1
## J        0.82 0.67 0.33   1
## PAU      0.83 0.68 0.32   1
## REB      0.86 0.74 0.26   1
## 
##                  MR1
## SS loadings    14.44
## Proportion Var  0.63
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  253  and the objective function was  23 with Chi Square of  4375
## The degrees of freedom for the model are 230  and the objective function was  4.1 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic number of observations is  196 with the empirical chi square  279  with prob <  0.015 
## The total number of observations was  196  with Likelihood Chi Square =  754  with prob <  5.3e-57 
## 
## Tucker Lewis Index of factoring reliability =  0.86
## RMSEA index =  0.11  and the 90 % confidence intervals are  0.1 0.12
## BIC =  -460
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.95

#long form
party_ratings_long = party_ratings %>% 
  gather(key = rater, value = rating, !!all_raters)

#plot, order by the mean
party_ratings_long %>% 
  mutate(
    Party = fct_reorder(Party, mean_good)
  ) %>% 
  ggplot(aes(Party, rating)) +
  geom_boxplot(outlier.shape = NA) +
  # geom_violin() +
  scale_y_continuous(breaks = -3:3) +
  coord_flip() +
  theme(
    axis.text.y = element_text(size = 2),
    # axis.line.y = element_blank(),
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank()
  )

## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

GG_save("figs/party_ratings.boxplot.png")

## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

#copy mean ratings to regular data frame for further use
#put on the same scale
parties$mean_rating = NULL

#mutate
party_ratings %<>% mutate(
  party_country = Party + " | " + Country,
  #new name, same scale as Wikipedia data
  mean_rating = mean
)

#join to main data
parties = left_join(parties, party_ratings %>% select(mean_rating, party_country), by = "party_country")

Wikipedia political position

#simplify
#figure out what they are closest to
# stringdist::stringdist(a = parties$wp_pol_position %>% str_replace_all("\\\\n", ""),
#                        b = c("far-left", "far right", "left-wing", "right-wing", "centre", "centre-left", "centre-right", "centre to centre-left", "centre to centre-right", "left-wing to far-left", "right-wing to far-right"))

#clean it up a bit
parties$wp_pol_position_clean = parties$wp_pol_position %>% 
  str_replace_all("\\n", "") %>%
  str_to_lower()

#code into numerical form: -3 to 3
#detect keywords, average score based on keywords
positions = c("far-left", "left-wing", "center-left", "center", "center-right", "right-wing", "far-right", "libertarianism", "syncretic")
positions_values = c(-3:3, 2, 0)

#recode as numeric
parties$wp_pol_position_numerical = parties$wp_pol_position_clean %>% 
  map_dbl(function(.) {
    if (is.na(.)) return(NA)
    # browser()
    #split by " to "
    #we need to average values if multiple are found
    split_x = str_split(., " to ") %>% .[[1]]
    
    #american spelling of "center"
    split_x = map_chr(split_x, ~str_replace_all(., "centre", "center"))
    
    #replace the value with the numerical value
    x_recoded = plyr::mapvalues(split_x, positions, positions_values, warn_missing = F)
    
    #as numeric, gets rid of strings
    x_vals = as.numeric(x_recoded)
    
    #mean
    y = mean(x_vals, na.rm = T)
    
    #odd?
    # if (is.nan(y)) browser()
    
    y
  })

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

#hard-coded values
#done when text is not standardized properly
parties[parties$party_country == 'Russian Democratic Party "Yabloko" | Russia', "wp_pol_position_numerical"] = 0

parties[parties$party_country == "Scottish National Party | United Kingdom", "wp_pol_position_numerical"] = -1

parties[parties$party_country == "Liberal Party of Canada | Canada", "wp_pol_position_numerical"] = -0.5
parties[parties$party_country == "League of Polish Families | Poland", "wp_pol_position_numerical"] = 1
parties[parties$party_country == "United Russia | Russia", "wp_pol_position_numerical"] = 0

parties[parties$party_country == "Evangelical People's Party of Switzerland | Switzerland", "wp_pol_position_numerical"] = 0

parties[parties$party_country == "Democrats 66 | Netherlands", "wp_pol_position_numerical"] = 0

#ensure no indefinite values
assert_that(!any(is.infinite(parties$wp_pol_position_numerical)))

## [1] TRUE

assert_that(!any(is.nan(parties$wp_pol_position_numerical)))

## [1] TRUE

#add party data to voting data
voting_augmented_all = left_join(
  voting_aggr, 
  parties %>% select(-Party_native, -Party), by = c("party_country")
  )

#alternative version without old data
voting_augmented_all_no_old = left_join(
  voting_aggr_no_old, 
  parties %>% select(-Party_native), by = c("party_country")
  )

#error on missing joins!
assert_that(nrow(voting_augmented_all) == nrow(voting_aggr))

## [1] TRUE

assert_that(nrow(voting_augmented_all_no_old) == nrow(voting_aggr_no_old))

## [1] TRUE

#stats
parties$wp_pol_position_numerical %>% describe()

#main dataset - apply preferred data filter
voting_augmented = voting_augmented_all %>% 
  filter(General_pct >= 2, 
         Country != "Russia", 
         !is.na(RR)
         )

#without old data
voting_augmented_no_old = voting_augmented_all_no_old %>% 
  filter(General_pct >= 2, 
         Country != "Russia", 
         !is.na(RR)
         )

#how many excluded?
voting_augmented_all %>% filter(General_pct < 2) %>% nrow()

## [1] 19

voting_augmented_all %>% nrow()

## [1] 151

voting_augmented_all %>% filter(General_pct < 2) %>% nrow() / voting_augmented_all %>% nrow()

## [1] 0.13

voting_augmented_all %>% filter(Country == "Russia") %>% nrow()

## [1] 0

voting_augmented_all %>% filter(Country == "Russia") %>% nrow() / voting_augmented_all %>% nrow()

## [1] 0

#distribution
parties %>% 
  ggplot(aes(wp_pol_position_numerical)) +
  geom_histogram(stat = "count") +
  theme_bw() +
  scale_x_continuous("Political position",
                     labels = c("far-right", "right", "center-right", "center", "center-left", "left", "far-left"),
                     breaks = seq(-3, 3, 1))

## Warning: Ignoring unknown parameters: binwidth, bins, pad

## Warning: Removed 13 rows containing non-finite values (stat_count).

GG_save("figs/position_dist.png")

## Warning: Removed 13 rows containing non-finite values (stat_count).

#small parties included
voting_augmented_small = voting_augmented_all %>% 
  filter(Country != "Russia", 
         !is.na(RR)
         )

Weights

Calculate weights based on various metrics such as sample size or country-fixed weights (i.e. same weight to each country no matter how many parties).

#country fixed weights
#have to condition on missing data
#weights can result in Inf, which we recode to 1's
add_weights = function(d) {
  
  #assign 1 weight distributed across parties with data
  #per block
  #but all parties should have this...
  d$weight_b = 1 / sum(!is.na(d$Block))
  
  #per political position = pp
  d$weight_pp = 1 / sum(!is.na(d$wp_pol_position))
  
  #ideology = i
  d$weight_i = 1 / sum(!is.na(d$wp_ideolgy))
  
  #both = ppi
  d$weight_ppi = 1 / min(sum(!is.na(d$wp_pol_position)), sum(!is.na(d$wp_ideolgy)))
  
  #replace Inf with the maximum
  #Inf causes issues in step-wise regressions
  d$weight_ppi[is.infinite(d$weight_ppi)] = pmin(d$weight_pp[is.infinite(d$weight_ppi)], d$weight_i[is.infinite(d$weight_ppi)])
  
  #more Inf
  d$weight_pp[is.infinite(d$weight_pp)] = 0
  
  #sqrt n
  d$sqrt_n = sqrt(d$n_total)
  
  #constant weights
  d$ones = 1
  
  d
}

#add weights
voting_augmented = plyr::ddply(voting_augmented, c("Country"), .fun = add_weights)
voting_augmented_small = plyr::ddply(voting_augmented_small, c("Country"), .fun = add_weights)
voting_augmented_no_old = plyr::ddply(voting_augmented_no_old, c("Country"), .fun = add_weights)

Results

Descriptive stats

#parties included, including only those with data and including pseudo-parties
voting_augmented %>% filter(!is.na(RR)) %>% nrow()

## [1] 132

#parties included, including only those with data and tags
voting_augmented %>% filter(!is.na(RR), !is.na(wp_ideolgy), !is.na(socialism)) %>% nrow()

## [1] 123

voting_augmented %>% filter(!is.na(RR), !is.na(wp_ideolgy), !is.na(socialism)) %>% .$Country %>% table2(include_NA = F) %>% print(n = 20)

## # A tibble: 17 x 3
##    Group          Count Percent
##    <chr>          <dbl>   <dbl>
##  1 France            17   13.8 
##  2 Slovenia           9    7.32
##  3 Denmark            8    6.50
##  4 Finland            8    6.50
##  5 Norway             8    6.50
##  6 Poland             8    6.50
##  7 Sweden             8    6.50
##  8 Belgium            7    5.69
##  9 Ireland            7    5.69
## 10 Australia          6    4.88
## 11 Austria            6    4.88
## 12 Germany            6    4.88
## 13 Netherlands        6    4.88
## 14 Switzerland        6    4.88
## 15 United Kingdom     6    4.88
## 16 Canada             5    4.07
## 17 USA                2    1.63

#sample sizes
n_sumstats

n_party_sumstats

n_party_frac_sumstats

#samples by country
samples %>% 
  filter(Type == "Journalist") %>% 
  group_by(Country) %>% 
  dplyr::summarise(n = n()) %>% 
  pull(n) %>% 
  describe()

#party data
#coverage
parties$Wikipedia_link %>% is.na() %>% mean()

## [1] 0

parties$wp_pol_position_numerical %>% is.na() %>% mean()

## [1] 0.066

parties$wp_ideolgy %>% is.na() %>% mean()

## [1] 0.03

#countries
voting_orig %>% 
  filter(!is.na(Journalist_pct)) %>% 
  .$Country %>% 
  table2() %>% 
  print(n = Inf)

## # A tibble: 18 x 3
##    Group          Count Percent
##    <chr>          <dbl>   <dbl>
##  1 Sweden            35  15.4  
##  2 France            28  12.3  
##  3 Norway            20   8.77 
##  4 Germany           19   8.33 
##  5 Australia         18   7.89 
##  6 Denmark           17   7.46 
##  7 Belgium           16   7.02 
##  8 Poland            12   5.26 
##  9 Finland            9   3.95 
## 10 Netherlands        9   3.95 
## 11 Slovenia           9   3.95 
## 12 Ireland            8   3.51 
## 13 Switzerland        7   3.07 
## 14 United Kingdom     7   3.07 
## 15 Austria            6   2.63 
## 16 Canada             6   2.63 
## 17 USA                2   0.877
## 18 <NA>               0   0

#survey years
samples %>% filter(Type == "Journalist",
                   ID %in% voting$journalist_sample_ID,
                   Country != "Russia"
                   ) %>% 
  pull(Year) %>% 
  str_match("\\d+") %>% 
  as.numeric() %>% 
  describe()

Simple tags

#table summary
tibble(tag = names(parties_wp_ideology_dummies[-(1:2)]),
           proportion = colMeans(parties_wp_ideology_dummies[-(1:2)], na.rm = T)) %>% 
  arrange(-proportion)

#simple tag approach
simple_tag = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  tibble(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(OR, weight_i),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(RR, weight_i),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(d, weight_i)
  )
})

#print all
simple_tag

#plot together
simple_tag %>% 
  #gather
  gather(metric, value, OR, RR, d) %>% 
  ggplot(aes(tag, value)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_grid(. ~ metric, scales = "free") +
  theme_bw()

GG_save("figs/simple_tags.png")

#medians for robustness
simple_tag_median = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  tibble(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% weighted.median(OR, weight_i),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% weighted.median(RR, weight_i),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% weighted.median(d, weight_i)
  )
})

#print all
simple_tag_median

#plot
simple_tag_median %>% 
  #gather
  gather(metric, value, OR, RR, d) %>% 
  ggplot(aes(tag, value)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_grid(. ~ metric, scales = "free") +
  theme_bw()

GG_save("figs/simple_tags_median.png")

#means without weights
#just for comparison
simple_tag_nowt = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  tibble(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(OR),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(RR),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(d)
  )
})

simple_tag_nowt

#medians without weights
#just for comparison
simple_tag_median_nowt = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  tibble(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% median(OR, na.rm = T),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% median(RR, na.rm = T),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% median(d, na.rm = T)
  )
})

simple_tag_median_nowt

#beautiful chart
make_pretty_names = function(x) {
  plyr::mapvalues(x, from = c("EU_skeptic", "EU_positive", "RW_populism"), to = c("EU-skeptic", "EU-positive", "right-wing populism")) %>% 
    stringr::str_replace("_", " ")
}

#nice plot with median RRs
ggplot() +
  #add party datapoints
  geom_point(data = voting_augmented[c(names(ideology_tags), "RR")] %>% 
    gather(key = tag, value = status, -RR) %>% 
    filter(status) %>% 
    mutate(tag = factor(tag %>% make_pretty_names(), levels = simple_tag_median %>% arrange(-RR) %>% pull(tag) %>% make_pretty_names())),
  mapping = aes(RR, tag)) +
  #medians
  geom_point(data = simple_tag_median %>% mutate(tag = make_pretty_names(tag)), aes(RR, tag), color = "red", size = 4, shape = 5) +
  #style
  scale_x_continuous("", 
                     limits = c(1/20, 10), 
                     breaks = c(1/20, 1/10, 1/5, 1/2, 1, 2, 5, 10),
                     labels = c("20x", "10x", "5x\nMore support among population", "2x", "1x", "2x", "5x\nMore support among journalists", "10x"),
                     trans = "log10") +
  theme_classic() +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_y_discrete("Political ideology") +
  theme(panel.grid.major.x = element_line(color = "grey"))

## Warning: Removed 25 rows containing missing values (geom_point).

GG_save("figs/tags_nice_medians.png")

## Warning: Removed 25 rows containing missing values (geom_point).

#nice plot with mean RRs
ggplot() +
  #add party datapoints
  geom_point(data = voting_augmented[c(names(ideology_tags), "RR")] %>% 
    gather(key = tag, value = status, -RR) %>% 
    filter(status) %>% 
    mutate(tag = factor(tag %>% make_pretty_names(), levels = simple_tag %>% arrange(-RR) %>% pull(tag) %>% make_pretty_names())),
  mapping = aes(RR, tag)) +
  #medians
  geom_point(data = simple_tag %>% mutate(tag = make_pretty_names(tag)), aes(RR, tag), color = "red", size = 4, shape = 5) +
  #style
  scale_x_continuous("", 
                     limits = c(1/20, 10), 
                     breaks = c(1/20, 1/10, 1/5, 1/2, 1, 2, 5, 10),
                     labels = c("20x", "10x", "5x\nMore support among population", "2x", "1x", "2x", "5x\nMore support among journalists", "10x"),
                     trans = "log10") +
  theme_classic() +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_y_discrete("Political ideology") +
  theme(panel.grid.major.x = element_line(color = "grey"))

## Warning: Removed 25 rows containing missing values (geom_point).

GG_save("figs/tags_nice_means.png")

## Warning: Removed 25 rows containing missing values (geom_point).

#do methods agree?
wtd.cors(cbind(wtd_medians = simple_tag_median[-1], 
               medians = simple_tag_median_nowt[-1], 
               wtd_means = simple_tag[-1], 
               means = simple_tag_nowt[-1])) %>% 
  print() %>% 
  alpha()

##                wtd_medians.OR wtd_medians.RR wtd_medians.d medians.OR
## wtd_medians.OR           1.00           0.99          0.87       0.48
## wtd_medians.RR           0.99           1.00          0.87       0.49
## wtd_medians.d            0.87           0.87          1.00       0.38
## medians.OR               0.48           0.49          0.38       1.00
## medians.RR               0.77           0.77          0.65       0.93
## medians.d                0.65           0.65          0.68       0.92
## wtd_means.OR             0.64           0.65          0.54       0.93
## wtd_means.RR             0.83           0.83          0.72       0.84
## wtd_means.d              0.74           0.74          0.77       0.79
## means.OR                 0.58           0.58          0.48       0.97
## means.RR                 0.79           0.79          0.69       0.89
## means.d                  0.74           0.74          0.71       0.89
##                medians.RR medians.d wtd_means.OR wtd_means.RR wtd_means.d
## wtd_medians.OR       0.77      0.65         0.64         0.83        0.74
## wtd_medians.RR       0.77      0.65         0.65         0.83        0.74
## wtd_medians.d        0.65      0.68         0.54         0.72        0.77
## medians.OR           0.93      0.92         0.93         0.84        0.79
## medians.RR           1.00      0.95         0.94         0.96        0.89
## medians.d            0.95      1.00         0.91         0.90        0.90
## wtd_means.OR         0.94      0.91         1.00         0.96        0.91
## wtd_means.RR         0.96      0.90         0.96         1.00        0.93
## wtd_means.d          0.89      0.90         0.91         0.93        1.00
## means.OR             0.95      0.93         0.99         0.93        0.86
## means.RR             0.98      0.93         0.96         0.99        0.91
## means.d              0.95      0.96         0.95         0.96        0.98
##                means.OR means.RR means.d
## wtd_medians.OR     0.58     0.79    0.74
## wtd_medians.RR     0.58     0.79    0.74
## wtd_medians.d      0.48     0.69    0.71
## medians.OR         0.97     0.89    0.89
## medians.RR         0.95     0.98    0.95
## medians.d          0.93     0.93    0.96
## wtd_means.OR       0.99     0.96    0.95
## wtd_means.RR       0.93     0.99    0.96
## wtd_means.d        0.86     0.91    0.98
## means.OR           1.00     0.95    0.94
## means.RR           0.95     1.00    0.96
## means.d            0.94     0.96    1.00

## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N median_r
##       0.98      0.98       1      0.82  55     0.89
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r S/N var.r med.r
## wtd_medians.OR      0.98      0.98       1      0.84  58 0.022  0.91
## wtd_medians.RR      0.98      0.98       1      0.84  57 0.022  0.91
## wtd_medians.d       0.98      0.98       1      0.85  64 0.018  0.91
## medians.OR          0.98      0.98       1      0.83  54 0.019  0.89
## medians.RR          0.98      0.98       1      0.81  47 0.025  0.87
## medians.d           0.98      0.98       1      0.82  49 0.025  0.87
## wtd_means.OR        0.98      0.98       1      0.82  49 0.023  0.87
## wtd_means.RR        0.98      0.98       1      0.81  46 0.025  0.87
## wtd_means.d         0.98      0.98       1      0.82  49 0.026  0.89
## means.OR            0.98      0.98       1      0.82  50 0.021  0.87
## means.RR            0.98      0.98       1      0.81  46 0.025  0.87
## means.d             0.98      0.98       1      0.81  47 0.024  0.87
## 
##  Item statistics 
##                   r r.cor r.drop
## wtd_medians.OR 0.83  0.83   0.79
## wtd_medians.RR 0.83  0.83   0.80
## wtd_medians.d  0.76  0.76   0.72
## medians.OR     0.87  0.87   0.84
## medians.RR     0.98  0.98   0.97
## medians.d      0.94  0.94   0.93
## wtd_means.OR   0.94  0.94   0.93
## wtd_means.RR   0.99  0.99   0.98
## wtd_means.d    0.95  0.95   0.94
## means.OR       0.92  0.92   0.91
## means.RR       0.99  0.99   0.98
## means.d        0.98  0.98   0.98

Political position

Wikipedia vs. raters

#Wikipedia vs. raters
parties %>% 
  #this is to randomize the order for the party names shown
  #otherwise, parties from some countries are shown a lot more
  sample_frac() %>% 
  GG_scatter("wp_pol_position_numerical", "mean_rating", case_names = "Party") +
  scale_x_continuous("Wikipedia-derived position", breaks = -3:3, labels = leftright_labels) + 
  scale_y_continuous("Average position rating of online raters", breaks = -3:3, limits = c(-3, 3), labels = leftright_labels) +
  geom_abline(intercept = 0, slope = 1, linetype = "dotted", alpha = .5)

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/party_position_ratings_compare.png")

## `geom_smooth()` using formula 'y ~ x'

#highest correlation to Wikipedia
#join data first
party_ratings = left_join(party_ratings, parties %>% select(party_country, wp_pol_position_numerical), by = "party_country")

#compute
party_ratings %>% select(!!all_raters, wp_pol_position_numerical) %>% wtd.cors() %>% 
  {
    .[, ncol(.)]
  } %>% 
  sort()

##            Portuguese_guy                       LOU                        RY 
##                     0.096                     0.228                     0.546 
##                         A                        EM                        AS 
##                     0.548                     0.563                     0.590 
##                         S                       GUS                       JAC 
##                     0.601                     0.618                     0.636 
##                        TH                       AND                       FIL 
##                     0.642                     0.651                     0.663 
##                       PET                        SO                      MATH 
##                     0.676                     0.687                     0.689 
##                       PAU                         R                         J 
##                     0.708                     0.748                     0.776 
##                         C                      BENJ                        FI 
##                     0.787                     0.788                     0.818 
##                    LOU_IS                  Victoria                     Arjen 
##                     0.834                     0.837                     0.848 
##                       REB wp_pol_position_numerical 
##                     0.848                     1.000

#Jensen's method
#using all raters
fa_Jensens_method(fa = party_ratings_factor, df = party_ratings, criterion = "wp_pol_position_numerical") +
  scale_y_continuous("Correlation (Wikipedia position x rater)") +
  scale_x_continuous("Rater's loading on general factor of ratings")

## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/party_position_ratings_Jensen_method.png")

## `geom_smooth()` using formula 'y ~ x'

#good raters only
fa_Jensens_method(fa = party_ratings_factor_good, df = party_ratings, criterion = "wp_pol_position_numerical") +
  scale_y_continuous("Correlation (Wikipedia position x rater)") +
  scale_x_continuous("Rater's loading on general factor of ratings")

## Using Pearson correlations for the criterion-indicators relationships.
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/party_position_ratings_Jensen_method.png")

## `geom_smooth()` using formula 'y ~ x'

#heatmap
party_ratings %>% 
  select(Portuguese_guy:mean, wp_pol_position_numerical) %>% 
  rename(Wikipedia = wp_pol_position_numerical) %>% 
  {
    #change to informative ids
    colnames(.)[1:length(all_raters)] = party_ratings_meta$informative_id
    
    #correlations
    cor_mat = wtd.cors(.[1:length(all_raters)])
    
    #reorder the raters, but not the last two variables
    raters_reordered = reorder_cormat(cor_mat)
    
    #reorder dataset
    .[c(colnames(raters_reordered), "mean", "Wikipedia")]
  } %>% 
  GG_heatmap(reorder_vars = F, font_size = 2)

GG_save("figs/party_position_ratings_heatmap.png")

#regression models
#to examine slope and intercept
ols(mean_rating ~ wp_pol_position_numerical, data = party_ratings)

## Frequencies of Missing Values Due to Each Variable
##               mean_rating wp_pol_position_numerical 
##                         0                        13 
## 
## Linear Regression Model
##  
##  ols(formula = mean_rating ~ wp_pol_position_numerical, data = party_ratings)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     183    LR chi2    245.94    R2       0.739    
##  sigma0.6223    d.f.            1    R2 adj   0.738    
##  d.f.    181    Pr(> chi2) 0.0000    g        1.195    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -1.815339 -0.444568 -0.006825  0.417689  1.802717 
##  
##  
##                            Coef    S.E.   t     Pr(>|t|)
##  Intercept                 -0.1057 0.0460 -2.30 0.0228  
##  wp_pol_position_numerical  0.7285 0.0322 22.65 <0.0001 
##

ols(wp_pol_position_numerical ~ mean_rating, data = party_ratings)

## Frequencies of Missing Values Due to Each Variable
## wp_pol_position_numerical               mean_rating 
##                        13                         0 
## 
## Linear Regression Model
##  
##  ols(formula = wp_pol_position_numerical ~ mean_rating, data = party_ratings)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     183    LR chi2    245.94    R2       0.739    
##  sigma0.7344    d.f.            1    R2 adj   0.738    
##  d.f.    181    Pr(> chi2) 0.0000    g        1.413    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.3643 -0.4373  0.0173  0.4596  2.3500 
##  
##  
##              Coef   S.E.   t     Pr(>|t|)
##  Intercept   0.1208 0.0544  2.22 0.0276  
##  mean_rating 1.0146 0.0448 22.65 <0.0001 
##

Block

Crude block level analysis.

#simple block level
table2(parties$Block)

#different outcomes
#difference in %
ols(d ~ Block, data = voting_augmented, weights = weight_b)

## Frequencies of Missing Values Due to Each Variable
##         d     Block (weights) 
##         0         9         0 
## 
## Linear Regression Model
##  
##  ols(formula = d ~ Block, data = voting_augmented, weights = weight_b)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     123    LR chi2     53.32    R2       0.352    
##  sigma4.1173    d.f.            2    R2 adj   0.341    
##  d.f.    120    Pr(> chi2) 0.0000    g        8.207    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -22.3144  -7.7357  -0.3331   5.5776  39.8593 
##  
##  
##               Coef     S.E.   t     Pr(>|t|)
##  Intercept      8.3324 1.4579  5.72 <0.0001 
##  Block=center  -2.0611 3.8433 -0.54 0.5928  
##  Block=right  -16.4528 2.0813 -7.91 <0.0001 
##

#relative ratio
ols(RR ~ Block, data = voting_augmented, weights = weight_b)

## Frequencies of Missing Values Due to Each Variable
##        RR     Block (weights) 
##         0         9         0 
## 
## Linear Regression Model
##  
##  ols(formula = RR ~ Block, data = voting_augmented, weights = weight_b)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     123    LR chi2     43.17    R2       0.296    
##  sigma0.4038    d.f.            2    R2 adj   0.284    
##  d.f.    120    Pr(> chi2) 0.0000    g        0.708    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.91125 -0.55961 -0.09107  0.33300  3.59501 
##  
##  
##               Coef    S.E.   t     Pr(>|t|)
##  Intercept     2.0255 0.1430 14.17 <0.0001 
##  Block=center -0.1706 0.3769 -0.45 0.6515  
##  Block=right  -1.4194 0.2041 -6.95 <0.0001 
##

#odds ratio
ols(OR ~ Block, data = voting_augmented, weights = weight_b)

## Frequencies of Missing Values Due to Each Variable
##        OR     Block (weights) 
##         0         9         0 
## 
## Linear Regression Model
##  
##  ols(formula = OR ~ Block, data = voting_augmented, weights = weight_b)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     123    LR chi2     35.21    R2       0.249    
##  sigma0.6418    d.f.            2    R2 adj   0.236    
##  d.f.    120    Pr(> chi2) 0.0000    g        0.996    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.4699 -0.7315 -0.2107  0.3540  9.5912 
##  
##  
##               Coef    S.E.   t     Pr(>|t|)
##  Intercept     2.5752 0.2273 11.33 <0.0001 
##  Block=center -0.1962 0.5991 -0.33 0.7438  
##  Block=right  -1.9979 0.3244 -6.16 <0.0001 
##

Left-right numerical position

Rating data

#relative rate
voting_augmented %>% 
  GG_scatter("mean_rating", "RR", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (rater data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = leftright_labels) +
  scale_y_continuous("Times representation among journalists\ncompared to elections") +
  geom_smooth(se = F)

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/mean_rating_RR.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

voting_augmented %>% 
  GG_scatter("mean_rating", "d", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (rater data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = leftright_labels) +
  scale_y_continuous("%points representation among journalists\ncompared to elections") +
  geom_smooth(se = F)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/mean_rating_d.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#logRR
voting_augmented %>% 
  GG_scatter("mean_rating", "logRR", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (rater data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = leftright_labels) +
  scale_y_continuous("Times over-representation among general population (below 1)\nor journalists (above)",
                     breaks = seq(-1.5, 1.5, by = 0.5),
                     labels = format(c(1/10^c(-1.5, -1, -0.5), 1, 10^c(0.5, 1, 1.5)), nsmall = 1)
                     ) +
  geom_smooth(se = F)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/mean_rating_logRR.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#OR
cor.test(voting_augmented$OR, voting_augmented$mean_rating)

## 
##  Pearson's product-moment correlation
## 
## data:  voting_augmented$OR and voting_augmented$mean_rating
## t = -6, df = 122, p-value = 1e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.58 -0.30
## sample estimates:
##   cor 
## -0.45

#cor matrix
wtd.cors(voting_augmented %>% select(mean_rating, d, RR, OR, logRR, logOR), weight = voting_augmented$weight_pp)

##             mean_rating     d    RR    OR logRR logOR
## mean_rating        1.00 -0.50 -0.53 -0.47 -0.53 -0.54
## d                 -0.50  1.00  0.81  0.84  0.75  0.81
## RR                -0.53  0.81  1.00  0.95  0.84  0.86
## OR                -0.47  0.84  0.95  1.00  0.74  0.79
## logRR             -0.53  0.75  0.84  0.74  1.00  0.99
## logOR             -0.54  0.81  0.86  0.79  0.99  1.00

#d
#nonlinear fit
linear_fit = ols(d ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit

## Frequencies of Missing Values Due to Each Variable
##           d mean_rating   (weights) 
##           0           8           0 
## 
## Linear Regression Model
##  
##  ols(formula = d ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     124    LR chi2     35.36    R2       0.248    
##  sigma4.0219    d.f.            1    R2 adj   0.242    
##  d.f.    122    Pr(> chi2) 0.0000    g        7.461    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -30.7782  -6.6455  -0.1508   4.3255  44.5075 
##  
##  
##              Coef    S.E.   t     Pr(>|t|)
##  Intercept    0.3645 0.9977  0.37 0.7155  
##  mean_rating -5.2550 0.8351 -6.29 <0.0001 
##

nonlinear_fit = ols(d ~ rcs(mean_rating), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit

## Frequencies of Missing Values Due to Each Variable
##           d mean_rating   (weights) 
##           0           8           0 
## 
## Linear Regression Model
##  
##  ols(formula = d ~ rcs(mean_rating), data = voting_augmented, 
##      weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     124    LR chi2     37.84    R2       0.263    
##  sigma4.0326    d.f.            4    R2 adj   0.238    
##  d.f.    119    Pr(> chi2) 0.0000    g        7.446    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -30.4381  -6.0077  -0.4037   4.7769  44.1405 
##  
##  
##                 Coef      S.E.     t     Pr(>|t|)
##  Intercept        11.9837   8.5754  1.40 0.1649  
##  mean_rating       1.8451   5.0637  0.36 0.7162  
##  mean_rating'    -28.7166  24.1321 -1.19 0.2364  
##  mean_rating''    95.3131  92.4260  1.03 0.3045  
##  mean_rating''' -141.9655 169.9775 -0.84 0.4053  
##

#compare
lrtest(linear_fit,
       nonlinear_fit
       )

## 
## Model 1: d ~ mean_rating
## Model 2: d ~ rcs(mean_rating)
## 
## L.R. Chisq       d.f.          P 
##       2.48       3.00       0.48

#logRR
#nonlinear fit
linear_fit2 = ols(logRR ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit2

## Frequencies of Missing Values Due to Each Variable
##       logRR mean_rating   (weights) 
##           0           8           0 
## 
## Linear Regression Model
##  
##  ols(formula = logRR ~ mean_rating, data = voting_augmented, weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     124    LR chi2     41.19    R2       0.283    
##  sigma0.1600    d.f.            1    R2 adj   0.277    
##  d.f.    122    Pr(> chi2) 0.0000    g        0.324    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.35441 -0.19624  0.05326  0.27846  0.69519 
##  
##  
##              Coef    S.E.   t     Pr(>|t|)
##  Intercept   -0.1060 0.0397 -2.67 0.0086  
##  mean_rating -0.2284 0.0332 -6.88 <0.0001 
##

nonlinear_fit2 = ols(logRR ~ rcs(mean_rating), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit2

## Frequencies of Missing Values Due to Each Variable
##       logRR mean_rating   (weights) 
##           0           8           0 
## 
## Linear Regression Model
##  
##  ols(formula = logRR ~ rcs(mean_rating), data = voting_augmented, 
##      weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     124    LR chi2     46.15    R2       0.311    
##  sigma0.1588    d.f.            4    R2 adj   0.288    
##  d.f.    119    Pr(> chi2) 0.0000    g        0.325    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.33988 -0.21973  0.07345  0.30939  0.62677 
##  
##  
##                 Coef    S.E.   t     Pr(>|t|)
##  Intercept       0.2552 0.3378  0.76 0.4514  
##  mean_rating     0.0213 0.1994  0.11 0.9151  
##  mean_rating'   -0.5259 0.9505 -0.55 0.5811  
##  mean_rating''   0.7475 3.6404  0.21 0.8377  
##  mean_rating'''  0.5324 6.6950  0.08 0.9367  
##

#compare
lrtest(linear_fit2,
       nonlinear_fit2
       )

## 
## Model 1: logRR ~ mean_rating
## Model 2: logRR ~ rcs(mean_rating)
## 
## L.R. Chisq       d.f.          P 
##       4.97       3.00       0.17

#without adjusted values
wtd.cors(voting_augmented %>% select(mean_rating, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented$weight_pp)

##             mean_rating d_orig RR_orig OR_orig logRR_orig logOR_orig
## mean_rating        1.00  -0.50   -0.52   -0.46      -0.56      -0.56
## d_orig            -0.50   1.00    0.81    0.85       0.80       0.86
## RR_orig           -0.52   0.81    1.00    0.95       0.86       0.88
## OR_orig           -0.46   0.85    0.95    1.00       0.77       0.81
## logRR_orig        -0.56   0.80    0.86    0.77       1.00       0.99
## logOR_orig        -0.56   0.86    0.88    0.81       0.99       1.00

#sample size change
voting_augmented$logRR %>% na.omit() %>% length()

## [1] 132

voting_augmented$logRR_orig %>% na.omit() %>% length()

## [1] 120

#with small parties
wtd.cors(voting_augmented_small %>% select(mean_rating, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_small$weight_pp)

##             mean_rating d_orig RR_orig OR_orig logRR_orig logOR_orig
## mean_rating        1.00  -0.47   -0.47   -0.43      -0.51      -0.52
## d_orig            -0.47   1.00    0.79    0.83       0.78       0.85
## RR_orig           -0.47   0.79    1.00    0.95       0.86       0.88
## OR_orig           -0.43   0.83    0.95    1.00       0.76       0.80
## logRR_orig        -0.51   0.78    0.86    0.76       1.00       0.99
## logOR_orig        -0.52   0.85    0.88    0.80       0.99       1.00

voting_augmented$logRR %>% na.omit() %>% length()

## [1] 132

voting_augmented_small$logRR %>% na.omit() %>% length()

## [1] 151

#without old data
wtd.cors(voting_augmented_no_old %>% select(mean_rating, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_no_old$weight_pp)

##             mean_rating d_orig RR_orig OR_orig logRR_orig logOR_orig
## mean_rating        1.00  -0.55   -0.56   -0.48      -0.58      -0.59
## d_orig            -0.55   1.00    0.80    0.84       0.78       0.85
## RR_orig           -0.56   0.80    1.00    0.94       0.85       0.87
## OR_orig           -0.48   0.84    0.94    1.00       0.74       0.79
## logRR_orig        -0.58   0.78    0.85    0.74       1.00       0.99
## logOR_orig        -0.59   0.85    0.87    0.79       0.99       1.00

voting_augmented$logRR %>% na.omit() %>% length()

## [1] 132

voting_augmented_no_old$logRR %>% na.omit() %>% length()

## [1] 100

Wikipedia data

Same as above, but using the Wikipedia position data.

#relative rate
voting_augmented %>% 
  GG_scatter("wp_pol_position_numerical", "RR", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (Wikipedia data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = leftright_labels) +
  scale_y_continuous("Times representation among journalists\ncompared to elections") +
  geom_smooth(se = F)

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/wp_position_RR.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

voting_augmented %>% 
  GG_scatter("wp_pol_position_numerical", "d", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (Wikipedia data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = leftright_labels) +
  scale_y_continuous("%points representation among journalists\ncompared to elections") +
  geom_smooth(se = F)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/wp_position_d.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#logRR
voting_augmented %>% 
  GG_scatter("wp_pol_position_numerical", "logRR", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (Wikipedia data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = leftright_labels) +
  scale_y_continuous("Times over-representation among general population (below 1)\nor journalists (above)",
                     breaks = seq(-1.5, 1.5, by = 0.5),
                     labels = format(c(1/10^c(-1.5, -1, -0.5), 1, 10^c(0.5, 1, 1.5)), nsmall = 1)
                     ) +
  geom_smooth(se = F)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GG_save("figs/wp_position_logRR.png")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#OR
cor.test(voting_augmented$OR, voting_augmented$wp_pol_position_numerical)

## 
##  Pearson's product-moment correlation
## 
## data:  voting_augmented$OR and voting_augmented$wp_pol_position_numerical
## t = -5, df = 117, p-value = 4e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.55 -0.24
## sample estimates:
##   cor 
## -0.41

#cor matrix
wtd.cors(voting_augmented %>% select(wp_pol_position_numerical, d, RR, OR, logRR, logOR), weight = voting_augmented$weight_pp)

##                           wp_pol_position_numerical     d    RR    OR logRR
## wp_pol_position_numerical                      1.00 -0.47 -0.47 -0.40 -0.53
## d                                             -0.47  1.00  0.81  0.84  0.75
## RR                                            -0.47  0.81  1.00  0.95  0.84
## OR                                            -0.40  0.84  0.95  1.00  0.74
## logRR                                         -0.53  0.75  0.84  0.74  1.00
## logOR                                         -0.53  0.81  0.86  0.79  0.99
##                           logOR
## wp_pol_position_numerical -0.53
## d                          0.81
## RR                         0.86
## OR                         0.79
## logRR                      0.99
## logOR                      1.00

#d
#nonlinear fit
linear_fit = ols(d ~ wp_pol_position_numerical, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit

## Frequencies of Missing Values Due to Each Variable
##                         d wp_pol_position_numerical                 (weights) 
##                         0                        13                         0 
## 
## Linear Regression Model
##  
##  ols(formula = d ~ wp_pol_position_numerical, data = voting_augmented, 
##      weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     119    LR chi2     29.58    R2       0.220    
##  sigma4.1380    d.f.            1    R2 adj   0.213    
##  d.f.    117    Pr(> chi2) 0.0000    g        6.951    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -28.9059  -5.9654  -0.9795   4.6571  43.1572 
##  
##  
##                            Coef    S.E.   t     Pr(>|t|)
##  Intercept                  0.6590 1.0345  0.64 0.5254  
##  wp_pol_position_numerical -4.3756 0.7616 -5.75 <0.0001 
##

nonlinear_fit = ols(d ~ rcs(wp_pol_position_numerical), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit

## Frequencies of Missing Values Due to Each Variable
##                         d wp_pol_position_numerical                 (weights) 
##                         0                        13                         0 
## 
## Linear Regression Model
##  
##  ols(formula = d ~ rcs(wp_pol_position_numerical), data = voting_augmented, 
##      weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     119    LR chi2     37.32    R2       0.269    
##  sigma4.0579    d.f.            4    R2 adj   0.244    
##  d.f.    114    Pr(> chi2) 0.0000    g        7.369    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -29.792  -6.546   0.160   4.648  41.836 
##  
##  
##                               Coef     S.E.     t     Pr(>|t|)
##  Intercept                     12.2200   6.9297  1.76 0.0805  
##  wp_pol_position_numerical      3.6683   4.1794  0.88 0.3819  
##  wp_pol_position_numerical'   -23.0743  33.7173 -0.68 0.4951  
##  wp_pol_position_numerical''   16.2526 107.6351  0.15 0.8802  
##  wp_pol_position_numerical'''  64.6132 141.4627  0.46 0.6487  
##

#compare
lrtest(linear_fit,
       nonlinear_fit
       )

## 
## Model 1: d ~ wp_pol_position_numerical
## Model 2: d ~ rcs(wp_pol_position_numerical)
## 
## L.R. Chisq       d.f.          P 
##      7.742      3.000      0.052

#logRR
#nonlinear fit
linear_fit2 = ols(logRR ~ wp_pol_position_numerical, data = voting_augmented, weights = voting_augmented$weight_pp)
linear_fit2

## Frequencies of Missing Values Due to Each Variable
##                     logRR wp_pol_position_numerical                 (weights) 
##                         0                        13                         0 
## 
## Linear Regression Model
##  
##  ols(formula = logRR ~ wp_pol_position_numerical, data = voting_augmented, 
##      weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     119    LR chi2     39.13    R2       0.280    
##  sigma0.1613    d.f.            1    R2 adj   0.274    
##  d.f.    117    Pr(> chi2) 0.0000    g        0.318    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.56003 -0.18898  0.02685  0.30652  0.79985 
##  
##  
##                            Coef    S.E.   t     Pr(>|t|)
##  Intercept                 -0.0916 0.0403 -2.27 0.0248  
##  wp_pol_position_numerical -0.2003 0.0297 -6.75 <0.0001 
##

nonlinear_fit2 = ols(logRR ~ rcs(wp_pol_position_numerical), data = voting_augmented, weights = voting_augmented$weight_pp)
nonlinear_fit2

## Frequencies of Missing Values Due to Each Variable
##                     logRR wp_pol_position_numerical                 (weights) 
##                         0                        13                         0 
## 
## Linear Regression Model
##  
##  ols(formula = logRR ~ rcs(wp_pol_position_numerical), data = voting_augmented, 
##      weights = voting_augmented$weight_pp)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     119    LR chi2     49.44    R2       0.340    
##  sigma0.1564    d.f.            4    R2 adj   0.317    
##  d.f.    114    Pr(> chi2) 0.0000    g        0.315    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.60876 -0.20279  0.07733  0.29667  0.79987 
##  
##  
##                               Coef    S.E.   t     Pr(>|t|)
##  Intercept                     0.2732 0.2672  1.02 0.3086  
##  wp_pol_position_numerical     0.0744 0.1611  0.46 0.6454  
##  wp_pol_position_numerical'   -0.8403 1.2999 -0.65 0.5193  
##  wp_pol_position_numerical''   1.5532 4.1497  0.37 0.7089  
##  wp_pol_position_numerical''' -1.2583 5.4538 -0.23 0.8180  
##

#compare
lrtest(linear_fit2,
       nonlinear_fit2
       )

## 
## Model 1: logRR ~ wp_pol_position_numerical
## Model 2: logRR ~ rcs(wp_pol_position_numerical)
## 
## L.R. Chisq       d.f.          P 
##     10.307      3.000      0.016

#without adjusted values
wtd.cors(voting_augmented %>% select(wp_pol_position_numerical, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented$weight_pp)

##                           wp_pol_position_numerical d_orig RR_orig OR_orig
## wp_pol_position_numerical                      1.00  -0.47   -0.48   -0.40
## d_orig                                        -0.47   1.00    0.81    0.85
## RR_orig                                       -0.48   0.81    1.00    0.95
## OR_orig                                       -0.40   0.85    0.95    1.00
## logRR_orig                                    -0.55   0.80    0.86    0.77
## logOR_orig                                    -0.55   0.86    0.88    0.81
##                           logRR_orig logOR_orig
## wp_pol_position_numerical      -0.55      -0.55
## d_orig                          0.80       0.86
## RR_orig                         0.86       0.88
## OR_orig                         0.77       0.81
## logRR_orig                      1.00       0.99
## logOR_orig                      0.99       1.00

#sample size change
voting_augmented$logRR %>% na.omit() %>% length()

## [1] 132

voting_augmented$logRR_orig %>% na.omit() %>% length()

## [1] 120

#with small parties
wtd.cors(voting_augmented_small %>% select(wp_pol_position_numerical, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_small$weight_pp)

##                           wp_pol_position_numerical d_orig RR_orig OR_orig
## wp_pol_position_numerical                      1.00  -0.44   -0.43   -0.37
## d_orig                                        -0.44   1.00    0.79    0.83
## RR_orig                                       -0.43   0.79    1.00    0.95
## OR_orig                                       -0.37   0.83    0.95    1.00
## logRR_orig                                    -0.50   0.78    0.86    0.76
## logOR_orig                                    -0.50   0.85    0.88    0.80
##                           logRR_orig logOR_orig
## wp_pol_position_numerical      -0.50      -0.50
## d_orig                          0.78       0.85
## RR_orig                         0.86       0.88
## OR_orig                         0.76       0.80
## logRR_orig                      1.00       0.99
## logOR_orig                      0.99       1.00

voting_augmented$logRR %>% na.omit() %>% length()

## [1] 132

voting_augmented_small$logRR %>% na.omit() %>% length()

## [1] 151

#without old data
wtd.cors(voting_augmented_no_old %>% select(wp_pol_position_numerical, d_orig, RR_orig, OR_orig, logRR_orig, logOR_orig), weight = voting_augmented_no_old$weight_pp)

##                           wp_pol_position_numerical d_orig RR_orig OR_orig
## wp_pol_position_numerical                      1.00  -0.50   -0.48   -0.40
## d_orig                                        -0.50   1.00    0.80    0.84
## RR_orig                                       -0.48   0.80    1.00    0.94
## OR_orig                                       -0.40   0.84    0.94    1.00
## logRR_orig                                    -0.57   0.78    0.85    0.74
## logOR_orig                                    -0.58   0.85    0.87    0.79
##                           logRR_orig logOR_orig
## wp_pol_position_numerical      -0.57      -0.58
## d_orig                          0.78       0.85
## RR_orig                         0.85       0.87
## OR_orig                         0.74       0.79
## logRR_orig                      1.00       0.99
## logOR_orig                      0.99       1.00

voting_augmented$logRR %>% na.omit() %>% length()

## [1] 132

voting_augmented_no_old$logRR %>% na.omit() %>% length()

## [1] 100

Multivariate analysis

Bayesian model averaging

#make a convenience function
bma_modeller = function(x, party_pos = "wp_pol_position_numerical", mcmc = "enumerate", seed = 1, exclude_vars = c(), ...) {
  # browser()
  #no missing data subset
  d = voting_augmented %>% 
    select(
      !!x,
      !!party_pos,
      !!names(parties_wp_ideology_dummies[-c(1:2)])
      ) %>%
    miss_filter()
  
  #remove excluded variables
  for (v in exclude_vars) d[[v]] = NULL
  
  #join the arguments
  args = list(
    X.data = d %>% select(!!x, !!everything()),
    mcmc = mcmc
    ) %>% c(list(...))
  
  #fit with seed
  set.seed(seed)
  fit = do.call(BMS::bms, args)
  
  fit
}

#fit BMAs
bma_d = cache_object({bma_modeller("d")}, filename = "cache/bma_d.rds")

## Cache found, reading object from disk

## Registered S3 method overwritten by 'BMS':
##   method           from
##   quantile.density

bma_logRR = cache_object({bma_modeller("logRR")}, filename = "cache/bma_logRR.rds")

## Cache found, reading object from disk

bma_logOR = cache_object({bma_modeller("logOR")}, filename = "cache/bma_logOR.rds")

## Cache found, reading object from disk

#alternative position variable
bma_d_rating = cache_object({bma_modeller("d", party_pos = "mean_rating")}, filename = "cache/bma_d_rating.rds")

## Cache found, reading object from disk

bma_logRR_rating = cache_object({bma_modeller("logRR", party_pos = "mean_rating")}, filename = "cache/bma_logRR_rating.rds")

## Cache found, reading object from disk

bma_logOR_rating = cache_object({bma_modeller("logOR", party_pos = "mean_rating")}, filename = "cache/bma_logOR_rating.rds")

## Cache found, reading object from disk

Populism

The populism results are strange. What happens if we leave one of them out?

#which parties are populist?
parties_populist = parties %>% filter(LW_populism | RW_populism | populism)
parties_populist %>% select(party_country, populism, LW_populism, RW_populism, wp_pol_position_clean, wp_pol_position_numerical, mean_rating) %>% print(n=Inf)

## # A tibble: 35 x 7
##    party_country       populism LW_populism RW_populism wp_pol_position_clean   
##    <chr>               <lgl>    <lgl>       <lgl>       <chr>                   
##  1 Danish People's Pa… TRUE     FALSE       TRUE        right-wing to far-right 
##  2 Sweden Democrats |… TRUE     FALSE       TRUE        right-wing to far-right 
##  3 Republican Party |… TRUE     FALSE       TRUE        <NA>                    
##  4 Democratic Party |… TRUE     TRUE        FALSE       <NA>                    
##  5 The Left | Germany  TRUE     TRUE        FALSE       left-wing to far-left   
##  6 Alternative for Ge… TRUE     FALSE       TRUE        right-wing to far-right…
##  7 Party of Democrati… TRUE     TRUE        FALSE       left-wing               
##  8 ANO 2011 | Czech R… TRUE     FALSE       FALSE       centre to  centre-right 
##  9 Dawn - National Co… TRUE     FALSE       TRUE        right-wing to far-right 
## 10 Nick Xenophon Team… TRUE     FALSE       FALSE       centre                  
## 11 Katter's Australia… TRUE     FALSE       TRUE        <NA>                    
## 12 Palmer United Part… TRUE     FALSE       TRUE        right-wing              
## 13 Reform Party of Ca… TRUE     FALSE       TRUE        centre-right to right-w…
## 14 Progressive Party … TRUE     FALSE       FALSE       centre to centre-right  
## 15 Self-Defence of th… TRUE     FALSE       FALSE       economic: left-wing soc…
## 16 Law and Justice | … TRUE     FALSE       TRUE        right-wing              
## 17 Real Politics Unio… TRUE     FALSE       TRUE        right-wing              
## 18 New Zealand First … TRUE     FALSE       FALSE       centre                  
## 19 Liberal Democratic… TRUE     FALSE       TRUE        right-wing to far-right 
## 20 Flemish Interest |… TRUE     FALSE       TRUE        right-wing to far-right 
## 21 People's Party | B… TRUE     FALSE       TRUE        right-wing to far-right 
## 22 Libertarian, Direc… TRUE     FALSE       TRUE        centre-right to right-w…
## 23 Austrian People's … TRUE     FALSE       TRUE        centre-right to right-w…
## 24 Freedom Party of A… TRUE     FALSE       TRUE        right-wing to far-right 
## 25 Peter Pilz List | … TRUE     TRUE        FALSE       centre-left to left-wing
## 26 Alliance for the F… TRUE     FALSE       TRUE        centre-right to right-w…
## 27 Swiss People's Par… TRUE     FALSE       TRUE        right-wing to far-right 
## 28 National Front | F… TRUE     FALSE       TRUE        right-wing to far-right 
## 29 Debout la Républiq… TRUE     FALSE       TRUE        right-wing to far-right 
## 30 National Republica… TRUE     FALSE       TRUE        far-right               
## 31 UK Independence Pa… TRUE     FALSE       TRUE        right-wing to far-right 
## 32 Finns Party | Finl… TRUE     FALSE       TRUE        right-wing to  far-right
## 33 Socialist Party | … TRUE     TRUE        FALSE       left-wing               
## 34 Slovenian Democrat… TRUE     FALSE       TRUE        right-wing              
## 35 Slovenian National… TRUE     FALSE       TRUE        right-wing to far-right 
## # … with 2 more variables: wp_pol_position_numerical <dbl>, mean_rating <dbl>

#sum stats
#all
parties_populist %>% select(LW_populism, RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()

#directional populism
parties_populist %>% filter(LW_populism) %>% select(RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()

parties_populist %>% filter(RW_populism) %>% select(RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()

#just populism
parties_populist %>% filter(!LW_populism, !RW_populism) %>% select(RW_populism, populism, wp_pol_position_numerical, mean_rating) %>% map_df(as.numeric) %>% describe()

#without populism
bma_d_noP = cache_object({bma_modeller("d", exclude_vars = "populism")}, filename = "cache/bma_d_noP.rds")

## Cache not found, evaluating expression

##                             PIP Post Mean Post SD Cond.Pos.Sign Idx
## conservative              0.972   -9.6924    2.89       0.00000   4
## EU_positive               0.903    6.6803    2.96       1.00000   7
## green                     0.575    4.3513    4.29       1.00000   3
## nationalism               0.461   -3.0995    3.77       0.00000   5
## direct_democracy          0.345    5.9158    9.13       1.00000  21
## LW_populism               0.111   -1.1615    3.90       0.00000  16
## feminism                  0.107    0.6807    2.33       1.00000   8
## liberalism                0.103   -0.3677    1.32       0.00077  20
## national_conservatism     0.095   -0.5462    2.11       0.00572  10
## agrarianism               0.069   -0.2997    1.40       0.00000  11
## democratic_socialism      0.063    0.2510    1.29       0.99724  18
## wp_pol_position_numerical 0.055   -0.0702    0.48       0.14787   1
## socialism                 0.054    0.1683    1.02       0.97923   2
## RW_populism               0.049   -0.1069    1.15       0.32089  15
## christian                 0.044   -0.1133    0.81       0.00747  12
## social_liberalism         0.042    0.0957    0.93       0.72514  19
## libertarianism            0.041   -0.1781    1.34       0.00390   9
## EU_skeptic                0.040   -0.0398    0.72       0.41142   6
## centrism                  0.040   -0.1645    1.29       0.00039  14
## social_democracy          0.033    0.0077    0.53       0.58333  17
## communism                 0.030    0.0329    0.79       0.81373  13
## 
## Mean no. regressors               Draws             Burnins                Time 
##            "4.2318"           "2097152"                 "0"       "1.3602 mins" 
##  No. models visited      Modelspace 2^K           % visited         % Topmodels 
##           "2097152"           "2097152"               "100"             "0.024" 
##            Corr PMP            No. Obs.         Model Prior             g-Prior 
##                "NA"               "118"     "random / 10.5"               "UIP" 
##     Shrinkage-Stats 
##         "Av=0.9916" 
## 
## Time difference of 1.4 mins

bma_logRR_noP = cache_object({bma_modeller("logRR", exclude_vars = "populism")}, filename = "cache/bma_logRR_noP.rds")

## Cache not found, evaluating expression

##                             PIP Post Mean Post SD Cond.Pos.Sign Idx
## LW_populism               0.828   -0.6608   0.378       0.0e+00  16
## green                     0.737    0.2434   0.177       1.0e+00   3
## nationalism               0.684   -0.2572   0.202       0.0e+00   5
## conservative              0.568   -0.1903   0.188       0.0e+00   4
## wp_pol_position_numerical 0.477   -0.0646   0.077       9.6e-06   1
## national_conservatism     0.385   -0.1686   0.243       0.0e+00  10
## libertarianism            0.277   -0.1280   0.234       0.0e+00   9
## agrarianism               0.218   -0.0571   0.124       0.0e+00  11
## social_liberalism         0.133    0.0229   0.070       9.9e-01  19
## EU_positive               0.116    0.0157   0.053       1.0e+00   7
## christian                 0.104   -0.0169   0.061       4.6e-06  12
## centrism                  0.080   -0.0211   0.092       4.0e-08  14
## democratic_socialism      0.071    0.0112   0.058       9.7e-01  18
## feminism                  0.057    0.0082   0.049       1.0e+00   8
## liberalism                0.055    0.0039   0.028       8.9e-01  20
## social_democracy          0.052   -0.0035   0.032       1.5e-01  17
## socialism                 0.051    0.0018   0.040       6.6e-01   2
## direct_democracy          0.049    0.0112   0.079       1.0e+00  21
## EU_skeptic                0.047   -0.0029   0.034       2.4e-01   6
## RW_populism               0.047    0.0024   0.047       6.4e-01  15
## communism                 0.039   -0.0010   0.038       4.2e-01  13
## 
## Mean no. regressors               Draws             Burnins                Time 
##            "5.0726"           "2097152"                 "0"       "1.3561 mins" 
##  No. models visited      Modelspace 2^K           % visited         % Topmodels 
##           "2097152"           "2097152"               "100"             "0.024" 
##            Corr PMP            No. Obs.         Model Prior             g-Prior 
##                "NA"               "118"     "random / 10.5"               "UIP" 
##     Shrinkage-Stats 
##         "Av=0.9916" 
## 
## Time difference of 1.4 mins

bma_logOR_noP = cache_object({bma_modeller("logOR", exclude_vars = "populism")}, filename = "cache/bma_logOR_noP,rds")

## Cache not found, evaluating expression

##                             PIP Post Mean Post SD Cond.Pos.Sign Idx
## LW_populism               0.781  -0.65515   0.422       0.0e+00  16
## green                     0.778   0.28929   0.193       1.0e+00   3
## nationalism               0.699  -0.29347   0.223       0.0e+00   5
## conservative              0.667  -0.25271   0.206       0.0e+00   4
## national_conservatism     0.386  -0.18609   0.267       0.0e+00  10
## wp_pol_position_numerical 0.373  -0.05213   0.077       8.7e-05   1
## libertarianism            0.270  -0.13292   0.248       0.0e+00   9
## agrarianism               0.231  -0.06757   0.141       0.0e+00  11
## EU_positive               0.216   0.04062   0.089       1.0e+00   7
## social_liberalism         0.118   0.02118   0.072       9.8e-01  19
## christian                 0.091  -0.01521   0.061       1.9e-05  12
## democratic_socialism      0.086   0.01693   0.073       9.8e-01  18
## centrism                  0.080  -0.02271   0.100       7.0e-08  14
## feminism                  0.070   0.01308   0.064       1.0e+00   8
## direct_democracy          0.068   0.02361   0.118       1.0e+00  21
## socialism                 0.056   0.00396   0.047       7.5e-01   2
## social_democracy          0.048  -0.00238   0.031       2.5e-01  17
## RW_populism               0.047   0.00180   0.050       6.1e-01  15
## EU_skeptic                0.047  -0.00269   0.036       2.8e-01   6
## liberalism                0.045   0.00105   0.024       6.3e-01  20
## communism                 0.040  -0.00049   0.041       5.0e-01  13
## 
## Mean no. regressors               Draws             Burnins                Time 
##            "5.1970"           "2097152"                 "0"       "1.3540 mins" 
##  No. models visited      Modelspace 2^K           % visited         % Topmodels 
##           "2097152"           "2097152"               "100"             "0.024" 
##            Corr PMP            No. Obs.         Model Prior             g-Prior 
##                "NA"               "118"     "random / 10.5"               "UIP" 
##     Shrinkage-Stats 
##         "Av=0.9916" 
## 
## Time difference of 1.4 mins

#with alternative rating source
bma_d_noP_rating = cache_object({bma_modeller("d", exclude_vars = "populism", party_pos = "mean_rating")}, filename = "cache/bma_d_noP_rating.rds")

## Cache not found, evaluating expression

##                         PIP Post Mean Post SD Cond.Pos.Sign Idx
## conservative          0.853    -7.749    3.99       0.0e+00   4
## EU_positive           0.781     5.499    3.48       1.0e+00   7
## green                 0.599     4.863    4.58       1.0e+00   3
## nationalism           0.369    -2.465    3.59       1.9e-05   5
## direct_democracy      0.296     5.164    8.91       1.0e+00  21
## mean_rating           0.162    -0.505    1.33       7.1e-03   1
## national_conservatism 0.127    -0.890    2.75       1.5e-03  10
## social_liberalism     0.096     0.521    1.99       1.0e+00  19
## RW_populism           0.096    -0.579    2.19       2.8e-02  15
## feminism              0.093     0.593    2.21       1.0e+00   8
## libertarianism        0.068    -0.445    2.07       1.4e-04   9
## agrarianism           0.066    -0.299    1.41       0.0e+00  11
## liberalism            0.061    -0.211    1.13       2.7e-02  20
## social_democracy      0.052     0.162    0.96       9.8e-01  17
## christian             0.049    -0.159    0.97       3.6e-03  12
## democratic_socialism  0.044     0.137    0.93       9.9e-01  18
## socialism             0.040     0.100    0.78       9.8e-01   2
## EU_skeptic            0.036    -0.041    0.67       3.5e-01   6
## communism             0.028    -0.018    0.78       5.1e-01  13
## LW_populism           0.028     0.017    0.92       6.7e-01  16
## centrism              0.027    -0.017    0.75       3.2e-01  14
## 
## Mean no. regressors               Draws             Burnins                Time 
##            "3.9730"           "2097152"                 "0"       "1.3589 mins" 
##  No. models visited      Modelspace 2^K           % visited         % Topmodels 
##           "2097152"           "2097152"               "100"             "0.024" 
##            Corr PMP            No. Obs.         Model Prior             g-Prior 
##                "NA"               "123"     "random / 10.5"               "UIP" 
##     Shrinkage-Stats 
##         "Av=0.9919" 
## 
## Time difference of 1.4 mins

bma_logRR_noP_rating = cache_object({bma_modeller("logRR", exclude_vars = "populism", party_pos = "mean_rating")}, filename = "cache/bma_logRR_noP_rating.rds")

## Cache not found, evaluating expression

##                         PIP Post Mean Post SD Cond.Pos.Sign Idx
## green                 0.951   0.40925   0.148       1.0e+00   3
## national_conservatism 0.800  -0.44817   0.263       0.0e+00  10
## conservative          0.480  -0.12864   0.152       0.0e+00   4
## agrarianism           0.367  -0.11415   0.167       0.0e+00  11
## nationalism           0.315  -0.11015   0.181       0.0e+00   5
## social_liberalism     0.213   0.04392   0.095       1.0e+00  19
## LW_populism           0.167  -0.06955   0.177       0.0e+00  16
## mean_rating           0.110  -0.00940   0.033       1.5e-02   1
## libertarianism        0.107  -0.03445   0.117       0.0e+00   9
## christian             0.088  -0.01482   0.058       7.6e-07  12
## EU_positive           0.078   0.01012   0.043       9.9e-01   7
## feminism              0.063   0.01248   0.061       1.0e+00   8
## social_democracy      0.046   0.00446   0.030       9.7e-01  17
## liberalism            0.042   0.00200   0.023       7.9e-01  20
## democratic_socialism  0.038   0.00361   0.032       9.6e-01  18
## direct_democracy      0.032   0.00508   0.057       1.0e+00  21
## EU_skeptic            0.031  -0.00152   0.024       1.9e-01   6
## socialism             0.031   0.00092   0.024       7.3e-01   2
## communism             0.031  -0.00210   0.032       2.0e-01  13
## RW_populism           0.031  -0.00070   0.033       4.6e-01  15
## centrism              0.030  -0.00183   0.032       1.5e-01  14
## 
## Mean no. regressors               Draws             Burnins                Time 
##            "4.0503"           "2097152"                 "0"       "1.3364 mins" 
##  No. models visited      Modelspace 2^K           % visited         % Topmodels 
##           "2097152"           "2097152"               "100"             "0.024" 
##            Corr PMP            No. Obs.         Model Prior             g-Prior 
##                "NA"               "123"     "random / 10.5"               "UIP" 
##     Shrinkage-Stats 
##         "Av=0.9919" 
## 
## Time difference of 1.3 mins

bma_logOR_noP_rating = cache_object({bma_modeller("logOR", exclude_vars = "populism", party_pos = "mean_rating")}, filename = "cache/bma_logOR_noP_rating.rds")

## Cache not found, evaluating expression

##                         PIP Post Mean Post SD Cond.Pos.Sign Idx
## green                 0.955   0.45464   0.162       1.00000   3
## national_conservatism 0.767  -0.46553   0.297       0.00000  10
## conservative          0.515  -0.15581   0.172       0.00000   4
## agrarianism           0.370  -0.12772   0.186       0.00000  11
## nationalism           0.339  -0.13232   0.205       0.00000   5
## social_liberalism     0.234   0.05551   0.113       1.00000  19
## EU_positive           0.130   0.02322   0.070       1.00000   7
## libertarianism        0.120  -0.04418   0.140       0.00000   9
## mean_rating           0.107  -0.00973   0.035       0.02051   1
## LW_populism           0.099  -0.03761   0.135       0.00000  16
## christian             0.080  -0.01422   0.059       0.00001  12
## feminism              0.076   0.01803   0.077       1.00000   8
## social_democracy      0.057   0.00730   0.041       0.98167  17
## direct_democracy      0.042   0.01190   0.085       1.00000  21
## liberalism            0.039   0.00041   0.025       0.66817  20
## democratic_socialism  0.037   0.00348   0.033       0.94990  18
## RW_populism           0.033  -0.00202   0.039       0.27965  15
## EU_skeptic            0.032  -0.00181   0.027       0.17752   6
## communism             0.032  -0.00244   0.036       0.18553  13
## socialism             0.031   0.00102   0.025       0.72384   2
## centrism              0.030  -0.00198   0.035       0.14405  14
## 
## Mean no. regressors               Draws             Burnins                Time 
##            "4.1236"           "2097152"                 "0"       "1.3297 mins" 
##  No. models visited      Modelspace 2^K           % visited         % Topmodels 
##           "2097152"           "2097152"               "100"             "0.024" 
##            Corr PMP            No. Obs.         Model Prior             g-Prior 
##                "NA"               "123"     "random / 10.5"               "UIP" 
##     Shrinkage-Stats 
##         "Av=0.9919" 
## 
## Time difference of 1.3 mins

#plot the model coefs as a briefer summary
bma_d_noP$topmod$betas() %>% 
  t() %>% 
  set_colnames(bma_d_noP$X.data[, -1] %>% colnames()) %>% 
  as_tibble() %>% 
  mutate(
    model = 1:n()
  ) %>% 
  pivot_longer(
    cols = -model,
    names_to = "predictor",
    values_to = "beta"
  ) %>% 
  ggplot(aes(beta)) +
  geom_histogram() +
  facet_wrap("predictor")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/bma_d_noP_betas.png")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Summary of 4 meta-analyses

One plot to rule them all.

#aggregate from model summaries
metas_coefs = bind_rows(
  coef(bma_d_noP) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "d", position = "Wikipedia"),
  coef(bma_logRR_noP) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "logRR", position = "Wikipedia"),
  coef(bma_d_noP_rating) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "d", position = "Raters"),
  coef(bma_logRR_noP_rating) %>% as.data.frame() %>% rownames_to_column() %>% mutate(metric = "logRR", position = "Raters")
) %>% 
  #rename position source
  mutate(
    rowname = rowname %>% mapvalues(c("wp_pol_position_numerical", "mean_rating"),
                                    c("Left-right position (Wikipedia)", "Left-right position (Raters)"))
  ) %>% 
  df_legalize_names()

#fix some labels
nicer_tags = function(x) {
  x %>% 
    str_clean() %>% 
    str_replace("LW", "Left-wing") %>% 
    str_replace("RW", "Right-wing") %>% 
    str_to_sentence() %>% 
    str_replace("Eu", "EU")
}

#plots
metas_coefs %>% 
  ggplot(aes(rowname %>% nicer_tags(), PIP, fill = position + " " + metric)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous("Probability of inclusion in best models", labels = scales::percent) +
  scale_x_discrete("Predictor", guide = guide_axis(n.dodge = 1)) +
  scale_fill_discrete("Meta-analysis") +
  coord_flip()

GG_save("figs/metas_PIP.png")

metas_coefs %>% 
  ggplot(aes(rowname %>% nicer_tags(), Post_Mean, fill = position)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous("Posterior mean effect size") +
  scale_x_discrete("Predictor", guide = guide_axis(n.dodge = 1)) +
  scale_fill_discrete("Meta-analysis") +
  coord_flip() +
  facet_wrap("metric", scales = c("free_x"))

GG_save("figs/metas_post_mean.png")

Bias by country

Here we make a measure of left-right bias and calculate it by country. The most obvious measure is a weighted mean of the party position using the votes among journos as the weight, subtracted by the same for the general population (which should be close to 0).

#weighted means
country_bias = plyr::ddply(voting_augmented, "Country", function(d) {
  #skip if no data
  # if (all(is.na(d$wp_pol_position_numerical))) return(NULL)
  
  tibble(
    wmean_journalists_wikipedia = wtd_mean(d$wp_pol_position_numerical, d$Journalist_pct),
    wmean_general_wikipedia = wtd_mean(d$wp_pol_position_numerical, d$General_pct),
    bias_wikipedia = wmean_journalists_wikipedia - wmean_general_wikipedia,
    
    #ratings
    wmean_journalists_raters = wtd_mean(d$mean_rating, d$Journalist_pct),
    wmean_general_raters = wtd_mean(d$mean_rating, d$General_pct),
    bias_raters = wmean_journalists_raters - wmean_general_raters
  )
})

#stats
country_bias[-1] %>% describe()

#cors
country_bias[-1] %>% wtd.cors()

##                             wmean_journalists_wikipedia wmean_general_wikipedia
## wmean_journalists_wikipedia                        1.00                    0.69
## wmean_general_wikipedia                            0.69                    1.00
## bias_wikipedia                                     0.43                   -0.35
## wmean_journalists_raters                           0.83                    0.52
## wmean_general_raters                               0.72                    0.77
## bias_raters                                        0.47                   -0.20
##                             bias_wikipedia wmean_journalists_raters
## wmean_journalists_wikipedia          0.430                     0.83
## wmean_general_wikipedia             -0.354                     0.52
## bias_wikipedia                       1.000                     0.43
## wmean_journalists_raters             0.431                     1.00
## wmean_general_raters                -0.035                     0.87
## bias_raters                          0.853                     0.59
##                             wmean_general_raters bias_raters
## wmean_journalists_wikipedia                0.715        0.47
## wmean_general_wikipedia                    0.769       -0.20
## bias_wikipedia                            -0.035        0.85
## wmean_journalists_raters                   0.871        0.59
## wmean_general_raters                       1.000        0.12
## bias_raters                                0.116        1.00

#plot Wikipedia
country_bias %>% 
  filter(!is.na(bias_wikipedia)) %>% 
  mutate(Country = fct_reorder(Country, bias_wikipedia)) %>% 
  ggplot(aes(Country, bias_wikipedia)) +
  geom_bar(stat = "identity") +
  theme_bw() +
  coord_flip()

GG_save("figs/bias_wikipedia_by_country.png")

#write table
country_bias %>% arrange(bias_wikipedia)

#plot raters
country_bias %>% 
  filter(!is.na(bias_raters)) %>% 
  mutate(Country = fct_reorder(Country, bias_raters)) %>% 
  ggplot(aes(Country, bias_raters)) +
  geom_bar(stat = "identity") +
  theme_bw() +
  coord_flip()

GG_save("figs/bias_raters_by_country.png")

#write table
country_bias %>% arrange(bias_raters)

#relationships
GG_scatter(country_bias, "bias_wikipedia", "bias_raters", case_names = "Country")

## `geom_smooth()` using formula 'y ~ x'

Historical trend

For Sweden and USA

#add blocks
timeseries$block = "Party-level"
timeseries = timeseries %>% plyr::ddply(c("country", "year"), function(dd) {
  #if USA, just return
  if (dd$country[1] == "USA") return(dd)

  #make blocks
  #right
  .right = dd[1, ]
  .right$party = "right-wing block"
  .right$block = "Block-level"
  .right$percent = dd %>% filter(party %in% c("Centre Party", "Moderate Party", "Christian Democrats", "Liberals")) %>% pull(percent) %>% sum(na.rm = T)
  
  #make left
  .left = .right
  .left$party = "left-wing block"
  .left$percent = 100 - .right$percent
  
  rbind(dd %>% filter(!party %in% c("right-wing block", "left-wing block")),
        .right,
        .left
        )
})

#plot USA
timeseries %>% 
  filter(country == "USA") %>% 
  ggplot(aes(year, percent/100, color = party)) +
  geom_path() +
  theme_bw() +
  scale_y_continuous("Percentage affiliation", labels = scales::percent) +
  scale_color_discrete("Party")

GG_save("figs/USA_timeseries.png")

#Sweden, simple
timeseries %>% 
  filter(country == "Sweden") %>% 
  ggplot(aes(year, percent/100, color = party)) +
  geom_path() +
  theme_bw() +
  scale_y_continuous("Percentage sympathy", labels = scales::percent, breaks = seq(0, 1, .1)) +
  scale_color_discrete("Party/block") +
  facet_wrap(~block, strip.position = "bottom")

## Warning: Removed 6 row(s) containing missing values (geom_path).

GG_save("figs/Sweden_timeseries.png")

## Warning: Removed 6 row(s) containing missing values (geom_path).

#Swedish parties only
timeseries %>% 
  filter(country == "Sweden", block == "Party-level") %>% 
  ggplot(aes(year, percent/100, color = party)) +
  geom_path() +
  theme_bw() +
  scale_y_continuous("Percentage sympathy", labels = scales::percent, breaks = seq(0, 1, .1)) +
  scale_color_discrete("Party")

## Warning: Removed 6 row(s) containing missing values (geom_path).

GG_save("figs/Sweden_timeseries2.png")

## Warning: Removed 6 row(s) containing missing values (geom_path).

The left-wing skew of Western media: A meta-analysis of the political ideology of Western media personnel

Emil O. W. Kirkegaard, Noah Carl, et al

16 March, 2021