Init

options(digits = 2)
library(pacman)
p_load(kirkegaard, googlesheets, rvest, stringi, polycor, rms, glmnet)

Ad hoc functions

str_rm_sources = function(x) {
  str_replace_all(x, "\\[[^\\]]+\\]", "")
}

l0_to_NA = function(x) {
  if (length(x) == 0) return(NA)
  x
}

#ad hoc function to get best model from cv.glmnet
get_glmnet_coefs = function(x) {
  #get coefs at two criteria
  coefs_min <- coef(x, s = "lambda.min")
  coefs_1se <- coef(x, s = "lambda.1se")
  
  #get values
  d1 = data.frame(predictor = coefs_min@Dimnames[[1]][coefs_min@i + 1], beta_min = coefs_min@x)
  d2 = data.frame(predictor = coefs_1se@Dimnames[[1]][coefs_1se@i + 1], beta_1se = coefs_1se@x)
  
  #merge
  full_join(d1, d2, by = "predictor")
}

Data

gs_auth()

#get spreadsheet
gs_doc = gs_url("https://docs.google.com/spreadsheets/d/1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g/edit#gid=0")
## Sheet-identifying info appears to be a browser URL.
## googlesheets will attempt to extract sheet key from the URL.
## Putative key: 1X9g7Eh-EerrkjtabHazNu-a-niVC4t5dObok_aNUP0g
## Sheet successfully identified: "Journalist political preferences dataset"
#get sheets
voting = gs_read(gs_doc, "voting") %>% df_legalize_names()
## Accessing worksheet titled 'voting'.
## Parsed with column specification:
## cols(
##   Country = col_character(),
##   `Election type` = col_character(),
##   Year = col_integer(),
##   Party = col_character(),
##   `Journalist %` = col_double(),
##   `General %` = col_double(),
##   `Journalist sample` = col_character(),
##   `General sample` = col_integer(),
##   comments = col_character()
## )
parties = gs_read(gs_doc, "parties") %>% df_legalize_names()
## Accessing worksheet titled 'parties'.
## Parsed with column specification:
## cols(
##   Party = col_character(),
##   `Party English` = col_character(),
##   Country = col_character(),
##   Abbreviation = col_character(),
##   `Wikipedia link` = col_character(),
##   Block = col_character()
## )
samples = gs_read(gs_doc, "samples") %>% df_legalize_names()
## Accessing worksheet titled 'samples'.
## Parsed with column specification:
## cols(
##   ID = col_integer(),
##   Country = col_character(),
##   Type = col_character(),
##   Question = col_character(),
##   n = col_integer(),
##   n_party_data = col_double(),
##   Link = col_character(),
##   `Permanent link` = col_character(),
##   Reference = col_character(),
##   Year = col_character(),
##   Comments = col_character()
## )

Initial data recoding

To analyze the data, we need to recode them into relative ‘risk’/ratio and odds ratios. Odds ratios are superior because they take into account that it is impossible to get more than 100%.

#add RR and OR
voting %<>% mutate(
  RR = Journalist_pct/General_pct,
  OR = (Journalist_pct/(100 - Journalist_pct)) / (General_pct / (100 - General_pct) ),
  d = Journalist_pct - General_pct,
  party_country = Party + " | " + Country
)

#merge within countries
voting_aggr = plyr::ddply(voting, c("Country", "Party"), function(x) {
  #base is mostly the same
  y = x[1, ]
  
  #mean for the numericals
  y$Journalist_pct = wtd_mean(x$Journalist_pct)
  y$General_pct = wtd_mean(x$General_pct)
  
  #RR and OR
  y %>% mutate(
    RR = Journalist_pct/General_pct,
    OR = (Journalist_pct/(100 - Journalist_pct)) / (General_pct / (100 - General_pct) ),
      d = Journalist_pct - General_pct
  )
})

#assert no duplicated links
#means data error
assert_that(!any(duplicated(parties$Wikipedia_link)))
## [1] TRUE
#unique ID
#unique ID
parties %<>% mutate(
  party_country = Party + " | " + Country,
  wp_page = NA,
  Block = Block %>% str_to_lower() %>% factor() %>% fct_relevel("center"),
  id = 1:n()
)

Party data

We need to scrape some party data from Wikipedia.

#loop over rows of parties and fetch data
for (p in seq_along_rows(parties)) {
  party = parties$Party_English[p]
  country = parties$Country[p]
  
  #get page
  #do we have it already?
  party_page_file = sprintf("%s - %s.html", country, party)
  data_location = "data/parties/"
  party_page_file2 = data_location + party_page_file
  
  #does file exist?
  if (!file.exists(party_page_file2)) {
    message(sprintf("%.0f%% -- Downloading page for %s -- %s", 100*p/nrow(parties), party, country))
    #download it
    party_page = parties$Wikipedia_link[p] %>% read_html()
    
    #save it
    party_page %>% as.character() %>% write_lines(party_page_file2)
  } else {
    message(sprintf("%.0f%% -- Loading page for %s -- %s", 100*p/nrow(parties), party, country))

    #read from disk
    party_page = read_html(party_page_file2)
  }
  
  #add to table
  parties$wp_page[p] = list(party_page)
}
## 1% -- Loading page for Red-Green Alliance -- Denmark
## 1% -- Loading page for Socialist People's Party -- Denmark
## 2% -- Loading page for Social Democrats -- Denmark
## 2% -- Loading page for The Alternative -- Denmark
## 3% -- Loading page for Danish Social Liberal Party -- Denmark
## 3% -- Loading page for Venstre -- Denmark
## 4% -- Loading page for Liberal Alliance -- Denmark
## 4% -- Loading page for Conservative People's Party -- Denmark
## 5% -- Loading page for Danish People's Party -- Denmark
## 5% -- Loading page for Left Party -- Sweden
## 6% -- Loading page for Swedish Social Democratic Party -- Sweden
## 6% -- Loading page for Green Party -- Sweden
## 7% -- Loading page for Centre Party -- Sweden
## 7% -- Loading page for Liberals -- Sweden
## 8% -- Loading page for Moderate Party -- Sweden
## 8% -- Loading page for Christian Democrats -- Sweden
## 9% -- Loading page for Sweden Democrats -- Sweden
## 9% -- Loading page for Feminist Initiative -- Sweden
## 10% -- Loading page for Labour Party -- Norway
## 10% -- Loading page for Conservative Party -- Norway
## 11% -- Loading page for Progress Party    -- Norway
## 11% -- Loading page for Christian Democratic Party -- Norway
## 12% -- Loading page for Centre Party -- Norway
## 12% -- Loading page for Liberal Party     -- Norway
## 13% -- Loading page for Socialist Left Party -- Norway
## 13% -- Loading page for Green Party -- Norway
## 14% -- Loading page for Red Party -- Norway
## 14% -- Loading page for Republican Party -- USA
## 15% -- Loading page for Democratic Party -- USA
## 15% -- Loading page for Christian Democratic Union of Germany -- Germany
## 16% -- Loading page for Christian Social Union in Bavaria -- Germany
## 16% -- Loading page for Social Democratic Party of Germany -- Germany
## 17% -- Loading page for The Left -- Germany
## 18% -- Loading page for The Greens -- Germany
## 18% -- Loading page for Liberal Conservative Reformers -- Germany
## 19% -- Loading page for Free Democratic Party -- Germany
## 19% -- Loading page for Alternative for Germany -- Germany
## 20% -- Loading page for Party of Democratic Socialism -- Germany
## 20% -- Loading page for Czech Social Democratic Party -- Czech Republic
## 21% -- Loading page for ANO 2011 -- Czech Republic
## 21% -- Loading page for Communist Party of Bohemia and Moravia -- Czech Republic
## 22% -- Loading page for TOP09 -- Czech Republic
## 22% -- Loading page for Civic Democratic Party -- Czech Republic
## 23% -- Loading page for Dawn - National Coalition -- Czech Republic
## 23% -- Loading page for Christian and Democratic Union – Czechoslovak People's Party -- Czech Republic
## 24% -- Loading page for Green Party -- Czech Republic
## 24% -- Loading page for Liberal–National Coalition -- Australia
## 25% -- Loading page for Liberal Party of Australia -- Australia
## 25% -- Loading page for Liberal National Party of Queensland -- Australia
## 26% -- Loading page for National Party of Australia -- Australia
## 26% -- Loading page for Country Liberal Party (NT) -- Australia
## 27% -- Loading page for Australian Labor Party -- Australia
## 27% -- Loading page for Australian Greens -- Australia
## 28% -- Loading page for Nick Xenophon Team -- Australia
## 28% -- Loading page for Katter's Australian Party -- Australia
## 29% -- Loading page for Palmer United Party -- Australia
## 29% -- Loading page for Liberal Democratic Party -- Australia
## 30% -- Loading page for Australian Democrats -- Australia
## 30% -- Loading page for Conservative Party of Canada -- Canada
## 31% -- Loading page for Liberal Party of Canada -- Canada
## 31% -- Loading page for New Democratic Party -- Canada
## 32% -- Loading page for Bloc Québécois -- Canada
## 32% -- Loading page for Reform Party of Canada -- Canada
## 33% -- Loading page for Sjálfstæðisflokkurinn -- Iceland
## 34% -- Loading page for Viðreisn -- Iceland
## 34% -- Loading page for Björt framtíð -- Iceland
## 35% -- Loading page for Vinstrihreyfingin – grænt framboð -- Iceland
## 35% -- Loading page for Píratar -- Iceland
## 36% -- Loading page for Framsóknarflokkurinn, FSF) -- Iceland
## 36% -- Loading page for Samfylkingin-Jafnaðarmannaflokkur Íslands -- Iceland
## 37% -- Loading page for Social Democracy of Poland -- Poland
## 37% -- Loading page for Democratic Left Alliance -- Poland
## 38% -- Loading page for Democratic Party -- Poland
## 38% -- Loading page for Civic Platform -- Poland
## 39% -- Loading page for Polish People's Party -- Poland
## 39% -- Loading page for Self-Defence of the Republic of Poland -- Poland
## 40% -- Loading page for Law and Justice -- Poland
## 40% -- Loading page for League of Polish Families -- Poland
## 41% -- Loading page for Real Politics Union -- Poland
## 41% -- Loading page for New Zealand National Party -- New Zealand
## 42% -- Loading page for New Zealand Labour Party -- New Zealand
## 42% -- Loading page for Green Party of Aotearoa New Zealand -- New Zealand
## 43% -- Loading page for New Zealand First -- New Zealand
## 43% -- Loading page for Māori Party -- New Zealand
## 44% -- Loading page for ACT New Zealand -- New Zealand
## 44% -- Loading page for United Russia -- Russia
## 45% -- Loading page for Communist Party of the Russian Federation -- Russia
## 45% -- Loading page for Liberal Democratic Party of Russia -- Russia
## 46% -- Loading page for A Just Russia -- Russia
## 46% -- Loading page for Agrarian Party of Russia -- Russia
## 47% -- Loading page for Russian Democratic Party "Yabloko" -- Russia
## 47% -- Loading page for Civilian Power -- Russia
## 48% -- Loading page for Union of Rightist Forces -- Russia
## 48% -- Loading page for Patriots of Russia -- Russia
## 49% -- Loading page for Party of Social Justice -- Russia
## 49% -- Loading page for Democratic Party of Russia -- Russia
## 50% -- Loading page for New Flemish Alliance -- Belgium
## 51% -- Loading page for Socialist Party -- Belgium
## 51% -- Loading page for Christian Democratic and Flemish -- Belgium
## 52% -- Loading page for Open Flemish Liberals and Democrats -- Belgium
## 52% -- Loading page for Reformist Movement -- Belgium
## 53% -- Loading page for Socialist Party Differently -- Belgium
## 53% -- Loading page for Green -- Belgium
## 54% -- Loading page for Humanist Democratic Centre -- Belgium
## 54% -- Loading page for Workers' Party of Belgium -- Belgium
## 55% -- Loading page for Flemish Interest -- Belgium
## 55% -- Loading page for Ecolo -- Belgium
## 56% -- Loading page for DéFI -- Belgium
## 56% -- Loading page for People's Party -- Belgium
## 57% -- Loading page for Libertarian, Direct, Democratic -- Belgium
## 57% -- Loading page for Social Liberal Party -- Belgium
## 58% -- Loading page for Vivant -- Belgium
## 58% -- Loading page for The Greens -- Austria
## 59% -- Loading page for Austrian People's Party -- Austria
## 59% -- Loading page for Social Democratic Party of Austria -- Austria
## 60% -- Loading page for Freedom Party of Austria -- Austria
## 60% -- Loading page for NEOS -- Austria
## 61% -- Loading page for Peter Pilz List -- Austria
## 61% -- Loading page for My Vote Counts! -- Austria
## 62% -- Loading page for Alliance for the Future of Austria -- Austria
## 62% -- Loading page for Liberal Forum -- Austria
## 63% -- Loading page for Swiss People's Party -- Switzerland
## 63% -- Loading page for Social Democratic Party of Switzerland -- Switzerland
## 64% -- Loading page for FDP.The Liberals -- Switzerland
## 64% -- Loading page for Christian Democratic People's Party of Switzerland -- Switzerland
## 65% -- Loading page for Green Party of Switzerland -- Switzerland
## 65% -- Loading page for Green Liberal Party of Switzerland -- Switzerland
## 66% -- Loading page for Conservative Democratic Party of Switzerland -- Switzerland
## 66% -- Loading page for Evangelical People's Party of Switzerland -- Switzerland
## 67% -- Loading page for Socialist Party -- France
## 68% -- Loading page for UMP -- France
## 68% -- Loading page for National Front -- France
## 69% -- Loading page for Left Front -- France
## 69% -- Loading page for Democratic Movement -- France
## 70% -- Loading page for Europe Ecology - The Greens -- France
## 70% -- Loading page for Debout la République -- France
## 71% -- Loading page for New Anticapitalist Party -- France
## 71% -- Loading page for Lutte Ouvrière -- France
## 72% -- Loading page for Solidarity and progress -- France
## 72% -- Loading page for Rally for the Republic -- France
## 73% -- Loading page for Republican Pole -- France
## 73% -- Loading page for The Greens -- France
## 74% -- Loading page for Revolutionary Communist League -- France
## 74% -- Loading page for Hunting, Fishing, Nature and Traditions -- France
## 75% -- Loading page for Liberal Democracy -- France
## 75% -- Loading page for French Communist Party -- France
## 76% -- Loading page for National Republican Movement -- France
## 76% -- Loading page for Radical Party of the Left -- France
## 77% -- Loading page for Citizenship, Action, Participation for the 21st century -- France
## 77% -- Loading page for Forum of Social Republicans -- France
## 78% -- Loading page for Workers' Party -- France
## 78% -- Loading page for Conservative Party -- United Kingdom
## 79% -- Loading page for Labour Party -- United Kingdom
## 79% -- Loading page for Scottish National Party -- United Kingdom
## 80% -- Loading page for Liberal Democrats -- United Kingdom
## 80% -- Loading page for UK Independence Party -- United Kingdom
## 81% -- Loading page for Green Party of England and Wales -- United Kingdom
## 81% -- Loading page for Centre Party of Finland -- Finland
## 82% -- Loading page for Finns Party -- Finland
## 82% -- Loading page for National Coalition Party -- Finland
## 83% -- Loading page for Social Democratic Party of Finland -- Finland
## 84% -- Loading page for Green League -- Finland
## 84% -- Loading page for Left Alliance -- Finland
## 85% -- Loading page for Swedish People's Party of Finland -- Finland
## 85% -- Loading page for Christian Democrats -- Finland
## 86% -- Loading page for Åland Coalition -- Finland
## 86% -- Loading page for Labour Party -- Netherlands
## 87% -- Loading page for People's Party for Freedom and Democracy -- Netherlands
## 87% -- Loading page for Christian Democratic Appeal -- Netherlands
## 88% -- Loading page for Democrats 66 -- Netherlands
## 88% -- Loading page for GreenLeft -- Netherlands
## 89% -- Loading page for Socialist Party -- Netherlands
## 89% -- Loading page for Reformatory Political Federation -- Netherlands
## 90% -- Loading page for Reformed Political Party -- Netherlands
## 90% -- Loading page for Reformed Political League -- Netherlands
## 91% -- Loading page for Centre Democrats -- Netherlands
## 91% -- Loading page for General Elderly Alliance -- Netherlands
## 92% -- Loading page for Fianna Fáil -- Ireland
## 92% -- Loading page for Fine Gael -- Ireland
## 93% -- Loading page for Labour Party -- Ireland
## 93% -- Loading page for Progressive Democrats -- Ireland
## 94% -- Loading page for Democratic Left -- Ireland
## 94% -- Loading page for Green Party -- Ireland
## 95% -- Loading page for Sinn Féin -- Ireland
## 95% -- Loading page for Socialist Party -- Ireland
## 96% -- Loading page for Social Democrats -- Slovenia
## 96% -- Loading page for Slovenian Democratic Party -- Slovenia
## 97% -- Loading page for Zares – Social Liberals -- Slovenia
## 97% -- Loading page for Democratic Party of Pensioners of Slovenia -- Slovenia
## 98% -- Loading page for Slovenian National Party -- Slovenia
## 98% -- Loading page for Slovenian People's Party -- Slovenia
## 99% -- Loading page for Youth Party – European Greens -- Slovenia
## 99% -- Loading page for Liberal Democracy of Slovenia -- Slovenia
## 100% -- Loading page for New Slovenia – Christian Democrats -- Slovenia

Parse and extract party data

#parse vcard data
parties_vcard_data = map_df(seq_along_rows(parties), function(p) {
  # browser()

  y = try_else({
        #get vcard
    vcard = parties$wp_page[[p]] %>% html_node(".vcard") %>% 
      #replace linebreaks with spaces
      #otherwise they disappear in table...
      str_replace_all("<br>", ", ") %>% 
      #back to html
      read_html()
  })
  
  if (is.null(y)) return(NULL)
    
  #parse vcard table
  vcard_table = html_table(vcard) %>% .[[1]]
  
  #check for errors
  if (!is.data.frame(vcard_table)) browser()
  if (ncol(vcard_table) != 2) return(NULL)
  
  #set names
  colnames(vcard_table) = c("var", "data")
  vcard_table$var %<>% str_replace_all("\\s", "_") #replace magic whitespace...
  
  #extract party data
  y = data_frame(
    id = p,
    wp_ideolgy = vcard_table[vcard_table$var == "Ideology", 2] %>% l0_to_NA,
    wp_pol_position = vcard_table[vcard_table$var == "Political_position", 2] %>% l0_to_NA
  )
  
  #check for errors
  if (nrow(y) != 1) browser()
  
  y
  

}) %>% 
  #post cleaning
  #do it here for vectorized approach instead of one at a time
  mutate(
    wp_ideolgy = wp_ideolgy %>% str_rm_sources,
    wp_pol_position = wp_pol_position %>% str_rm_sources %>% str_replace_all(",", "")
  )

#merge
parties = dplyr::left_join(parties, parties_vcard_data, by = "id")

#code dummy predictors
ideology_tags = list(
  socialism = "socialism",
  green = "(eco\\-)|green",
  conservative = "conservative|conservatism",
  nationalism = "nationalism",
  euroscepticism = "euroscepticism",
  "pro_europeanism" = "pro\\-europeanism",
  feminism = "feminism",
  libertarianism = "libertarianism",
  "national_conservatism" = "national conservatism",
  agrarianism = "agrarianism",
  christian = "christian",
  communism = "communism|(anti\\-capitalism)|marxism",
  centrism = "centrism",
  "RW_populism" = "right-wing populism",
  populism = "populism",
  "social_democracy" = "social democracy",
  "democratic_socialism" = "democratic socialism",
  "social_liberalism" = "social liberalism",
  "liberalism" = "\\bliberalism\\b"
)

#make dummies
parties_wp_ideology_dummies = map(ideology_tags, function(tag) {
  stri_detect_regex(parties$wp_ideolgy, tag, case_insensitive=TRUE)}
  ) %>% 
  set_names(names(ideology_tags)) %>% 
  as.data.frame() %>% 
  mutate(tag_num = rowSums(.),
         id = 1:nrow(.)) %>% 
  select(id, tag_num, everything())

#merge
parties = dplyr::left_join(parties %>% select(-Party, -Country), parties_wp_ideology_dummies, by = "id")

Analyze party data

(not working right now, too little data)

Calculate political position

#table
parties$wp_pol_position %>% table2()
#simplify
#figure out what they are closest to
# stringdist::stringdist(a = parties$wp_pol_position %>% str_replace_all("\\\\n", ""),
#                        b = c("far-left", "far right", "left-wing", "right-wing", "centre", "centre-left", "centre-right", "centre to centre-left", "centre to centre-right", "left-wing to far-left", "right-wing to far-right"))

#clean it up a bit
parties$wp_pol_position_clean = parties$wp_pol_position %>% 
  str_replace_all("\\n", "") %>%
  str_to_lower()

#code into numerical form: -3 to 3
#detect keywords, average score based on keywords
positions = c("far-left", "left-wing", "center-left", "center", "center-right", "right-wing", "far-right", "libertarianism", "syncretic")
positions_values = c(-3:3, 2, 0)

#recode as numeric
parties$wp_pol_position_numerical = parties$wp_pol_position_clean %>% 
  map_dbl(function(.) {
    if (is.na(.)) return(NA)
    # browser()
    #split by " to "
    #we need to average values if multiple are found
    split_x = str_split(., " to ") %>% .[[1]]
    
    #american spelling of "center"
    split_x = map_chr(split_x, ~str_replace_all(., "centre", "center"))
    
    #replace the value with the numerical value
    x_recoded = plyr::mapvalues(split_x, positions, positions_values, warn_missing = F)
    
    #as numeric, gets rid of strings
    x_vals = as.numeric(x_recoded)
    
    #mean
    y = mean(x_vals, na.rm = T)
    
    #odd?
    # if (is.nan(y)) browser()
    
    y
  })
## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion

## Warning in .f(.x[[i]], ...): NAs introduced by coercion
#hard-coded values
#done when text is not perfectly standardized in idiosyncratic ways
parties[parties$Party_English == "French Communist Party", "wp_pol_position_numerical"] = -2
parties[parties$Party_English == "Scottish National Party", "wp_pol_position_numerical"] = -1
parties[parties$Party_English == "Agrarian Party of Russia", "wp_pol_position_numerical"] = -0.5
parties[parties$Party_English == 'Russian Democratic Party "Yabloko"', "wp_pol_position_numerical"] = 0


#add party data to voting data
voting_augmented_all = left_join(voting_aggr %>% select(-Election_type, -ends_with("_sample"), -comments), parties, by = c("party_country"))

#main dataset
voting_augmented = voting_augmented_all %>% filter(General_pct >= 2)

#European only
voting_augmented_EU = voting_augmented %>% filter(!Country %in% c("USA", "Australia", "Russia"))

Descriptive stats

#parties included, including only those with data and including pseudo-parties
voting_augmented %>% filter(!is.na(RR)) %>% nrow()
## [1] 135
#parties included, including only those with data and tags
voting_augmented %>% filter(!is.na(RR), !is.na(wp_ideolgy)) %>% nrow()
## [1] 122
voting_augmented %>% filter(!is.na(RR), !is.na(wp_ideolgy)) %>% .$Country %>% table2(include_NA = F) %>% print(n = 20)
## # A tibble: 18 x 3
##    Group          Count Percent
##    <chr>          <dbl>   <dbl>
##  1 France           16.   13.1 
##  2 Slovenia          9.    7.38
##  3 Denmark           8.    6.56
##  4 Finland           8.    6.56
##  5 Poland            8.    6.56
##  6 Sweden            8.    6.56
##  7 Belgium           7.    5.74
##  8 Ireland           7.    5.74
##  9 Norway            7.    5.74
## 10 Austria           6.    4.92
## 11 Germany           6.    4.92
## 12 Netherlands       6.    4.92
## 13 Switzerland       6.    4.92
## 14 United Kingdom    6.    4.92
## 15 Russia            5.    4.10
## 16 Canada            4.    3.28
## 17 Australia         3.    2.46
## 18 USA               2.    1.64

Weights

Calculate weights based on various metrics such as sample size or country-fixed weights (i.e. same weight to each country no matter how many parties).

#country fixed weights
#have to condition on missing data
#weights can result in Inf, which we recode to 1's
voting_augmented = plyr::ddply(voting_augmented, c("Country"), function(d) {
  
  #assign 1 weight distributed across parties with data
  #per block
  #but all parties should have this...
  d$weight_b = 1 / sum(!is.na(d$Block))
  #per political position
  d$weight_pp = 1 / sum(!is.na(d$wp_pol_position))
  #ideology 
  d$weight_i = 1 / sum(!is.na(d$wp_ideolgy))
  #both
  d$weight_ppi = 1 / min(sum(!is.na(d$wp_pol_position)), sum(!is.na(d$wp_ideolgy)))
  #replace Inf with the maximum
  #Inf causes issues in step-wise regressions
  d$weight_ppi[is.infinite(d$weight_ppi)] = pmin(d$weight_pp[is.infinite(d$weight_ppi)], d$weight_i[is.infinite(d$weight_ppi)])
  
  d
})

Simple tags

#simple tag approach
simple_tag = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  data_frame(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(OR, weight_i),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(RR, weight_i),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(d, weight_i)
  )
})

#print all
print(simple_tag, n = Inf)
## # A tibble: 19 x 4
##    tag                      OR    RR      d
##    <chr>                 <dbl> <dbl>  <dbl>
##  1 socialism             1.99  1.65    8.14
##  2 green                 3.43  2.85   11.5 
##  3 conservative          0.706 0.600  -6.98
##  4 nationalism           0.509 0.537  -9.18
##  5 euroscepticism        1.01  0.912  -4.22
##  6 pro_europeanism       3.05  2.35    7.68
##  7 feminism              2.50  2.29    7.54
##  8 libertarianism        0.403 0.506 -14.1 
##  9 national_conservatism 0.202 0.262 -16.0 
## 10 agrarianism           0.223 0.238  -8.47
## 11 christian             0.857 0.646  -5.41
## 12 communism             1.91  1.76    4.68
## 13 centrism              1.26  0.971  -3.38
## 14 RW_populism           0.274 0.332 -14.1 
## 15 populism              0.596 0.474  -7.15
## 16 social_democracy      1.82  1.46    5.44
## 17 democratic_socialism  1.93  1.55    8.61
## 18 social_liberalism     2.53  2.07    9.73
## 19 liberalism            1.42  1.23   -1.15
#plot
simple_tag %>% 
  #gather
  gather(metric, value, OR, RR, d) %>% 
  ggplot(aes(tag, value)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_grid(. ~ metric, scales = "free") +
  theme_bw()

GG_save("figs/simple_tags.png")

#medians for robustness
simple_tag_median = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  data_frame(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_median(OR, weight_i),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_median(RR, weight_i),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_median(d, weight_i)
  )
})

#print all
print(simple_tag_median, n = Inf)
## # A tibble: 19 x 4
##    tag                      OR    RR      d
##    <chr>                 <dbl> <dbl>  <dbl>
##  1 socialism             1.92  1.44    7.09
##  2 green                 2.90  2.71    8.39
##  3 conservative          0.379 0.542  -7.71
##  4 nationalism           0.326 0.486  -7.51
##  5 euroscepticism        0.347 0.358  -3.87
##  6 pro_europeanism       2.29  2.10    7.94
##  7 feminism              2.31  2.14    6.96
##  8 libertarianism        0.178 0.253 -21.7 
##  9 national_conservatism 0.108 0.134 -17.4 
## 10 agrarianism           0.181 0.230  -4.60
## 11 christian             0.404 0.473  -4.82
## 12 communism             1.38  1.31    4.85
## 13 centrism              0.502 0.681 -18.2 
## 14 RW_populism           0.215 0.314 -16.4 
## 15 populism              0.340 0.381  -8.84
## 16 social_democracy      1.39  1.24    4.74
## 17 democratic_socialism  1.88  1.43    6.97
## 18 social_liberalism     2.09  1.68    6.59
## 19 liberalism            0.876 0.888  -1.07
#plot
simple_tag_median %>% 
  #gather
  gather(metric, value, OR, RR, d) %>% 
  ggplot(aes(tag, value)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  facet_grid(. ~ metric, scales = "free") +
  theme_bw()

GG_save("figs/simple_tags_median.png")

#means without weights
#just for comparison
simple_tag_nowt = map_df(names(ideology_tags), function(tag) {
  #mean by tag
  data_frame(
    tag = tag,
    OR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(OR),
    RR = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(RR),
    d = voting_augmented %>% filter(voting_augmented[[tag]]) %$% wtd_mean(d)
  )
})

#do methods agree?
wtd.cors(cbind(medians = simple_tag_median[-1], means = simple_tag[-1], means_no_weights = simple_tag_nowt[-1]))
##                     medians.OR medians.RR medians.d means.OR means.RR
## medians.OR                1.00       0.99      0.86     0.97     0.97
## medians.RR                0.99       1.00      0.82     0.98     0.98
## medians.d                 0.86       0.82      1.00     0.83     0.83
## means.OR                  0.97       0.98      0.83     1.00     0.99
## means.RR                  0.97       0.98      0.83     0.99     1.00
## means.d                   0.94       0.91      0.92     0.94     0.93
## means_no_weights.OR       0.97       0.97      0.83     1.00     0.99
## means_no_weights.RR       0.97       0.97      0.83     0.99     1.00
## means_no_weights.d        0.95       0.93      0.89     0.96     0.96
##                     means.d means_no_weights.OR means_no_weights.RR
## medians.OR             0.94                0.97                0.97
## medians.RR             0.91                0.97                0.97
## medians.d              0.92                0.83                0.83
## means.OR               0.94                1.00                0.99
## means.RR               0.93                0.99                1.00
## means.d                1.00                0.93                0.93
## means_no_weights.OR    0.93                1.00                0.99
## means_no_weights.RR    0.93                0.99                1.00
## means_no_weights.d     0.98                0.96                0.97
##                     means_no_weights.d
## medians.OR                        0.95
## medians.RR                        0.93
## medians.d                         0.89
## means.OR                          0.96
## means.RR                          0.96
## means.d                           0.98
## means_no_weights.OR               0.96
## means_no_weights.RR               0.97
## means_no_weights.d                1.00

Political position

Crude block level analysis.

#simple block level
table2(parties$Block)
#different outcomes
#difference in %
ols(d ~ Block, data = voting_augmented, weights = weight_b)
## Frequencies of Missing Values Due to Each Variable
##         d     Block (weights) 
##         1        11         0 
## 
## Linear Regression Model
##  
##  ols(formula = d ~ Block, data = voting_augmented, weights = weight_b)
##  
##  
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
##  Obs     124    LR chi2     54.14    R2       0.354    
##  sigma4.1299    d.f.            2    R2 adj   0.343    
##  d.f.    121    Pr(> chi2) 0.0000    g        8.116    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -27.4977  -7.0792   0.2798   6.4935  36.9662 
##  
##  
##              Coef     S.E.   t     Pr(>|t|)
##  Intercept     4.5638 3.5081  1.30 0.1958  
##  Block=left    2.6739 3.7795  0.71 0.4806  
##  Block=right -13.6610 3.8052 -3.59 0.0005  
## 
#relative ratio
ols(RR ~ Block, data = voting_augmented, weights = weight_b)
## Frequencies of Missing Values Due to Each Variable
##        RR     Block (weights) 
##         1        11         0 
## 
## Linear Regression Model
##  
##  ols(formula = RR ~ Block, data = voting_augmented, weights = weight_b)
##  
##  
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
##  Obs     124    LR chi2     41.38    R2       0.284    
##  sigma0.4064    d.f.            2    R2 adj   0.272    
##  d.f.    121    Pr(> chi2) 0.0000    g        0.682    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -1.9341 -0.5614 -0.1405  0.3692  3.3153 
##  
##  
##              Coef    S.E.   t     Pr(>|t|)
##  Intercept    1.6498 0.3452  4.78 <0.0001 
##  Block=left   0.2843 0.3719  0.76 0.4461  
##  Block=right -1.0883 0.3744 -2.91 0.0043  
## 
#odds ratio
ols(OR ~ Block, data = voting_augmented, weights = weight_b)
## Frequencies of Missing Values Due to Each Variable
##        OR     Block (weights) 
##         1        11         0 
## 
## Linear Regression Model
##  
##  ols(formula = OR ~ Block, data = voting_augmented, weights = weight_b)
##  
##  
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
##  Obs     124    LR chi2     40.96    R2       0.281    
##  sigma0.5455    d.f.            2    R2 adj   0.269    
##  d.f.    121    Pr(> chi2) 0.0000    g        0.899    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.3390 -0.6764 -0.1714  0.3880  4.8533 
##  
##  
##              Coef    S.E.   t     Pr(>|t|)
##  Intercept    2.1730 0.4634  4.69 <0.0001 
##  Block=left   0.1659 0.4992  0.33 0.7402  
##  Block=right -1.6450 0.5026 -3.27 0.0014  
## 

Numerical position.

#relative rate
voting_augmented %>% 
  GG_scatter("wp_pol_position_numerical", "RR", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (Wikipedia data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = c("Far left", "Left", "Center-left", "Center", "Center-right", "Right", "Far right")) +
  scale_y_continuous("Times representation among journalists\ncompared to elections")

GG_save("figs/wp_position_RR.png")

voting_augmented %>% 
  GG_scatter("wp_pol_position_numerical", "d", weights = "weight_pp", weight_as_size = F) +
  scale_x_continuous("Political position (Wikipedia data)",
                     limits = c(-3, 3),
                     breaks = -3:3,
                     labels = c("Far left", "Left", "Center-left", "Center", "Center-right", "Right", "Far right")) +
  scale_y_continuous("%points representation among journalists\ncompared to elections")