Getting documents

Apsnypress has a special section dedicated to communications from Abkahzia’s Central Electoral Commission. Unnfortunately, not all relevant information has been properly tagged, but it is a place to start.

# http://www.apsnypress.info/cik/tsentrizbirkom-opublikoval-kalendarnyy-plan-osnovnykh-meropriyatiy-po-podgotovke-i-provedeniyu-vybor/
download.file(url = "http://www.apsnypress.info/upload/KP_vyboryNSPRA_120317.pdf", destfile = file.path("pdf", "CalendarPreparationPlan.pdf"))

# http://www.apsnypress.info/cik/resheniem-tsika-obrazovano-35-izbiratelnykh-okrugov-po-vyboram-deputatov-parlamenta-abkhazii/
# Центральная избирательная комиссия определила границы избирательных округов по выборам депутатов Народного Собрания – Парламента Абхазии.
# Сухум. 18 января. Апсныпресс. Список избирательных округов с указанием их границ, численности избирателей и мест нахождения окружных избирательных комиссий.

download.file(url = "http://www.apsnypress.info/news/resheniem-tsika-obrazovano-35-izbiratelnykh-okrugov-po-vyboram-deputatov-parlamenta-abkhazii/", destfile = file.path("html", "ListOfElectoralDistricts.html"))

# http://www.apsnypress.info/cik/135-initsiativnykh-grupp-po-vydvizheniyu-kandidatov-v-deputaty-parlamenta-zaregistrirovano-v-tsik-/
# В Центральной избирательной комиссии Республики Абхазия с 21 января по 8 февраля 2017 г. зарегистрировано 135 инициативных групп по выдвижению кандидатов в депутаты и 4 политические партии: «Аинар», «Амцахара», «Народный фронт за справедливость и развитие» и «Форум народного единства Абхазии».

download.file(url = "http://www.apsnypress.info/cik/135-initsiativnykh-grupp-po-vydvizheniyu-kandidatov-v-deputaty-parlamenta-zaregistrirovano-v-tsik-/", destfile = file.path("html", "InitiativeGroups135.html"))

These include data on registered candidates, such as their profession and home address.

# http://www.apsnypress.info/news/tsentrizbirkom-opublikoval-dannye-zaregistrirovannykh-kandidatov-v-deputaty-parlamenta/

download.file(url = "http://www.apsnypress.info/upload/dannieKandidarov_nspra_20017.pdf", destfile = file.path("pdf", "Registered-2017-02-10.pdf"))

# http://www.apsnypress.info/cik/tsik-opublikoval-dannye-zaregistrirovannykh-13-fevralya-kandidatov-v-deputaty-parlamenta/

download.file(url = "http://apsnypress.info/upload/kandidati_13.2.17.pdf", destfile = file.path("pdf", "Registered-2017-02-13.pdf"))

# http://www.apsnypress.info/cik/16-fevralya-tsentrizbirkom-zaregistriroval-12-kandidatov-v-deputaty-parlamenta-abkhazii-/

download.file(url = "http://apsnypress.info/upload/kandidati_16.2.17.pdf", destfile = file.path("pdf", "Registered-2017-02-16.pdf"))

# http://www.apsnypress.info/cik/18-fevralya-tsik-zaregistriroval-devyat-kandidatov-v-deputaty-parlamenta-abkhazii-/

download.file(url = "http://apsnypress.info/upload/kandidati_18.2.17.pdf", destfile = file.path("pdf", "Registered-2017-02-18.pdf"))

# http://www.apsnypress.info/cik/tsentrizbirkom-abkhazii-opublikoval-dannye-kandidatov-v-deputaty-parlamenta-zaregistrirovannykh-20-f/

download.file(url = "http://www.apsnypress.info/upload/kandidati_nspra_210217.pdf", destfile = file.path("pdf", "Registered-2017-02-21.pdf"))

# http://www.apsnypress.info/cik/opredelena-ocherednost-vystupleniy-kandidatov-v-deputaty-parlamenta-na-gosudarstvennom-televidenii/

download.file(url = "http://www.apsnypress.info/upload/ocherednosty_vystupleniya_kandidatov_nspra_200217.pdf", destfile = file.path("pdf", "TVappearanceOrder.pdf"))

Election results

# http://www.apsnypress.info/news/tsik-v-12-okrugakh-izbrany-deputaty-parlamenta-abkhazii/

download.file(url = "http://www.apsnypress.info/news/tsik-v-12-okrugakh-izbrany-deputaty-parlamenta-abkhazii/", destfile = file.path("html", "2017-03-13-FirstRoundResults.html"))

# FirstRound winners

download.file(url = "http://www.apsnypress.info/upload/izbrannie_deputatyNSPRA_120317.pdf", destfile = file.path("pdf", "2017-03-13-FirstRoundWinners.pdf"))

# Second round
download.file(url = "http://www.apsnypress.info/upload/2tur_nspra.pdf", destfile = file.path("pdf", "2017-03-13-FirstRoundToSecondRound.pdf"))

# Repeat election
download.file(url = "http://www.apsnypress.info/upload/povtornievibory_nspra_2017.pdf", destfile = file.path("pdf", "2017-03-13-FirstRoundRepeatElections.pdf"))

Extract results

Repeat round

#list.files(path = file.path("pdf"), pattern = "FirstRound", full.names = TRUE)
FirstRoundRepeatElectionsDF <- as.data.frame(extract_tables(file = file.path("pdf", "2017-03-13-FirstRoundRepeatElections.pdf")), stringsAsFactors = FALSE)

colnames(FirstRoundRepeatElectionsDF) <- c("District", "Name", "VotersInList", "AddedToList", "TotalVoters", "TotalVoting", "Votes", "ShareVotes")

## inputting data manually

FirstRoundRepeatElectionsDF <- tribble(
    ~District, ~Name, ~VotersInList, ~AddedToList, ~TotalVoters, ~TotalVoting, ~Votes, ~ShareVotes,
    17, "Лакербая Леонид Иванович", 3962, 186, 4148, 2089, 1034,0.495,
    17, "Чамагуа Леонид Михайлович", 3962, 186, 4148, 2089, 964, 0.4615) %>% 
     separate(col = Name, into = c("Surname", "Name", "Patronimic"))

knitr::kable(FirstRoundRepeatElectionsDF)
District Surname Name Patronimic VotersInList AddedToList TotalVoters TotalVoting Votes ShareVotes
17 Лакербая Леонид Иванович 3962 186 4148 2089 1034 0.4950
17 Чамагуа Леонид Михайлович 3962 186 4148 2089 964 0.4615

Districts going to second round

#list.files(path = file.path("pdf"), pattern = "FirstRound", full.names = TRUE)

# Extract table from PDF
FirstRoundToSecondRound <- extract_tables(file = file.path("pdf", "2017-03-13-FirstRoundToSecondRound.pdf"))
FirstRoundToSecondRoundDF <- bind_rows(as.data.frame(FirstRoundToSecondRound[1], stringsAsFactors = FALSE), as.data.frame(FirstRoundToSecondRound[2], stringsAsFactors = FALSE), as.data.frame(FirstRoundToSecondRound[3], stringsAsFactors = FALSE))

# Set correct names of columns
colnames(FirstRoundToSecondRoundDF) <- c("District", "Name", "VotersInList", "AddedToList", "TotalVoters", "TotalVoting", "Votes", "ShareVotes")


# Polish table
FirstRoundToSecondRoundDF <- FirstRoundToSecondRoundDF[-(1:7),]

FirstRoundToSecondRoundDF$District <- as.numeric(c(zoo::na.locf(object =  as.numeric(FirstRoundToSecondRoundDF$District)), FirstRoundToSecondRoundDF$District[length(FirstRoundToSecondRoundDF$District)-1]))

FirstRoundToSecondRoundDF$VotersInList <- as.numeric(c(zoo::na.locf(object =  as.numeric(gsub(pattern = " ", replacement = "", x = FirstRoundToSecondRoundDF$VotersInList))), FirstRoundToSecondRoundDF$VotersInList[length(FirstRoundToSecondRoundDF$VotersInList)-1]))

FirstRoundToSecondRoundDF$AddedToList <- as.numeric(c(zoo::na.locf(object =  as.numeric(gsub(pattern = " ", replacement = "", x = FirstRoundToSecondRoundDF$AddedToList))), FirstRoundToSecondRoundDF$AddedToList[length(FirstRoundToSecondRoundDF$AddedToList)-1]))

FirstRoundToSecondRoundDF$TotalVoters <- as.numeric(c(zoo::na.locf(object =  as.numeric(gsub(pattern = " ", replacement = "", x = FirstRoundToSecondRoundDF$TotalVoters))), FirstRoundToSecondRoundDF$TotalVoters[length(FirstRoundToSecondRoundDF$TotalVoters)-1]))

FirstRoundToSecondRoundDF$TotalVoting <- as.numeric(c(zoo::na.locf(object =  as.numeric(gsub(pattern = " ", replacement = "", x = FirstRoundToSecondRoundDF$TotalVoting))), FirstRoundToSecondRoundDF$TotalVoting[length(FirstRoundToSecondRoundDF$TotalVoting)-1]))

FirstRoundToSecondRoundDF$ShareVotes <- gsub(pattern = "%", replacement = "", x = FirstRoundToSecondRoundDF$ShareVotes)
FirstRoundToSecondRoundDF$ShareVotes <- gsub(pattern = ",", replacement = ".", x = FirstRoundToSecondRoundDF$ShareVotes)
FirstRoundToSecondRoundDF$ShareVotes <- as.numeric(FirstRoundToSecondRoundDF$ShareVotes)/100
FirstRoundToSecondRoundDF$Votes <- as.numeric(gsub(pattern = " ", replacement = "", x = FirstRoundToSecondRoundDF$Votes))

FirstRoundToSecondRoundDF <- FirstRoundToSecondRoundDF %>% filter(!Name=="") %>% separate(col = Name, into = c("Surname", "Name", "Patronimic"))
rownames(FirstRoundToSecondRoundDF) <- NULL

FirstRoundToSecondRoundDF[FirstRoundToSecondRoundDF$Surname=="Хутаба",  9:10] <-  c(842,0.4429)
FirstRoundToSecondRoundDF[FirstRoundToSecondRoundDF$Surname=="Цишба",  9:10] <-  c(704,0.2103)
FirstRoundToSecondRoundDF[FirstRoundToSecondRoundDF$Surname=="Джинджолия"&FirstRoundToSecondRoundDF$Name=="Омар",  9:10] <-  c(651, 0.4118)

knitr::kable(FirstRoundToSecondRoundDF)
District Surname Name Patronimic VotersInList AddedToList TotalVoters TotalVoting Votes ShareVotes
1 Кварчия Гиви Анатольевич 5173 279 5452 2790 1162 0.4165
1 Гунба Бадра Зурабович 5173 279 5452 2790 799 0.2864
3 Агрба Валерий Владимирович 4336 604 4940 2668 951 0.3564
3 Адзынба Шамиль Омарович 4336 604 4940 2668 748 0.2804
4 Рштуни Эрик Сагателович 5949 357 6306 3081 752 0.2441
4 Айба Батал Эдуардович 5949 357 6306 3081 622 0.2019
5 Ашуба Лаша Нугзарович 4229 261 4490 2485 976 0.3928
5 Цахнакия Тамаз Георгиевич 4229 261 4490 2485 473 0.1903
6 Лабахуа Алиас Михайлович 5197 321 5518 2842 924 0.3251
6 Лолуа Рауль Валериевич 5197 321 5518 2842 752 0.2646
7 Джинджолия Алхас Владимирович 5194 257 5451 2716 889 0.3273
7 Адлейба Астамур Борисович 5194 257 5451 2716 994 0.3660
8 Абухба Ахра Иванович 3906 236 4142 2272 497 0.2188
8 Агрба Гудиса Эдуардович 3906 236 4142 2272 426 0.1875
9 Хутаба Даут Витальевич 4579 77 4656 2028 842 0.4429
9 Смыр Беслан Алексеевич 4579 77 4656 2028 586 0.2890
12 Цишба Александр Владимирович 6998 294 7292 3348 704 0.2103
12 Кецба Астамур Шамилевич 6998 294 7292 3348 960 0.2867
13 Трапизонян Галуст Парнакович 2121 193 2314 2366 509 0.2151
13 Дащян Левон Вагаршакович 2121 193 2314 2366 775 0.3276
19 Гунба Дмитрий Георгиевич 3492 212 3704 2541 780 0.3070
19 Отырба Астамур Михайлович 3492 212 3704 2541 724 0.2849
20 Смыр Натали Викторовна 2651 72 2723 1700 729 0.4288
20 Авидзба Баталбей Шаурович 2651 72 2723 1700 458 0.2694
21 Авидзба Фазлибей Лизберович 2313 59 2372 1158 422 0.3644
21 Ардзинба Алмасхан Зурабович 2313 59 2372 1158 489 0.4223
23 Кварчия Валерий Ермейович 1738 132 1870 1201 368 0.3064
23 Ашуба Аида Шалодиевна 1738 132 1870 1201 304 0.2531
26 Габуния Георгий Борисович 3587 201 3587 2142 558 0.2605
26 Гуния Илья Чичикович 3587 201 3587 2142 910 0.4248
27 Зухба Юрий Зауриевич 1877 124 2001 1477 404 0.2735
27 Бебия Венори Яковлевич 1877 124 2001 1477 557 0.3771
28 Каджая Дато Олегович 1980 168 2148 1643 492 0.2995
28 Логуа Астамур Омарович 1980 168 2148 1643 386 0.2349
31 Бжания Аслан Георгиевич 2288 102 2390 1678 809 0.4821
31 Квициния Темур Паатович 2288 102 2390 1678 627 0.3737
32 Тарба Инал Романович 4131 394 4525 2657 1111 0.4181
32 Цвижба Отари Шотович 4131 394 4525 2657 780 0.2936
33 Аргун Кадыр Валериевич 4221 223 4444 2234 432 0.1934
33 Ардзинба Тайфун Наимович 4221 223 4444 2234 1002 0.4485
34 Джинджолия Омар Какович 2020 140 2160 1581 651 0.4118
34 Кварчия Кан Валерьевич 2020 140 2160 1581 628 0.3972
35 Адлейба Дмитрий Александрович 608 25 633 471 53 0.1125
35 Пертая Каха Зауриевич 608 25 633 471 206 0.4374

First round winners

#list.files(path = file.path("pdf"), pattern = "FirstRound", full.names = TRUE)

FirstRoundWinnersDF <- data.frame(x = extract_tables(file = file.path("pdf", "2017-03-13-FirstRoundWinners.pdf")), stringsAsFactors = FALSE)

# Set correct names of columns
colnames(FirstRoundWinnersDF) <- c("District", "Name", "VotersInList", "AddedToList", "TotalVoters", "TotalVoting", "Votes", "ShareVotes")
FirstRoundWinnersDF <- FirstRoundWinnersDF[FirstRoundWinnersDF$VotersInList!="",]
rownames(FirstRoundWinnersDF) <- NULL

for(i in c(1,3:7)) {
    FirstRoundWinnersDF[,i] <- as.numeric(x = gsub(pattern = " ", replacement = "", x = FirstRoundWinnersDF[,i]))
}
FirstRoundWinnersDF$ShareVotes <- as.numeric(x = gsub(pattern = "%", replacement = "", x = gsub(pattern = ",", replacement = ".", x = FirstRoundWinnersDF$ShareVotes)))/100
FirstRoundWinnersDF <- FirstRoundWinnersDF %>% separate(col = Name, into = c("Surname", "Name", "Patronimic"))

knitr::kable(FirstRoundWinnersDF)
District Surname Name Patronimic VotersInList AddedToList TotalVoters TotalVoting Votes ShareVotes
2 Джапуа Алмас Северьянович 5312 294 5606 1946 1095 0.5627
10 Хагуш Юрий Львович 5117 204 5321 2882 1745 0.6055
11 Аршба Астамур Борисович 4577 226 4803 2340 1391 0.5944
14 Дбар Дмитрий Сергеевич 3151 137 3288 1949 1043 0.5351
15 Ардзинба Дмитрий Григорьевич 3462 273 3735 2549 1343 0.5269
16 Сангулия Михаил Петрович 3726 88 3814 1665 1094 0.6571
18 Анкваб Александр Золотинскович 3450 123 3573 1967 1028 0.5226
22 Галустян Левон Миружанович 3244 149 3393 1624 1012 0.6232
24 Миносян Ашот Ваграмович 3356 140 3496 2125 1281 0.6028
25 Харазия Саид Геннадиевич 4928 171 5099 2452 1277 0.5208
29 Тарба Астамур Адикович 2430 193 2623 1910 1075 0.5628
30 Табагуа Батал Иванович 2477 104 2581 1733 927 0.5349

All together

votes <- bind_rows(FirstRoundWin = FirstRoundWinnersDF, SecondRound = FirstRoundToSecondRoundDF, RepeatVote = FirstRoundRepeatElectionsDF,.id = "Type") %>% arrange(District)
votes$Name <- trimws(votes$Name)
votes$Surname <- trimws(votes$Surname)
votes$FullName <- paste(votes$Surname, votes$Name, sep = ", ")
#View(votes)

Extract all candidates

RegisteredPdf <- list.files(path = file.path("pdf"), pattern = "Registered", full.names = TRUE)

reg1 <- extract_tables(file = RegisteredPdf[1])

reg1DF <- bind_rows(data.frame(reg1[1], stringsAsFactors = FALSE), data.frame(reg1[2], stringsAsFactors = FALSE), data.frame(reg1[3], stringsAsFactors = FALSE))
names(reg1DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg1DF <- reg1DF[-(1:3),-(12:13)]

for (i in 1:nrow(reg1DF)) {
    if (reg1DF$Employer[i]=="") {
        reg1DF$Employer[i] <- paste(reg1DF$Employer[i-1], reg1DF$Employer[i+1])
    }
}
reg1DF <- reg1DF[reg1DF$District!="",]
# some professions not correctly recorded

reg2 <- extract_tables(file = RegisteredPdf[2])
reg2DF <- bind_rows(data.frame(reg2[1], stringsAsFactors = FALSE), data.frame(reg2[2], stringsAsFactors = FALSE), data.frame(reg2[3], stringsAsFactors = FALSE), data.frame(reg2[4], stringsAsFactors = FALSE), data.frame(reg2[5], stringsAsFactors = FALSE))
names(reg2DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg2DF <- reg2DF[-1,]

reg3 <- extract_tables(file = RegisteredPdf[3])
reg3DF <- data.frame(reg3[1], stringsAsFactors = FALSE)
names(reg3DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg3DF <- reg3DF[-1,]

reg4 <- extract_tables(file = RegisteredPdf[4])
reg4DF <- data.frame(reg4[1], stringsAsFactors = FALSE)
names(reg4DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg4DF <- reg4DF[-1,]

reg5 <- extract_tables(file = RegisteredPdf[5])
reg5DF <- data.frame(reg5[1], stringsAsFactors = FALSE)
reg5DF <- reg5DF[-1,-1]
names(reg5DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")

registered <- bind_rows(reg1DF, reg2DF, reg3DF, reg4DF, reg5DF)
registered$District <- as.integer(registered$District)
registered$DateOfBirth <- lubridate::dmy(registered$DateOfBirth)
registered$Surname <- trimws(registered$Surname)
registered$Name <-  trimws(registered$Name)
registered$Patronimic <-  trimws(registered$Patronimic)
registered <- registered %>% arrange(District) %>% mutate(FullName = paste(Surname, Name, sep = ", "))
#View(registered)
#duplicated(x = registered$FullName)

All candidates

# http://www.apsnypress.info/cik/135-initsiativnykh-grupp-po-vydvizheniyu-kandidatov-v-deputaty-parlamenta-zaregistrirovano-v-tsik-/

bind_rows(
data_frame(District = "1", 
           DistrictName = "Новый район", 
           Rayon = "Sukhumi",
           Candidate = c("1. Кварчия Гиви Анатольевич", "2. Купалба Темыр Гурамович", "3. Гунба Бадра Зурабович", "4. Корсантия Гурам Виталиевич", "5. Авидзба Ахрик Русланович"))
,
data_frame(District = "2", 
           DistrictName = "Новый район", 
           Rayon = "Sukhumi",
           Candidate = c("1. Габния Гиви Георгиевич", "2. Бганба Роланд Анатолиевич", "3.  Джапуа Алмас Северьянович от ПП «Аинар»"))
,
data_frame(District = "3", 
           DistrictName = "Старый поселок", 
           Rayon = "Sukhumi",
           Candidate = c("1. Адзынба Шамиль Омарович", "2. Агрба Валерий Владимирович", "3. Харчилава Ираклий Игорьевич", "4. Анкваб Александр Золотинскович", "5. Колбая Дмитрий Александрович"))
,
data_frame(District = "4", 
           DistrictName = "Северный", 
           Rayon = "Sukhumi",
           Candidate = c("1. Хагуш Саида Львовна", "2. Качарава Роман Сергеевич", "3. Айба Батал Эдуардович", "4. Рштуни Эрик Сагателович", "5. Чирикба Вячеслав Андреевич", "6. Агумава Эраст Рамзанович", "7. Цвинария Беслан Константинович"))
,
data_frame(District = "5",
           DistrictName = "Синопский", 
           Rayon = "Sukhumi",
           Candidate = c("1. Адлейба Георгий Лериевич", "2. Лагвилава Вадим Иванович", "3. Ашуба Лаша Нугзарович", "4. Цахнакия Тамаз Георгиевич", "5. Тужба Ираклий Вахтангович от ПП «Аинар»", "6. Васильев Владимир Владимирович"))
,
data_frame(District = "6",
           DistrictName = "Центральный", 
           Rayon = "Sukhumi",
           Candidate = c("1. Агрба Тенгиз Алмасханович", "2. Лабахуа Алиас Михайлович", "3. Миканба Георгий Валерианович", "4. Кварчия Абесалом Алексеевич", "5. Лолуа Рауль Валериевич", "6. Квициния Алхас Шалодиевич"))
,
data_frame(District = "7",
           DistrictName = "Библиотека", 
           Rayon = "Sukhumi",
           Candidate =c("1. Адлейба Астамур Борисович", "2. Джинджолия Алхас Владимирович", "3. Шамба Сергей Миронович", "4. Амалыиа Наира Концаловна"))
,
data_frame(District = "8",
           DistrictName = "Маякский", 
           Rayon = "Sukhumi",
           Candidate = c("1. Квициния Беслан Алексеевич", "2. Язычба Заур Гаидарович", "3. Айба Джемалик Владимирович", "4. Кишмария Астамур Ингиштерович", "5. Джопуа Тенгиз Шотович от ПП «Аинар»", "6. Папба Валерий Иванович"))
# ,
# data_frame(District = "8",
#            DistrictName = "Маякский", 
#            Rayon = "Gagra",
#            Candidate 

          
)

Find most “famous” candidates

load(file = file.path("..", "..", "deFactoNews", "apsnyPress", "Dataset", "2017-03-17 - deFactoNews - apsnyPress - dataset.RData"))

regexNames <- registered %>% transmute(FullName, RegexNames = paste0(stringr::str_trunc(string = trimws(Name), width = nchar(trimws(Name))-1, side = "right", ellipsis = ""), "[а-я]?[а-я]?", " ", trimws(Surname), "[а-я]?[а-я]?"))


for (i in 1:nrow(regexNames)) {
    regexNames$mentions[i] <- sum(str_count(string = dataset$contents, pattern = regexNames$RegexNames[i]), na.rm = TRUE)
}

fame <- regexNames %>% mutate(mentions = as.integer(mentions)) %>% arrange(mentions) %>% mutate(FullName = forcats::fct_inorder(f = FullName, ordered = TRUE)) %>% select(-RegexNames)

fame %>% 
    filter(mentions>50) %>%  # removing those never mentioned
    filter(mentions<1000) %>% 
    ggplot(mapping = aes(x = FullName, y = mentions)) +
    geom_col() +
    coord_flip() +
    scale_x_discrete(name = "") + 
    scale_y_continuous(name = "") +
    theme_minimal() +
    labs(title = "Most famous candidates", subtitle = "Removing those mentioned more than 1000 times (Ankvab) and those mentioned less than 50 times")

# str_extract_all(string = dataset$contents[str_detect(string = dataset$contents, pattern = regexNames[1,1])], pattern = regexNames[1,1], simplify = TRUE)

Check how they did at the elections

votesAndFame <- merge.data.frame(x = fame,y = votes, by = "FullName", all = TRUE)
votesAndFame <- merge.data.frame(x = votesAndFame,y = registered, by = "FullName", all.x = TRUE)

votesAndFame <- votesAndFame %>% arrange(mentions)

votesAndFame$Type[is.na(votesAndFame$Type)==TRUE] <- "FirstRoundLose"
votesAndFame$colourByType <- NA
votesAndFame$colourByType[votesAndFame$Type=="FirstRoundWin"] <- "#4daf4a"
votesAndFame$colourByType[votesAndFame$Type=="SecondRound"] <- "#377eb8"
votesAndFame$colourByType[votesAndFame$Type=="FirstRoundLose"] <- "#e41a1c"
votesAndFame$colourByType[votesAndFame$Type=="RepeatVote"] <- "#ff7f00"


winLosePalette <- c("#4daf4a", "#377eb8", "#e41a1c", "#ff7f00")
names(winLosePalette) <- c("FirstRoundWin", "SecondRound", "FirstRoundLose", "RepeatVote")
winLoseFill <- ggplot2::scale_fill_manual(name = "Type", values = winLosePalette)

votesAndFame$Type <- factor(x = votesAndFame$Type, levels = c("FirstRoundWin", "SecondRound", "FirstRoundLose", "RepeatVote"))

votesAndFame$FullName <- forcats::fct_inorder(f = votesAndFame$FullName, ordered = TRUE)

WinLoseColours <- c("#4daf4a", "#377eb8", "#e41a1c", "#ff7f00")

votesAndFame %>% 
    ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
    geom_col() +
    coord_flip() +
    scale_x_discrete(name = "") + 
    scale_y_continuous(name = "") +
    theme_minimal() +
    scale_fill_manual(values = WinLoseColours)

Colouring labels

votesAndFame %>% 
    ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
    geom_col() +
    coord_flip() +
    scale_x_discrete(name = "") + 
    scale_y_continuous(name = "") +
    theme_minimal() +
    scale_fill_manual(values = WinLoseColours) +
    theme(axis.text.y=element_text(colour = votesAndFame$colourByType))

Removing top5 for clarity

votesAndFame %>% filter(mentions<500) %>%
    ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
    geom_col() +
    coord_flip() +
    scale_x_discrete(name = "") +
    scale_y_continuous(name = "") +
    theme_minimal() +
    scale_fill_manual(values = WinLoseColours) +
    theme(axis.text.y=element_text(colour = votesAndFame$colourByType[1:(length(votesAndFame$colourByType)-5)]))

By district

# votesAndFame %>% 
# ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
#     geom_col() +
#     facet_wrap(~District.y,scales = "free_y") +
#     coord_flip() +
#     scale_x_discrete(name = "") +
#     scale_y_continuous(name = "") +
#     theme_minimal() +
#     scale_fill_brewer(type = "qual", palette = 6)

for (i in 1:35) {
    ggTemp <- votesAndFame %>% filter(District.y==i) %>% 
        ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
        geom_col() +
        coord_flip() +
        scale_x_discrete(name = "") +
        scale_y_continuous(name = "") +
        theme_minimal() +
        labs(title = paste("District", i)) +
        winLoseFill
    print(ggTemp)
}