Getting documents
Apsnypress has a special section dedicated to communications from Abkahzia’s Central Electoral Commission. Unnfortunately, not all relevant information has been properly tagged, but it is a place to start.
# http://www.apsnypress.info/cik/tsentrizbirkom-opublikoval-kalendarnyy-plan-osnovnykh-meropriyatiy-po-podgotovke-i-provedeniyu-vybor/
download.file(url = "http://www.apsnypress.info/upload/KP_vyboryNSPRA_120317.pdf", destfile = file.path("pdf", "CalendarPreparationPlan.pdf"))
# http://www.apsnypress.info/cik/resheniem-tsika-obrazovano-35-izbiratelnykh-okrugov-po-vyboram-deputatov-parlamenta-abkhazii/
# Центральная избирательная комиссия определила границы избирательных округов по выборам депутатов Народного Собрания – Парламента Абхазии.
# Сухум. 18 января. Апсныпресс. Список избирательных округов с указанием их границ, численности избирателей и мест нахождения окружных избирательных комиссий.
download.file(url = "http://www.apsnypress.info/news/resheniem-tsika-obrazovano-35-izbiratelnykh-okrugov-po-vyboram-deputatov-parlamenta-abkhazii/", destfile = file.path("html", "ListOfElectoralDistricts.html"))
# http://www.apsnypress.info/cik/135-initsiativnykh-grupp-po-vydvizheniyu-kandidatov-v-deputaty-parlamenta-zaregistrirovano-v-tsik-/
# В Центральной избирательной комиссии Республики Абхазия с 21 января по 8 февраля 2017 г. зарегистрировано 135 инициативных групп по выдвижению кандидатов в депутаты и 4 политические партии: «Аинар», «Амцахара», «Народный фронт за справедливость и развитие» и «Форум народного единства Абхазии».
download.file(url = "http://www.apsnypress.info/cik/135-initsiativnykh-grupp-po-vydvizheniyu-kandidatov-v-deputaty-parlamenta-zaregistrirovano-v-tsik-/", destfile = file.path("html", "InitiativeGroups135.html"))
These include data on registered candidates, such as their profession and home address.
# http://www.apsnypress.info/news/tsentrizbirkom-opublikoval-dannye-zaregistrirovannykh-kandidatov-v-deputaty-parlamenta/
download.file(url = "http://www.apsnypress.info/upload/dannieKandidarov_nspra_20017.pdf", destfile = file.path("pdf", "Registered-2017-02-10.pdf"))
# http://www.apsnypress.info/cik/tsik-opublikoval-dannye-zaregistrirovannykh-13-fevralya-kandidatov-v-deputaty-parlamenta/
download.file(url = "http://apsnypress.info/upload/kandidati_13.2.17.pdf", destfile = file.path("pdf", "Registered-2017-02-13.pdf"))
# http://www.apsnypress.info/cik/16-fevralya-tsentrizbirkom-zaregistriroval-12-kandidatov-v-deputaty-parlamenta-abkhazii-/
download.file(url = "http://apsnypress.info/upload/kandidati_16.2.17.pdf", destfile = file.path("pdf", "Registered-2017-02-16.pdf"))
# http://www.apsnypress.info/cik/18-fevralya-tsik-zaregistriroval-devyat-kandidatov-v-deputaty-parlamenta-abkhazii-/
download.file(url = "http://apsnypress.info/upload/kandidati_18.2.17.pdf", destfile = file.path("pdf", "Registered-2017-02-18.pdf"))
# http://www.apsnypress.info/cik/tsentrizbirkom-abkhazii-opublikoval-dannye-kandidatov-v-deputaty-parlamenta-zaregistrirovannykh-20-f/
download.file(url = "http://www.apsnypress.info/upload/kandidati_nspra_210217.pdf", destfile = file.path("pdf", "Registered-2017-02-21.pdf"))
# http://www.apsnypress.info/cik/opredelena-ocherednost-vystupleniy-kandidatov-v-deputaty-parlamenta-na-gosudarstvennom-televidenii/
download.file(url = "http://www.apsnypress.info/upload/ocherednosty_vystupleniya_kandidatov_nspra_200217.pdf", destfile = file.path("pdf", "TVappearanceOrder.pdf"))
Election results
# http://www.apsnypress.info/news/tsik-v-12-okrugakh-izbrany-deputaty-parlamenta-abkhazii/
download.file(url = "http://www.apsnypress.info/news/tsik-v-12-okrugakh-izbrany-deputaty-parlamenta-abkhazii/", destfile = file.path("html", "2017-03-13-FirstRoundResults.html"))
# FirstRound winners
download.file(url = "http://www.apsnypress.info/upload/izbrannie_deputatyNSPRA_120317.pdf", destfile = file.path("pdf", "2017-03-13-FirstRoundWinners.pdf"))
# Second round
download.file(url = "http://www.apsnypress.info/upload/2tur_nspra.pdf", destfile = file.path("pdf", "2017-03-13-FirstRoundToSecondRound.pdf"))
# Repeat election
download.file(url = "http://www.apsnypress.info/upload/povtornievibory_nspra_2017.pdf", destfile = file.path("pdf", "2017-03-13-FirstRoundRepeatElections.pdf"))
Extract all candidates
RegisteredPdf <- list.files(path = file.path("pdf"), pattern = "Registered", full.names = TRUE)
reg1 <- extract_tables(file = RegisteredPdf[1])
reg1DF <- bind_rows(data.frame(reg1[1], stringsAsFactors = FALSE), data.frame(reg1[2], stringsAsFactors = FALSE), data.frame(reg1[3], stringsAsFactors = FALSE))
names(reg1DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg1DF <- reg1DF[-(1:3),-(12:13)]
for (i in 1:nrow(reg1DF)) {
if (reg1DF$Employer[i]=="") {
reg1DF$Employer[i] <- paste(reg1DF$Employer[i-1], reg1DF$Employer[i+1])
}
}
reg1DF <- reg1DF[reg1DF$District!="",]
# some professions not correctly recorded
reg2 <- extract_tables(file = RegisteredPdf[2])
reg2DF <- bind_rows(data.frame(reg2[1], stringsAsFactors = FALSE), data.frame(reg2[2], stringsAsFactors = FALSE), data.frame(reg2[3], stringsAsFactors = FALSE), data.frame(reg2[4], stringsAsFactors = FALSE), data.frame(reg2[5], stringsAsFactors = FALSE))
names(reg2DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg2DF <- reg2DF[-1,]
reg3 <- extract_tables(file = RegisteredPdf[3])
reg3DF <- data.frame(reg3[1], stringsAsFactors = FALSE)
names(reg3DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg3DF <- reg3DF[-1,]
reg4 <- extract_tables(file = RegisteredPdf[4])
reg4DF <- data.frame(reg4[1], stringsAsFactors = FALSE)
names(reg4DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
reg4DF <- reg4DF[-1,]
reg5 <- extract_tables(file = RegisteredPdf[5])
reg5DF <- data.frame(reg5[1], stringsAsFactors = FALSE)
reg5DF <- reg5DF[-1,-1]
names(reg5DF) <- c("District", "Surname", "Name", "Patronimic", "DateOfBirth", "Employer", "Profession", "City", "Street", "House", "Apartment")
registered <- bind_rows(reg1DF, reg2DF, reg3DF, reg4DF, reg5DF)
registered$District <- as.integer(registered$District)
registered$DateOfBirth <- lubridate::dmy(registered$DateOfBirth)
registered$Surname <- trimws(registered$Surname)
registered$Name <- trimws(registered$Name)
registered$Patronimic <- trimws(registered$Patronimic)
registered <- registered %>% arrange(District) %>% mutate(FullName = paste(Surname, Name, sep = ", "))
#View(registered)
#duplicated(x = registered$FullName)
All candidates
# http://www.apsnypress.info/cik/135-initsiativnykh-grupp-po-vydvizheniyu-kandidatov-v-deputaty-parlamenta-zaregistrirovano-v-tsik-/
bind_rows(
data_frame(District = "1",
DistrictName = "Новый район",
Rayon = "Sukhumi",
Candidate = c("1. Кварчия Гиви Анатольевич", "2. Купалба Темыр Гурамович", "3. Гунба Бадра Зурабович", "4. Корсантия Гурам Виталиевич", "5. Авидзба Ахрик Русланович"))
,
data_frame(District = "2",
DistrictName = "Новый район",
Rayon = "Sukhumi",
Candidate = c("1. Габния Гиви Георгиевич", "2. Бганба Роланд Анатолиевич", "3. Джапуа Алмас Северьянович от ПП «Аинар»"))
,
data_frame(District = "3",
DistrictName = "Старый поселок",
Rayon = "Sukhumi",
Candidate = c("1. Адзынба Шамиль Омарович", "2. Агрба Валерий Владимирович", "3. Харчилава Ираклий Игорьевич", "4. Анкваб Александр Золотинскович", "5. Колбая Дмитрий Александрович"))
,
data_frame(District = "4",
DistrictName = "Северный",
Rayon = "Sukhumi",
Candidate = c("1. Хагуш Саида Львовна", "2. Качарава Роман Сергеевич", "3. Айба Батал Эдуардович", "4. Рштуни Эрик Сагателович", "5. Чирикба Вячеслав Андреевич", "6. Агумава Эраст Рамзанович", "7. Цвинария Беслан Константинович"))
,
data_frame(District = "5",
DistrictName = "Синопский",
Rayon = "Sukhumi",
Candidate = c("1. Адлейба Георгий Лериевич", "2. Лагвилава Вадим Иванович", "3. Ашуба Лаша Нугзарович", "4. Цахнакия Тамаз Георгиевич", "5. Тужба Ираклий Вахтангович от ПП «Аинар»", "6. Васильев Владимир Владимирович"))
,
data_frame(District = "6",
DistrictName = "Центральный",
Rayon = "Sukhumi",
Candidate = c("1. Агрба Тенгиз Алмасханович", "2. Лабахуа Алиас Михайлович", "3. Миканба Георгий Валерианович", "4. Кварчия Абесалом Алексеевич", "5. Лолуа Рауль Валериевич", "6. Квициния Алхас Шалодиевич"))
,
data_frame(District = "7",
DistrictName = "Библиотека",
Rayon = "Sukhumi",
Candidate =c("1. Адлейба Астамур Борисович", "2. Джинджолия Алхас Владимирович", "3. Шамба Сергей Миронович", "4. Амалыиа Наира Концаловна"))
,
data_frame(District = "8",
DistrictName = "Маякский",
Rayon = "Sukhumi",
Candidate = c("1. Квициния Беслан Алексеевич", "2. Язычба Заур Гаидарович", "3. Айба Джемалик Владимирович", "4. Кишмария Астамур Ингиштерович", "5. Джопуа Тенгиз Шотович от ПП «Аинар»", "6. Папба Валерий Иванович"))
# ,
# data_frame(District = "8",
# DistrictName = "Маякский",
# Rayon = "Gagra",
# Candidate
)
Find most “famous” candidates
load(file = file.path("..", "..", "deFactoNews", "apsnyPress", "Dataset", "2017-03-17 - deFactoNews - apsnyPress - dataset.RData"))
regexNames <- registered %>% transmute(FullName, RegexNames = paste0(stringr::str_trunc(string = trimws(Name), width = nchar(trimws(Name))-1, side = "right", ellipsis = ""), "[а-я]?[а-я]?", " ", trimws(Surname), "[а-я]?[а-я]?"))
for (i in 1:nrow(regexNames)) {
regexNames$mentions[i] <- sum(str_count(string = dataset$contents, pattern = regexNames$RegexNames[i]), na.rm = TRUE)
}
fame <- regexNames %>% mutate(mentions = as.integer(mentions)) %>% arrange(mentions) %>% mutate(FullName = forcats::fct_inorder(f = FullName, ordered = TRUE)) %>% select(-RegexNames)
fame %>%
filter(mentions>50) %>% # removing those never mentioned
filter(mentions<1000) %>%
ggplot(mapping = aes(x = FullName, y = mentions)) +
geom_col() +
coord_flip() +
scale_x_discrete(name = "") +
scale_y_continuous(name = "") +
theme_minimal() +
labs(title = "Most famous candidates", subtitle = "Removing those mentioned more than 1000 times (Ankvab) and those mentioned less than 50 times")

# str_extract_all(string = dataset$contents[str_detect(string = dataset$contents, pattern = regexNames[1,1])], pattern = regexNames[1,1], simplify = TRUE)
Check how they did at the elections
votesAndFame <- merge.data.frame(x = fame,y = votes, by = "FullName", all = TRUE)
votesAndFame <- merge.data.frame(x = votesAndFame,y = registered, by = "FullName", all.x = TRUE)
votesAndFame <- votesAndFame %>% arrange(mentions)
votesAndFame$Type[is.na(votesAndFame$Type)==TRUE] <- "FirstRoundLose"
votesAndFame$colourByType <- NA
votesAndFame$colourByType[votesAndFame$Type=="FirstRoundWin"] <- "#4daf4a"
votesAndFame$colourByType[votesAndFame$Type=="SecondRound"] <- "#377eb8"
votesAndFame$colourByType[votesAndFame$Type=="FirstRoundLose"] <- "#e41a1c"
votesAndFame$colourByType[votesAndFame$Type=="RepeatVote"] <- "#ff7f00"
winLosePalette <- c("#4daf4a", "#377eb8", "#e41a1c", "#ff7f00")
names(winLosePalette) <- c("FirstRoundWin", "SecondRound", "FirstRoundLose", "RepeatVote")
winLoseFill <- ggplot2::scale_fill_manual(name = "Type", values = winLosePalette)
votesAndFame$Type <- factor(x = votesAndFame$Type, levels = c("FirstRoundWin", "SecondRound", "FirstRoundLose", "RepeatVote"))
votesAndFame$FullName <- forcats::fct_inorder(f = votesAndFame$FullName, ordered = TRUE)
WinLoseColours <- c("#4daf4a", "#377eb8", "#e41a1c", "#ff7f00")
votesAndFame %>%
ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
geom_col() +
coord_flip() +
scale_x_discrete(name = "") +
scale_y_continuous(name = "") +
theme_minimal() +
scale_fill_manual(values = WinLoseColours)

Colouring labels
votesAndFame %>%
ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
geom_col() +
coord_flip() +
scale_x_discrete(name = "") +
scale_y_continuous(name = "") +
theme_minimal() +
scale_fill_manual(values = WinLoseColours) +
theme(axis.text.y=element_text(colour = votesAndFame$colourByType))

Removing top5 for clarity
votesAndFame %>% filter(mentions<500) %>%
ggplot(mapping = aes(x = FullName, y = mentions, fill = Type)) +
geom_col() +
coord_flip() +
scale_x_discrete(name = "") +
scale_y_continuous(name = "") +
theme_minimal() +
scale_fill_manual(values = WinLoseColours) +
theme(axis.text.y=element_text(colour = votesAndFame$colourByType[1:(length(votesAndFame$colourByType)-5)]))
