library(tidyverse)
library(ggforce)
library(jsonlite)
library(glue)
library(cowplot)
library(gender)
library(ggtext)
extrafont::loadfonts()
extrafont::font_import()
## Importing fonts may take a few minutes, depending on the number of fonts and the speed of the system.
## Continue? [y/n]
wdr <- getwd()
df_all <- data.frame(person_name=as.character(),
person_surname=as.character(),
message=as.character(),
created_at=as.character(),
page_i=as.integer(),
person_id=as.integer(),
stringsAsFactors = F
)
baseurl <- "https://dabei.sebastian-kurz.at/api/v1/pages/52/commitments"
i <- 1
for(i in 1:500)
try(
{
kurzdata <- jsonlite::fromJSON(paste0(baseurl, "?page_nr=", i), flatten=TRUE)
message("Retrieving page ", i)
if (!length(kurzdata$commitment_module_people$person.surname)>0)
{
message("End at page ", i)
break
}
df_i <- data.frame(person_name=kurzdata$commitment_module_people$person.name ,
person_surname=kurzdata$commitment_module_people$person.surname,
message=kurzdata$commitment_module_people$message,
created_at=kurzdata$commitment_module_people$created_at,
page_i=i,
person_id=kurzdata$commitment_module_people$person_id,
stringsAsFactors = F)
df_all <- bind_rows(df_all, df_i)
}
)
titles <- c("Mag", "MMag", "Prof", "Dr", "O\\.Univ\\.Prof", "Ing+\\W", "Dipl","jun","sen")
df_gender <- df_all %>%
mutate(person_name2=str_remove_all(person_name, paste(titles, collapse = "|")) %>%
str_remove_all(., "\\W") %>% word(., 1)) %>%
mutate(gender=map(person_name, gender)) %>%
unnest_wider(gender) %>%
mutate(gender=as_factor(gender))
df_gender <- df_gender %>%
mutate(gender=forcats::fct_explicit_na(gender, na_level="missing")) %>%
mutate(gender=forcats::fct_infreq(gender) %>% fct_rev)
write_csv2(df_gender, path=paste0(wdr, "/data/KurzSupporters.csv"))
# timeline ----------------------------------------------------------------
df_gender %>%
mutate(created_day=lubridate::as_date(created_at)) %>%
group_by(gender, created_day) %>%
summarise(n_obs=n()) %>%
group_by(gender) %>%
mutate(cum_n_obs=cumsum(n_obs)) %>%
ggplot()+
geom_line(aes(x=created_day,
y=cum_n_obs,
fill=gender))

df_gender %>%
group_by(gender) %>%
count() %>%
ungroup() %>%
mutate(n_rel=n/sum(n)) %>%
arrange(desc(n)) %>%
mutate(n_cum=cumsum(n)) %>%
mutate(y_pos=n/2+lag(n_cum, default=0)) %>%
mutate(x_pos=lubridate::as_date(max(df_gender$created_at))) -> df_notes
df_gender %>%
mutate(created_day=lubridate::as_date(created_at)) %>%
group_by(gender, created_day) %>%
summarise(n_obs=n()) %>%
group_by(gender) %>%
mutate(cum_n_obs=cumsum(n_obs)) %>%
padr::pad(., interval="day", group="gender", end_val = as.Date(max(df_gender$created_at))) %>%
tidyr::fill(cum_n_obs, .direction = c("down")) %>%
ggplot()+
geom_area(aes(x=created_day,
group=gender,
y=cum_n_obs,
fill=gender),
color="transparent",
key_glyph = circle_key_glyph(
fill = fill,
color = "transparent",
linetype = 1,
size = 0.01,
padding = margin(1, 1, 1, 1)
))+
hrbrthemes::theme_ft_rc()+
theme(legend.position = "none",
legend.title = element_blank(),
legend.justification = "left",
panel.grid.major.x = element_blank(),
legend.box.spacing = unit(0, "cm"),
legend.margin = margin(0,0,0,0, "cm"),
legend.box.margin = margin(0,0,0,0, "cm"),
panel.grid.minor = element_blank(),
plot.title = element_markdown(lineheight = 1.1, color="white"),
axis.text.y.right = element_text(color = c("#00688B", "#00CD66", "grey")),
plot.caption = element_markdown(),
plot.caption.position = "plot",
plot.subtitle = element_text(size=11),
plot.title.position = "plot",
axis.title.y = element_text(hjust=0.9),
axis.title.x = element_blank())+
guides(fill=guide_legend(reverse = T))+
scale_y_continuous(expand=expansion(mult=c(0, 0.1)),
breaks=c(seq(0, 2000, 1000), df_notes %>% summarise(n=sum(n)) %>% pull()),
sec.axis = sec_axis(~.,
breaks=df_notes$n_cum,
labels=paste0(unique(df_notes$gender), ": ",
unique(df_notes$n),
" (",
scales::percent(df_notes$n_rel),
")")))+
scale_x_date(limits=c(lubridate::as_date(min(df_gender$created_at)),
lubridate::as_date(max(df_gender$created_at))),
breaks=c(as.Date("2019-08-01"), as.Date("2019-09-01")),
expand=expansion(mult=c(0,0)),
labels = scales::date_format(format="%b"))+
scale_fill_manual(values=c(male="#00688B", female="#00CD66", missing="grey"))+
labs(title="<span style='font-size:11pt; color:orange'>Austrian General Elections 2019</span> <br>Gender of supporters on sebastian-kurz.at",
subtitle = "Data retrieved from <https://dabei.sebastian-kurz.at/wir-fuer-kurz> on 29 Sep 2019. \nGender infered by first name with R gender package.",
caption="Roland Schmidt | @zoowalk | <span style='color:white'>werk.statt.codes</span>",
y="total number")
