library(tidyverse)
library(ggforce)
library(jsonlite)
library(glue)
library(cowplot)
library(gender)
library(ggtext)
extrafont::loadfonts()
extrafont::font_import()
## Importing fonts may take a few minutes, depending on the number of fonts and the speed of the system.
## Continue? [y/n]
wdr <- getwd()

df_all <- data.frame(person_name=as.character(),
                     person_surname=as.character(),
                     message=as.character(),
                     created_at=as.character(),
                     page_i=as.integer(),
                     person_id=as.integer(),
                     stringsAsFactors = F
                     )

baseurl <- "https://dabei.sebastian-kurz.at/api/v1/pages/52/commitments"
i <- 1

for(i in 1:500)
  try(
    {  
      kurzdata <- jsonlite::fromJSON(paste0(baseurl, "?page_nr=", i), flatten=TRUE)
      message("Retrieving page ", i)
      
      if (!length(kurzdata$commitment_module_people$person.surname)>0)
      { 
        message("End at page ", i)
        break
      }
      
      df_i <- data.frame(person_name=kurzdata$commitment_module_people$person.name ,
                         person_surname=kurzdata$commitment_module_people$person.surname,
                         message=kurzdata$commitment_module_people$message,
                         created_at=kurzdata$commitment_module_people$created_at,
                         page_i=i,
                         person_id=kurzdata$commitment_module_people$person_id,
                         stringsAsFactors = F)
      
      df_all <- bind_rows(df_all, df_i)
      
    }
  )

titles <- c("Mag", "MMag", "Prof", "Dr", "O\\.Univ\\.Prof", "Ing+\\W", "Dipl","jun","sen")

df_gender <- df_all %>% 
  mutate(person_name2=str_remove_all(person_name, paste(titles, collapse = "|")) %>% 
           str_remove_all(., "\\W") %>% word(., 1)) %>% 
  mutate(gender=map(person_name, gender)) %>% 
  unnest_wider(gender) %>% 
  mutate(gender=as_factor(gender))


df_gender <- df_gender %>% 
  mutate(gender=forcats::fct_explicit_na(gender, na_level="missing")) %>% 
  mutate(gender=forcats::fct_infreq(gender) %>% fct_rev)


write_csv2(df_gender, path=paste0(wdr, "/data/KurzSupporters.csv"))



# timeline ----------------------------------------------------------------

df_gender %>% 
  mutate(created_day=lubridate::as_date(created_at)) %>% 
  group_by(gender, created_day) %>% 
  summarise(n_obs=n()) %>% 
  group_by(gender) %>% 
  mutate(cum_n_obs=cumsum(n_obs)) %>% 
  ggplot()+
  geom_line(aes(x=created_day,
                y=cum_n_obs,
                fill=gender))

df_gender %>% 
  group_by(gender) %>% 
  count() %>% 
  ungroup() %>% 
  mutate(n_rel=n/sum(n)) %>% 
  arrange(desc(n)) %>% 
  mutate(n_cum=cumsum(n)) %>% 
  mutate(y_pos=n/2+lag(n_cum, default=0)) %>% 
  mutate(x_pos=lubridate::as_date(max(df_gender$created_at))) -> df_notes


df_gender %>% 
  mutate(created_day=lubridate::as_date(created_at)) %>% 
  group_by(gender, created_day) %>% 
  summarise(n_obs=n()) %>% 
  group_by(gender) %>% 
  mutate(cum_n_obs=cumsum(n_obs)) %>% 
  padr::pad(., interval="day", group="gender", end_val = as.Date(max(df_gender$created_at))) %>% 
  tidyr::fill(cum_n_obs, .direction = c("down")) %>% 
  ggplot()+
  geom_area(aes(x=created_day,
                group=gender,
                y=cum_n_obs,
                fill=gender),
            color="transparent",
            key_glyph = circle_key_glyph(
              fill = fill,
              color = "transparent", 
              linetype = 1, 
              size = 0.01,
              padding = margin(1, 1, 1, 1)
            ))+
  hrbrthemes::theme_ft_rc()+
  theme(legend.position = "none",
        legend.title = element_blank(),
        legend.justification = "left",
        panel.grid.major.x = element_blank(),
        legend.box.spacing = unit(0, "cm"),
        legend.margin = margin(0,0,0,0, "cm"),
        legend.box.margin = margin(0,0,0,0, "cm"),
        panel.grid.minor = element_blank(),
        plot.title = element_markdown(lineheight = 1.1, color="white"),
        axis.text.y.right = element_text(color = c("#00688B", "#00CD66", "grey")),
        plot.caption = element_markdown(),
        plot.caption.position = "plot",
        plot.subtitle = element_text(size=11),
        plot.title.position = "plot",
        axis.title.y = element_text(hjust=0.9),
        axis.title.x = element_blank())+
  guides(fill=guide_legend(reverse = T))+
  scale_y_continuous(expand=expansion(mult=c(0, 0.1)),
                     breaks=c(seq(0, 2000, 1000), df_notes %>% summarise(n=sum(n)) %>% pull()),
                     sec.axis = sec_axis(~.,
                                         breaks=df_notes$n_cum,
                                         labels=paste0(unique(df_notes$gender), ": ", 
                                                       unique(df_notes$n),
                                                      " (",
                                                       scales::percent(df_notes$n_rel),
                                                      ")")))+
  scale_x_date(limits=c(lubridate::as_date(min(df_gender$created_at)), 
                        lubridate::as_date(max(df_gender$created_at))),
               breaks=c(as.Date("2019-08-01"), as.Date("2019-09-01")),
               expand=expansion(mult=c(0,0)),
               labels = scales::date_format(format="%b"))+
  scale_fill_manual(values=c(male="#00688B", female="#00CD66", missing="grey"))+
  labs(title="<span  style='font-size:11pt; color:orange'>Austrian General Elections 2019</span> <br>Gender of supporters on sebastian-kurz.at",
       subtitle = "Data retrieved from <https://dabei.sebastian-kurz.at/wir-fuer-kurz> on 29 Sep 2019. \nGender infered by first name with R gender package.",
       caption="Roland Schmidt | @zoowalk | <span style='color:white'>werk.statt.codes</span>",
       y="total number")