Load

library(googlesheets)
library(tidyverse)
library(janitor)
library(stringr)
library(lubridate)
library(forcats)
devtools::load_all()

Download google sheets

# googlesheets::gs_auth()
sheet_key <- gs_ls() %>% 
  filter(stringr::str_detect(sheet_title, 'XII|REGISTROS_')) %>% 
  arrange(sheet_title) %>% 
  with(sheet_key)

d_inscritos <- sheet_key %>% 
  last() %>% 
  gs_key() %>% 
  gs_read(verbose = FALSE) %>% 
  clean_names() %>% 
  set_names(abjutils::rm_accent(names(.)))

d_registros <- sheet_key %>% 
  first() %>% 
  gs_key() %>% 
  gs_read(verbose = FALSE) %>% 
  clean_names() %>% 
  set_names(abjutils::rm_accent(names(.)))

Tidy

tidy_registros <- d_registros %>% 
  mutate(cpf = str_replace_all(cpf, '[^0-9]', '')) %>% 
  select(cpf, situacao) %>% 
  mutate(situacao = tolower(situacao)) %>% 
  distinct(cpf, .keep_all = TRUE)
tidy_inscritos <- d_inscritos %>% 
  select(timestamp = indicacao_de_data_e_hora,
         cpf, endereco, cidade, estado, cep, 
         ocupacao = starts_with('ocupacao'),
         confirmacao = starts_with('confirm'), 
         sexo = starts_with('como_'),
         obs) %>% 
  filter(confirmacao != 'repetido', is.na(obs) | obs != 'DESISTIU') %>% 
  select(-obs) %>% 
  distinct(cpf, .keep_all = TRUE) %>% 
  mutate(estado = estado %>% toupper() %>% abjutils::rm_accent(),
         estado = if_else(str_detect(estado, 'SAO PAULO'), 'SP', estado)) %>% 
  unite(origem, endereco, cidade, estado, sep = ', ') %>% 
  mutate(timestamp = dmy_hms(timestamp)) %>% 
  mutate(cpf = str_replace_all(cpf, '[^0-9]', '')) %>% 
  mutate(confirmacao = if_else(confirmacao == 'sim', 
                               'confirmado', 'espera')) %>% 
  left_join(tidy_registros, 'cpf') %>% 
  mutate(situacao = if_else(is.na(situacao), 'não registrado', situacao)) %>% 
  mutate(sexo = if_else(sexo == 'Sr.', 'masculino', 'feminino'))
  
latlon <- suppressMessages(ggmap::geocode(tidy_inscritos$origem))

Inscrições no tempo

p <- tidy_inscritos %>% 
  arrange(timestamp) %>% 
  mutate(um = 1, inscricoes = cumsum(um)) %>% 
  ggplot(aes(x = timestamp, y = inscricoes)) +
  geom_step() +
  scale_x_datetime(breaks = scales::date_breaks('1 day'), 
                   labels = scales::date_format('%b %d')) +
  theme_bw(14) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab(' ') +
  ylab('Volume de inscrições')
plotly::ggplotly(p)

Endereços dos inscritos

library(leaflet)
latlon %>% 
  bind_cols(tidy_inscritos) %>% 
  mutate(label = str_c(ocupacao, origem, sep = '<br/>')) %>% 
  filter(confirmacao == 'confirmado') %>% 
  leaflet() %>%
  setView(lng = -50, lat = -17, zoom = 4) %>% 
  addTiles() %>%
  addMarkers(lng = ~lon, lat = ~lat, popup = ~label,
             clusterOptions = markerClusterOptions())

Sexo

tidy_inscritos %>% 
  count(sexo) %>% 
  mutate(prop = n/sum(n)) %>% 
  add_totals_row() %>% 
  mutate(prop = scales::percent(prop)) %>% 
  knitr::kable()
sexo n prop
feminino 58 32.8%
masculino 119 67.2%
Total 177 100.0%

Ocupação

Proporção de inscritos por combinação de ocupações:

tidy_inscritos %>% 
  mutate(ocupacao = fct_lump(ocupacao, n = 12)) %>% 
  count(ocupacao, sort = TRUE) %>% 
  mutate(prop = n/sum(n)) %>%
  add_totals_row() %>% 
  mutate(prop = scales::percent(prop)) %>% 
  knitr::kable()
ocupacao n prop
Bacharel em Estatística 50 28.2%
Other 32 18.1%
Profissional com outra graduação 16 9.0%
Estudante do Bacharelado em Estatística 15 8.5%
Estudante de outras carreiras 12 6.8%
Pós-graduando de outras áreas 12 6.8%
Mestrando ou Doutorando em Estatística 8 4.5%
Docente 7 4.0%
Pós-graduando de outras áreas, Bacharel em Estatística 7 4.0%
Estudante de outras carreiras, Profissional com outra graduação 5 2.8%
Mestrando ou Doutorando em Estatística, Bacharel em Estatística 5 2.8%
Pós-graduando de outras áreas, Profissional com outra graduação 5 2.8%
Atualmente está desempregado 3 1.7%
Total 177 100.0%

Proporção de inscritos por ocupação (tem overlap, não soma 100%!).

tidy_inscritos %>% 
  separate(ocupacao, c('a', 'b', 'c', 'd'), sep = ', ',
           fill = 'right') %>% 
  gather(key_ocup, val_ocup, a, b, c, d, convert = TRUE) %>% 
  filter(!is.na(val_ocup)) %>% 
  mutate(ntot = n_distinct(cpf)) %>% 
  mutate(val_ocup = str_wrap(fct_lump(val_ocup, n = 8), 12)) %>%
  group_by(val_ocup) %>% 
  summarise(n = n_distinct(cpf), ntot = first(ntot)) %>% 
  mutate(prop = n / ntot) %>% 
  mutate(val_ocup = fct_reorder(val_ocup, prop, .desc = TRUE)) %>% 
  ggplot(aes(x = val_ocup, y = prop)) +
  geom_bar(stat = 'identity') +
  scale_y_continuous(labels = scales::percent, limits = c(0, .45)) +
  geom_text(aes(label = scales::percent(prop)), 
            position = position_dodge(.9), vjust = -.2) +
  theme_bw(14) +
  xlab('Ocupação') +
  ylab('Proporção de inscritos')

Inscritos vs registrados

Bacharéis em estatística vs situação no CONRE-3. Dos inscritos e bacharéis em estatística, 70% não estão registrados (ou não bateram número do CPF).

tidy_inscritos %>% 
  filter(str_detect(ocupacao, 'Bacharel |Estatístico')) %>% 
  count(situacao) %>% 
  mutate(prop = n/sum(n)) %>% 
  add_totals_row() %>% 
  mutate(prop = scales::percent(prop)) %>% 
  knitr::kable()
situacao n prop
não registrado 50 69.4%
regular 22 30.6%
Total 72 100.0%
LS0tCnRpdGxlOiAiQW7DoWxpc2UgZGVzY3JpdGl2YSBkb3MgaW5zY3JpdG9zIgphdXRob3I6ICJKdWxpbyBUcmVjZW50aSAtIENPTlJFLTMiCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OiBodG1sX25vdGVib29rCnZpZ25ldHRlOiA+CiAgJVxWaWduZXR0ZUluZGV4RW50cnl7SnVsaW8gVHJlY2VudGl9CiAgJVxWaWduZXR0ZUVuZ2luZXtrbml0cjo6cm1hcmtkb3dufQogICVcVmlnbmV0dGVFbmNvZGluZ3tVVEYtOH0KLS0tCgojIyMgTG9hZAoKYGBge3IgbGVpdHVyYSwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0KbGlicmFyeShnb29nbGVzaGVldHMpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGphbml0b3IpCmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoZm9yY2F0cykKZGV2dG9vbHM6OmxvYWRfYWxsKCkKYGBgCgojIyMgRG93bmxvYWQgZ29vZ2xlIHNoZWV0cwoKYGBge3IsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0V9CiMgZ29vZ2xlc2hlZXRzOjpnc19hdXRoKCkKc2hlZXRfa2V5IDwtIGdzX2xzKCkgJT4lIAogIGZpbHRlcihzdHJpbmdyOjpzdHJfZGV0ZWN0KHNoZWV0X3RpdGxlLCAnWElJfFJFR0lTVFJPU18nKSkgJT4lIAogIGFycmFuZ2Uoc2hlZXRfdGl0bGUpICU+JSAKICB3aXRoKHNoZWV0X2tleSkKCmRfaW5zY3JpdG9zIDwtIHNoZWV0X2tleSAlPiUgCiAgbGFzdCgpICU+JSAKICBnc19rZXkoKSAlPiUgCiAgZ3NfcmVhZCh2ZXJib3NlID0gRkFMU0UpICU+JSAKICBjbGVhbl9uYW1lcygpICU+JSAKICBzZXRfbmFtZXMoYWJqdXRpbHM6OnJtX2FjY2VudChuYW1lcyguKSkpCgpkX3JlZ2lzdHJvcyA8LSBzaGVldF9rZXkgJT4lIAogIGZpcnN0KCkgJT4lIAogIGdzX2tleSgpICU+JSAKICBnc19yZWFkKHZlcmJvc2UgPSBGQUxTRSkgJT4lIAogIGNsZWFuX25hbWVzKCkgJT4lIAogIHNldF9uYW1lcyhhYmp1dGlsczo6cm1fYWNjZW50KG5hbWVzKC4pKSkKYGBgCgojIyMgVGlkeQoKYGBge3J9CnRpZHlfcmVnaXN0cm9zIDwtIGRfcmVnaXN0cm9zICU+JSAKICBtdXRhdGUoY3BmID0gc3RyX3JlcGxhY2VfYWxsKGNwZiwgJ1teMC05XScsICcnKSkgJT4lIAogIHNlbGVjdChjcGYsIHNpdHVhY2FvKSAlPiUgCiAgbXV0YXRlKHNpdHVhY2FvID0gdG9sb3dlcihzaXR1YWNhbykpICU+JSAKICBkaXN0aW5jdChjcGYsIC5rZWVwX2FsbCA9IFRSVUUpCgp0aWR5X2luc2NyaXRvcyA8LSBkX2luc2NyaXRvcyAlPiUgCiAgc2VsZWN0KHRpbWVzdGFtcCA9IGluZGljYWNhb19kZV9kYXRhX2VfaG9yYSwKICAgICAgICAgY3BmLCBlbmRlcmVjbywgY2lkYWRlLCBlc3RhZG8sIGNlcCwgCiAgICAgICAgIG9jdXBhY2FvID0gc3RhcnRzX3dpdGgoJ29jdXBhY2FvJyksCiAgICAgICAgIGNvbmZpcm1hY2FvID0gc3RhcnRzX3dpdGgoJ2NvbmZpcm0nKSwgCiAgICAgICAgIHNleG8gPSBzdGFydHNfd2l0aCgnY29tb18nKSwKICAgICAgICAgb2JzKSAlPiUgCiAgZmlsdGVyKGNvbmZpcm1hY2FvICE9ICdyZXBldGlkbycsIGlzLm5hKG9icykgfCBvYnMgIT0gJ0RFU0lTVElVJykgJT4lIAogIHNlbGVjdCgtb2JzKSAlPiUgCiAgZGlzdGluY3QoY3BmLCAua2VlcF9hbGwgPSBUUlVFKSAlPiUgCiAgbXV0YXRlKGVzdGFkbyA9IGVzdGFkbyAlPiUgdG91cHBlcigpICU+JSBhYmp1dGlsczo6cm1fYWNjZW50KCksCiAgICAgICAgIGVzdGFkbyA9IGlmX2Vsc2Uoc3RyX2RldGVjdChlc3RhZG8sICdTQU8gUEFVTE8nKSwgJ1NQJywgZXN0YWRvKSkgJT4lIAogIHVuaXRlKG9yaWdlbSwgZW5kZXJlY28sIGNpZGFkZSwgZXN0YWRvLCBzZXAgPSAnLCAnKSAlPiUgCiAgbXV0YXRlKHRpbWVzdGFtcCA9IGRteV9obXModGltZXN0YW1wKSkgJT4lIAogIG11dGF0ZShjcGYgPSBzdHJfcmVwbGFjZV9hbGwoY3BmLCAnW14wLTldJywgJycpKSAlPiUgCiAgbXV0YXRlKGNvbmZpcm1hY2FvID0gaWZfZWxzZShjb25maXJtYWNhbyA9PSAnc2ltJywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnY29uZmlybWFkbycsICdlc3BlcmEnKSkgJT4lIAogIGxlZnRfam9pbih0aWR5X3JlZ2lzdHJvcywgJ2NwZicpICU+JSAKICBtdXRhdGUoc2l0dWFjYW8gPSBpZl9lbHNlKGlzLm5hKHNpdHVhY2FvKSwgJ27Do28gcmVnaXN0cmFkbycsIHNpdHVhY2FvKSkgJT4lIAogIG11dGF0ZShzZXhvID0gaWZfZWxzZShzZXhvID09ICdTci4nLCAnbWFzY3VsaW5vJywgJ2ZlbWluaW5vJykpCiAgCmxhdGxvbiA8LSBzdXBwcmVzc01lc3NhZ2VzKGdnbWFwOjpnZW9jb2RlKHRpZHlfaW5zY3JpdG9zJG9yaWdlbSkpCmBgYAoKIyMjIEluc2NyacOnw7VlcyBubyB0ZW1wbwoKYGBge3IsIGZpZy53aWR0aD0xMSwgZmlnLmhlaWdodCA9IDV9CnAgPC0gdGlkeV9pbnNjcml0b3MgJT4lIAogIGFycmFuZ2UodGltZXN0YW1wKSAlPiUgCiAgbXV0YXRlKHVtID0gMSwgaW5zY3JpY29lcyA9IGN1bXN1bSh1bSkpICU+JSAKICBnZ3Bsb3QoYWVzKHggPSB0aW1lc3RhbXAsIHkgPSBpbnNjcmljb2VzKSkgKwogIGdlb21fc3RlcCgpICsKICBzY2FsZV94X2RhdGV0aW1lKGJyZWFrcyA9IHNjYWxlczo6ZGF0ZV9icmVha3MoJzEgZGF5JyksIAogICAgICAgICAgICAgICAgICAgbGFiZWxzID0gc2NhbGVzOjpkYXRlX2Zvcm1hdCgnJWIgJWQnKSkgKwogIHRoZW1lX2J3KDE0KSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkgKwogIHhsYWIoJyAnKSArCiAgeWxhYignVm9sdW1lIGRlIGluc2NyacOnw7VlcycpCgpwbG90bHk6OmdncGxvdGx5KHApCmBgYAoKIyMjIEVuZGVyZcOnb3MgZG9zIGluc2NyaXRvcwoKYGBge3IsIGZpZy53aWR0aD0xMX0KbGlicmFyeShsZWFmbGV0KQpsYXRsb24gJT4lIAogIGJpbmRfY29scyh0aWR5X2luc2NyaXRvcykgJT4lIAogIG11dGF0ZShsYWJlbCA9IHN0cl9jKG9jdXBhY2FvLCBvcmlnZW0sIHNlcCA9ICc8YnIvPicpKSAlPiUgCiAgZmlsdGVyKGNvbmZpcm1hY2FvID09ICdjb25maXJtYWRvJykgJT4lIAogIGxlYWZsZXQoKSAlPiUKICBzZXRWaWV3KGxuZyA9IC01MCwgbGF0ID0gLTE3LCB6b29tID0gNCkgJT4lIAogIGFkZFRpbGVzKCkgJT4lCiAgYWRkTWFya2VycyhsbmcgPSB+bG9uLCBsYXQgPSB+bGF0LCBwb3B1cCA9IH5sYWJlbCwKICAgICAgICAgICAgIGNsdXN0ZXJPcHRpb25zID0gbWFya2VyQ2x1c3Rlck9wdGlvbnMoKSkKYGBgCgojIyMgU2V4bwoKYGBge3J9CnRpZHlfaW5zY3JpdG9zICU+JSAKICBjb3VudChzZXhvKSAlPiUgCiAgbXV0YXRlKHByb3AgPSBuL3N1bShuKSkgJT4lIAogIGFkZF90b3RhbHNfcm93KCkgJT4lIAogIG11dGF0ZShwcm9wID0gc2NhbGVzOjpwZXJjZW50KHByb3ApKSAlPiUgCiAga25pdHI6OmthYmxlKCkKYGBgCgojIyMgT2N1cGHDp8OjbwoKUHJvcG9yw6fDo28gZGUgaW5zY3JpdG9zIHBvciBjb21iaW5hw6fDo28gZGUgb2N1cGHDp8O1ZXM6CgpgYGB7cn0KdGlkeV9pbnNjcml0b3MgJT4lIAogIG11dGF0ZShvY3VwYWNhbyA9IGZjdF9sdW1wKG9jdXBhY2FvLCBuID0gMTIpKSAlPiUgCiAgY291bnQob2N1cGFjYW8sIHNvcnQgPSBUUlVFKSAlPiUgCiAgbXV0YXRlKHByb3AgPSBuL3N1bShuKSkgJT4lCiAgYWRkX3RvdGFsc19yb3coKSAlPiUgCiAgbXV0YXRlKHByb3AgPSBzY2FsZXM6OnBlcmNlbnQocHJvcCkpICU+JSAKICBrbml0cjo6a2FibGUoKQpgYGAKCgpQcm9wb3LDp8OjbyBkZSBpbnNjcml0b3MgcG9yIG9jdXBhw6fDo28gKHRlbSBvdmVybGFwLCBuw6NvIHNvbWEgMTAwJSEpLgoKYGBge3IsIGZpZy53aWR0aD0xMH0KdGlkeV9pbnNjcml0b3MgJT4lIAogIHNlcGFyYXRlKG9jdXBhY2FvLCBjKCdhJywgJ2InLCAnYycsICdkJyksIHNlcCA9ICcsICcsCiAgICAgICAgICAgZmlsbCA9ICdyaWdodCcpICU+JSAKICBnYXRoZXIoa2V5X29jdXAsIHZhbF9vY3VwLCBhLCBiLCBjLCBkLCBjb252ZXJ0ID0gVFJVRSkgJT4lIAogIGZpbHRlcighaXMubmEodmFsX29jdXApKSAlPiUgCiAgbXV0YXRlKG50b3QgPSBuX2Rpc3RpbmN0KGNwZikpICU+JSAKICBtdXRhdGUodmFsX29jdXAgPSBzdHJfd3JhcChmY3RfbHVtcCh2YWxfb2N1cCwgbiA9IDgpLCAxMikpICU+JQogIGdyb3VwX2J5KHZhbF9vY3VwKSAlPiUgCiAgc3VtbWFyaXNlKG4gPSBuX2Rpc3RpbmN0KGNwZiksIG50b3QgPSBmaXJzdChudG90KSkgJT4lIAogIG11dGF0ZShwcm9wID0gbiAvIG50b3QpICU+JSAKICBtdXRhdGUodmFsX29jdXAgPSBmY3RfcmVvcmRlcih2YWxfb2N1cCwgcHJvcCwgLmRlc2MgPSBUUlVFKSkgJT4lIAogIGdncGxvdChhZXMoeCA9IHZhbF9vY3VwLCB5ID0gcHJvcCkpICsKICBnZW9tX2JhcihzdGF0ID0gJ2lkZW50aXR5JykgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQsIGxpbWl0cyA9IGMoMCwgLjQ1KSkgKwogIGdlb21fdGV4dChhZXMobGFiZWwgPSBzY2FsZXM6OnBlcmNlbnQocHJvcCkpLCAKICAgICAgICAgICAgcG9zaXRpb24gPSBwb3NpdGlvbl9kb2RnZSguOSksIHZqdXN0ID0gLS4yKSArCiAgdGhlbWVfYncoMTQpICsKICB4bGFiKCdPY3VwYcOnw6NvJykgKwogIHlsYWIoJ1Byb3BvcsOnw6NvIGRlIGluc2NyaXRvcycpCgpgYGAKCiMjIyBJbnNjcml0b3MgdnMgcmVnaXN0cmFkb3MKCkJhY2hhcsOpaXMgZW0gZXN0YXTDrXN0aWNhIHZzIHNpdHVhw6fDo28gbm8gQ09OUkUtMy4gRG9zIGluc2NyaXRvcyBlIGJhY2hhcsOpaXMgZW0gZXN0YXTDrXN0aWNhLCA3MCUgbsOjbyBlc3TDo28gcmVnaXN0cmFkb3MgKG91IG7Do28gYmF0ZXJhbSBuw7ptZXJvIGRvIENQRikuCgpgYGB7cn0KdGlkeV9pbnNjcml0b3MgJT4lIAogIGZpbHRlcihzdHJfZGV0ZWN0KG9jdXBhY2FvLCAnQmFjaGFyZWwgfEVzdGF0w61zdGljbycpKSAlPiUgCiAgY291bnQoc2l0dWFjYW8pICU+JSAKICBtdXRhdGUocHJvcCA9IG4vc3VtKG4pKSAlPiUgCiAgYWRkX3RvdGFsc19yb3coKSAlPiUgCiAgbXV0YXRlKHByb3AgPSBzY2FsZXM6OnBlcmNlbnQocHJvcCkpICU+JSAKICBrbml0cjo6a2FibGUoKQpgYGAKCgo=