library(tidyverse)
library(here)
library(tidyr)
library(broom)
library(boot)
dados = read_csv2(here("data/sentimento31.csv"),
  col_types = cols(
  id = col_double(),
  regiao = col_character(),
  estado = col_character(),
  sigla = col_character(),
  vitoria_ = col_character(),
  usa_twitter_ = col_character(),
  sent_twitter_ = col_double(),
  usa_instagram_ = col_character(),
  sent_instagram_ = col_double(),
  anos_ = col_number(),
  foi_diretor_ = col_character(),
  foi_coordenador_ = col_character(),
  foi_fg_ = col_character(),
  publicacoes_twitter_ = col_double(),
  seguidores_twitter_ = col_double(),
  seguindo_twitter_ = col_double(),
  publicacoes_instagram_ = col_double(),
  seguidores_instagram_ = col_double(),
  seguindo_instagram_ = col_double()
))
## i Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
dados = dados %>% 
  mutate(vitoria = (vitoria_ == "S") * 1)

glimpse(dados)
## Rows: 313
## Columns: 20
## $ id                     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, ~
## $ regiao                 <chr> "Centro-Oeste", "Centro-Oeste", "Centro-Oeste",~
## $ estado                 <chr> "Distrito Federal", "Distrito Federal", "Distri~
## $ sigla                  <chr> "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF",~
## $ vitoria_               <chr> "N", "S", "N", "N", "N", "S", "S", "N", "N", "N~
## $ usa_twitter_           <chr> "N", "S", "N", "N", "N", "S", "N", "N", "N", "N~
## $ sent_twitter_          <dbl> NA, 0.1756916, NA, NA, NA, 0.1222222, NA, NA, N~
## $ usa_instagram_         <chr> "N", "S", "N", "S", "N", "N", "S", "S", "S", "N~
## $ sent_instagram_        <dbl> NA, 0.17150728, NA, 0.00000000, NA, NA, 0.39009~
## $ anos_                  <dbl> 13.10, 11.57, 18.32, 10.25, 10.83, 37.66, 26.54~
## $ foi_diretor_           <chr> "S", "S", "S", "S", "N", "S", "S", "S", "N", "N~
## $ foi_coordenador_       <chr> "S", "N", "S", "S", "S", "N", "N", "N", "N", "N~
## $ foi_fg_                <chr> "N", "N", "S", "N", "N", "N", "N", "N", "N", "N~
## $ publicacoes_twitter_   <dbl> NA, 142, NA, NA, NA, 27, NA, NA, NA, NA, NA, NA~
## $ seguidores_twitter_    <dbl> NA, 55, NA, NA, NA, 167, NA, NA, NA, NA, NA, NA~
## $ seguindo_twitter_      <dbl> NA, 138, NA, NA, NA, 639, NA, NA, NA, NA, NA, N~
## $ publicacoes_instagram_ <dbl> NA, 349, NA, 27, NA, NA, 131, 332, 502, NA, 57,~
## $ seguidores_instagram_  <dbl> NA, 1509, NA, 204, NA, NA, 1420, 1453, 20200, N~
## $ seguindo_instagram_    <dbl> NA, 904, NA, 229, NA, NA, 724, 2366, 4757, NA, ~
## $ vitoria                <dbl> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0,~

Removendo os que não utilizam nem twitter nem Instagram

dados = dados %>% 
  filter(!(usa_twitter_ == "N" & usa_instagram_ == "N")) %>% 
  filter(!sent_twitter_ == 0) %>% 
  filter(!sent_instagram_ == 0)
  
glimpse(dados)
## Rows: 23
## Columns: 20
## $ id                     <dbl> 2, 15, 28, 41, 59, 66, 76, 81, 95, 103, 111, 11~
## $ regiao                 <chr> "Centro-Oeste", "Nordeste", "Nordeste", "Nordes~
## $ estado                 <chr> "Distrito Federal", "Alagoas", "Maranh\xe3o", "~
## $ sigla                  <chr> "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF",~
## $ vitoria_               <chr> "S", "S", "N", "N", "N", "N", "N", "S", "S", "S~
## $ usa_twitter_           <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "S~
## $ sent_twitter_          <dbl> 0.17569160, 0.43636364, 0.05317443, 0.11137336,~
## $ usa_instagram_         <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "S~
## $ sent_instagram_        <dbl> 0.17150728, 0.19174603, 0.20689689, 0.36987944,~
## $ anos_                  <dbl> 11.57, 26.89, 13.23, 16.79, 10.86, 18.33, 11.64~
## $ foi_diretor_           <chr> "S", "S", "N", "S", "N", "N", "S", "S", "S", "S~
## $ foi_coordenador_       <chr> "N", "N", "N", "N", "N", "S", "N", "N", "N", "N~
## $ foi_fg_                <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N~
## $ publicacoes_twitter_   <dbl> 142, 11, 67, 18, 132, 1, 65, 204, 1, 164, 375, ~
## $ seguidores_twitter_    <dbl> 55, 2, 26, 35, 99, 47, 9, 15, 3, 24, 293, 29, 4~
## $ seguindo_twitter_      <dbl> 138, 0, 63, 397, 310, 15, 199, 2, 0, 21, 616, 7~
## $ publicacoes_instagram_ <dbl> 349, 64, 231, 143, 43, 44, 62, 37, 845, 290, 58~
## $ seguidores_instagram_  <dbl> 1509, 1003, 1830, 1226, 918, 1166, 49, 638, 186~
## $ seguindo_instagram_    <dbl> 904, 583, 1997, 1705, 421, 2252, 104, 742, 73, ~
## $ vitoria                <dbl> 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0,~

22 candidatos utilizam o Twitter

dados %>% 
  count(usa_twitter_ == "S") 
## # A tibble: 2 x 2
##   `usa_twitter_ == "S"`     n
##   <lgl>                 <int>
## 1 FALSE                     1
## 2 TRUE                     22

23 candidatos utilizam o Instagram

dados %>% 
  count(usa_instagram_ == "S") 
## # A tibble: 1 x 2
##   `usa_instagram_ == "S"`     n
##   <lgl>                   <int>
## 1 TRUE                       23

O Sentimento do Twitter

dados %>% ggplot(mapping = aes(y=sent_twitter_, x="candidato")) +
  geom_point(alpha = 0.1) +
  geom_jitter(alpha = 0.1, width = .04)

O Sentimento do Twitter x vitoria_

dados %>% ggplot(mapping = aes(y=sent_twitter_, x=vitoria_)) +
  geom_point(alpha=.1) +
  geom_jitter(alpha = 0.1, width = .04)

O Sentimento do Instagram

dados %>% ggplot(mapping = aes(y=sent_instagram_, x="candidato")) +
  geom_point(alpha = 0.1) +
  geom_jitter(alpha = 0.1, width = .02)

O Sentimento do Twitter x vitoria_

dados %>% ggplot(mapping = aes(y=sent_instagram_, x=vitoria_)) +
  geom_point(alpha=.1) +
  geom_jitter(alpha = 0.1, width = .02)

dados %>% ggplot(mapping = aes(x= sent_twitter_, y = sent_instagram_)) +
  geom_point()

Existe uma correlação fraca entre o sentimento do twitter e o sentimento do instagram

cor(x= dados$sent_twitter_, y=dados$sent_instagram_)
## [1] 0.2447097

A correlação entre a vitoria e o sentimento do twitter não é relevante

cor(x= dados$vitoria, y=dados$sent_twitter_)
## [1] -0.001741561

Existe uma correlação média entre a vitoria e o sentimento do instagram

cor(x= dados$vitoria, y=dados$sent_instagram_)
## [1] -0.487012

Histogramas

dados %>%  ggplot(aes(x = sent_instagram_)) +
  facet_wrap(~sigla) + 
  geom_histogram(binwidth = .09, fill = "coral", color="black") +
  geom_rug()

Regressões

Regressao logísitca vitoria e sentimento do twitter e instagram para institutos e universidades

Existe uma relação inversa entre a vitoria e o sentimento do instagram com efeito relevante

logit <- glm(vitoria ~ sent_twitter_ + sent_instagram_, data = dados)
summary(logit)
## 
## Call:
## glm(formula = vitoria ~ sent_twitter_ + sent_instagram_, data = dados)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.68469  -0.41182  -0.00032   0.36939   0.78606  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       1.0724     0.2947   3.638  0.00164 **
## sent_twitter_     0.5536     0.8841   0.626  0.53825   
## sent_instagram_  -2.3814     0.9178  -2.595  0.01732 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2146865)
## 
##     Null deviance: 5.7391  on 22  degrees of freedom
## Residual deviance: 4.2937  on 20  degrees of freedom
## AIC: 34.669
## 
## Number of Fisher Scoring iterations: 2

regressao vitoria com twitter e instagram, para universidade

sem efeito para somente universidades

dados_uf = dados %>%
  filter(sigla=="UF")

logit <- glm(vitoria ~ sent_twitter_ + sent_instagram_, data = dados_uf)
summary(logit)
## 
## Call:
## glm(formula = vitoria ~ sent_twitter_ + sent_instagram_, data = dados_uf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6949  -0.5024   0.2583   0.4287   0.5476  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.9926     0.6030   1.646    0.138
## sent_twitter_    -0.3678     2.0873  -0.176    0.865
## sent_instagram_  -1.1942     2.1366  -0.559    0.592
## 
## (Dispersion parameter for gaussian family taken to be 0.3171569)
## 
##     Null deviance: 2.7273  on 10  degrees of freedom
## Residual deviance: 2.5373  on  8  degrees of freedom
## AIC: 23.082
## 
## Number of Fisher Scoring iterations: 2

regressao vitoria twitter e instagram, para instituto federal

Existe efeito do sentimento do instagram no twitter para os reitores do instituto federal

dados_if = dados %>%
  filter(sigla=="IF")

logit <- glm(vitoria ~ sent_twitter_+ sent_instagram_, data = dados_if)
summary(logit)
## 
## Call:
## glm(formula = vitoria ~ sent_twitter_ + sent_instagram_, data = dados_if)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.54114  -0.25454   0.03828   0.27565   0.45142  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       1.1356     0.3123   3.636  0.00543 **
## sent_twitter_     0.8342     0.8831   0.945  0.36948   
## sent_instagram_  -3.2134     0.9758  -3.293  0.00933 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1489052)
## 
##     Null deviance: 3.0000  on 11  degrees of freedom
## Residual deviance: 1.3401  on  9  degrees of freedom
## AIC: 15.749
## 
## Number of Fisher Scoring iterations: 2

Em média o sentimento demonstrado no IF é menor do que o sentimento da UF

dados %>% 
  select(sigla, sent_instagram_) %>% 
  group_by(sigla) %>% 
  summarise(media = mean(sent_instagram_), .groups = "drop") 
## # A tibble: 2 x 2
##   sigla media
##   <chr> <dbl>
## 1 IF    0.253
## 2 UF    0.31
theta <- function(d, i) {
    agrupado = d %>% 
        slice(i) %>% 
        group_by(sigla) %>% 
        summarise(media = mean(sent_instagram_), .groups = "drop")
    uf = agrupado %>% filter(sigla == "UF") %>% pull(media)
    ifs = agrupado %>% filter(sigla == "IF") %>% pull(media)
    uf - ifs
}

booted <- boot(data = dados, 
               statistic = theta, 
               R = 2000)

ci = tidy(booted, 
          conf.level = .95,
          conf.method = "bca",
          conf.int = TRUE)

glimpse(ci)
## Rows: 1
## Columns: 5
## $ statistic <dbl> 0.05688691
## $ bias      <dbl> 0.001887328
## $ std.error <dbl> 0.04414287
## $ conf.low  <dbl> -0.03383139
## $ conf.high <dbl> 0.1401579

Não é possível afirmar se existe uma diferença em média entre o sentimento do instagram das universidades e dos institutos federais, conforme análise do IC

ci %>% 
  ggplot(aes(
            ymin = conf.low,
            y = statistic,
            ymax = conf.high,
            x = "Média do Sentimento do Instagram"
        )) +
        geom_linerange() +
        geom_point(color = "coral", size = 2) +
        scale_y_continuous(limits = c(-1, 1)) +
        labs(x = "", y = "Diferença entre as médias do sentimento do instagram entre instituto e universidade") +
        coord_flip()