relatorios

dados = read_csv(here::here("sentimento.csv"),
                 col_types = cols(
                    id = col_double(),
                    regiao = col_character(),
                    estado = col_character(),
                    sigla = col_character(),
                    vitoria_ = col_character(),
                    usa_twitter_ = col_character(),
                    sent_twitter_ = col_double(),
                    usa_instagram_ = col_character(),
                    sent_instagram_ = col_double(),
                    anos_ = col_number(),
                    foi_diretor_ = col_character(),
                    foi_coordenador_ = col_character(),
                    foi_fg_ = col_character()
                ), locale = locale(decimal_mark = ",")
                 
            )

dados = dados %>% mutate(vitoria = if_else(vitoria_ == "S", 1, 0))
dados = dados %>% mutate(sent_twitter = if_else(sent_twitter_ >= 0, 1, 0))
dados = dados %>% mutate(sent_instagram = if_else(sent_instagram_ >= 0, 1, 0))

dados_tw = dados %>% 
    filter(!is.na(sent_twitter_)) %>%
    filter(!sent_twitter_ == 0)

dados_tw_com_zero = dados %>% 
    filter(!is.na(sent_twitter_)) 
    
dados_insta = dados %>% 
    filter(!is.na(sent_instagram_)) %>%
    filter(!sent_instagram_ == 0)

dados_insta_com_zero = dados %>% 
    filter(!is.na(sent_instagram_)) 

dados_tw_insta = dados %>% 
    filter(!is.na(sent_instagram_)) %>% 
    filter(!sent_instagram_ == 0) %>%
    filter(!is.na(sent_twitter_)) %>%
    filter(!sent_twitter_ == 0)

dados_tw_insta_com_zero = dados %>% 
    filter(!is.na(sent_instagram_)) %>% 
    filter(!is.na(sent_twitter_))

Perguntas Existe diferença entre o sentimento da universidade e do intituto?

Existe diferença entre o sentimento da midia social entre as regiões Brasileiras?

Existe correlação entre o sentimento do twitter e do instagram 0.24 entre os sentimentos dasmídias, positiva e fraca

Existe correlação entre a vitoria e o sentimento 0.1 para o twitter e -0.2 para o instagram com a vitoria

glimpse(dados)

## Rows: 313
## Columns: 16
## $ id               <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16~
## $ regiao           <chr> "Centro-Oeste", "Centro-Oeste", "Centro-Oeste", "Cent~
## $ estado           <chr> "Distrito Federal", "Distrito Federal", "Distrito Fed~
## $ sigla            <chr> "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF",~
## $ vitoria_         <chr> "N", "S", "N", "N", "N", "S", "S", "N", "N", "N", "S"~
## $ usa_twitter_     <chr> "N", "S", "N", "N", "N", "S", "N", "N", "N", "N", "N"~
## $ sent_twitter_    <dbl> NA, 0.1756916, NA, NA, NA, 0.1222222, NA, NA, NA, NA,~
## $ usa_instagram_   <chr> "N", "S", "N", "S", "N", "N", "S", "S", "S", "N", "S"~
## $ sent_instagram_  <dbl> NA, 0.17150728, NA, 0.00000000, NA, NA, 0.39009915, 0~
## $ anos_            <dbl> 13.10, 11.57, 18.32, 10.25, 10.83, 37.66, 26.54, 27.4~
## $ foi_diretor_     <chr> "S", "S", "S", "S", "N", "S", "S", "S", "N", "N", "S"~
## $ foi_coordenador_ <chr> "S", "N", "S", "S", "S", "N", "N", "N", "N", "N", "N"~
## $ foi_fg_          <chr> "N", "N", "S", "N", "N", "N", "N", "N", "N", "N", "N"~
## $ vitoria          <dbl> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,~
## $ sent_twitter     <dbl> NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1~
## $ sent_instagram   <dbl> NA, 1, NA, 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, 1, 1, 1,~

nrow(dados)

## [1] 313

nrow(dados_tw)

## [1] 49

nrow(dados_tw_com_zero)

## [1] 81

nrow(dados_insta)

## [1] 77

nrow(dados_insta_com_zero)

## [1] 154

nrow(dados_tw_insta)

## [1] 23

nrow(dados_tw_insta_com_zero)

## [1] 61

EDA TWITTER

dados_tw %>%
    ggplot(mapping = aes(x = sent_twitter_, y = ""))+
    geom_point(alpha=.5, color="red") +
    geom_jitter(alpha=.5, color="red", width = .1, height = .1) +
    labs (
        x = "Sentimento do Twitter",
        y = "Observações"
    )

dados_tw %>%
    ggplot(mapping = aes(x = sent_twitter_))+
    geom_histogram(binwidth = .2) +
    labs (
        x = "Sentimento do Twitter",
        y = "Frequencia"
    )

dados_tw %>%
    ggplot(mapping = aes(x = sent_twitter_))+
    geom_histogram(binwidth = .2) +
    facet_wrap(~ regiao) +
    labs (
        x = "Sentimento do Twitter",
        y = "Frequencia"
    )

dados_tw %>%
    ggplot(mapping = aes(y = sent_twitter_))+
    geom_boxplot() +
    labs (
        y = "Sentimento do Twitter",
        x = "boxplot"
    )

range(dados_tw$sent_twitter_)

## [1] -0.29  0.67

mean(dados_tw$sent_twitter_)

## [1] 0.1741078

EDA INSTAGRAM

dados_insta %>%
    ggplot(mapping = aes(x = sent_instagram_, y = ""))+
    geom_point(alpha=.5, color="red") +
    geom_jitter(alpha=.5, color="red", width = .1, height = .1) +
    labs (
        x = "Sentimento do Instagram",
        y = "Observações"
    )

dados_insta %>%
    ggplot(mapping = aes(x = sent_instagram_))+
    geom_histogram(binwidth = .2) +
    labs (
        x = "Sentimento do Instagram",
        y = "Frequencia"
    )

dados_insta %>%
    ggplot(mapping = aes(x = sent_instagram_))+
    geom_histogram(binwidth = .2) +
    facet_wrap(~ regiao) +
    labs (
        x = "Sentimento do Instagram",
        y = "Frequencia"
    )

dados_insta %>%
    ggplot(mapping = aes(y = sent_instagram_))+
    geom_boxplot() +
    labs (
        y = "Sentimento do Instagram",
        x = "boxplot"
    )

range(dados_insta$sent_instagram_)

## [1] -0.04  1.00

mean(dados_insta$sent_instagram_)

## [1] 0.2734126

CORRELAçÕES

CORRELAÇÃO VITORIA SENTIMENTO

Existe uma correlação entre vitoria e sentimento do twitter fraca e positiva de 0.11. Existe uma correlação entre vitoria e sentimento do instagram fraca, porem maior que o do twitter e negativa de -0.20

cor(dados_tw$vitoria, dados_tw$sent_twitter_, method="pearson")

## [1] 0.1139849

dados_tw %>%
    ggplot(aes(x=vitoria, y=sent_twitter_)) +
    geom_point(alpha = .2, color="red") +
    geom_jitter(alpha = .2, width = .1, height = .1, color="red")

cor(dados_insta$vitoria, dados_insta$sent_instagram_, method="pearson")

## [1] -0.2007559

dados_tw %>%
    ggplot(aes(x=vitoria, y=sent_instagram_)) +
    geom_point(alpha = .2, color="red") +
    geom_jitter(alpha = .2, width = .1, height = .1, color="red")

## Warning: Removed 14 rows containing missing values (geom_point).

## Warning: Removed 14 rows containing missing values (geom_point).

CORRELAÇÃO TWITTER E INSTAGRAM

A correlação entre o Wtitter e o instagram é de 0.24, quando coloco as medidas com sentimento zero a correlação cai para 0.16 Existe uma correlacão fraca (0,24) entre o sentimento do twitter e o sentimento do instagram

cor(dados_tw_insta$sent_twitter_, dados_tw_insta$sent_instagram_, method="pearson")

## [1] 0.2447097

dados_tw_insta %>%
    ggplot(aes(x=sent_twitter_, sent_instagram_)) +
    geom_point(alpha = .2)

cor(dados_tw_insta_com_zero$sent_twitter_, dados_tw_insta_com_zero$sent_instagram_, method="pearson")

## [1] 0.1634639

dados_tw_insta_com_zero %>%
    ggplot(aes(x=sent_twitter_, sent_instagram_)) +
    geom_point(alpha = .2)

IC DA CORRELAÇÃO DO TWITTER E DO INSTAGRAM

s <- function(d, i) {
    sumarizado = d %>% 
        slice(i) %>% 
        summarise(corr_pearson = cor(sent_twitter_, sent_instagram_, method = "pearson"))
    
    sumarizado %>% 
      pull(corr_pearson)
}

s(dados_tw_insta, 1:(nrow(dados))) # theta_chapeu

## [1] 0.2447097

booted <- boot(data = dados_tw_insta, 
               statistic = s, 
               R = 2000)

ci_corr_tw_inst = tidy(booted, 
              conf.level = .95,
              conf.method = "basic",
              conf.int = TRUE)

ci_corr_tw_inst

## # A tibble: 1 x 5
##   statistic    bias std.error conf.low conf.high
##       <dbl>   <dbl>     <dbl>    <dbl>     <dbl>
## 1     0.245 -0.0189     0.230   -0.135     0.761

ci_corr_tw_inst %>% 
  ggplot(aes(
            ymin = conf.low,
            y = statistic,
            ymax = conf.high,
            x = "Correlação"
        )) +
        geom_linerange() +
        geom_point(color = "coral", size = 2) +
        scale_y_continuous(limits = c(-1, 1)) +
        labs(x = "", y = "Correlação de Pearson entre sentimento do Twitter x Instagram") +
        coord_flip()

SENTIMENTO AGRUPADO POR REGIÃO

dados %>%
  ggplot (aes(x = sent_twitter_, y="")) +
  facet_wrap ( ~ regiao) + 
  geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
  geom_point(color = "red") +
  labs (x= "Sentimento Twitter")

## Warning: Removed 232 rows containing missing values (geom_point).

## Warning: Removed 232 rows containing missing values (geom_point).

dados_tw_insta_com_zero %>%
  ggplot (aes(x = sent_twitter_, y="")) +
  facet_wrap ( ~ regiao) + 
  geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
  geom_point(color = "red") +
  labs (x= "Sentimento Twitter")

dados %>%
  ggplot (aes(x = sent_instagram_, y="")) +
  facet_wrap ( ~ regiao) + 
  geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
  geom_point(color = "red") +
  labs (x= "Sentimento Instagram")

## Warning: Removed 159 rows containing missing values (geom_point).

## Warning: Removed 159 rows containing missing values (geom_point).

dados_tw_insta_com_zero %>%
  ggplot (aes(x = sent_instagram_, y="")) +
  facet_wrap ( ~ regiao) + 
  geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
  geom_point(color = "red") + 
  labs (x= "Sentimento Instagram")

IC DA MÉDIA DO SENTIMENTO DO TWITTER

s <- function(d, i) {
    sumarizado = d %>% 
        slice(i) %>% 
        summarise(media_twitter = mean(sent_twitter_))
    
    sumarizado %>% 
      pull(media_twitter)
}

s(dados_tw, 1:(nrow(dados))) # theta_chapeu

## [1] 0.1741078

booted <- boot(data = dados_tw, 
               statistic = s, 
               R = 2000)

media_tw_boot = tidy(booted, 
              conf.level = .95,
              conf.method = "basic",
              conf.int = TRUE)

media_tw_boot

## # A tibble: 1 x 5
##   statistic     bias std.error conf.low conf.high
##       <dbl>    <dbl>     <dbl>    <dbl>     <dbl>
## 1     0.174 0.000381    0.0241    0.128     0.221

media_tw_boot %>% 
  ggplot(aes(
            ymin = conf.low,
            y = statistic,
            ymax = conf.high,
            x = "Média"
        )) +
        geom_linerange() +
        geom_point(color = "coral", size = 2) +
        scale_y_continuous(limits = c(-1, 1)) +
        labs(x = "", y = "Média do Twitter") +
        coord_flip()

IC DA MÉDIA DO SENTIMENTO DO INSTAGRAM

s <- function(d, i) {
    sumarizado = d %>% 
        slice(i) %>% 
        summarise(media_instagram = mean(sent_instagram_))
    
    sumarizado %>% 
      pull(media_instagram)
}

s(dados_insta, 1:(nrow(dados))) # theta_chapeu

## [1] 0.2734126

booted <- boot(data = dados_insta, 
               statistic = s, 
               R = 2000)

media_insta_boot = tidy(booted, 
              conf.level = .95,
              conf.method = "basic",
              conf.int = TRUE)

media_insta_boot

## # A tibble: 1 x 5
##   statistic     bias std.error conf.low conf.high
##       <dbl>    <dbl>     <dbl>    <dbl>     <dbl>
## 1     0.273 0.000637    0.0199    0.232     0.310

media_insta_boot %>% 
  ggplot(aes(
            ymin = conf.low,
            y = statistic,
            ymax = conf.high,
            x = "Média"
        )) +
        geom_linerange() +
        geom_point(color = "coral", size = 2) +
        scale_y_continuous(limits = c(-1, 1)) +
        labs(x = "", y = "Média do Instagram") +
        coord_flip()

plot_ics = function(d) {
    d %>%
        ggplot(aes(
            ymin = low,
            y = mid,
            ymax = high,
            x = metodo
        )) +
        geom_linerange() +
        geom_point(color = "coral", size = 3) +
        scale_y_continuous(limits = c(0, .5)) +
        labs(x = "", y = "Médias dos sentimentos") +
        coord_flip()
}

tribble(
    ~metodo, ~low, ~mid, ~high, 
    "Média com IC do Sent Twitter", .12, .17, .22, 
    "Média com IC do Sent Instagram", .23, .27, .30) %>% 
    plot_ics()

REGRESSÃO LOGISTICA TWITTER

\[ \hat{vitoria} = B0 + B1 * sentimentoTwitter \]

dados %>% 
  ggplot(aes(x=sent_twitter_, y=vitoria_)) + 
  geom_point() + 
  stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) +
  geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
  labs (x = "Sentimento do Twitter",
        y = "Vitória do candidato")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 232 rows containing non-finite values (stat_smooth).

## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1

## Warning: Removed 232 rows containing missing values (geom_point).

## Warning: Removed 232 rows containing missing values (geom_point).

dados_tw %>% 
  ggplot(aes(x=sent_twitter_, y=vitoria_)) + 
  geom_point() + 
  stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) +
  geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
  labs (x = "Sentimento do Twitter",
        y = "Vitória do candidato")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1

modelo_regressao_linear_twitter

ml_tw=lm(vitoria~sent_twitter_, data = dados)
tidy(ml_tw)

## # A tibble: 2 x 5
##   term          estimate std.error statistic    p.value
##   <chr>            <dbl>     <dbl>     <dbl>      <dbl>
## 1 (Intercept)      0.303    0.0636     4.77  0.00000822
## 2 sent_twitter_    0.284    0.336      0.844 0.401

ml_tw_geral=lm(vitoria~sent_twitter, data = dados)
tidy(ml_tw_geral)

## # A tibble: 2 x 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)  2.71e-15     0.210  1.29e-14   1.00 
## 2 sent_twitter 3.55e- 1     0.217  1.64e+ 0   0.105

ml_tw_so_tw=lm(vitoria~sent_twitter_, data = dados_tw)
tidy(ml_tw_so_tw)

## # A tibble: 2 x 5
##   term          estimate std.error statistic p.value
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)      0.291    0.0989     2.95  0.00500
## 2 sent_twitter_    0.320    0.407      0.787 0.435

ml_tw_geral_so_tw=lm(vitoria~sent_twitter, data = dados_tw)
tidy(ml_tw_geral_so_tw)

## # A tibble: 2 x 5
##   term          estimate std.error statistic p.value
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)  -8.25e-16     0.211 -3.91e-15  1.00  
## 2 sent_twitter  3.86e- 1     0.222  1.74e+ 0  0.0888

ml_tw_so_tw_com_zero=lm(vitoria~sent_twitter_, data = dados_tw_com_zero)
tidy(ml_tw_so_tw_com_zero)

## # A tibble: 2 x 5
##   term          estimate std.error statistic    p.value
##   <chr>            <dbl>     <dbl>     <dbl>      <dbl>
## 1 (Intercept)      0.303    0.0636     4.77  0.00000822
## 2 sent_twitter_    0.284    0.336      0.844 0.401

ml_tw_geral_so_tw_com_zero=lm(vitoria~sent_twitter, data = dados_tw_com_zero)
tidy(ml_tw_geral_so_tw_com_zero)

## # A tibble: 2 x 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)  2.71e-15     0.210  1.29e-14   1.00 
## 2 sent_twitter 3.55e- 1     0.217  1.64e+ 0   0.105

summary(ml_tw)

## 
## Call:
## lm(formula = vitoria ~ sent_twitter_, data = dados)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4937 -0.3281 -0.3034  0.6426  0.6966 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.30342    0.06358   4.772 8.22e-06 ***
## sent_twitter_  0.28405    0.33637   0.844    0.401    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4752 on 79 degrees of freedom
##   (232 observations deleted due to missingness)
## Multiple R-squared:  0.008946,   Adjusted R-squared:  -0.003599 
## F-statistic: 0.7131 on 1 and 79 DF,  p-value: 0.401

summary(ml_tw_geral)

## 
## Call:
## lm(formula = vitoria ~ sent_twitter, data = dados)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3553 -0.3553 -0.3553  0.6447  0.6447 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.714e-15  2.099e-01   0.000    1.000
## sent_twitter 3.553e-01  2.167e-01   1.639    0.105
## 
## Residual standard error: 0.4694 on 79 degrees of freedom
##   (232 observations deleted due to missingness)
## Multiple R-squared:  0.03289,    Adjusted R-squared:  0.02065 
## F-statistic: 2.687 on 1 and 79 DF,  p-value: 0.1051

summary(ml_tw_so_tw)

## 
## Call:
## lm(formula = vitoria ~ sent_twitter_, data = dados_tw)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5056 -0.3472 -0.3082  0.6354  0.6800 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    0.29123    0.09886   2.946    0.005 **
## sent_twitter_  0.31997    0.40679   0.787    0.435   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4829 on 47 degrees of freedom
## Multiple R-squared:  0.01299,    Adjusted R-squared:  -0.008008 
## F-statistic: 0.6187 on 1 and 47 DF,  p-value: 0.4355

summary(ml_tw_geral_so_tw)

## 
## Call:
## lm(formula = vitoria ~ sent_twitter, data = dados_tw)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3864 -0.3864 -0.3864  0.6136  0.6136 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -8.247e-16  2.107e-01   0.000   1.0000  
## sent_twitter  3.864e-01  2.223e-01   1.738   0.0888 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4711 on 47 degrees of freedom
## Multiple R-squared:  0.06037,    Adjusted R-squared:  0.04038 
## F-statistic:  3.02 on 1 and 47 DF,  p-value: 0.08881

summary(ml_tw_so_tw_com_zero)

## 
## Call:
## lm(formula = vitoria ~ sent_twitter_, data = dados_tw_com_zero)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4937 -0.3281 -0.3034  0.6426  0.6966 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.30342    0.06358   4.772 8.22e-06 ***
## sent_twitter_  0.28405    0.33637   0.844    0.401    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4752 on 79 degrees of freedom
## Multiple R-squared:  0.008946,   Adjusted R-squared:  -0.003599 
## F-statistic: 0.7131 on 1 and 79 DF,  p-value: 0.401

summary(ml_tw_geral_so_tw_com_zero)

## 
## Call:
## lm(formula = vitoria ~ sent_twitter, data = dados_tw_com_zero)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3553 -0.3553 -0.3553  0.6447  0.6447 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.714e-15  2.099e-01   0.000    1.000
## sent_twitter 3.553e-01  2.167e-01   1.639    0.105
## 
## Residual standard error: 0.4694 on 79 degrees of freedom
## Multiple R-squared:  0.03289,    Adjusted R-squared:  0.02065 
## F-statistic: 2.687 on 1 and 79 DF,  p-value: 0.1051

modelo_regressao_logit_twitter

mrl_logit_tw=glm(vitoria~sent_twitter_, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_tw)

## # A tibble: 2 x 5
##   term          estimate std.error statistic p.value
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)     -0.831     0.291    -2.85  0.00431
## 2 sent_twitter_    1.25      1.48      0.845 0.398

mrl_logit_tw_geral=glm(vitoria~sent_twitter, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_tw_geral)

## # A tibble: 2 x 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)     -17.6     1769.  -0.00993   0.992
## 2 sent_twitter     17.0     1769.   0.00959   0.992

mrl_logit_tw_so_tw=glm(vitoria~sent_twitter_, family = binomial(link="logit"), data = dados_tw)
tidy(mrl_logit_tw_so_tw)

## # A tibble: 2 x 5
##   term          estimate std.error statistic p.value
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)     -0.889     0.451    -1.97   0.0486
## 2 sent_twitter_    1.43      1.80      0.790  0.429

mrl_logit_tw_geral_so_tw=glm(vitoria~sent_twitter, family = binomial(link="logit"), data = dados_tw)
tidy(mrl_logit_tw_geral_so_tw)

## # A tibble: 2 x 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)     -17.6     1769.  -0.00993   0.992
## 2 sent_twitter     17.1     1769.   0.00967   0.992

mrl_logit_tw_so_tw_com_zero=glm(vitoria~sent_twitter_, family = binomial(link="logit"), data = dados_tw_com_zero)
tidy(mrl_logit_tw_so_tw_com_zero)

## # A tibble: 2 x 5
##   term          estimate std.error statistic p.value
##   <chr>            <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)     -0.831     0.291    -2.85  0.00431
## 2 sent_twitter_    1.25      1.48      0.845 0.398

mrl_logit_tw_geral_so_tw_com_zero=glm(vitoria~sent_twitter, family = binomial(link="logit"), data = dados_tw_com_zero)
tidy(mrl_logit_tw_geral_so_tw_com_zero)

## # A tibble: 2 x 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)     -17.6     1769.  -0.00993   0.992
## 2 sent_twitter     17.0     1769.   0.00959   0.992

summary(mrl_logit_tw)

## 
## Call:
## glm(formula = vitoria ~ sent_twitter_, family = binomial(link = "logit"), 
##     data = dados)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1809  -0.8898  -0.8504   1.4373   1.5444  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -0.8310     0.2911  -2.855  0.00431 **
## sent_twitter_   1.2526     1.4827   0.845  0.39822   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.12  on 80  degrees of freedom
## Residual deviance: 102.40  on 79  degrees of freedom
##   (232 observations deleted due to missingness)
## AIC: 106.4
## 
## Number of Fisher Scoring iterations: 4

summary(mrl_logit_tw_geral)

## 
## Call:
## glm(formula = vitoria ~ sent_twitter, family = binomial(link = "logit"), 
##     data = dados)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9369  -0.9369  -0.9369   1.4387   1.4387  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -17.57    1769.26   -0.01    0.992
## sent_twitter    16.97    1769.26    0.01    0.992
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.115  on 80  degrees of freedom
## Residual deviance:  98.898  on 79  degrees of freedom
##   (232 observations deleted due to missingness)
## AIC: 102.9
## 
## Number of Fisher Scoring iterations: 16

summary(mrl_logit_tw_so_tw)

## 
## Call:
## glm(formula = vitoria ~ sent_twitter_, family = binomial(link = "logit"), 
##     data = dados_tw)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2054  -0.9204  -0.8567   1.4236   1.5128  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    -0.8891     0.4508  -1.972   0.0486 *
## sent_twitter_   1.4252     1.8031   0.790   0.4293  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 63.262  on 48  degrees of freedom
## Residual deviance: 62.624  on 47  degrees of freedom
## AIC: 66.624
## 
## Number of Fisher Scoring iterations: 4

summary(mrl_logit_tw_geral_so_tw)

## 
## Call:
## glm(formula = vitoria ~ sent_twitter, family = binomial(link = "logit"), 
##     data = dados_tw)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9883  -0.9883  -0.9883   1.3791   1.3791  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -17.57    1769.26   -0.01    0.992
## sent_twitter    17.10    1769.26    0.01    0.992
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 63.262  on 48  degrees of freedom
## Residual deviance: 58.704  on 47  degrees of freedom
## AIC: 62.704
## 
## Number of Fisher Scoring iterations: 16

summary(mrl_logit_tw_so_tw_com_zero)

## 
## Call:
## glm(formula = vitoria ~ sent_twitter_, family = binomial(link = "logit"), 
##     data = dados_tw_com_zero)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1809  -0.8898  -0.8504   1.4373   1.5444  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -0.8310     0.2911  -2.855  0.00431 **
## sent_twitter_   1.2526     1.4827   0.845  0.39822   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.12  on 80  degrees of freedom
## Residual deviance: 102.40  on 79  degrees of freedom
## AIC: 106.4
## 
## Number of Fisher Scoring iterations: 4

summary(mrl_logit_tw_geral_so_tw_com_zero)

## 
## Call:
## glm(formula = vitoria ~ sent_twitter, family = binomial(link = "logit"), 
##     data = dados_tw_com_zero)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9369  -0.9369  -0.9369   1.4387   1.4387  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -17.57    1769.26   -0.01    0.992
## sent_twitter    16.97    1769.26    0.01    0.992
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.115  on 80  degrees of freedom
## Residual deviance:  98.898  on 79  degrees of freedom
## AIC: 102.9
## 
## Number of Fisher Scoring iterations: 16

REGRESSÃO LOGISTICA INSTAGRAM

\[ \hat{vitoria} = B0 + B1 * sentimentoInstagram \]

dados %>% 
  ggplot(aes(x=sent_instagram_, y=vitoria_)) + 
  geom_point() + 
  stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) + 
  geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
    labs (x = "Sentimento do Instagram",
        y = "Vitória do candidato")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 159 rows containing non-finite values (stat_smooth).

## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1

## Warning: Removed 159 rows containing missing values (geom_point).

## Warning: Removed 159 rows containing missing values (geom_point).

dados_insta %>%
  ggplot(aes(x=sent_instagram_, y=vitoria_)) + 
  geom_point() + 
  stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) + 
  geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
    labs (x = "Sentimento do Instagram",
          y = "Vitória do candidato")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1

modelo_regressao_linear_insta

ml_insta=lm(vitoria~sent_instagram_, data = dados)
tidy(ml_insta)

## # A tibble: 2 x 5
##   term            estimate std.error statistic  p.value
##   <chr>              <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)        0.344    0.0487     7.06  5.54e-11
## 2 sent_instagram_    0.191    0.213      0.897 3.71e- 1

ml_insta_geral=lm(vitoria~sent_instagram, data = dados)
tidy(ml_insta_geral)

## # A tibble: 2 x 5
##   term           estimate std.error statistic p.value
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)    1.56e-14     0.485  3.22e-14   1.00 
## 2 sent_instagram 3.73e- 1     0.487  7.66e- 1   0.445

#significativo
ml_insta_so_insta=lm(vitoria~sent_instagram_, data = dados_insta)
tidy(ml_insta_so_insta)

## # A tibble: 2 x 5
##   term            estimate std.error statistic      p.value
##   <chr>              <dbl>     <dbl>     <dbl>        <dbl>
## 1 (Intercept)        0.639     0.106      6.04 0.0000000543
## 2 sent_instagram_   -0.580     0.327     -1.77 0.0800

ml_insta_geral_so_insta=lm(vitoria~sent_instagram, data = dados_insta)
tidy(ml_insta_geral_so_insta)

## # A tibble: 2 x 5
##   term           estimate std.error statistic p.value
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)    2.33e-15     0.503  4.63e-15   1.00 
## 2 sent_instagram 4.87e- 1     0.506  9.61e- 1   0.339

ml_insta_so_insta_com_zero=lm(vitoria~sent_instagram_, data = dados_insta_com_zero)
tidy(ml_insta_so_insta_com_zero)

## # A tibble: 2 x 5
##   term            estimate std.error statistic  p.value
##   <chr>              <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)        0.344    0.0487     7.06  5.54e-11
## 2 sent_instagram_    0.191    0.213      0.897 3.71e- 1

ml_insta_geral_so_insta_com_zero=lm(vitoria~sent_instagram, data = dados_insta_com_zero)
tidy(ml_insta_geral_so_insta_com_zero)

## # A tibble: 2 x 5
##   term           estimate std.error statistic p.value
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)    1.56e-14     0.485  3.22e-14   1.00 
## 2 sent_instagram 3.73e- 1     0.487  7.66e- 1   0.445

summary(ml_insta)

## 
## Call:
## lm(formula = vitoria ~ sent_instagram_, data = dados)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5064 -0.3512 -0.3440  0.6148  0.6560 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.34400    0.04872   7.060 5.54e-11 ***
## sent_instagram_  0.19112    0.21303   0.897    0.371    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4847 on 152 degrees of freedom
##   (159 observations deleted due to missingness)
## Multiple R-squared:  0.005267,   Adjusted R-squared:  -0.001277 
## F-statistic: 0.8048 on 1 and 152 DF,  p-value: 0.3711

summary(ml_insta_geral)

## 
## Call:
## lm(formula = vitoria ~ sent_instagram, data = dados)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3725 -0.3725 -0.3725  0.6274  0.6274 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)    1.560e-14  4.851e-01   0.000    1.000
## sent_instagram 3.725e-01  4.867e-01   0.766    0.445
## 
## Residual standard error: 0.4851 on 152 degrees of freedom
##   (159 observations deleted due to missingness)
## Multiple R-squared:  0.003841,   Adjusted R-squared:  -0.002713 
## F-statistic: 0.586 on 1 and 152 DF,  p-value: 0.4451

summary(ml_insta_so_insta)

## 
## Call:
## lm(formula = vitoria ~ sent_instagram_, data = dados_insta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6624 -0.4883 -0.1749  0.4847  0.9412 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.6392     0.1058   6.043 5.43e-08 ***
## sent_instagram_  -0.5805     0.3271  -1.775     0.08 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4959 on 75 degrees of freedom
## Multiple R-squared:  0.0403, Adjusted R-squared:  0.02751 
## F-statistic:  3.15 on 1 and 75 DF,  p-value: 0.08

summary(ml_insta_geral_so_insta)

## 
## Call:
## lm(formula = vitoria ~ sent_instagram, data = dados_insta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4868 -0.4868 -0.4868  0.5132  0.5132 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)    2.328e-15  5.031e-01   0.000    1.000
## sent_instagram 4.868e-01  5.064e-01   0.961    0.339
## 
## Residual standard error: 0.5031 on 75 degrees of freedom
## Multiple R-squared:  0.01217,    Adjusted R-squared:  -0.001 
## F-statistic: 0.9241 on 1 and 75 DF,  p-value: 0.3395

summary(ml_insta_so_insta_com_zero)

## 
## Call:
## lm(formula = vitoria ~ sent_instagram_, data = dados_insta_com_zero)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5064 -0.3512 -0.3440  0.6148  0.6560 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.34400    0.04872   7.060 5.54e-11 ***
## sent_instagram_  0.19112    0.21303   0.897    0.371    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4847 on 152 degrees of freedom
## Multiple R-squared:  0.005267,   Adjusted R-squared:  -0.001277 
## F-statistic: 0.8048 on 1 and 152 DF,  p-value: 0.3711

summary(ml_insta_geral_so_insta_com_zero)

## 
## Call:
## lm(formula = vitoria ~ sent_instagram, data = dados_insta_com_zero)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3725 -0.3725 -0.3725  0.6274  0.6274 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)    1.560e-14  4.851e-01   0.000    1.000
## sent_instagram 3.725e-01  4.867e-01   0.766    0.445
## 
## Residual standard error: 0.4851 on 152 degrees of freedom
## Multiple R-squared:  0.003841,   Adjusted R-squared:  -0.002713 
## F-statistic: 0.586 on 1 and 152 DF,  p-value: 0.4451

modelo_regressao_logit_twitter

mrl_logit_insta=glm(vitoria~sent_instagram_, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_insta)

## # A tibble: 2 x 5
##   term            estimate std.error statistic p.value
##   <chr>              <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)       -0.644     0.210    -3.06  0.00221
## 2 sent_instagram_    0.803     0.897     0.895 0.371

mrl_logit_insta_geral=glm(vitoria~sent_instagram, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_insta_geral)

## # A tibble: 2 x 5
##   term           estimate std.error statistic p.value
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)       -14.6      883.   -0.0165   0.987
## 2 sent_instagram     14.0      883.    0.0159   0.987

# Significativo
mrl_logit_insta_so_insta=glm(vitoria~sent_instagram_, family = binomial(link="logit"), data = dados_insta)
tidy(mrl_logit_insta_so_insta)

## # A tibble: 2 x 5
##   term            estimate std.error statistic p.value
##   <chr>              <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)        0.625     0.468      1.34  0.182 
## 2 sent_instagram_   -2.63      1.56      -1.68  0.0926

mrl_logit_insta_geral_so_insta=glm(vitoria~sent_instagram, family = binomial(link="logit"), data = dados_insta)
tidy(mrl_logit_insta_geral_so_insta)

## # A tibble: 2 x 5
##   term           estimate std.error statistic p.value
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)       -15.6     1455.   -0.0107   0.991
## 2 sent_instagram     15.5     1455.    0.0107   0.991

mrl_logit_insta_so_insta_com_zero=glm(vitoria~sent_instagram_, family = binomial(link="logit"), data = dados_insta_com_zero)
tidy(mrl_logit_insta_so_insta_com_zero)

## # A tibble: 2 x 5
##   term            estimate std.error statistic p.value
##   <chr>              <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)       -0.644     0.210    -3.06  0.00221
## 2 sent_instagram_    0.803     0.897     0.895 0.371

mrl_logit_insta_geral_so_insta_com_zero=glm(vitoria~sent_instagram, family = binomial(link="logit"), data = dados_insta_com_zero)
tidy(mrl_logit_insta_geral_so_insta_com_zero)

## # A tibble: 2 x 5
##   term           estimate std.error statistic p.value
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)       -14.6      883.   -0.0165   0.987
## 2 sent_instagram     14.0      883.    0.0159   0.987

summary(mrl_logit_insta)

## 
## Call:
## glm(formula = vitoria ~ sent_instagram_, family = binomial(link = "logit"), 
##     data = dados)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1938  -0.9302  -0.9189   1.3827   1.4601  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -0.6438     0.2104  -3.060  0.00221 **
## sent_instagram_   0.8027     0.8969   0.895  0.37080   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 202.98  on 153  degrees of freedom
## Residual deviance: 202.18  on 152  degrees of freedom
##   (159 observations deleted due to missingness)
## AIC: 206.18
## 
## Number of Fisher Scoring iterations: 4

summary(mrl_logit_insta_geral)

## 
## Call:
## glm(formula = vitoria ~ sent_instagram, family = binomial(link = "logit"), 
##     data = dados)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9655  -0.9655  -0.9655   1.4053   1.4053  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)      -14.57     882.74  -0.017    0.987
## sent_instagram    14.04     882.74   0.016    0.987
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 202.98  on 153  degrees of freedom
## Residual deviance: 202.05  on 152  degrees of freedom
##   (159 observations deleted due to missingness)
## AIC: 206.05
## 
## Number of Fisher Scoring iterations: 13

summary(mrl_logit_insta_so_insta)

## 
## Call:
## glm(formula = vitoria ~ sent_instagram_, family = binomial(link = "logit"), 
##     data = dados_insta)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.499  -1.153  -0.642   1.150   2.062  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       0.6254     0.4682   1.336   0.1816  
## sent_instagram_  -2.6251     1.5610  -1.682   0.0926 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 106.63  on 76  degrees of freedom
## Residual deviance: 103.32  on 75  degrees of freedom
## AIC: 107.32
## 
## Number of Fisher Scoring iterations: 4

summary(mrl_logit_insta_geral_so_insta)

## 
## Call:
## glm(formula = vitoria ~ sent_instagram, family = binomial(link = "logit"), 
##     data = dados_insta)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.155  -1.155  -1.155   1.200   1.200  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)      -15.57    1455.40  -0.011    0.991
## sent_instagram    15.51    1455.40   0.011    0.991
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 106.63  on 76  degrees of freedom
## Residual deviance: 105.31  on 75  degrees of freedom
## AIC: 109.31
## 
## Number of Fisher Scoring iterations: 14

summary(mrl_logit_insta_so_insta_com_zero)

## 
## Call:
## glm(formula = vitoria ~ sent_instagram_, family = binomial(link = "logit"), 
##     data = dados_insta_com_zero)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1938  -0.9302  -0.9189   1.3827   1.4601  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -0.6438     0.2104  -3.060  0.00221 **
## sent_instagram_   0.8027     0.8969   0.895  0.37080   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 202.98  on 153  degrees of freedom
## Residual deviance: 202.18  on 152  degrees of freedom
## AIC: 206.18
## 
## Number of Fisher Scoring iterations: 4

summary(mrl_logit_insta_geral_so_insta_com_zero)

## 
## Call:
## glm(formula = vitoria ~ sent_instagram, family = binomial(link = "logit"), 
##     data = dados_insta_com_zero)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9655  -0.9655  -0.9655   1.4053   1.4053  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)      -14.57     882.74  -0.017    0.987
## sent_instagram    14.04     882.74   0.016    0.987
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 202.98  on 153  degrees of freedom
## Residual deviance: 202.05  on 152  degrees of freedom
## AIC: 206.05
## 
## Number of Fisher Scoring iterations: 13