dados = read_csv(here::here("sentimento.csv"),
col_types = cols(
id = col_double(),
regiao = col_character(),
estado = col_character(),
sigla = col_character(),
vitoria_ = col_character(),
usa_twitter_ = col_character(),
sent_twitter_ = col_double(),
usa_instagram_ = col_character(),
sent_instagram_ = col_double(),
anos_ = col_number(),
foi_diretor_ = col_character(),
foi_coordenador_ = col_character(),
foi_fg_ = col_character()
), locale = locale(decimal_mark = ",")
)
dados = dados %>% mutate(vitoria = if_else(vitoria_ == "S", 1, 0))
dados = dados %>% mutate(sent_twitter = if_else(sent_twitter_ >= 0, 1, 0))
dados = dados %>% mutate(sent_instagram = if_else(sent_instagram_ >= 0, 1, 0))
dados_tw = dados %>%
filter(!is.na(sent_twitter_)) %>%
filter(!sent_twitter_ == 0)
dados_tw_com_zero = dados %>%
filter(!is.na(sent_twitter_))
dados_insta = dados %>%
filter(!is.na(sent_instagram_)) %>%
filter(!sent_instagram_ == 0)
dados_insta_com_zero = dados %>%
filter(!is.na(sent_instagram_))
dados_tw_insta = dados %>%
filter(!is.na(sent_instagram_)) %>%
filter(!sent_instagram_ == 0) %>%
filter(!is.na(sent_twitter_)) %>%
filter(!sent_twitter_ == 0)
dados_tw_insta_com_zero = dados %>%
filter(!is.na(sent_instagram_)) %>%
filter(!is.na(sent_twitter_))
Perguntas Existe diferença entre o sentimento da universidade e do intituto?
Existe diferença entre o sentimento da midia social entre as regiões Brasileiras?
Existe correlação entre o sentimento do twitter e do instagram 0.24 entre os sentimentos dasmÃdias, positiva e fraca
Existe correlação entre a vitoria e o sentimento 0.1 para o twitter e -0.2 para o instagram com a vitoria
glimpse(dados)
## Rows: 313
## Columns: 16
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16~
## $ regiao <chr> "Centro-Oeste", "Centro-Oeste", "Centro-Oeste", "Cent~
## $ estado <chr> "Distrito Federal", "Distrito Federal", "Distrito Fed~
## $ sigla <chr> "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF",~
## $ vitoria_ <chr> "N", "S", "N", "N", "N", "S", "S", "N", "N", "N", "S"~
## $ usa_twitter_ <chr> "N", "S", "N", "N", "N", "S", "N", "N", "N", "N", "N"~
## $ sent_twitter_ <dbl> NA, 0.1756916, NA, NA, NA, 0.1222222, NA, NA, NA, NA,~
## $ usa_instagram_ <chr> "N", "S", "N", "S", "N", "N", "S", "S", "S", "N", "S"~
## $ sent_instagram_ <dbl> NA, 0.17150728, NA, 0.00000000, NA, NA, 0.39009915, 0~
## $ anos_ <dbl> 13.10, 11.57, 18.32, 10.25, 10.83, 37.66, 26.54, 27.4~
## $ foi_diretor_ <chr> "S", "S", "S", "S", "N", "S", "S", "S", "N", "N", "S"~
## $ foi_coordenador_ <chr> "S", "N", "S", "S", "S", "N", "N", "N", "N", "N", "N"~
## $ foi_fg_ <chr> "N", "N", "S", "N", "N", "N", "N", "N", "N", "N", "N"~
## $ vitoria <dbl> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,~
## $ sent_twitter <dbl> NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1~
## $ sent_instagram <dbl> NA, 1, NA, 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, 1, 1, 1,~
nrow(dados)
## [1] 313
nrow(dados_tw)
## [1] 49
nrow(dados_tw_com_zero)
## [1] 81
nrow(dados_insta)
## [1] 77
nrow(dados_insta_com_zero)
## [1] 154
nrow(dados_tw_insta)
## [1] 23
nrow(dados_tw_insta_com_zero)
## [1] 61
EDA TWITTER
dados_tw %>%
ggplot(mapping = aes(x = sent_twitter_, y = ""))+
geom_point(alpha=.5, color="red") +
geom_jitter(alpha=.5, color="red", width = .1, height = .1) +
labs (
x = "Sentimento do Twitter",
y = "Observações"
)
dados_tw %>%
ggplot(mapping = aes(x = sent_twitter_))+
geom_histogram(binwidth = .2) +
labs (
x = "Sentimento do Twitter",
y = "Frequencia"
)
dados_tw %>%
ggplot(mapping = aes(x = sent_twitter_))+
geom_histogram(binwidth = .2) +
facet_wrap(~ regiao) +
labs (
x = "Sentimento do Twitter",
y = "Frequencia"
)
dados_tw %>%
ggplot(mapping = aes(y = sent_twitter_))+
geom_boxplot() +
labs (
y = "Sentimento do Twitter",
x = "boxplot"
)
range(dados_tw$sent_twitter_)
## [1] -0.29 0.67
mean(dados_tw$sent_twitter_)
## [1] 0.1741078
EDA INSTAGRAM
dados_insta %>%
ggplot(mapping = aes(x = sent_instagram_, y = ""))+
geom_point(alpha=.5, color="red") +
geom_jitter(alpha=.5, color="red", width = .1, height = .1) +
labs (
x = "Sentimento do Instagram",
y = "Observações"
)
dados_insta %>%
ggplot(mapping = aes(x = sent_instagram_))+
geom_histogram(binwidth = .2) +
labs (
x = "Sentimento do Instagram",
y = "Frequencia"
)
dados_insta %>%
ggplot(mapping = aes(x = sent_instagram_))+
geom_histogram(binwidth = .2) +
facet_wrap(~ regiao) +
labs (
x = "Sentimento do Instagram",
y = "Frequencia"
)
dados_insta %>%
ggplot(mapping = aes(y = sent_instagram_))+
geom_boxplot() +
labs (
y = "Sentimento do Instagram",
x = "boxplot"
)
range(dados_insta$sent_instagram_)
## [1] -0.04 1.00
mean(dados_insta$sent_instagram_)
## [1] 0.2734126
CORRELAçÕES
CORRELAÇÃO VITORIA SENTIMENTO
Existe uma correlação entre vitoria e sentimento do twitter fraca e positiva de 0.11. Existe uma correlação entre vitoria e sentimento do instagram fraca, porem maior que o do twitter e negativa de -0.20
cor(dados_tw$vitoria, dados_tw$sent_twitter_, method="pearson")
## [1] 0.1139849
dados_tw %>%
ggplot(aes(x=vitoria, y=sent_twitter_)) +
geom_point(alpha = .2, color="red") +
geom_jitter(alpha = .2, width = .1, height = .1, color="red")
cor(dados_insta$vitoria, dados_insta$sent_instagram_, method="pearson")
## [1] -0.2007559
dados_tw %>%
ggplot(aes(x=vitoria, y=sent_instagram_)) +
geom_point(alpha = .2, color="red") +
geom_jitter(alpha = .2, width = .1, height = .1, color="red")
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
CORRELAÇÃO TWITTER E INSTAGRAM
A correlação entre o Wtitter e o instagram é de 0.24, quando coloco as medidas com sentimento zero a correlação cai para 0.16 Existe uma correlacão fraca (0,24) entre o sentimento do twitter e o sentimento do instagram
cor(dados_tw_insta$sent_twitter_, dados_tw_insta$sent_instagram_, method="pearson")
## [1] 0.2447097
dados_tw_insta %>%
ggplot(aes(x=sent_twitter_, sent_instagram_)) +
geom_point(alpha = .2)
cor(dados_tw_insta_com_zero$sent_twitter_, dados_tw_insta_com_zero$sent_instagram_, method="pearson")
## [1] 0.1634639
dados_tw_insta_com_zero %>%
ggplot(aes(x=sent_twitter_, sent_instagram_)) +
geom_point(alpha = .2)
IC DA CORRELAÇÃO DO TWITTER E DO INSTAGRAM
s <- function(d, i) {
sumarizado = d %>%
slice(i) %>%
summarise(corr_pearson = cor(sent_twitter_, sent_instagram_, method = "pearson"))
sumarizado %>%
pull(corr_pearson)
}
s(dados_tw_insta, 1:(nrow(dados))) # theta_chapeu
## [1] 0.2447097
booted <- boot(data = dados_tw_insta,
statistic = s,
R = 2000)
ci_corr_tw_inst = tidy(booted,
conf.level = .95,
conf.method = "basic",
conf.int = TRUE)
ci_corr_tw_inst
## # A tibble: 1 x 5
## statistic bias std.error conf.low conf.high
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.245 -0.0189 0.230 -0.135 0.761
ci_corr_tw_inst %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "Correlação"
)) +
geom_linerange() +
geom_point(color = "coral", size = 2) +
scale_y_continuous(limits = c(-1, 1)) +
labs(x = "", y = "Correlação de Pearson entre sentimento do Twitter x Instagram") +
coord_flip()
SENTIMENTO AGRUPADO POR REGIÃO
dados %>%
ggplot (aes(x = sent_twitter_, y="")) +
facet_wrap ( ~ regiao) +
geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
geom_point(color = "red") +
labs (x= "Sentimento Twitter")
## Warning: Removed 232 rows containing missing values (geom_point).
## Warning: Removed 232 rows containing missing values (geom_point).
dados_tw_insta_com_zero %>%
ggplot (aes(x = sent_twitter_, y="")) +
facet_wrap ( ~ regiao) +
geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
geom_point(color = "red") +
labs (x= "Sentimento Twitter")
dados %>%
ggplot (aes(x = sent_instagram_, y="")) +
facet_wrap ( ~ regiao) +
geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
geom_point(color = "red") +
labs (x= "Sentimento Instagram")
## Warning: Removed 159 rows containing missing values (geom_point).
## Warning: Removed 159 rows containing missing values (geom_point).
dados_tw_insta_com_zero %>%
ggplot (aes(x = sent_instagram_, y="")) +
facet_wrap ( ~ regiao) +
geom_jitter(alpha=.2, width = .1, height = .1, color = "red") +
geom_point(color = "red") +
labs (x= "Sentimento Instagram")
IC DA MÉDIA DO SENTIMENTO DO TWITTER
s <- function(d, i) {
sumarizado = d %>%
slice(i) %>%
summarise(media_twitter = mean(sent_twitter_))
sumarizado %>%
pull(media_twitter)
}
s(dados_tw, 1:(nrow(dados))) # theta_chapeu
## [1] 0.1741078
booted <- boot(data = dados_tw,
statistic = s,
R = 2000)
media_tw_boot = tidy(booted,
conf.level = .95,
conf.method = "basic",
conf.int = TRUE)
media_tw_boot
## # A tibble: 1 x 5
## statistic bias std.error conf.low conf.high
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.174 0.000381 0.0241 0.128 0.221
media_tw_boot %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "Média"
)) +
geom_linerange() +
geom_point(color = "coral", size = 2) +
scale_y_continuous(limits = c(-1, 1)) +
labs(x = "", y = "Média do Twitter") +
coord_flip()
IC DA MÉDIA DO SENTIMENTO DO INSTAGRAM
s <- function(d, i) {
sumarizado = d %>%
slice(i) %>%
summarise(media_instagram = mean(sent_instagram_))
sumarizado %>%
pull(media_instagram)
}
s(dados_insta, 1:(nrow(dados))) # theta_chapeu
## [1] 0.2734126
booted <- boot(data = dados_insta,
statistic = s,
R = 2000)
media_insta_boot = tidy(booted,
conf.level = .95,
conf.method = "basic",
conf.int = TRUE)
media_insta_boot
## # A tibble: 1 x 5
## statistic bias std.error conf.low conf.high
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.273 0.000637 0.0199 0.232 0.310
media_insta_boot %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "Média"
)) +
geom_linerange() +
geom_point(color = "coral", size = 2) +
scale_y_continuous(limits = c(-1, 1)) +
labs(x = "", y = "Média do Instagram") +
coord_flip()
plot_ics = function(d) {
d %>%
ggplot(aes(
ymin = low,
y = mid,
ymax = high,
x = metodo
)) +
geom_linerange() +
geom_point(color = "coral", size = 3) +
scale_y_continuous(limits = c(0, .5)) +
labs(x = "", y = "Médias dos sentimentos") +
coord_flip()
}
tribble(
~metodo, ~low, ~mid, ~high,
"Média com IC do Sent Twitter", .12, .17, .22,
"Média com IC do Sent Instagram", .23, .27, .30) %>%
plot_ics()
REGRESSÃO LOGISTICA TWITTER
\[ \hat{vitoria} = B0 + B1 * sentimentoTwitter \]
dados %>%
ggplot(aes(x=sent_twitter_, y=vitoria_)) +
geom_point() +
stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) +
geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
labs (x = "Sentimento do Twitter",
y = "Vitória do candidato")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 232 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1
## Warning: Removed 232 rows containing missing values (geom_point).
## Warning: Removed 232 rows containing missing values (geom_point).
dados_tw %>%
ggplot(aes(x=sent_twitter_, y=vitoria_)) +
geom_point() +
stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) +
geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
labs (x = "Sentimento do Twitter",
y = "Vitória do candidato")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1
modelo_regressao_linear_twitter
ml_tw=lm(vitoria~sent_twitter_, data = dados)
tidy(ml_tw)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.303 0.0636 4.77 0.00000822
## 2 sent_twitter_ 0.284 0.336 0.844 0.401
ml_tw_geral=lm(vitoria~sent_twitter, data = dados)
tidy(ml_tw_geral)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 2.71e-15 0.210 1.29e-14 1.00
## 2 sent_twitter 3.55e- 1 0.217 1.64e+ 0 0.105
ml_tw_so_tw=lm(vitoria~sent_twitter_, data = dados_tw)
tidy(ml_tw_so_tw)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.291 0.0989 2.95 0.00500
## 2 sent_twitter_ 0.320 0.407 0.787 0.435
ml_tw_geral_so_tw=lm(vitoria~sent_twitter, data = dados_tw)
tidy(ml_tw_geral_so_tw)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -8.25e-16 0.211 -3.91e-15 1.00
## 2 sent_twitter 3.86e- 1 0.222 1.74e+ 0 0.0888
ml_tw_so_tw_com_zero=lm(vitoria~sent_twitter_, data = dados_tw_com_zero)
tidy(ml_tw_so_tw_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.303 0.0636 4.77 0.00000822
## 2 sent_twitter_ 0.284 0.336 0.844 0.401
ml_tw_geral_so_tw_com_zero=lm(vitoria~sent_twitter, data = dados_tw_com_zero)
tidy(ml_tw_geral_so_tw_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 2.71e-15 0.210 1.29e-14 1.00
## 2 sent_twitter 3.55e- 1 0.217 1.64e+ 0 0.105
summary(ml_tw)
##
## Call:
## lm(formula = vitoria ~ sent_twitter_, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4937 -0.3281 -0.3034 0.6426 0.6966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.30342 0.06358 4.772 8.22e-06 ***
## sent_twitter_ 0.28405 0.33637 0.844 0.401
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4752 on 79 degrees of freedom
## (232 observations deleted due to missingness)
## Multiple R-squared: 0.008946, Adjusted R-squared: -0.003599
## F-statistic: 0.7131 on 1 and 79 DF, p-value: 0.401
summary(ml_tw_geral)
##
## Call:
## lm(formula = vitoria ~ sent_twitter, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3553 -0.3553 -0.3553 0.6447 0.6447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.714e-15 2.099e-01 0.000 1.000
## sent_twitter 3.553e-01 2.167e-01 1.639 0.105
##
## Residual standard error: 0.4694 on 79 degrees of freedom
## (232 observations deleted due to missingness)
## Multiple R-squared: 0.03289, Adjusted R-squared: 0.02065
## F-statistic: 2.687 on 1 and 79 DF, p-value: 0.1051
summary(ml_tw_so_tw)
##
## Call:
## lm(formula = vitoria ~ sent_twitter_, data = dados_tw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5056 -0.3472 -0.3082 0.6354 0.6800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.29123 0.09886 2.946 0.005 **
## sent_twitter_ 0.31997 0.40679 0.787 0.435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4829 on 47 degrees of freedom
## Multiple R-squared: 0.01299, Adjusted R-squared: -0.008008
## F-statistic: 0.6187 on 1 and 47 DF, p-value: 0.4355
summary(ml_tw_geral_so_tw)
##
## Call:
## lm(formula = vitoria ~ sent_twitter, data = dados_tw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3864 -0.3864 -0.3864 0.6136 0.6136
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.247e-16 2.107e-01 0.000 1.0000
## sent_twitter 3.864e-01 2.223e-01 1.738 0.0888 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4711 on 47 degrees of freedom
## Multiple R-squared: 0.06037, Adjusted R-squared: 0.04038
## F-statistic: 3.02 on 1 and 47 DF, p-value: 0.08881
summary(ml_tw_so_tw_com_zero)
##
## Call:
## lm(formula = vitoria ~ sent_twitter_, data = dados_tw_com_zero)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4937 -0.3281 -0.3034 0.6426 0.6966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.30342 0.06358 4.772 8.22e-06 ***
## sent_twitter_ 0.28405 0.33637 0.844 0.401
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4752 on 79 degrees of freedom
## Multiple R-squared: 0.008946, Adjusted R-squared: -0.003599
## F-statistic: 0.7131 on 1 and 79 DF, p-value: 0.401
summary(ml_tw_geral_so_tw_com_zero)
##
## Call:
## lm(formula = vitoria ~ sent_twitter, data = dados_tw_com_zero)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3553 -0.3553 -0.3553 0.6447 0.6447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.714e-15 2.099e-01 0.000 1.000
## sent_twitter 3.553e-01 2.167e-01 1.639 0.105
##
## Residual standard error: 0.4694 on 79 degrees of freedom
## Multiple R-squared: 0.03289, Adjusted R-squared: 0.02065
## F-statistic: 2.687 on 1 and 79 DF, p-value: 0.1051
modelo_regressao_logit_twitter
mrl_logit_tw=glm(vitoria~sent_twitter_, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_tw)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.831 0.291 -2.85 0.00431
## 2 sent_twitter_ 1.25 1.48 0.845 0.398
mrl_logit_tw_geral=glm(vitoria~sent_twitter, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_tw_geral)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -17.6 1769. -0.00993 0.992
## 2 sent_twitter 17.0 1769. 0.00959 0.992
mrl_logit_tw_so_tw=glm(vitoria~sent_twitter_, family = binomial(link="logit"), data = dados_tw)
tidy(mrl_logit_tw_so_tw)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.889 0.451 -1.97 0.0486
## 2 sent_twitter_ 1.43 1.80 0.790 0.429
mrl_logit_tw_geral_so_tw=glm(vitoria~sent_twitter, family = binomial(link="logit"), data = dados_tw)
tidy(mrl_logit_tw_geral_so_tw)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -17.6 1769. -0.00993 0.992
## 2 sent_twitter 17.1 1769. 0.00967 0.992
mrl_logit_tw_so_tw_com_zero=glm(vitoria~sent_twitter_, family = binomial(link="logit"), data = dados_tw_com_zero)
tidy(mrl_logit_tw_so_tw_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.831 0.291 -2.85 0.00431
## 2 sent_twitter_ 1.25 1.48 0.845 0.398
mrl_logit_tw_geral_so_tw_com_zero=glm(vitoria~sent_twitter, family = binomial(link="logit"), data = dados_tw_com_zero)
tidy(mrl_logit_tw_geral_so_tw_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -17.6 1769. -0.00993 0.992
## 2 sent_twitter 17.0 1769. 0.00959 0.992
summary(mrl_logit_tw)
##
## Call:
## glm(formula = vitoria ~ sent_twitter_, family = binomial(link = "logit"),
## data = dados)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1809 -0.8898 -0.8504 1.4373 1.5444
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8310 0.2911 -2.855 0.00431 **
## sent_twitter_ 1.2526 1.4827 0.845 0.39822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 103.12 on 80 degrees of freedom
## Residual deviance: 102.40 on 79 degrees of freedom
## (232 observations deleted due to missingness)
## AIC: 106.4
##
## Number of Fisher Scoring iterations: 4
summary(mrl_logit_tw_geral)
##
## Call:
## glm(formula = vitoria ~ sent_twitter, family = binomial(link = "logit"),
## data = dados)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9369 -0.9369 -0.9369 1.4387 1.4387
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.57 1769.26 -0.01 0.992
## sent_twitter 16.97 1769.26 0.01 0.992
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 103.115 on 80 degrees of freedom
## Residual deviance: 98.898 on 79 degrees of freedom
## (232 observations deleted due to missingness)
## AIC: 102.9
##
## Number of Fisher Scoring iterations: 16
summary(mrl_logit_tw_so_tw)
##
## Call:
## glm(formula = vitoria ~ sent_twitter_, family = binomial(link = "logit"),
## data = dados_tw)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2054 -0.9204 -0.8567 1.4236 1.5128
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8891 0.4508 -1.972 0.0486 *
## sent_twitter_ 1.4252 1.8031 0.790 0.4293
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 63.262 on 48 degrees of freedom
## Residual deviance: 62.624 on 47 degrees of freedom
## AIC: 66.624
##
## Number of Fisher Scoring iterations: 4
summary(mrl_logit_tw_geral_so_tw)
##
## Call:
## glm(formula = vitoria ~ sent_twitter, family = binomial(link = "logit"),
## data = dados_tw)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9883 -0.9883 -0.9883 1.3791 1.3791
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.57 1769.26 -0.01 0.992
## sent_twitter 17.10 1769.26 0.01 0.992
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 63.262 on 48 degrees of freedom
## Residual deviance: 58.704 on 47 degrees of freedom
## AIC: 62.704
##
## Number of Fisher Scoring iterations: 16
summary(mrl_logit_tw_so_tw_com_zero)
##
## Call:
## glm(formula = vitoria ~ sent_twitter_, family = binomial(link = "logit"),
## data = dados_tw_com_zero)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1809 -0.8898 -0.8504 1.4373 1.5444
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8310 0.2911 -2.855 0.00431 **
## sent_twitter_ 1.2526 1.4827 0.845 0.39822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 103.12 on 80 degrees of freedom
## Residual deviance: 102.40 on 79 degrees of freedom
## AIC: 106.4
##
## Number of Fisher Scoring iterations: 4
summary(mrl_logit_tw_geral_so_tw_com_zero)
##
## Call:
## glm(formula = vitoria ~ sent_twitter, family = binomial(link = "logit"),
## data = dados_tw_com_zero)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9369 -0.9369 -0.9369 1.4387 1.4387
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.57 1769.26 -0.01 0.992
## sent_twitter 16.97 1769.26 0.01 0.992
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 103.115 on 80 degrees of freedom
## Residual deviance: 98.898 on 79 degrees of freedom
## AIC: 102.9
##
## Number of Fisher Scoring iterations: 16
REGRESSÃO LOGISTICA INSTAGRAM
\[ \hat{vitoria} = B0 + B1 * sentimentoInstagram \]
dados %>%
ggplot(aes(x=sent_instagram_, y=vitoria_)) +
geom_point() +
stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) +
geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
labs (x = "Sentimento do Instagram",
y = "Vitória do candidato")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 159 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1
## Warning: Removed 159 rows containing missing values (geom_point).
## Warning: Removed 159 rows containing missing values (geom_point).
dados_insta %>%
ggplot(aes(x=sent_instagram_, y=vitoria_)) +
geom_point() +
stat_smooth(method="glm", method.args=list(family="binomial"), se=FALSE) +
geom_jitter(alpha=.5, width = 0.1, height = 0.1) +
labs (x = "Sentimento do Instagram",
y = "Vitória do candidato")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Computation failed in `stat_smooth()`:
## y values must be 0 <= y <= 1
modelo_regressao_linear_insta
ml_insta=lm(vitoria~sent_instagram_, data = dados)
tidy(ml_insta)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.344 0.0487 7.06 5.54e-11
## 2 sent_instagram_ 0.191 0.213 0.897 3.71e- 1
ml_insta_geral=lm(vitoria~sent_instagram, data = dados)
tidy(ml_insta_geral)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.56e-14 0.485 3.22e-14 1.00
## 2 sent_instagram 3.73e- 1 0.487 7.66e- 1 0.445
#significativo
ml_insta_so_insta=lm(vitoria~sent_instagram_, data = dados_insta)
tidy(ml_insta_so_insta)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.639 0.106 6.04 0.0000000543
## 2 sent_instagram_ -0.580 0.327 -1.77 0.0800
ml_insta_geral_so_insta=lm(vitoria~sent_instagram, data = dados_insta)
tidy(ml_insta_geral_so_insta)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 2.33e-15 0.503 4.63e-15 1.00
## 2 sent_instagram 4.87e- 1 0.506 9.61e- 1 0.339
ml_insta_so_insta_com_zero=lm(vitoria~sent_instagram_, data = dados_insta_com_zero)
tidy(ml_insta_so_insta_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.344 0.0487 7.06 5.54e-11
## 2 sent_instagram_ 0.191 0.213 0.897 3.71e- 1
ml_insta_geral_so_insta_com_zero=lm(vitoria~sent_instagram, data = dados_insta_com_zero)
tidy(ml_insta_geral_so_insta_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.56e-14 0.485 3.22e-14 1.00
## 2 sent_instagram 3.73e- 1 0.487 7.66e- 1 0.445
summary(ml_insta)
##
## Call:
## lm(formula = vitoria ~ sent_instagram_, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5064 -0.3512 -0.3440 0.6148 0.6560
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.34400 0.04872 7.060 5.54e-11 ***
## sent_instagram_ 0.19112 0.21303 0.897 0.371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4847 on 152 degrees of freedom
## (159 observations deleted due to missingness)
## Multiple R-squared: 0.005267, Adjusted R-squared: -0.001277
## F-statistic: 0.8048 on 1 and 152 DF, p-value: 0.3711
summary(ml_insta_geral)
##
## Call:
## lm(formula = vitoria ~ sent_instagram, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3725 -0.3725 -0.3725 0.6274 0.6274
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.560e-14 4.851e-01 0.000 1.000
## sent_instagram 3.725e-01 4.867e-01 0.766 0.445
##
## Residual standard error: 0.4851 on 152 degrees of freedom
## (159 observations deleted due to missingness)
## Multiple R-squared: 0.003841, Adjusted R-squared: -0.002713
## F-statistic: 0.586 on 1 and 152 DF, p-value: 0.4451
summary(ml_insta_so_insta)
##
## Call:
## lm(formula = vitoria ~ sent_instagram_, data = dados_insta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6624 -0.4883 -0.1749 0.4847 0.9412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6392 0.1058 6.043 5.43e-08 ***
## sent_instagram_ -0.5805 0.3271 -1.775 0.08 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4959 on 75 degrees of freedom
## Multiple R-squared: 0.0403, Adjusted R-squared: 0.02751
## F-statistic: 3.15 on 1 and 75 DF, p-value: 0.08
summary(ml_insta_geral_so_insta)
##
## Call:
## lm(formula = vitoria ~ sent_instagram, data = dados_insta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4868 -0.4868 -0.4868 0.5132 0.5132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.328e-15 5.031e-01 0.000 1.000
## sent_instagram 4.868e-01 5.064e-01 0.961 0.339
##
## Residual standard error: 0.5031 on 75 degrees of freedom
## Multiple R-squared: 0.01217, Adjusted R-squared: -0.001
## F-statistic: 0.9241 on 1 and 75 DF, p-value: 0.3395
summary(ml_insta_so_insta_com_zero)
##
## Call:
## lm(formula = vitoria ~ sent_instagram_, data = dados_insta_com_zero)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5064 -0.3512 -0.3440 0.6148 0.6560
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.34400 0.04872 7.060 5.54e-11 ***
## sent_instagram_ 0.19112 0.21303 0.897 0.371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4847 on 152 degrees of freedom
## Multiple R-squared: 0.005267, Adjusted R-squared: -0.001277
## F-statistic: 0.8048 on 1 and 152 DF, p-value: 0.3711
summary(ml_insta_geral_so_insta_com_zero)
##
## Call:
## lm(formula = vitoria ~ sent_instagram, data = dados_insta_com_zero)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3725 -0.3725 -0.3725 0.6274 0.6274
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.560e-14 4.851e-01 0.000 1.000
## sent_instagram 3.725e-01 4.867e-01 0.766 0.445
##
## Residual standard error: 0.4851 on 152 degrees of freedom
## Multiple R-squared: 0.003841, Adjusted R-squared: -0.002713
## F-statistic: 0.586 on 1 and 152 DF, p-value: 0.4451
modelo_regressao_logit_twitter
mrl_logit_insta=glm(vitoria~sent_instagram_, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_insta)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.644 0.210 -3.06 0.00221
## 2 sent_instagram_ 0.803 0.897 0.895 0.371
mrl_logit_insta_geral=glm(vitoria~sent_instagram, family = binomial(link="logit"), data = dados)
tidy(mrl_logit_insta_geral)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -14.6 883. -0.0165 0.987
## 2 sent_instagram 14.0 883. 0.0159 0.987
# Significativo
mrl_logit_insta_so_insta=glm(vitoria~sent_instagram_, family = binomial(link="logit"), data = dados_insta)
tidy(mrl_logit_insta_so_insta)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.625 0.468 1.34 0.182
## 2 sent_instagram_ -2.63 1.56 -1.68 0.0926
mrl_logit_insta_geral_so_insta=glm(vitoria~sent_instagram, family = binomial(link="logit"), data = dados_insta)
tidy(mrl_logit_insta_geral_so_insta)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -15.6 1455. -0.0107 0.991
## 2 sent_instagram 15.5 1455. 0.0107 0.991
mrl_logit_insta_so_insta_com_zero=glm(vitoria~sent_instagram_, family = binomial(link="logit"), data = dados_insta_com_zero)
tidy(mrl_logit_insta_so_insta_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.644 0.210 -3.06 0.00221
## 2 sent_instagram_ 0.803 0.897 0.895 0.371
mrl_logit_insta_geral_so_insta_com_zero=glm(vitoria~sent_instagram, family = binomial(link="logit"), data = dados_insta_com_zero)
tidy(mrl_logit_insta_geral_so_insta_com_zero)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -14.6 883. -0.0165 0.987
## 2 sent_instagram 14.0 883. 0.0159 0.987
summary(mrl_logit_insta)
##
## Call:
## glm(formula = vitoria ~ sent_instagram_, family = binomial(link = "logit"),
## data = dados)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1938 -0.9302 -0.9189 1.3827 1.4601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6438 0.2104 -3.060 0.00221 **
## sent_instagram_ 0.8027 0.8969 0.895 0.37080
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 202.98 on 153 degrees of freedom
## Residual deviance: 202.18 on 152 degrees of freedom
## (159 observations deleted due to missingness)
## AIC: 206.18
##
## Number of Fisher Scoring iterations: 4
summary(mrl_logit_insta_geral)
##
## Call:
## glm(formula = vitoria ~ sent_instagram, family = binomial(link = "logit"),
## data = dados)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9655 -0.9655 -0.9655 1.4053 1.4053
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.57 882.74 -0.017 0.987
## sent_instagram 14.04 882.74 0.016 0.987
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 202.98 on 153 degrees of freedom
## Residual deviance: 202.05 on 152 degrees of freedom
## (159 observations deleted due to missingness)
## AIC: 206.05
##
## Number of Fisher Scoring iterations: 13
summary(mrl_logit_insta_so_insta)
##
## Call:
## glm(formula = vitoria ~ sent_instagram_, family = binomial(link = "logit"),
## data = dados_insta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.499 -1.153 -0.642 1.150 2.062
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6254 0.4682 1.336 0.1816
## sent_instagram_ -2.6251 1.5610 -1.682 0.0926 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.63 on 76 degrees of freedom
## Residual deviance: 103.32 on 75 degrees of freedom
## AIC: 107.32
##
## Number of Fisher Scoring iterations: 4
summary(mrl_logit_insta_geral_so_insta)
##
## Call:
## glm(formula = vitoria ~ sent_instagram, family = binomial(link = "logit"),
## data = dados_insta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.155 -1.155 -1.155 1.200 1.200
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -15.57 1455.40 -0.011 0.991
## sent_instagram 15.51 1455.40 0.011 0.991
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 106.63 on 76 degrees of freedom
## Residual deviance: 105.31 on 75 degrees of freedom
## AIC: 109.31
##
## Number of Fisher Scoring iterations: 14
summary(mrl_logit_insta_so_insta_com_zero)
##
## Call:
## glm(formula = vitoria ~ sent_instagram_, family = binomial(link = "logit"),
## data = dados_insta_com_zero)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1938 -0.9302 -0.9189 1.3827 1.4601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6438 0.2104 -3.060 0.00221 **
## sent_instagram_ 0.8027 0.8969 0.895 0.37080
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 202.98 on 153 degrees of freedom
## Residual deviance: 202.18 on 152 degrees of freedom
## AIC: 206.18
##
## Number of Fisher Scoring iterations: 4
summary(mrl_logit_insta_geral_so_insta_com_zero)
##
## Call:
## glm(formula = vitoria ~ sent_instagram, family = binomial(link = "logit"),
## data = dados_insta_com_zero)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9655 -0.9655 -0.9655 1.4053 1.4053
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.57 882.74 -0.017 0.987
## sent_instagram 14.04 882.74 0.016 0.987
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 202.98 on 153 degrees of freedom
## Residual deviance: 202.05 on 152 degrees of freedom
## AIC: 206.05
##
## Number of Fisher Scoring iterations: 13