library(tidyverse)
library(here)
library(tidyr)
library(broom)
library(boot)
dados = read_csv2(here("data/sentimento31.csv"),
col_types = cols(
id = col_double(),
regiao = col_character(),
estado = col_character(),
sigla = col_character(),
vitoria_ = col_character(),
usa_twitter_ = col_character(),
sent_twitter_ = col_double(),
usa_instagram_ = col_character(),
sent_instagram_ = col_double(),
anos_ = col_number(),
foi_diretor_ = col_character(),
foi_coordenador_ = col_character(),
foi_fg_ = col_character(),
publicacoes_twitter_ = col_double(),
seguidores_twitter_ = col_double(),
seguindo_twitter_ = col_double(),
publicacoes_instagram_ = col_double(),
seguidores_instagram_ = col_double(),
seguindo_instagram_ = col_double()
))
## i Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
dados = dados %>%
mutate(vitoria = (vitoria_ == "S") * 1)
glimpse(dados)
## Rows: 313
## Columns: 20
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, ~
## $ regiao <chr> "Centro-Oeste", "Centro-Oeste", "Centro-Oeste",~
## $ estado <chr> "Distrito Federal", "Distrito Federal", "Distri~
## $ sigla <chr> "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF",~
## $ vitoria_ <chr> "N", "S", "N", "N", "N", "S", "S", "N", "N", "N~
## $ usa_twitter_ <chr> "N", "S", "N", "N", "N", "S", "N", "N", "N", "N~
## $ sent_twitter_ <dbl> NA, 0.1756916, NA, NA, NA, 0.1222222, NA, NA, N~
## $ usa_instagram_ <chr> "N", "S", "N", "S", "N", "N", "S", "S", "S", "N~
## $ sent_instagram_ <dbl> NA, 0.17150728, NA, 0.00000000, NA, NA, 0.39009~
## $ anos_ <dbl> 13.10, 11.57, 18.32, 10.25, 10.83, 37.66, 26.54~
## $ foi_diretor_ <chr> "S", "S", "S", "S", "N", "S", "S", "S", "N", "N~
## $ foi_coordenador_ <chr> "S", "N", "S", "S", "S", "N", "N", "N", "N", "N~
## $ foi_fg_ <chr> "N", "N", "S", "N", "N", "N", "N", "N", "N", "N~
## $ publicacoes_twitter_ <dbl> NA, 142, NA, NA, NA, 27, NA, NA, NA, NA, NA, NA~
## $ seguidores_twitter_ <dbl> NA, 55, NA, NA, NA, 167, NA, NA, NA, NA, NA, NA~
## $ seguindo_twitter_ <dbl> NA, 138, NA, NA, NA, 639, NA, NA, NA, NA, NA, N~
## $ publicacoes_instagram_ <dbl> NA, 349, NA, 27, NA, NA, 131, 332, 502, NA, 57,~
## $ seguidores_instagram_ <dbl> NA, 1509, NA, 204, NA, NA, 1420, 1453, 20200, N~
## $ seguindo_instagram_ <dbl> NA, 904, NA, 229, NA, NA, 724, 2366, 4757, NA, ~
## $ vitoria <dbl> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0,~
Removendo os que não utilizam nem twitter nem Instagram
dados = dados %>%
filter(!(usa_twitter_ == "N" & usa_instagram_ == "N")) %>%
filter(!sent_twitter_ == 0) %>%
filter(!sent_instagram_ == 0)
glimpse(dados)
## Rows: 23
## Columns: 20
## $ id <dbl> 2, 15, 28, 41, 59, 66, 76, 81, 95, 103, 111, 11~
## $ regiao <chr> "Centro-Oeste", "Nordeste", "Nordeste", "Nordes~
## $ estado <chr> "Distrito Federal", "Alagoas", "Maranh\xe3o", "~
## $ sigla <chr> "IF", "IF", "IF", "IF", "IF", "IF", "IF", "IF",~
## $ vitoria_ <chr> "S", "S", "N", "N", "N", "N", "N", "S", "S", "S~
## $ usa_twitter_ <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "S~
## $ sent_twitter_ <dbl> 0.17569160, 0.43636364, 0.05317443, 0.11137336,~
## $ usa_instagram_ <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "S~
## $ sent_instagram_ <dbl> 0.17150728, 0.19174603, 0.20689689, 0.36987944,~
## $ anos_ <dbl> 11.57, 26.89, 13.23, 16.79, 10.86, 18.33, 11.64~
## $ foi_diretor_ <chr> "S", "S", "N", "S", "N", "N", "S", "S", "S", "S~
## $ foi_coordenador_ <chr> "N", "N", "N", "N", "N", "S", "N", "N", "N", "N~
## $ foi_fg_ <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N~
## $ publicacoes_twitter_ <dbl> 142, 11, 67, 18, 132, 1, 65, 204, 1, 164, 375, ~
## $ seguidores_twitter_ <dbl> 55, 2, 26, 35, 99, 47, 9, 15, 3, 24, 293, 29, 4~
## $ seguindo_twitter_ <dbl> 138, 0, 63, 397, 310, 15, 199, 2, 0, 21, 616, 7~
## $ publicacoes_instagram_ <dbl> 349, 64, 231, 143, 43, 44, 62, 37, 845, 290, 58~
## $ seguidores_instagram_ <dbl> 1509, 1003, 1830, 1226, 918, 1166, 49, 638, 186~
## $ seguindo_instagram_ <dbl> 904, 583, 1997, 1705, 421, 2252, 104, 742, 73, ~
## $ vitoria <dbl> 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0,~
22 candidatos utilizam o Twitter
dados %>%
count(usa_twitter_ == "S")
## # A tibble: 2 x 2
## `usa_twitter_ == "S"` n
## <lgl> <int>
## 1 FALSE 1
## 2 TRUE 22
23 candidatos utilizam o Instagram
dados %>%
count(usa_instagram_ == "S")
## # A tibble: 1 x 2
## `usa_instagram_ == "S"` n
## <lgl> <int>
## 1 TRUE 23
O Sentimento do Twitter
dados %>% ggplot(mapping = aes(y=sent_twitter_, x="candidato")) +
geom_point(alpha = 0.1) +
geom_jitter(alpha = 0.1, width = .04)
O Sentimento do Twitter x vitoria_
dados %>% ggplot(mapping = aes(y=sent_twitter_, x=vitoria_)) +
geom_point(alpha=.1) +
geom_jitter(alpha = 0.1, width = .04)
O Sentimento do Instagram
dados %>% ggplot(mapping = aes(y=sent_instagram_, x="candidato")) +
geom_point(alpha = 0.1) +
geom_jitter(alpha = 0.1, width = .02)
O Sentimento do Twitter x vitoria_
dados %>% ggplot(mapping = aes(y=sent_instagram_, x=vitoria_)) +
geom_point(alpha=.1) +
geom_jitter(alpha = 0.1, width = .02)
dados %>% ggplot(mapping = aes(x= sent_twitter_, y = sent_instagram_)) +
geom_point()
Existe uma correlação fraca entre o sentimento do twitter e o sentimento do instagram
cor(x= dados$sent_twitter_, y=dados$sent_instagram_)
## [1] 0.2447097
A correlação entre a vitoria e o sentimento do twitter não é relevante
cor(x= dados$vitoria, y=dados$sent_twitter_)
## [1] -0.001741561
Existe uma correlação média entre a vitoria e o sentimento do instagram
cor(x= dados$vitoria, y=dados$sent_instagram_)
## [1] -0.487012
Histogramas
dados %>% ggplot(aes(x = sent_instagram_)) +
facet_wrap(~sigla) +
geom_histogram(binwidth = .09, fill = "coral", color="black") +
geom_rug()
Regressões
Regressao logísitca vitoria e sentimento do twitter e instagram para institutos e universidades
Existe uma relação inversa entre a vitoria e o sentimento do instagram com efeito relevante
logit <- glm(vitoria ~ sent_twitter_ + sent_instagram_, data = dados)
summary(logit)
##
## Call:
## glm(formula = vitoria ~ sent_twitter_ + sent_instagram_, data = dados)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.68469 -0.41182 -0.00032 0.36939 0.78606
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0724 0.2947 3.638 0.00164 **
## sent_twitter_ 0.5536 0.8841 0.626 0.53825
## sent_instagram_ -2.3814 0.9178 -2.595 0.01732 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.2146865)
##
## Null deviance: 5.7391 on 22 degrees of freedom
## Residual deviance: 4.2937 on 20 degrees of freedom
## AIC: 34.669
##
## Number of Fisher Scoring iterations: 2
regressao vitoria com twitter e instagram, para universidade
sem efeito para somente universidades
dados_uf = dados %>%
filter(sigla=="UF")
logit <- glm(vitoria ~ sent_twitter_ + sent_instagram_, data = dados_uf)
summary(logit)
##
## Call:
## glm(formula = vitoria ~ sent_twitter_ + sent_instagram_, data = dados_uf)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6949 -0.5024 0.2583 0.4287 0.5476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9926 0.6030 1.646 0.138
## sent_twitter_ -0.3678 2.0873 -0.176 0.865
## sent_instagram_ -1.1942 2.1366 -0.559 0.592
##
## (Dispersion parameter for gaussian family taken to be 0.3171569)
##
## Null deviance: 2.7273 on 10 degrees of freedom
## Residual deviance: 2.5373 on 8 degrees of freedom
## AIC: 23.082
##
## Number of Fisher Scoring iterations: 2
regressao vitoria twitter e instagram, para instituto federal
Existe efeito do sentimento do instagram no twitter para os reitores do instituto federal
dados_if = dados %>%
filter(sigla=="IF")
logit <- glm(vitoria ~ sent_twitter_+ sent_instagram_, data = dados_if)
summary(logit)
##
## Call:
## glm(formula = vitoria ~ sent_twitter_ + sent_instagram_, data = dados_if)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.54114 -0.25454 0.03828 0.27565 0.45142
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1356 0.3123 3.636 0.00543 **
## sent_twitter_ 0.8342 0.8831 0.945 0.36948
## sent_instagram_ -3.2134 0.9758 -3.293 0.00933 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1489052)
##
## Null deviance: 3.0000 on 11 degrees of freedom
## Residual deviance: 1.3401 on 9 degrees of freedom
## AIC: 15.749
##
## Number of Fisher Scoring iterations: 2
Em média o sentimento demonstrado no IF é menor do que o sentimento da UF
dados %>%
select(sigla, sent_instagram_) %>%
group_by(sigla) %>%
summarise(media = mean(sent_instagram_), .groups = "drop")
## # A tibble: 2 x 2
## sigla media
## <chr> <dbl>
## 1 IF 0.253
## 2 UF 0.31
theta <- function(d, i) {
agrupado = d %>%
slice(i) %>%
group_by(sigla) %>%
summarise(media = mean(sent_instagram_), .groups = "drop")
uf = agrupado %>% filter(sigla == "UF") %>% pull(media)
ifs = agrupado %>% filter(sigla == "IF") %>% pull(media)
uf - ifs
}
booted <- boot(data = dados,
statistic = theta,
R = 2000)
ci = tidy(booted,
conf.level = .95,
conf.method = "bca",
conf.int = TRUE)
glimpse(ci)
## Rows: 1
## Columns: 5
## $ statistic <dbl> 0.05688691
## $ bias <dbl> 0.001887328
## $ std.error <dbl> 0.04414287
## $ conf.low <dbl> -0.03383139
## $ conf.high <dbl> 0.1401579
Não é possível afirmar se existe uma diferença em média entre o sentimento do instagram das universidades e dos institutos federais, conforme análise do IC
ci %>%
ggplot(aes(
ymin = conf.low,
y = statistic,
ymax = conf.high,
x = "Média do Sentimento do Instagram"
)) +
geom_linerange() +
geom_point(color = "coral", size = 2) +
scale_y_continuous(limits = c(-1, 1)) +
labs(x = "", y = "Diferença entre as médias do sentimento do instagram entre instituto e universidade") +
coord_flip()