









---
title: "R Notebook"
output: html_notebook
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(include = FALSE,echo=FALSE,message=FALSE,warning=FALSE)
```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}
#Aquí escribo el nombre del usuario del que quiero obtener los datos

usuario<-"@colosioriojas"

#Si tipeo crtl alt r se va correr todo el código
```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

#Verifico si los paquetes que necesito están instalados, en caso de que no lo estén, los voy a descargar y luego los voy a cargar.

packages <- c("tidytext","stringi","tidyverse","rtweet","lubridate","scales","syuzhet")

installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages])
}

invisible(lapply(packages, library, character.only = TRUE))

```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}
#Aquí se descargan los datos. Cuando hagan la primera consulta se les va a abrir una ventana de dialogo con Twitter. Hay que tener cuidado con los Firewalls


datos <- get_timeline(usuario,n=3200)

```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}
#Exportando base de datos

a<-paste0("datos_",usuario,".csv")

write_as_csv(datos, a)
```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

#Limpiando El diccionario

dictionario<-get_sentiment_dictionary('nrc', language = "spanish")

x<-dictionario %>% 
  filter(!sentiment=="anger" | !word %in% c ("epidemia")) %>% 
    filter(!sentiment=="anticipation" | !word %in% c ("epidemia")) %>% 
  filter(!word %in% c("gobierno", "compromiso", "informe", "maría","grupo", "jornada", "tarde", "alto", "fondo","marías","madre","vivienda","presidente", "general", "gobernador", "guerrero", "justicia","hacer","director","días","así","guardia","maría", "hidalgo", "gabinete", "candidato", "electo", "oficial", "niñas", "voto","palabras","aeropuerto","consejo","defensa","consejo","empresa","voto","secretaría","extranjero","congreso","hospital","fondo","arte","madre","premio", "grande", "periódico", "desfile", "general", "director","centro","guardia","puerto","defensa",
 "gabinete", "marina", "encuesta","presupuesto", "electo", "embajador","recursos", "ley", "congreso", "comercio", "serie","firma", "presidenta","sistema", "instituto","banco"))

```


```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

#Calculando los sentimientos por tweets

categorias<-x %>% count(sentiment) %>% select(sentiment) %>% as_vector()

data =data.frame()

for(i in 1:length(categorias)) { 
  
  tryCatch({

method <- "custom"
custom_lexicon <- x %>% filter(sentiment==categorias[i])
my_custom_values <- get_sentiment(datos$text, method = method, lexicon = custom_lexicon) %>% as.data.frame()

my_custom_values$emotion<-categorias[i]

data = rbind(data, data.frame (my_custom_values, stringsAsFactors = FALSE))

 }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})

}

y<-data %>%as_tibble() %>% rename(value=1,emotions=2) %>% mutate(row = row_number()) %>% 
  pivot_wider(names_from = emotions ,values_from =value) %>% select(-row)

y<-as.data.frame(na.omit(apply(y,2,function (x) x[order(is.na(x))])))

sentimientos_usuario<-y %>% as_tibble() %>% mutate(polaridad=positive - negative) %>%
 mutate(
    sentimiento = case_when(
      polaridad > 0 ~ "positivo",
       polaridad < 0 ~ "negativo",
      polaridad == 0 ~ "neutral"))

```


```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

#Graficando

titulo<-paste("Análisis de sentimientos de",usuario,"en Twitter")

sentimientos_usuario %>% count(sentimiento) %>%
  mutate(prop = n / sum(n) *100) %>% ggplot(aes(y=prop, x=sentimiento,fill=sentimiento)) +
      geom_col() +
      scale_fill_brewer(palette="RdYlBu") +
      ggtitle(titulo)  + ylab("Porcentaje de Tweets") + xlab("Tipos de polaridad")+theme_light() +
      theme(legend.position="none")     

```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}
#Exportando base de sentimientos 

nombre2<-paste0("sentimientos_",usuario,".csv")

sentimientos_usuario<-sentimientos_usuario %>% mutate(id=1:nrow(sentimientos_usuario))

datos_full<-datos %>% mutate(id=1:nrow(datos)) %>% full_join(sentimientos_usuario)

write_as_csv(datos_full, nombre2)

```

```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

#mirar % de sentimientos por mes

titulo<-paste("Análisis de sentimientos de",usuario,"en Twitter a lo largo del tiempo")

datos_full %>%
  mutate(created_at=ymd_hms(created_at)) %>% 
  group_by(month=floor_date(created_at,"month")) %>% 
  count(sentimiento,month)  %>% 
  mutate(prop = n / sum(n) *100) %>%  
  ggplot(aes(y=prop, x=month,colour=sentimiento)) +
        geom_line()+
  ggtitle(titulo)  + ylab("Porcentaje de Tweets Negativos") + xlab("Mes")+
  theme_light()

```


```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

titulo<-paste("Análisis de emociones de",usuario,"en Twitter")

datos_full %>% mutate(across(c(anger:trust), ~ifelse(.x>=1,1,0))) %>%  
  summarize(across(c(anger:trust), ~sum(.x))) %>% 
  mutate(across(c(anger:trust), ~(.x)/nrow(datos_full)*100)) %>% 
  pivot_longer(cols = everything(),names_to = "emotion", values_to = "values") %>% 
  filter(!emotion %in% c("positive","negative")) %>% 
   ggplot(aes(y=values, x=emotion,fill=emotion)) +
geom_col(position = position_dodge2(width = 0.9, preserve = "single"))+      scale_fill_brewer(palette="RdYlBu") +
      ggtitle(titulo)  + ylab("Porcentaje de Tweets") + xlab("Emociones")+theme_light() +
      theme(legend.position="bottom") +
   theme(legend.title=element_blank())
```

```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

titulo<-paste("Análisis de emociones de",usuario,"a lo largo del tiempo")

datos_full %>%  
  mutate(across(c(anger:trust), ~ifelse(.x>=1,1,0))) %>% mutate(created_at=ymd_hms(created_at)) %>% 
  group_by(month=floor_date(created_at,"month")) %>%
  summarize(across(c(anger:trust), ~sum(.x)),n=n()) %>% 
  mutate(across(c(anger:trust), ~(.x)/n*100)) %>% 
  pivot_longer(anger:trust,names_to = "emotion", values_to = "values") %>% 
  filter(!emotion %in% c("positive","negative")) %>%  
  ggplot(aes(y=values, x=month,color=emotion)) +
        geom_line()+
  ggtitle(titulo)  + ylab("Porcentaje de Tweets") + xlab("Mes")+
  theme_light()

```

```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

datos_full%>% 
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% 
    mutate(PAN = ifelse(str_detect(text, "PAN|[Pp]artido [Aa]cci[óo]n [Nn]acional"), 1,0),
           PRI = ifelse(str_detect(text, "PRI,| PRI |PRI\\.|[Pp]artido [Rr]evolucionario [Ii]nstitucional"), 1,0),
           PRD = ifelse(str_detect(text, "PRD|
[Pp]artido [dD]e [Ll]a [Rr]evoluci[óO]n [Dd]emocr[Aá]tica"),1,0),
           Morena = ifelse(str_detect(text, "[Mm]orena"), 1,0),
           PT = ifelse(str_detect(text, "PT|[Pp]artido [dD]el [Tt]rabajo"), 1,0),
           MC = ifelse(str_detect(text, "MC|[Mm]ovimiento [Cc]iudadano"), 1,0),
           PV = ifelse(str_detect(text, "[Pp]artido [Vv]erde"), 1,0))%>%
   summarize(across(c(PAN:PV), ~sum(.x))) %>% 
  pivot_longer(cols = everything(),names_to = "emotion", values_to = "values") %>% 
   ggplot(aes(y=values, x=emotion,fill=emotion)) +
geom_col(position = position_dodge2(width = 0.9, preserve = "single"))+      scale_fill_brewer(palette="RdYlBu") +
      ggtitle("Número de Menciones por Partido Político")  + ylab("Número de Tweets") + xlab("Partidos Políticos")+theme_light() +
      theme(legend.position="none") +
   theme(legend.title=element_blank())


```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

#Exportar base con partidos

base2<-datos_full%>% 
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% 
    mutate(PAN = ifelse(str_detect(text, "PAN|[Pp]artido [Aa]cci[óo]n [Nn]acional"), 1,0),
           PRI = ifelse(str_detect(text, "PRI,| PRI |PRI\\.|[Pp]artido [Rr]evolucionario [Ii]nstitucional"), 1,0),
           PRD = ifelse(str_detect(text, "PRD|
[Pp]artido [dD]e [Ll]a [Rr]evoluci[óO]n [Dd]emocr[Aá]tica"),1,0),
           Morena = ifelse(str_detect(text, "[Mm]orena"), 1,0),
           PT = ifelse(str_detect(text, "PT|[Pp]artido [dD]el [Tt]rabajo"), 1,0),
           MC = ifelse(str_detect(text, "MC|[Mm]ovimiento [Cc]iudadano"), 1,0),
           PV = ifelse(str_detect(text, "[Pp]artido [Vv]erde"), 1,0))

a<-paste0("datos_partidos",usuario,".csv")

write_as_csv(base2, a)


```


```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

#Emociones por partido

partidos<-datos_full%>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% 
    mutate(PAN = ifelse(str_detect(text, "PAN|[Pp]artido [Aa]cci[óo]n [Nn]acional"), 1,0),
           PRI = ifelse(str_detect(text, "PRI,| PRI |PRI\\.|[Pp]artido [Rr]evolucionario [Ii]nstitucional"), 1,0),
           PRD = ifelse(str_detect(text, "PRD|
[Pp]artido [dD]e [Ll]a [Rr]evoluci[óO]n [Dd]emocr[Aá]tica"),1,0),
           Morena = ifelse(str_detect(text, "[Mm]orena"), 1,0),
           PT = ifelse(str_detect(text, "PT|[Pp]artido [dD]el [Tt]rabajo"), 1,0),
           MC = ifelse(str_detect(text, "MC|[Mm]ovimiento [Cc]iudadano"), 1,0),
           PV = ifelse(str_detect(text, "[Pp]artido [Vv]erde"), 1,0))%>% 
           filter(if_any(c(PAN:PV), ~(.x)==1)) 


nombres<-partidos %>% select(PAN:PV) %>% pivot_longer(cols = everything(),names_to = "pp", values_to = "values") %>% count(pp) %>% select(pp) %>% as_vector

data2 =data.frame()

for(i in 1:length(nombres)) { 
  
  tryCatch({

info<-partidos %>%
  pivot_longer(PAN:PV,names_to = "pp", values_to = "values") %>% 
  filter(pp==nombres[i]&values==1) %>% count(sentimiento) %>%
      mutate(prop = n / sum(n) *100,partido=nombres[i])

data2 = rbind(data2, data.frame (info, stringsAsFactors = FALSE))


 }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})

}

```


```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

data2 %>% 
ggplot(aes(partido, prop, fill = sentimiento)) +
geom_col(position = position_dodge2(width = 0.9, preserve = "single"))+      scale_fill_brewer(palette="RdYlBu") +
      ggtitle("Análisis de sentimientos por Partido Político")  + ylab("Porcentaje de Tweets") + xlab("Partidos Políticos")+theme_light() +
      theme(legend.position="bottom") +
   theme(legend.title=element_blank())
```

```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

datos_full%>% 
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% 
    mutate(AMLO = ifelse(str_detect(text, "AMLO"), 1,0),
           Bienestar = ifelse(str_detect(text, "[Bb]ienestar"), 1,0),
           CUATRO_T = ifelse(str_detect(text, "#4T"),1,0),
           Morena = ifelse(str_detect(text, "[Mm]orena"), 1,0),
           Felipe_Calderon = ifelse(str_detect(text, "[Ff]elipe [Cc]alder[oó]n"), 1,0),
           Samuel_Garcia = ifelse(str_detect(text, "[Ss]amuel [Gg]arc[ií]a"), 1,0),
           Tren_Maya = ifelse(str_detect(text, "[Tt]ren [Mm]aya"), 1,0),
           Dos_Bocas = ifelse(str_detect(text, "[Rr]efiner[íi]a [Dd]os [Bb]ocas|[Dd]os [Bb]ocas"), 1,0),
            Ayotzinapa = ifelse(str_detect(text, "[Aa]yotzinapa"), 1,0)
           )%>%
   summarize(across(c(AMLO:Ayotzinapa), ~sum(.x))) %>% 
  pivot_longer(cols = everything(),names_to = "emotion", values_to = "values") %>% 
   ggplot(aes(y=values, x=fct_reorder(emotion,values),fill=emotion)) +
geom_col(position = position_dodge2(width = 0.9, preserve = "single"))+      scale_fill_brewer(palette="RdYlBu") +
      ggtitle("Número de menciones por palabra clave")  + ylab("Número de Tweets") + xlab("")+theme_light() +
      theme(legend.position="none") +
   theme(legend.title=element_blank())+coord_flip()
```


```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

#Exportar palabras claves

palabras_claves<-datos_full%>% 
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% 
    mutate(AMLO = ifelse(str_detect(text, "AMLO"), 1,0),
           Bienestar = ifelse(str_detect(text, "[Bb]ienestar"), 1,0),
           CUATRO_T = ifelse(str_detect(text, "#4T"),1,0),
           Morena = ifelse(str_detect(text, "[Mm]orena"), 1,0),
           Felipe_Calderon = ifelse(str_detect(text, "[Ff]elipe [Cc]alder[oó]n"), 1,0),
           Samuel_Garcia = ifelse(str_detect(text, "[Ss]amuel [Gg]arc[ií]a"), 1,0),
           Tren_Maya = ifelse(str_detect(text, "[Tt]ren [Mm]aya"), 1,0),
           Dos_Bocas = ifelse(str_detect(text, "[Rr]efiner[íi]a [Dd]os [Bb]ocas|[Dd]os [Bb]ocas"), 1,0),
            Ayotzinapa = ifelse(str_detect(text, "[Aa]yotzinapa"), 1,0)
           )

a<-paste0("datos_palabras_claves",usuario,".csv")

write_as_csv(palabras_claves, a)


```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

key_words<-datos_full%>% 
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>% 
    mutate(AMLO = ifelse(str_detect(text, "AMLO"), 1,0),
           Bienestar = ifelse(str_detect(text, "[Bb]ienestar"), 1,0),
           CUATRO_T = ifelse(str_detect(text, "#4T"),1,0),
           Morena = ifelse(str_detect(text, "[Mm]orena"), 1,0),
           Felipe_Calderon = ifelse(str_detect(text, "[Ff]elipe [Cc]alder[oó]n"), 1,0),
           Samuel_Garcia = ifelse(str_detect(text, "[Ss]amuel [Gg]arc[ií]a"), 1,0),
           Tren_Maya = ifelse(str_detect(text, "[Tt]ren [Mm]aya"), 1,0),
           Dos_Bocas = ifelse(str_detect(text, "[Rr]efiner[íi]a [Dd]os [Bb]ocas|[Dd]os [Bb]ocas"), 1,0),
            Ayotzinapa = ifelse(str_detect(text, "[Aa]yotzinapa"), 1,0))%>% 
           filter(if_any(c(AMLO:Ayotzinapa), ~(.x)==1)) 
           


terminos<-key_words %>% select(AMLO:Ayotzinapa) %>% pivot_longer(cols = everything(),names_to = "pp", values_to = "values") %>% count(pp) %>% select(pp) %>% as_vector

```



```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}
data3 =data.frame()

for(i in 1:length(terminos)) { 
  
  tryCatch({

info<-key_words %>%
  pivot_longer(AMLO:Ayotzinapa,names_to = "pp", values_to = "values") %>% 
  filter(pp==terminos[i]&values==1) %>% count(sentimiento) %>%
      mutate(prop = n / sum(n) *100,termino=terminos[i])

data3 = rbind(data3, data.frame (info, stringsAsFactors = FALSE))


 }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})

}
```


```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}
data3 %>% 
ggplot(aes(termino, prop, fill = sentimiento)) +
geom_col(position = position_dodge2(width = 0.9, preserve = "single"))+      scale_fill_brewer(palette="RdYlBu") +
      ggtitle("Análisis de sentimientos por términos de interés")  + ylab("Porcentaje de Tweets") + xlab("Términos de Interés")+theme_light() +
      theme(legend.position="bottom") +
   theme(legend.title=element_blank())
```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

data5 =data.frame()

for(i in 1:length(terminos)) { 
  
  tryCatch({
    
n<-key_words %>%
  pivot_longer(AMLO:Ayotzinapa,names_to = "pp", values_to = "values")%>% 
  filter(pp==terminos[i]&values==1)    
    
info<-palabras_claves %>%
  pivot_longer(AMLO:Ayotzinapa,names_to = "pp", values_to = "values")%>% 
  filter(pp==terminos[i]&values==1) %>%
  mutate(across(c(anger:trust), ~ifelse(.x>=1,1,0))) %>% 
  select(-positive,-negative) %>% 
  summarize(across(c(anger:trust), ~sum(.x))) %>% 
  pivot_longer(anger:trust,names_to = "emotion", values_to = "values") %>% 
  mutate(prop = values / nrow(n) *100,partido=terminos[i])

data5 = rbind(data5, data.frame (info, stringsAsFactors = FALSE))


 }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})

}
```

```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

data5 %>% filter(is.na(partido)==FALSE) %>% mutate(prop=ifelse(is.na(prop), 0,prop)) %>% 
  filter(prop!=0) %>% 
ggplot(aes(partido, prop, fill = emotion)) +
  geom_col(position = position_dodge2(width = 0.9, preserve = "single"))+      scale_fill_brewer(palette="RdYlBu") +
      ggtitle("Análisis de emociones por términos de interés")  + ylab("Porcentaje de Tweets") + xlab("Términos de Interés")+theme_light() +
      theme(legend.position="bottom") +
   theme(legend.title=element_blank())
```




```{r,include = TRUE,echo=FALSE,message=FALSE,warning=FALSE}

custom_stop_words <- bind_rows(stop_words,
                               data_frame(word = tm::stopwords("spanish"),
                                          lexicon = "custom")) %>% 
  as_tibble()  %>% 
  filter(lexicon=="custom")

tweet_words <- datos_full %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% custom_stop_words$word,
         str_detect(word, "[a-z]"))

tweet_words %>% count(word,sort=TRUE)%>% 
mutate(letras=nchar(word)) %>% filter(letras>1) %>% 
  filter(!word %in% c("t.co","http","est","as","todas","tambi") ) %>% 
  slice_head(n=20) %>%
  ggplot(aes(n,fct_reorder(word,n)))+geom_col()+
  labs(x = "frecuencia", y = "palabras", fill = "")+
  ggtitle("Las 20 palabras con la mayor frecuencia")+
  theme_light()


```

```{r,include = FALSE,echo=FALSE,message=FALSE,warning=FALSE}

frecuencia_palabras<-datos_full %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(word, text) %>%
  filter(!word %in% custom_stop_words$word,
         str_detect(word, "[a-z]")) %>% count(word,sort=TRUE)

a<-paste0("frecuencia_palabras",usuario,".csv")

write_as_csv(frecuencia_palabras, a)
```




















