Análisis de Datos

Librerías

# Load the 'rjson' library for handling JSON data in R
library(rjson)

# Load the 'httr' library for making HTTP requests and handling web data
library(httr)

# Load the 'jsonlite' library for working with JSON data in a more user-friendly way
library(jsonlite)

## 
## Attaching package: 'jsonlite'

## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON

# Load the 'rvest' library for web scraping and extracting information from HTML
library(rvest)

# Load the 'tm' library for text mining tasks, such as document-term matrix creation
library(tm)

## Loading required package: NLP

## 
## Attaching package: 'NLP'

## The following object is masked from 'package:httr':
## 
##     content

# Load the 'udpipe' library for tokenization and part-of-speech tagging of text
library(udpipe)

# Load the 'slam' library for sparse matrix operations, useful in text analysis
library(slam)

# Load the 'wordcloud' library for creating word clouds
library(wordcloud)

## Loading required package: RColorBrewer

# Load the 'RColorBrewer' library for color palettes in data visualization
library(RColorBrewer)

# Load the 'topicmodels' library for topic modeling analysis
library(topicmodels)

# Load the 'dplyr' library for data manipulation tasks
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Load the 'ggplot2' library for creating data visualizations with the Grammar of Graphics
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:NLP':
## 
##     annotate

# Load the 'sentimentr' library for sentiment analysis
library(sentimentr)

# Load the 'tidytext' library for tidy text analysis using the principles of tidy data
library(tidytext)

# Load the 'textdata' library for working with textual data
library(textdata)

## 
## Attaching package: 'textdata'

## The following object is masked from 'package:httr':
## 
##     cache_info

# Load the 'syuzhet' library for extracting sentiment intensity from text
library(syuzhet)

## 
## Attaching package: 'syuzhet'

## The following object is masked from 'package:sentimentr':
## 
##     get_sentences

# Load the 'lubridate' library for working with dates and times
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

# Load the 'stopwords' library for managing stop words in text analysis
library(stopwords)

## 
## Attaching package: 'stopwords'

## The following object is masked from 'package:tm':
## 
##     stopwords

# Load the 'tidyr' library for data tidying tasks
library(tidyr)

Cargar datos por fuente

# Cargar archivo de json Youtube
# Read the content of the JSON file for YouTube data
json_data <- readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia Final/Youtube.json")

## Warning in readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5
## SEMESTRE/CDTD/Evidencia Final/Youtube.json"): incomplete final line found on
## '/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia
## Final/Youtube.json'

# Convert the JSON content into a list using jsonlite
json_list <- jsonlite::fromJSON(json_data)
# Convert the list to a data frame for easier manipulation
youtubeDf <- as.data.frame(json_list)


# Cargar archivo Json Articulos
# Read the content of the JSON file for news articles
json_data <- readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia Final/Noticias.json")

## Warning in readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5
## SEMESTRE/CDTD/Evidencia Final/Noticias.json"): incomplete final line found on
## '/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia
## Final/Noticias.json'

# Convert the JSON content into a list using jsonlite
json_list <- jsonlite::fromJSON(json_data)
# Convert the list to a data frame for easier manipulation
noticiasDf <- as.data.frame(json_list)


# Cargar archivo Json Gobierno
# Read the content of the JSON file for government data
json_data <- readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia Final/Gobierno.json")

## Warning in readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5
## SEMESTRE/CDTD/Evidencia Final/Gobierno.json"): incomplete final line found on
## '/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia
## Final/Gobierno.json'

# Convert the JSON content into a list using jsonlite
json_list <- jsonlite::fromJSON(json_data)
# Convert the list to a data frame for easier manipulation
gobiernoDf <- as.data.frame(json_list)

Eliminara los NA

# Quitar filas que tienen "" o son NA
youtubeDf <- subset(youtubeDf, Content != "" & !is.na(Content))
noticiasDf <- subset(noticiasDf, Content != "" & !is.na(Content))

Filtro por año

# Función para separarlo por año

filter_by_year <- function(df, year) {
  # Lo convierte a DateType
  df$PublishedAt <- as.Date(df$PublishedAt, format = "%Y-%m-%d %H:%M:%S")
  
  # Extraemos el año solamente
  df$Year <- format(df$PublishedAt, "%Y")
  
  # Lo filtramos solamente con el año dado
  df_filtered <- df[df$Year == as.character(year), ]
  
  # Regresamos el dataframe filtrado
  return(df_filtered)
}

Cada df de fuente, filtrar por año

# Este código cuenta con parámetros dentro de la función para identificar lo que se está buscando de cada dataframe de información

  # Youtube dataframes
  youtube2023 <- filter_by_year(youtubeDf, 2023)
  youtube2022 <- filter_by_year(youtubeDf, 2022)
  youtube2021 <- filter_by_year(youtubeDf, 2021)
  youtube2020 <- filter_by_year(youtubeDf, 2020)
  youtube2019 <- filter_by_year(youtubeDf, 2019)
  
  # Noticias dataframes
  noticias2023 <- filter_by_year(noticiasDf, 2023)
  noticias2022 <- filter_by_year(noticiasDf, 2022)
  noticias2021 <- filter_by_year(noticiasDf, 2021)
  noticias2020 <- filter_by_year(noticiasDf, 2020)
  noticias2019 <- filter_by_year(noticiasDf, 2019)
  
  # Gobierno dataframes
  gobierno2023 <- filter_by_year(gobiernoDf, 2023)
  gobierno2022 <- filter_by_year(gobiernoDf, 2022)
  gobierno2021 <- filter_by_year(gobiernoDf, 2021)
  gobierno2020 <- filter_by_year(gobiernoDf, 2020)
  gobierno2019 <- filter_by_year(gobiernoDf, 2019)

Poner un límite a la información

  # Define the row limit para que estén relativamente proporcionales entre sí
  ROW_LIMIT <- 1000
  
  youtube2019 <- youtube2019[1:min(nrow(youtube2019), ROW_LIMIT), ]
  gobierno2019 <- gobierno2019[1:min(nrow(gobierno2019), ROW_LIMIT), ]
  noticias2019 <- noticias2019[1:min(nrow(noticias2019), ROW_LIMIT), ]
  
  youtube2020 <- youtube2020[1:min(nrow(youtube2020), ROW_LIMIT), ]
  gobierno2020 <- gobierno2020[1:min(nrow(gobierno2020), ROW_LIMIT), ]
  noticias2020 <- noticias2020[1:min(nrow(noticias2020), ROW_LIMIT), ]
  
  youtube2021 <- youtube2021[1:min(nrow(youtube2021), ROW_LIMIT), ]
  gobierno2021 <- gobierno2021[1:min(nrow(gobierno2021), ROW_LIMIT), ]
  noticias2021 <- noticias2021[1:min(nrow(noticias2021), ROW_LIMIT), ]
  
  youtube2022 <- youtube2022[1:min(nrow(youtube2022), ROW_LIMIT), ]
  gobierno2022 <- gobierno2022[1:min(nrow(gobierno2022), ROW_LIMIT), ]
  noticias2022 <- noticias2022[1:min(nrow(noticias2022), ROW_LIMIT), ]
  
  youtube2023 <- youtube2023[1:min(nrow(youtube2023), ROW_LIMIT), ]
  gobierno2023 <- gobierno2023[1:min(nrow(gobierno2023), ROW_LIMIT), ]
  noticias2023 <- noticias2023[1:min(nrow(noticias2023), ROW_LIMIT), ]

Análisis general por año

# unir los data frames en un mismo data frame por año
merged2023 <- rbind(noticias2023, youtube2023, gobierno2023)
merged2022 <- rbind(noticias2022, youtube2022, gobierno2022)
merged2021 <- rbind(noticias2021, youtube2021, gobierno2021)
merged2020 <- rbind(noticias2020, youtube2020, gobierno2020)
merged2019 <- rbind(noticias2019, youtube2019, gobierno2019)

Eliminar palabras

# Define a vector 'palabras_eliminar' containing common words to be removed
# These words are often considered noise or irrelevant for analysis
# Examples include specific terms, common words, URLs, and symbols
# The goal is to filter out these words from text data for more meaningful analysis
palabras_eliminar <- c("esquizofrenia", "persona", "personas", "méxico", "tel", "com", "org", "video", "saludos", "saludo", "canal", "mas", "mano", "videos", "â", "mâ", "må", "å", "años", "alb")

Función de limpieza de texto

# función de limpieza de texto para que en el parámetro poder meter el texto de los dataframes y tener el texto limpio de español

limpiar_texto <- function(texto, palabras_eliminar = NULL){
    
    # Cambiar texto a minusculas
    texto <- tolower(texto) 
    
    # Elimina las stopwords en español
    texto <- removeWords(texto, stopwords("es"))
    
    # Elimina palabras especificadas por el usuario
    if (!is.null(palabras_eliminar)) {
      texto <- removeWords(texto, palabras_eliminar)
    }
    
    # Convertir caracteres acentuados a su versión sin acentos
    texto <- iconv(texto, "UTF-8", "ASCII//TRANSLIT")
    
    # Eliminar palabras cortas:
    texto <- gsub("\\b\\w{1,3}\\b", "", texto)
    
    # Eliminar caracteres no ASCII
    texto <- iconv(texto, "latin1", "ASCII", sub="")
    
    # Elimina URLs
    texto <- gsub("http\\S+|www\\.\\S+","",texto)
    
    # Elimina puntuacion y simbolos
    texto <- gsub("<.*?>","",texto)
    
    # Reemplazar puntuacion con espacios
    texto <- gsub("[[:punct:]]"," ",texto) 
    
    # Elimina los digitos
    texto <- gsub("\\d+","",texto)
    
    # Reemplazar doble espacio con un solo espacio
    texto <- gsub("\\s+"," ",texto)
    
    # La siguiente cadena elimina el nombre de los participantes de la conferencia antes de que estos hablen, es decir elimina los " presidente : ", que no sirven de mucho para el analisis.
    texto <- gsub("^\\W+:", "", texto)
    
    return(texto)
  }

Aplicar función de limpieza de texto a cada df

# función de limpieza de texto para que en el parámetro poder meter el texto de los dataframes y tener el texto limpio de español de cada conjunto de información

youtube2019$Content <- sapply(youtube2019$Content, limpiar_texto)
gobierno2019$Content <- sapply(gobierno2019$Content, limpiar_texto)
noticias2019$Content <- sapply(noticias2019$Content, limpiar_texto)
merged2019$Content <- sapply(merged2019$Content, limpiar_texto)

youtube2020$Content <- sapply(youtube2020$Content, limpiar_texto)
gobierno2020$Content <- sapply(gobierno2020$Content, limpiar_texto)
noticias2020$Content <- sapply(noticias2020$Content, limpiar_texto)
merged2020$Content <- sapply(merged2020$Content, limpiar_texto)

youtube2021$Content <- sapply(youtube2021$Content, limpiar_texto)
gobierno2021$Content <- sapply(gobierno2021$Content, limpiar_texto)
noticias2021$Content <- sapply(noticias2021$Content, limpiar_texto)
merged2021$Content <- sapply(merged2021$Content, limpiar_texto)

youtube2022$Content <- sapply(youtube2022$Content, limpiar_texto)
gobierno2022$Content <- sapply(gobierno2022$Content, limpiar_texto)
noticias2022$Content <- sapply(noticias2022$Content, limpiar_texto)
merged2022$Content <- sapply(merged2022$Content, limpiar_texto)

youtube2023$Content <- sapply(youtube2023$Content, limpiar_texto)
gobierno2023$Content <- sapply(gobierno2023$Content, limpiar_texto)
noticias2023$Content <- sapply(noticias2023$Content, limpiar_texto)
merged2023$Content <- sapply(merged2023$Content, limpiar_texto)

youtubeDf$Content <- sapply(youtubeDf$Content, limpiar_texto, palabras_eliminar)
gobiernoDf$Content <- sapply(gobiernoDf$Content, limpiar_texto, palabras_eliminar)
noticiasDf$Content <- sapply(noticiasDf$Content, limpiar_texto, palabras_eliminar)

Función crear corpus

# Function to create a corpus para el proceso de frecuencia de palabras

crear_corpus <- function(df, palabras_eliminar) {
  # Create a corpus from the Content column
  corpus <- Corpus(VectorSource(df$Content))
  
  # Convert to lowercase
  corpus <- tm_map(corpus, content_transformer(tolower))
  
  # Remove punctuation
  corpus <- tm_map(corpus, removePunctuation)
  
  # Remove specific words
  corpus <- tm_map(corpus, removeWords, palabras_eliminar)
  
  # Remove empty documents
  corpus <- tm_filter(corpus, function(x) length(unlist(strsplit(as.character(x), " "))) > 0)
    
  return(corpus)
}

Corpus

# Gobierno
gob23.corpus <- crear_corpus(gobierno2023, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

gob22.corpus <- crear_corpus(gobierno2022, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

gob21.corpus <- crear_corpus(gobierno2021, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

gob20.corpus <- crear_corpus(gobierno2020, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

gob19.corpus <- crear_corpus(gobierno2019, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

# Youtube
yt23.corpus <- crear_corpus(youtube2023, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

yt22.corpus <- crear_corpus(youtube2022, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

yt21.corpus <- crear_corpus(youtube2021, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

yt20.corpus <- crear_corpus(youtube2020, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

yt19.corpus <- crear_corpus(youtube2019, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

# Noticias
news23.corpus <- crear_corpus(noticias2023, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

news22.corpus <- crear_corpus(noticias2022, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

news21.corpus <- crear_corpus(noticias2021, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

news20.corpus <- crear_corpus(noticias2020, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

news19.corpus <- crear_corpus(noticias2019, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

# Merged Corpus
merged23.corpus <- crear_corpus(merged2023, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

merged22.corpus <- crear_corpus(merged2022, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

merged21.corpus <- crear_corpus(merged2021, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

merged20.corpus <- crear_corpus(merged2020, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

merged19.corpus <- crear_corpus(merged2019, palabras_eliminar)

## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents

## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents

## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

Función nube de palabras

# Function to create a word cloud from a given corpus
crear_wordcloud <- function(corpus) {
  # Create the Document-Term Matrix
  dtm <- DocumentTermMatrix(corpus)
  
  # Calculate term frequency
  term_frequency <- colSums(as.matrix(dtm))
  term_frequency_sorted <- sort(term_frequency, decreasing = TRUE)
  
  # Identify and list the 25 most frequent terms in the dataset
  top_25_terms <- subset(term_frequency_sorted, term_frequency_sorted >= 50)
  top_25_terms <- head(top_25_terms, 25)
  
  print(top_25_terms)
  
  # Select the 150 most frequent words
  top_150_terms <- head(term_frequency_sorted, 150)
  
  # Define the color palette
  color_palette <- "Dark2"
  
  # Create the word cloud
  wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
          max.words=150, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, color_palette))
}

# display.brewer.all()

Creación de nube de palabras

# Generate word cloud for each year
crear_wordcloud(merged23.corpus)

##    gracias enfermedad       dios      puede       vida      salud     mental 
##        216        199        192        150        146        128        126 
##       solo  trastorno    intomas    consumo       bien     riesgo    estudio 
##        124        119        115         98         98         94         92 
##     pueden    cerebro     muchas      tener   cannabis      mayor      mundo 
##         91         87         84         81         80         75         75 
##       hijo      cosas      diagn    siempre 
##         75         74         73         72

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : importante could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : madre could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ostico could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : casa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : atenci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayudar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensamientos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : depresi could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paranoide could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ansiedad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicina could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : genes could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luego could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : esquizofr could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : puedo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : felicito could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hombres could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mujeres could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicamentos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : igual could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguien could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : misma could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : universidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entender could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : padre could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : afecta could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alucinaciones could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bendiciones could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : informaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adem could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : quiere could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psicosis could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : sustancias could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : jesucristo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : manera could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espec could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mejorar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatra could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bendiga could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : testimonio could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : resultados could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : factores could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hospital could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : papel could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : explica could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ificos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : saber could not be fit on page. It will not be plotted.

crear_wordcloud(merged22.corpus)

##  enfermedad      mental       salud        vida       puede        solo 
##         390         352         264         246         196         153 
##    asociaci tratamiento     intomas        hace   trastorno  familiares 
##         140         130         125         121         117         102 
##       seres       cosas       hacer     energia    cannabis      psiqui 
##         102          97          97          93          89          88 
##        cada    mentales      pueden        bien       poder      atenci 
##          87          86          86          85          84          82 
##        dios 
##          82

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : vibraciones could not be fit on page. It will not be plotted.

crear_wordcloud(merged21.corpus)

##  enfermedad       puede      mental       cosas       casos       salud 
##         212         163         155         127         125         118 
##        vida     depresi        solo      pueden        dios       voces 
##         118         116         106          99          98          95 
##        cada        hace       gente       mejor     gracias   trastorno 
##          90          89          87          86          85          79 
## tratamiento        bien       veces       mayor       mismo       decir 
##          76          74          74          73          73          72 
##    suicidio 
##          72

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasado could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : verdad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : conocimiento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : manera could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poder could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : homicidios could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : presentado could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : embargo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entonces could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicina could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : registro could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicamentos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ministerio could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : universidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : padecimiento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : horas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espiritual could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : incluso could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : miedo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguna could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : registran could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diagn could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problema could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tambi could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : igual could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nadie could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : real could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : centros could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : instituto could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : primer could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nunca could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paciente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : polic could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paso could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : importante could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ejemplo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luego could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : amiga could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : recomiendo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : social could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayudar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : factores could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : parece could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiares could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : recibe could not be fit on page. It will not be plotted.

crear_wordcloud(merged20.corpus)

##   enfermedad discapacidad      gracias        puede        mente        cosas 
##          154          154          140          129          126          120 
##         solo         tipo         vida         hace     programa       muchas 
##          118          117          116          111          110           97 
##        tener         bien       mental    trastorno        hacer        gente 
##           96           90           88           84           82           82 
##         dios        bueno      familia      intomas       manera     entonces 
##           78           77           75           73           72           72 
##        vamos 
##           72

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familia could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : manera could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entonces could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siempre could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : voces could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bienestar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : importante could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pacientes could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mentales could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : informaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tiempo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : chica could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tratamiento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguien could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ejemplo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mismo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cuenta could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mundo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pueden could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mejor could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cerebro could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : todas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : verdad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alucinaciones could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : exico could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poder could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adem could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : miedo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adelante could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : identidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ostico could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hecho could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : programas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : aunque could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : compa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luisa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguna could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diagn could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diferentes could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : veces could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : puedo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatra could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : experiencia could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : vivir could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : demonios could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : atenci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensamientos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicamentos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buenas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hijo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayudar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buena could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : apoyo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : general could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pareja could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : social could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : esquizofr could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : quiero could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trastornos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : igual could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buen could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : podr could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luego could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paciente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : podemos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : interesante could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : lard could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayuda could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : secretar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paranoide could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiares could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tipos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : toda could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : conocer could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diferente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entender could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : condiciones could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pregunta could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : gustar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : acompa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatras could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adultos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : favor could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : malas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problema could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : estudios could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : llegar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problemas could not be fit on page. It will not be plotted.

crear_wordcloud(merged19.corpus)

##       salud      mental        vida       puede  enfermedad   trastorno 
##         121         120         118         116         102          98 
##        solo    mentales tratamiento       gente       cosas        creo 
##          86          75          73          62          61          60 
##       mayor  trastornos        caso     gracias        bien    ansiedad 
##          58          55          55          55          55          54 
##       hacer       tener       parte       mejor       mismo   pacientes 
##          54          54          53          52          52          52 
##     depresi 
##          52

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : esquizo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : estudio could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : informaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : intomas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hospital could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tiempo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cada could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ahora could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : madre could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mundo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ostico could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : aunque could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diagn could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pues could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguien could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poblaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cualquier could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : forma could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ologo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : casos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : voces could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cookies could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : miedo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : contenido could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : personalidad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : todas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : grupo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatra could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bueno could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : doctor could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : primer could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatr could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buen could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : acuerdo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiares could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : general could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hecho could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hermano could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : atrico could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nadie could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poder could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : puedes could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasa could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : verdad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alucinaciones could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : debe could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mundial could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siento could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : punto could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mayores could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguna could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : carlos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : misma could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : incluso could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : sentido could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : sociedad could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : natural could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entonces could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : centro could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mucha could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problema could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nombre could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buena could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nuevo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realmente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mente could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : embargo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nunca could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : menos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayuda could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ideas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problemas could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : gusta could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : derechos could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : apoyo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bipolar could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : riesgo could not be fit on page. It will not be plotted.

## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : universidad could not be fit on page. It will not be plotted.

# La interpretación de cada una será vista en el reporte de investigación

Función Barplot

# Function to create a bar plot from a given corpus
crear_barplot <- function(corpus, main_title = "Términos más frecuentes", color_palette = "Dark2", threshold = 50, top_terms = 25) {
  # Create the Document-Term Matrix
  dtm <- DocumentTermMatrix(corpus)
  
  # Calculate term frequency
  term_frequency <- colSums(as.matrix(dtm))
  term_frequency_sorted <- sort(term_frequency, decreasing = TRUE)
  
  # Identify and list the top terms based on the specified threshold
  top_terms_vector <- subset(term_frequency_sorted, term_frequency_sorted >= threshold)
  top_terms_vector <- head(top_terms_vector, top_terms)
  
  # Create a bar plot
  barplot(top_terms_vector, main = main_title, col = brewer.pal(8, color_palette),
          las = 2, cex.names = 0.8)
}

Barplot por año

# Create bar plots for the years 2023 to 2019
crear_barplot(merged23.corpus, main_title = "Términos más frecuentes en el 2023", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged22.corpus, main_title = "Términos más frecuentes en el 2022", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged21.corpus, main_title = "Términos más frecuentes en el 2021", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged20.corpus, main_title = "Términos más frecuentes en el 2020", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged19.corpus, main_title = "Términos más frecuentes en el 2019", color_palette = "Dark2", threshold = 50, top_terms = 25)

# La interpretación de cada una será vista en el reporte de investigación

Función análisis de sentimientos

# Function for sentiment analysis

# Convert the 'PublishedAt' column to a datetime format using lubridate
sentiment_analysis <- function(df, source) {
  df$PublishedAt = lubridate::ymd_hms(df$PublishedAt)

# Tokenization: Split the 'Content' column into individual words
tidy_df <- df %>%
  unnest_tokens("word", Content)

# Stopword removal: Exclude common Spanish stopwords from the tokenized data
spanish_stop_words <- tm::stopwords("spanish")
spanish_stop_words <- data.frame(word = spanish_stop_words, stringsAsFactors = FALSE)
tidy_df <- tidy_df %>%
  anti_join(spanish_stop_words, by = "word")

# Sentiment analysis: Assign sentiment scores using the Bing lexicon
bing_word_sentiments <- get_sentiments("bing")
df_sentiment <- tidy_df %>%
  inner_join(bing_word_sentiments) %>%
  count(PublishedAt, sentiment) %>%
  spread(sentiment, n, fill = 0)

# Plot sentiment over time

# Create a line plot showing positive and negative sentiment over time
ggplot(df_sentiment, aes(x = PublishedAt)) +
  geom_line(aes(y = positive), color = "blue") +
  geom_line(aes(y = -negative), color = "red") +
  labs(x = "PublishedAt", y = "Sentiment",
       title = source,
       color = "Sentiment") +
  scale_color_manual(values = c("Positive" = "blue", "Negative" = "red")) +
  theme_minimal()
}

Análisis de sentimientos

# Use the function on your DataFrame
sentiment_analysis(gobiernoDf, "Gobierno")

## Joining with `by = join_by(word)`

sentiment_analysis(youtubeDf, "Youtube")

## Joining with `by = join_by(word)`

sentiment_analysis(noticiasDf, "Noticias")

## Joining with `by = join_by(word)`

# La interpretación de cada una será vista en el reporte de investigación

Función para análisis de sentimientos 2

# Function for sentiment analysis and word frequency
sentiment_analysis_2 <- function(df) {
# Convert the 'PublishedAt' column to a datetime format using lubridate
df$PublishedAt = lubridate::ymd_hms(df$PublishedAt)

# Tokenization: Split the 'Content' column into individual words
tidy_df <- df %>%
  unnest_tokens("word", Content)

# Stopword removal: Exclude common Spanish stopwords from the tokenized data
spanish_stop_words <- tm::stopwords("spanish")
spanish_stop_words <- data.frame(word = spanish_stop_words, stringsAsFactors = FALSE)
tidy_df <- tidy_df %>%
  anti_join(spanish_stop_words, by = "word")

# Sentiment analysis: Assign sentiment scores using the NRC lexicon
nrc_word_sentiments <- get_sentiments("nrc")
tidy_sentiment <- tidy_df %>%
  inner_join(nrc_word_sentiments)

# Get the total counts of each sentiment
total_sentiments <- tidy_sentiment %>%
  group_by(sentiment) %>%
  summarise(count = n())

# Get the most common words for each sentiment
common_words <- tidy_sentiment %>%
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10, n)

# Return a list containing total sentiment counts and most common words for each sentiment
return(list(total_sentiments = total_sentiments, common_words = common_words))
}

Función Recuentos totales de sentimientos

recuento_sentimientos <- function(df, titulo) {
  # Obtener los resultados del análisis de sentimientos
  resultados <- sentiment_analysis_2(df)
  
  # Definir los colores para cada emoción
  colores_emociones <- c("joy" = "#CCCC00",  # Un tono más oscuro de amarillo
                       "sadness" = "#0000CC",  # Un tono más oscuro de azul
                       "anger" = "#CC0000",  # Un tono más oscuro de rojo
                       "fear" = "#000000",  # Negro es el color más oscuro posible
                       "surprise" = "#CC7A00",  # Un tono más oscuro de naranja
                       "trust" = "#009900",  # Un tono más oscuro de verde
                       "disgust" = "#660099",  # Un tono más oscuro de morado
                       "anticipation" = "#FF1493",  # Un tono más oscuro de rosa
                       "positive" = "#1E90FF",  # Un tono más oscuro de azul claro
                       "negative" = "#8B0000")  # Un tono más oscuro de rojo oscuro

  
  # Crear el gráfico de barras
  ggplot(resultados$total_sentiments, aes(x = sentiment, y = count, fill = sentiment)) +
    geom_bar(stat = "identity") +
    scale_fill_manual(values = colores_emociones) +
    theme_minimal() +
    labs(x = "Sentimiento", y = "Recuento", title = titulo)
}

Recuentos totales de sentimientos

# Youtube
recuento_sentimientos(youtubeDf, titulo = "Análisis de sentimientos de YouTube")

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 27 of `x` matches multiple rows in `y`.
## ℹ Row 4859 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Gobierno
recuento_sentimientos(gobiernoDf, titulo = "Análisis de sentimientos de Gobierno")

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 13 of `x` matches multiple rows in `y`.
## ℹ Row 6879 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Noticias
recuento_sentimientos(noticiasDf, titulo = "Análisis de sentimientos de Noticias")

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4020 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2019, titulo = "Análisis de sentimientos 2019")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 7310 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2020, titulo = "Análisis de sentimientos 2020")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 10 of `x` matches multiple rows in `y`.
## ℹ Row 10125 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2021, titulo = "Análisis de sentimientos 2021")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 30 of `x` matches multiple rows in `y`.
## ℹ Row 8032 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2022, titulo = "Análisis de sentimientos 2022")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 102 of `x` matches multiple rows in `y`.
## ℹ Row 8032 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2023, titulo = "Análisis de sentimientos 2023")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4020 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Comparativa
recuento_sentimientos(youtube2019, "Youtube 2019")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 106 of `x` matches multiple rows in `y`.
## ℹ Row 11774 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(youtube2023, "Youtube 2023")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 28 of `x` matches multiple rows in `y`.
## ℹ Row 4859 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(noticias2019, "Noticias 2019")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 7310 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(noticias2023, "Noticias 2023")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4020 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(gobierno2019, "Gobierno 2019")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 36 of `x` matches multiple rows in `y`.
## ℹ Row 8032 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(gobierno2023, "Gobierno 2023")

## Warning: All formats failed to parse. No formats found.

## Joining with `by = join_by(word)`

## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 3 of `x` matches multiple rows in `y`.
## ℹ Row 3625 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# La interpretación de cada una será vista en el reporte de investigación

Función de tematización

 # Function for topic modeling visualization
tematizacion <- function(df, title) {
# Preprocess the text
# Convert the 'Content' column to lowercase
transcription <- tolower(df$Content)
# Remove punctuation from the text
transcription <- removePunctuation(transcription)
# Remove common Spanish stopwords from the text
transcription <- removeWords(transcription, stopwords("spanish"))


# Convert the preprocessed text into a Corpus
corpus <- Corpus(VectorSource(transcription))
# Convert the Corpus into a DocumentTermMatrix
dtm <- DocumentTermMatrix(corpus)
# Remove empty rows from the DocumentTermMatrix
dtm <- dtm[row_sums(dtm) > 0, ]
# Generate a topic model using Latent Dirichlet Allocation (LDA) with 5 topics
lda <- LDA(dtm, k = 5)


# Get the most important terms for each topic
# Extract terms with highest beta values for each topic
terms <- tidy(lda, matrix = "beta") %>%
  group_by(topic) %>%
  slice_max(order_by = beta, n = 15) %>%
  ungroup() %>%
  slice_max(order_by = beta, n = 15)

# Convert 'topic' into a factor
terms$topic <- as.factor(terms$topic)

# Generate the plot
# Create a grouped bar plot to visualize important terms for each topic
ggplot(terms, aes(x = term, y = beta, fill = topic)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~topic, ncol = 3) +
  scale_fill_manual(values = c("#DAA520", "#9B30FF", "#FF4500", "#4C9900", "#00CED1")) +  # Adjust colors as needed
  ggtitle(title)  # Add title to the plot
}

Tematización

tematizacion(youtubeDf, "Tematización Youtube")

tematizacion(gobiernoDf, "Tematización Gobierno")

tematizacion(noticiasDf, "Tematización Noticias")

# La interpretación de cada una será vista en el reporte de investigación

Análisis de Datos

Alondra Ixchel Huerta Treviño A01571130

2023-12-03

Librerías

Cargar datos por fuente

Eliminara los NA

Filtro por año

Cada df de fuente, filtrar por año

Poner un límite a la información

Análisis general por año

Eliminar palabras

Función de limpieza de texto

Aplicar función de limpieza de texto a cada df

Función crear corpus

Corpus

Función nube de palabras

Creación de nube de palabras

Función Barplot

Barplot por año

Función análisis de sentimientos

Análisis de sentimientos

Función para análisis de sentimientos 2

Función Recuentos totales de sentimientos

Recuentos totales de sentimientos

Función de tematización

Tematización