Librerías

# Load the 'rjson' library for handling JSON data in R
library(rjson)

# Load the 'httr' library for making HTTP requests and handling web data
library(httr)

# Load the 'jsonlite' library for working with JSON data in a more user-friendly way
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON
# Load the 'rvest' library for web scraping and extracting information from HTML
library(rvest)

# Load the 'tm' library for text mining tasks, such as document-term matrix creation
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
## 
##     content
# Load the 'udpipe' library for tokenization and part-of-speech tagging of text
library(udpipe)

# Load the 'slam' library for sparse matrix operations, useful in text analysis
library(slam)

# Load the 'wordcloud' library for creating word clouds
library(wordcloud)
## Loading required package: RColorBrewer
# Load the 'RColorBrewer' library for color palettes in data visualization
library(RColorBrewer)

# Load the 'topicmodels' library for topic modeling analysis
library(topicmodels)

# Load the 'dplyr' library for data manipulation tasks
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load the 'ggplot2' library for creating data visualizations with the Grammar of Graphics
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
# Load the 'sentimentr' library for sentiment analysis
library(sentimentr)

# Load the 'tidytext' library for tidy text analysis using the principles of tidy data
library(tidytext)

# Load the 'textdata' library for working with textual data
library(textdata)
## 
## Attaching package: 'textdata'
## The following object is masked from 'package:httr':
## 
##     cache_info
# Load the 'syuzhet' library for extracting sentiment intensity from text
library(syuzhet)
## 
## Attaching package: 'syuzhet'
## The following object is masked from 'package:sentimentr':
## 
##     get_sentences
# Load the 'lubridate' library for working with dates and times
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# Load the 'stopwords' library for managing stop words in text analysis
library(stopwords)
## 
## Attaching package: 'stopwords'
## The following object is masked from 'package:tm':
## 
##     stopwords
# Load the 'tidyr' library for data tidying tasks
library(tidyr)

Cargar datos por fuente

# Cargar archivo de json Youtube
# Read the content of the JSON file for YouTube data
json_data <- readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia Final/Youtube.json")
## Warning in readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5
## SEMESTRE/CDTD/Evidencia Final/Youtube.json"): incomplete final line found on
## '/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia
## Final/Youtube.json'
# Convert the JSON content into a list using jsonlite
json_list <- jsonlite::fromJSON(json_data)
# Convert the list to a data frame for easier manipulation
youtubeDf <- as.data.frame(json_list)


# Cargar archivo Json Articulos
# Read the content of the JSON file for news articles
json_data <- readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia Final/Noticias.json")
## Warning in readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5
## SEMESTRE/CDTD/Evidencia Final/Noticias.json"): incomplete final line found on
## '/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia
## Final/Noticias.json'
# Convert the JSON content into a list using jsonlite
json_list <- jsonlite::fromJSON(json_data)
# Convert the list to a data frame for easier manipulation
noticiasDf <- as.data.frame(json_list)


# Cargar archivo Json Gobierno
# Read the content of the JSON file for government data
json_data <- readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia Final/Gobierno.json")
## Warning in readLines("/Users/alondraixchelhuertatrevino/Documents/TEC/5
## SEMESTRE/CDTD/Evidencia Final/Gobierno.json"): incomplete final line found on
## '/Users/alondraixchelhuertatrevino/Documents/TEC/5 SEMESTRE/CDTD/Evidencia
## Final/Gobierno.json'
# Convert the JSON content into a list using jsonlite
json_list <- jsonlite::fromJSON(json_data)
# Convert the list to a data frame for easier manipulation
gobiernoDf <- as.data.frame(json_list)

Eliminara los NA

# Quitar filas que tienen "" o son NA
youtubeDf <- subset(youtubeDf, Content != "" & !is.na(Content))
noticiasDf <- subset(noticiasDf, Content != "" & !is.na(Content))

Filtro por año

# Función para separarlo por año

filter_by_year <- function(df, year) {
  # Lo convierte a DateType
  df$PublishedAt <- as.Date(df$PublishedAt, format = "%Y-%m-%d %H:%M:%S")
  
  # Extraemos el año solamente
  df$Year <- format(df$PublishedAt, "%Y")
  
  # Lo filtramos solamente con el año dado
  df_filtered <- df[df$Year == as.character(year), ]
  
  # Regresamos el dataframe filtrado
  return(df_filtered)
}

Cada df de fuente, filtrar por año

# Este código cuenta con parámetros dentro de la función para identificar lo que se está buscando de cada dataframe de información

  # Youtube dataframes
  youtube2023 <- filter_by_year(youtubeDf, 2023)
  youtube2022 <- filter_by_year(youtubeDf, 2022)
  youtube2021 <- filter_by_year(youtubeDf, 2021)
  youtube2020 <- filter_by_year(youtubeDf, 2020)
  youtube2019 <- filter_by_year(youtubeDf, 2019)
  
  # Noticias dataframes
  noticias2023 <- filter_by_year(noticiasDf, 2023)
  noticias2022 <- filter_by_year(noticiasDf, 2022)
  noticias2021 <- filter_by_year(noticiasDf, 2021)
  noticias2020 <- filter_by_year(noticiasDf, 2020)
  noticias2019 <- filter_by_year(noticiasDf, 2019)
  
  # Gobierno dataframes
  gobierno2023 <- filter_by_year(gobiernoDf, 2023)
  gobierno2022 <- filter_by_year(gobiernoDf, 2022)
  gobierno2021 <- filter_by_year(gobiernoDf, 2021)
  gobierno2020 <- filter_by_year(gobiernoDf, 2020)
  gobierno2019 <- filter_by_year(gobiernoDf, 2019)

Poner un límite a la información

  # Define the row limit para que estén relativamente proporcionales entre sí
  ROW_LIMIT <- 1000
  
  youtube2019 <- youtube2019[1:min(nrow(youtube2019), ROW_LIMIT), ]
  gobierno2019 <- gobierno2019[1:min(nrow(gobierno2019), ROW_LIMIT), ]
  noticias2019 <- noticias2019[1:min(nrow(noticias2019), ROW_LIMIT), ]
  
  youtube2020 <- youtube2020[1:min(nrow(youtube2020), ROW_LIMIT), ]
  gobierno2020 <- gobierno2020[1:min(nrow(gobierno2020), ROW_LIMIT), ]
  noticias2020 <- noticias2020[1:min(nrow(noticias2020), ROW_LIMIT), ]
  
  youtube2021 <- youtube2021[1:min(nrow(youtube2021), ROW_LIMIT), ]
  gobierno2021 <- gobierno2021[1:min(nrow(gobierno2021), ROW_LIMIT), ]
  noticias2021 <- noticias2021[1:min(nrow(noticias2021), ROW_LIMIT), ]
  
  youtube2022 <- youtube2022[1:min(nrow(youtube2022), ROW_LIMIT), ]
  gobierno2022 <- gobierno2022[1:min(nrow(gobierno2022), ROW_LIMIT), ]
  noticias2022 <- noticias2022[1:min(nrow(noticias2022), ROW_LIMIT), ]
  
  youtube2023 <- youtube2023[1:min(nrow(youtube2023), ROW_LIMIT), ]
  gobierno2023 <- gobierno2023[1:min(nrow(gobierno2023), ROW_LIMIT), ]
  noticias2023 <- noticias2023[1:min(nrow(noticias2023), ROW_LIMIT), ]

Análisis general por año

# unir los data frames en un mismo data frame por año
merged2023 <- rbind(noticias2023, youtube2023, gobierno2023)
merged2022 <- rbind(noticias2022, youtube2022, gobierno2022)
merged2021 <- rbind(noticias2021, youtube2021, gobierno2021)
merged2020 <- rbind(noticias2020, youtube2020, gobierno2020)
merged2019 <- rbind(noticias2019, youtube2019, gobierno2019)

Eliminar palabras

# Define a vector 'palabras_eliminar' containing common words to be removed
# These words are often considered noise or irrelevant for analysis
# Examples include specific terms, common words, URLs, and symbols
# The goal is to filter out these words from text data for more meaningful analysis
palabras_eliminar <- c("esquizofrenia", "persona", "personas", "méxico", "tel", "com", "org", "video", "saludos", "saludo", "canal", "mas", "mano", "videos", "â", "mâ", "må", "å", "años", "alb")

Función de limpieza de texto

# función de limpieza de texto para que en el parámetro poder meter el texto de los dataframes y tener el texto limpio de español

limpiar_texto <- function(texto, palabras_eliminar = NULL){
    
    # Cambiar texto a minusculas
    texto <- tolower(texto) 
    
    # Elimina las stopwords en español
    texto <- removeWords(texto, stopwords("es"))
    
    # Elimina palabras especificadas por el usuario
    if (!is.null(palabras_eliminar)) {
      texto <- removeWords(texto, palabras_eliminar)
    }
    
    # Convertir caracteres acentuados a su versión sin acentos
    texto <- iconv(texto, "UTF-8", "ASCII//TRANSLIT")
    
    # Eliminar palabras cortas:
    texto <- gsub("\\b\\w{1,3}\\b", "", texto)
    
    # Eliminar caracteres no ASCII
    texto <- iconv(texto, "latin1", "ASCII", sub="")
    
    # Elimina URLs
    texto <- gsub("http\\S+|www\\.\\S+","",texto)
    
    # Elimina puntuacion y simbolos
    texto <- gsub("<.*?>","",texto)
    
    # Reemplazar puntuacion con espacios
    texto <- gsub("[[:punct:]]"," ",texto) 
    
    # Elimina los digitos
    texto <- gsub("\\d+","",texto)
    
    # Reemplazar doble espacio con un solo espacio
    texto <- gsub("\\s+"," ",texto)
    
    # La siguiente cadena elimina el nombre de los participantes de la conferencia antes de que estos hablen, es decir elimina los " presidente : ", que no sirven de mucho para el analisis.
    texto <- gsub("^\\W+:", "", texto)
    
    return(texto)
  }

Aplicar función de limpieza de texto a cada df

# función de limpieza de texto para que en el parámetro poder meter el texto de los dataframes y tener el texto limpio de español de cada conjunto de información

youtube2019$Content <- sapply(youtube2019$Content, limpiar_texto)
gobierno2019$Content <- sapply(gobierno2019$Content, limpiar_texto)
noticias2019$Content <- sapply(noticias2019$Content, limpiar_texto)
merged2019$Content <- sapply(merged2019$Content, limpiar_texto)

youtube2020$Content <- sapply(youtube2020$Content, limpiar_texto)
gobierno2020$Content <- sapply(gobierno2020$Content, limpiar_texto)
noticias2020$Content <- sapply(noticias2020$Content, limpiar_texto)
merged2020$Content <- sapply(merged2020$Content, limpiar_texto)

youtube2021$Content <- sapply(youtube2021$Content, limpiar_texto)
gobierno2021$Content <- sapply(gobierno2021$Content, limpiar_texto)
noticias2021$Content <- sapply(noticias2021$Content, limpiar_texto)
merged2021$Content <- sapply(merged2021$Content, limpiar_texto)

youtube2022$Content <- sapply(youtube2022$Content, limpiar_texto)
gobierno2022$Content <- sapply(gobierno2022$Content, limpiar_texto)
noticias2022$Content <- sapply(noticias2022$Content, limpiar_texto)
merged2022$Content <- sapply(merged2022$Content, limpiar_texto)

youtube2023$Content <- sapply(youtube2023$Content, limpiar_texto)
gobierno2023$Content <- sapply(gobierno2023$Content, limpiar_texto)
noticias2023$Content <- sapply(noticias2023$Content, limpiar_texto)
merged2023$Content <- sapply(merged2023$Content, limpiar_texto)

youtubeDf$Content <- sapply(youtubeDf$Content, limpiar_texto, palabras_eliminar)
gobiernoDf$Content <- sapply(gobiernoDf$Content, limpiar_texto, palabras_eliminar)
noticiasDf$Content <- sapply(noticiasDf$Content, limpiar_texto, palabras_eliminar)

Función crear corpus

# Function to create a corpus para el proceso de frecuencia de palabras

crear_corpus <- function(df, palabras_eliminar) {
  # Create a corpus from the Content column
  corpus <- Corpus(VectorSource(df$Content))
  
  # Convert to lowercase
  corpus <- tm_map(corpus, content_transformer(tolower))
  
  # Remove punctuation
  corpus <- tm_map(corpus, removePunctuation)
  
  # Remove specific words
  corpus <- tm_map(corpus, removeWords, palabras_eliminar)
  
  # Remove empty documents
  corpus <- tm_filter(corpus, function(x) length(unlist(strsplit(as.character(x), " "))) > 0)
    
  return(corpus)
}

Corpus

# Gobierno
gob23.corpus <- crear_corpus(gobierno2023, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
gob22.corpus <- crear_corpus(gobierno2022, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
gob21.corpus <- crear_corpus(gobierno2021, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
gob20.corpus <- crear_corpus(gobierno2020, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
gob19.corpus <- crear_corpus(gobierno2019, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
# Youtube
yt23.corpus <- crear_corpus(youtube2023, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
yt22.corpus <- crear_corpus(youtube2022, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
yt21.corpus <- crear_corpus(youtube2021, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
yt20.corpus <- crear_corpus(youtube2020, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
yt19.corpus <- crear_corpus(youtube2019, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
# Noticias
news23.corpus <- crear_corpus(noticias2023, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
news22.corpus <- crear_corpus(noticias2022, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
news21.corpus <- crear_corpus(noticias2021, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
news20.corpus <- crear_corpus(noticias2020, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
news19.corpus <- crear_corpus(noticias2019, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
# Merged Corpus
merged23.corpus <- crear_corpus(merged2023, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
merged22.corpus <- crear_corpus(merged2022, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
merged21.corpus <- crear_corpus(merged2021, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
merged20.corpus <- crear_corpus(merged2020, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents
merged19.corpus <- crear_corpus(merged2019, palabras_eliminar)
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(corpus, removeWords, palabras_eliminar):
## transformation drops documents

Función nube de palabras

# Function to create a word cloud from a given corpus
crear_wordcloud <- function(corpus) {
  # Create the Document-Term Matrix
  dtm <- DocumentTermMatrix(corpus)
  
  # Calculate term frequency
  term_frequency <- colSums(as.matrix(dtm))
  term_frequency_sorted <- sort(term_frequency, decreasing = TRUE)
  
  # Identify and list the 25 most frequent terms in the dataset
  top_25_terms <- subset(term_frequency_sorted, term_frequency_sorted >= 50)
  top_25_terms <- head(top_25_terms, 25)
  
  print(top_25_terms)
  
  # Select the 150 most frequent words
  top_150_terms <- head(term_frequency_sorted, 150)
  
  # Define the color palette
  color_palette <- "Dark2"
  
  # Create the word cloud
  wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
          max.words=150, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, color_palette))
}

# display.brewer.all()

Creación de nube de palabras

# Generate word cloud for each year
crear_wordcloud(merged23.corpus)
##    gracias enfermedad       dios      puede       vida      salud     mental 
##        216        199        192        150        146        128        126 
##       solo  trastorno    intomas    consumo       bien     riesgo    estudio 
##        124        119        115         98         98         94         92 
##     pueden    cerebro     muchas      tener   cannabis      mayor      mundo 
##         91         87         84         81         80         75         75 
##       hijo      cosas      diagn    siempre 
##         75         74         73         72
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : importante could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : madre could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ostico could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : casa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : atenci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayudar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensamientos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : depresi could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paranoide could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ansiedad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicina could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : genes could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luego could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : esquizofr could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : puedo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : felicito could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hombres could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mujeres could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicamentos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : igual could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguien could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : misma could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : universidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entender could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : padre could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : afecta could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alucinaciones could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bendiciones could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : informaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adem could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : quiere could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psicosis could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : sustancias could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : jesucristo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : manera could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espec could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mejorar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatra could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bendiga could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : testimonio could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : resultados could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : factores could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hospital could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : papel could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : explica could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ificos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : saber could not be fit on page. It will not be plotted.

crear_wordcloud(merged22.corpus)
##  enfermedad      mental       salud        vida       puede        solo 
##         390         352         264         246         196         153 
##    asociaci tratamiento     intomas        hace   trastorno  familiares 
##         140         130         125         121         117         102 
##       seres       cosas       hacer     energia    cannabis      psiqui 
##         102          97          97          93          89          88 
##        cada    mentales      pueden        bien       poder      atenci 
##          87          86          86          85          84          82 
##        dios 
##          82
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : vibraciones could not be fit on page. It will not be plotted.

crear_wordcloud(merged21.corpus)
##  enfermedad       puede      mental       cosas       casos       salud 
##         212         163         155         127         125         118 
##        vida     depresi        solo      pueden        dios       voces 
##         118         116         106          99          98          95 
##        cada        hace       gente       mejor     gracias   trastorno 
##          90          89          87          86          85          79 
## tratamiento        bien       veces       mayor       mismo       decir 
##          76          74          74          73          73          72 
##    suicidio 
##          72
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasado could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : verdad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : conocimiento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : manera could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poder could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : homicidios could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : presentado could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : embargo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entonces could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicina could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : registro could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicamentos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ministerio could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : universidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : padecimiento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : horas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espiritual could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : incluso could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : miedo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguna could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : registran could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diagn could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problema could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tambi could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : igual could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nadie could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : real could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : centros could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : instituto could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : primer could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nunca could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paciente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : polic could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paso could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : importante could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ejemplo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luego could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : amiga could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : recomiendo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : social could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayudar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : factores could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : parece could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiares could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : recibe could not be fit on page. It will not be plotted.

crear_wordcloud(merged20.corpus)
##   enfermedad discapacidad      gracias        puede        mente        cosas 
##          154          154          140          129          126          120 
##         solo         tipo         vida         hace     programa       muchas 
##          118          117          116          111          110           97 
##        tener         bien       mental    trastorno        hacer        gente 
##           96           90           88           84           82           82 
##         dios        bueno      familia      intomas       manera     entonces 
##           78           77           75           73           72           72 
##        vamos 
##           72
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familia could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : manera could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entonces could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siempre could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : voces could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bienestar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : importante could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pacientes could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mentales could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : informaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tiempo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : chica could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tratamiento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguien could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ejemplo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mismo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cuenta could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mundo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pueden could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mejor could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cerebro could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : todas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : verdad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alucinaciones could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : exico could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poder could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adem could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : miedo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adelante could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : identidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ostico could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hecho could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : programas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : aunque could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : compa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luisa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguna could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diagn could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diferentes could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : veces could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : puedo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatra could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : experiencia could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : vivir could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : demonios could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : atenci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensamientos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : medicamentos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buenas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hijo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayudar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buena could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pensar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : apoyo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : general could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pareja could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : social could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : esquizofr could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : quiero could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trastornos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : igual could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buen could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : podr could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : luego could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paciente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : podemos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : interesante could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : lard could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayuda could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : secretar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : espa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : paranoide could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiares could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tipos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : toda could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : conocer could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diferente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entender could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : condiciones could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pregunta could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : gustar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : acompa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatras could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : adultos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : favor could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : malas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problema could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : estudios could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : llegar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problemas could not be fit on page. It will not be plotted.

crear_wordcloud(merged19.corpus)
##       salud      mental        vida       puede  enfermedad   trastorno 
##         121         120         118         116         102          98 
##        solo    mentales tratamiento       gente       cosas        creo 
##          86          75          73          62          61          60 
##       mayor  trastornos        caso     gracias        bien    ansiedad 
##          58          55          55          55          55          54 
##       hacer       tener       parte       mejor       mismo   pacientes 
##          54          54          53          52          52          52 
##     depresi 
##          52
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : enfermedades could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : esquizo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : estudio could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : informaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : intomas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hospital could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : trabajo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : tiempo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cada could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ahora could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : madre could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mundo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ostico could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : aunque could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : diagn could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : momento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pues could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguien could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poblaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : despu could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cualquier could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : forma could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ologo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : casos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : voces could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : cookies could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : miedo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : contenido could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : personalidad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : todas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : grupo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatra could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bueno could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : doctor could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : primer could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : investigaci could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : psiquiatr could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buen could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : acuerdo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiares could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : general could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hecho could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : hermano could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : atrico could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nadie could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : poder could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : puedes could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : pasa could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : verdad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alucinaciones could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : debe could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mundial could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : siento could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : punto could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mayores could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : alguna could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : carlos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : misma could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : incluso could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : sentido could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : sociedad could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : natural could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : entonces could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : centro could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mucha could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problema could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nombre could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : buena could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nuevo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : realmente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : mente could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : embargo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : nunca could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : familiar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : menos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ayuda could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : ideas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : problemas could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : gusta could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : derechos could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : apoyo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : bipolar could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : riesgo could not be fit on page. It will not be plotted.
## Warning in wordcloud(names(top_150_terms), freq = top_150_terms, min.freq = 6,
## : universidad could not be fit on page. It will not be plotted.

# La interpretación de cada una será vista en el reporte de investigación

Función Barplot

# Function to create a bar plot from a given corpus
crear_barplot <- function(corpus, main_title = "Términos más frecuentes", color_palette = "Dark2", threshold = 50, top_terms = 25) {
  # Create the Document-Term Matrix
  dtm <- DocumentTermMatrix(corpus)
  
  # Calculate term frequency
  term_frequency <- colSums(as.matrix(dtm))
  term_frequency_sorted <- sort(term_frequency, decreasing = TRUE)
  
  # Identify and list the top terms based on the specified threshold
  top_terms_vector <- subset(term_frequency_sorted, term_frequency_sorted >= threshold)
  top_terms_vector <- head(top_terms_vector, top_terms)
  
  # Create a bar plot
  barplot(top_terms_vector, main = main_title, col = brewer.pal(8, color_palette),
          las = 2, cex.names = 0.8)
}

Barplot por año

# Create bar plots for the years 2023 to 2019
crear_barplot(merged23.corpus, main_title = "Términos más frecuentes en el 2023", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged22.corpus, main_title = "Términos más frecuentes en el 2022", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged21.corpus, main_title = "Términos más frecuentes en el 2021", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged20.corpus, main_title = "Términos más frecuentes en el 2020", color_palette = "Dark2", threshold = 50, top_terms = 25)

crear_barplot(merged19.corpus, main_title = "Términos más frecuentes en el 2019", color_palette = "Dark2", threshold = 50, top_terms = 25)

# La interpretación de cada una será vista en el reporte de investigación

Función análisis de sentimientos

# Function for sentiment analysis

# Convert the 'PublishedAt' column to a datetime format using lubridate
sentiment_analysis <- function(df, source) {
  df$PublishedAt = lubridate::ymd_hms(df$PublishedAt)

# Tokenization: Split the 'Content' column into individual words
tidy_df <- df %>%
  unnest_tokens("word", Content)

# Stopword removal: Exclude common Spanish stopwords from the tokenized data
spanish_stop_words <- tm::stopwords("spanish")
spanish_stop_words <- data.frame(word = spanish_stop_words, stringsAsFactors = FALSE)
tidy_df <- tidy_df %>%
  anti_join(spanish_stop_words, by = "word")

# Sentiment analysis: Assign sentiment scores using the Bing lexicon
bing_word_sentiments <- get_sentiments("bing")
df_sentiment <- tidy_df %>%
  inner_join(bing_word_sentiments) %>%
  count(PublishedAt, sentiment) %>%
  spread(sentiment, n, fill = 0)

# Plot sentiment over time

# Create a line plot showing positive and negative sentiment over time
ggplot(df_sentiment, aes(x = PublishedAt)) +
  geom_line(aes(y = positive), color = "blue") +
  geom_line(aes(y = -negative), color = "red") +
  labs(x = "PublishedAt", y = "Sentiment",
       title = source,
       color = "Sentiment") +
  scale_color_manual(values = c("Positive" = "blue", "Negative" = "red")) +
  theme_minimal()
}

Análisis de sentimientos

# Use the function on your DataFrame
sentiment_analysis(gobiernoDf, "Gobierno")
## Joining with `by = join_by(word)`

sentiment_analysis(youtubeDf, "Youtube")
## Joining with `by = join_by(word)`

sentiment_analysis(noticiasDf, "Noticias")
## Joining with `by = join_by(word)`

# La interpretación de cada una será vista en el reporte de investigación

Función para análisis de sentimientos 2

# Function for sentiment analysis and word frequency
sentiment_analysis_2 <- function(df) {
# Convert the 'PublishedAt' column to a datetime format using lubridate
df$PublishedAt = lubridate::ymd_hms(df$PublishedAt)

# Tokenization: Split the 'Content' column into individual words
tidy_df <- df %>%
  unnest_tokens("word", Content)

# Stopword removal: Exclude common Spanish stopwords from the tokenized data
spanish_stop_words <- tm::stopwords("spanish")
spanish_stop_words <- data.frame(word = spanish_stop_words, stringsAsFactors = FALSE)
tidy_df <- tidy_df %>%
  anti_join(spanish_stop_words, by = "word")

# Sentiment analysis: Assign sentiment scores using the NRC lexicon
nrc_word_sentiments <- get_sentiments("nrc")
tidy_sentiment <- tidy_df %>%
  inner_join(nrc_word_sentiments)

# Get the total counts of each sentiment
total_sentiments <- tidy_sentiment %>%
  group_by(sentiment) %>%
  summarise(count = n())

# Get the most common words for each sentiment
common_words <- tidy_sentiment %>%
  count(word, sentiment, sort = TRUE) %>%
  group_by(sentiment) %>%
  top_n(10, n)

# Return a list containing total sentiment counts and most common words for each sentiment
return(list(total_sentiments = total_sentiments, common_words = common_words))
}

Función Recuentos totales de sentimientos

recuento_sentimientos <- function(df, titulo) {
  # Obtener los resultados del análisis de sentimientos
  resultados <- sentiment_analysis_2(df)
  
  # Definir los colores para cada emoción
  colores_emociones <- c("joy" = "#CCCC00",  # Un tono más oscuro de amarillo
                       "sadness" = "#0000CC",  # Un tono más oscuro de azul
                       "anger" = "#CC0000",  # Un tono más oscuro de rojo
                       "fear" = "#000000",  # Negro es el color más oscuro posible
                       "surprise" = "#CC7A00",  # Un tono más oscuro de naranja
                       "trust" = "#009900",  # Un tono más oscuro de verde
                       "disgust" = "#660099",  # Un tono más oscuro de morado
                       "anticipation" = "#FF1493",  # Un tono más oscuro de rosa
                       "positive" = "#1E90FF",  # Un tono más oscuro de azul claro
                       "negative" = "#8B0000")  # Un tono más oscuro de rojo oscuro

  
  # Crear el gráfico de barras
  ggplot(resultados$total_sentiments, aes(x = sentiment, y = count, fill = sentiment)) +
    geom_bar(stat = "identity") +
    scale_fill_manual(values = colores_emociones) +
    theme_minimal() +
    labs(x = "Sentimiento", y = "Recuento", title = titulo)
}

Recuentos totales de sentimientos

# Youtube
recuento_sentimientos(youtubeDf, titulo = "Análisis de sentimientos de YouTube")
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 27 of `x` matches multiple rows in `y`.
## ℹ Row 4859 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Gobierno
recuento_sentimientos(gobiernoDf, titulo = "Análisis de sentimientos de Gobierno")
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 13 of `x` matches multiple rows in `y`.
## ℹ Row 6879 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Noticias
recuento_sentimientos(noticiasDf, titulo = "Análisis de sentimientos de Noticias")
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4020 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2019, titulo = "Análisis de sentimientos 2019")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 7310 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2020, titulo = "Análisis de sentimientos 2020")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 10 of `x` matches multiple rows in `y`.
## ℹ Row 10125 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2021, titulo = "Análisis de sentimientos 2021")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 30 of `x` matches multiple rows in `y`.
## ℹ Row 8032 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2022, titulo = "Análisis de sentimientos 2022")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 102 of `x` matches multiple rows in `y`.
## ℹ Row 8032 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(merged2023, titulo = "Análisis de sentimientos 2023")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4020 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Comparativa
recuento_sentimientos(youtube2019, "Youtube 2019")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 106 of `x` matches multiple rows in `y`.
## ℹ Row 11774 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(youtube2023, "Youtube 2023")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 28 of `x` matches multiple rows in `y`.
## ℹ Row 4859 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(noticias2019, "Noticias 2019")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4 of `x` matches multiple rows in `y`.
## ℹ Row 7310 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(noticias2023, "Noticias 2023")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 4020 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(gobierno2019, "Gobierno 2019")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 36 of `x` matches multiple rows in `y`.
## ℹ Row 8032 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

recuento_sentimientos(gobierno2023, "Gobierno 2023")
## Warning: All formats failed to parse. No formats found.
## Joining with `by = join_by(word)`
## Warning in inner_join(., nrc_word_sentiments): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 3 of `x` matches multiple rows in `y`.
## ℹ Row 3625 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# La interpretación de cada una será vista en el reporte de investigación

Función de tematización

 # Function for topic modeling visualization
tematizacion <- function(df, title) {
# Preprocess the text
# Convert the 'Content' column to lowercase
transcription <- tolower(df$Content)
# Remove punctuation from the text
transcription <- removePunctuation(transcription)
# Remove common Spanish stopwords from the text
transcription <- removeWords(transcription, stopwords("spanish"))


# Convert the preprocessed text into a Corpus
corpus <- Corpus(VectorSource(transcription))
# Convert the Corpus into a DocumentTermMatrix
dtm <- DocumentTermMatrix(corpus)
# Remove empty rows from the DocumentTermMatrix
dtm <- dtm[row_sums(dtm) > 0, ]
# Generate a topic model using Latent Dirichlet Allocation (LDA) with 5 topics
lda <- LDA(dtm, k = 5)


# Get the most important terms for each topic
# Extract terms with highest beta values for each topic
terms <- tidy(lda, matrix = "beta") %>%
  group_by(topic) %>%
  slice_max(order_by = beta, n = 15) %>%
  ungroup() %>%
  slice_max(order_by = beta, n = 15)

# Convert 'topic' into a factor
terms$topic <- as.factor(terms$topic)

# Generate the plot
# Create a grouped bar plot to visualize important terms for each topic
ggplot(terms, aes(x = term, y = beta, fill = topic)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~topic, ncol = 3) +
  scale_fill_manual(values = c("#DAA520", "#9B30FF", "#FF4500", "#4C9900", "#00CED1")) +  # Adjust colors as needed
  ggtitle(title)  # Add title to the plot
}

Tematización

tematizacion(youtubeDf, "Tematización Youtube")

tematizacion(gobiernoDf, "Tematización Gobierno")

tematizacion(noticiasDf, "Tematización Noticias")

# La interpretación de cada una será vista en el reporte de investigación