# Instalar si no están presentes
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.4.4 v purrr 1.0.1
## v tibble 3.2.1 v dplyr 1.1.3
## v tidyr 1.3.0 v stringr 1.5.0
## v readr 2.1.4 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
if (!require("tidytext")) install.packages("tidytext")
## Loading required package: tidytext
## Warning: package 'tidytext' was built under R version 4.1.3
if (!require("wordcloud")) install.packages("wordcloud")
## Loading required package: wordcloud
## Warning: package 'wordcloud' was built under R version 4.1.3
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 4.1.3
if (!require("tm")) install.packages("tm")
## Loading required package: tm
## Warning: package 'tm' was built under R version 4.1.3
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
if (!require("textdata")) install.packages("textdata")
## Loading required package: textdata
## Warning: package 'textdata' was built under R version 4.1.3
if (!require("readr")) install.packages("readr")
# Cargar librerías
library(tidyverse)
library(tidytext)
library(wordcloud)
library(tm)
library(textdata)
library(readr)
El dataset se encuentra en:https://drive.google.com/file/d/1PJcyHdI1eUmINPmJtHngxvTncJ9Qmmxb/view?usp=sharing
comentarios <- read_csv("../datasets/comentarios_sentimiento.csv")
## Rows: 150 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): comentario, sentimiento
## dbl (1): id
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(comentarios)
## # A tibble: 6 x 3
## id comentario sentimiento
## <dbl> <chr> <chr>
## 1 1 Estuvo bien, nada especial. Neutro
## 2 2 El servicio fue aceptable. Neutro
## 3 3 Estuvo bien, nada especial. Neutro
## 4 4 Muy decepcionado con el servicio. Negativo
## 5 5 Nunca volveré a comprar esto. Negativo
## 6 6 Estuvo bien, nada especial. Neutro
Convertir a minúsculas
Eliminar puntuación, números y stopwords
Tokenizar las palabras
library(dplyr)
library(tidytext)
library(stringr)
# Convertir a minúsculas y quitar NA
comentarios <- comentarios %>%
filter(!is.na(comentario)) %>%
mutate(comentario = tolower(comentario))
# Tokenizar palabras
comentarios_tokens <- comentarios %>%
unnest_tokens(palabra, comentario)
# Remover palabras vacías (stopwords) y números
data("stop_words")
comentarios_limpios <- comentarios_tokens %>%
anti_join(stop_words, by = c("palabra" = "word")) %>%
filter(!str_detect(palabra, "^[0-9]+$"))
frecuencias <- comentarios_limpios %>%
count(sentimiento, palabra, sort = TRUE)
frecuencias %>%
group_by(sentimiento) %>%
top_n(10, n) %>%
ggplot(aes(x = reorder(palabra, n), y = n, fill = sentimiento)) +
geom_col(show.legend = FALSE) +
coord_flip() +
facet_wrap(~ sentimiento, scales = "free") +
labs(title = "Palabras más frecuentes por sentimiento",
x = "Palabra", y = "Frecuencia")