library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.5.2
library(viridisLite)
library(janeaustenr)
## Warning: package 'janeaustenr' was built under R version 4.5.2
library(stopwords)
## Warning: package 'stopwords' was built under R version 4.5.2
library(gridExtra)
##
## Adjuntando el paquete: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(gt)
austen_texts <- austen_books()
austen_limpio <- austen_books() %>%
mutate(text = gsub("[[:punct:]]", "", text)) %>%
mutate(text = tolower(text)) %>%
mutate(text = gsub("[[:digit:]]", "", text)) %>%
filter(!grepl("jane|austen|chapter", text))
austen_tokens <- austen_limpio %>%
unnest_tokens(word, text, token = "words") %>%
anti_join(get_stopwords(source = "stopwords-iso"))
## Joining with `by = join_by(word)`
any(is.na(austen_tokens$word))
## [1] FALSE
austen_tokens %>%
count(word, sort= TRUE)
## # A tibble: 18,106 × 2
## word n
## <chr> <int>
## 1 time 1289
## 2 fanny 849
## 3 lady 814
## 4 sir 791
## 5 emma 737
## 6 day 710
## 7 sister 695
## 8 house 661
## 9 elizabeth 654
## 10 elinor 616
## # ℹ 18,096 more rows
austen_sent <- austen_tokens %>%
inner_join(get_sentiments("bing")) %>%
group_by(book,sentiment) %>%
summarise(n= n()) %>%
ungroup() %>%
arrange(desc(book))
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 121757 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'book'. You can override using the
## `.groups` argument.
austen_sent %>%
gt() %>%
tab_header(
title = "Análisis de sentimiento",
subtitle = "Cantidad de palabras positivas y negativas, agrupadas por libro")
| Análisis de sentimiento |
| Cantidad de palabras positivas y negativas, agrupadas por libro |
| book |
sentiment |
n |
| Persuasion |
negative |
2027 |
| Persuasion |
positive |
2461 |
| Northanger Abbey |
negative |
2232 |
| Northanger Abbey |
positive |
2368 |
| Emma |
negative |
3847 |
| Emma |
positive |
4787 |
| Mansfield Park |
negative |
4293 |
| Mansfield Park |
positive |
4773 |
| Pride & Prejudice |
negative |
3211 |
| Pride & Prejudice |
positive |
3700 |
| Sense & Sensibility |
negative |
3314 |
| Sense & Sensibility |
positive |
3739 |
austen_sent %>%
ggplot(aes(x= book, y= n, fill= sentiment))+
geom_col(position = "dodge")+
labs(title = "Análisis de sentimiento de los libros de Jane Austen",
subtitle = "Cantidad de palabras positivas y negativas, agrupadas por libro",
x= "Libro", y = "Cantidad",
fill= NULL)+
theme_minimal()
