library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.5.2
library(viridisLite)
library(janeaustenr)
## Warning: package 'janeaustenr' was built under R version 4.5.2
library(stopwords)
## Warning: package 'stopwords' was built under R version 4.5.2
library(gridExtra)
## 
## Adjuntando el paquete: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(gt)

austen_texts <-  austen_books()
austen_limpio <- austen_books() %>% 
  mutate(text = gsub("[[:punct:]]", "", text)) %>% 
  mutate(text = tolower(text)) %>%
  mutate(text = gsub("[[:digit:]]", "", text)) %>% 
  filter(!grepl("jane|austen|chapter", text))
austen_tokens <- austen_limpio %>% 
  unnest_tokens(word, text, token = "words") %>% 
  anti_join(get_stopwords(source = "stopwords-iso"))
## Joining with `by = join_by(word)`
any(is.na(austen_tokens$word))
## [1] FALSE
austen_tokens %>% 
  count(word, sort= TRUE)
## # A tibble: 18,106 × 2
##    word          n
##    <chr>     <int>
##  1 time       1289
##  2 fanny       849
##  3 lady        814
##  4 sir         791
##  5 emma        737
##  6 day         710
##  7 sister      695
##  8 house       661
##  9 elizabeth   654
## 10 elinor      616
## # ℹ 18,096 more rows
austen_sent <- austen_tokens %>% 
  inner_join(get_sentiments("bing")) %>% 
  group_by(book,sentiment) %>% 
  summarise(n= n()) %>% 
  ungroup() %>% 
  arrange(desc(book))
## Joining with `by = join_by(word)`
## Warning in inner_join(., get_sentiments("bing")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 121757 of `x` matches multiple rows in `y`.
## ℹ Row 5051 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## `summarise()` has grouped output by 'book'. You can override using the
## `.groups` argument.
austen_sent %>% 
  gt() %>%
  tab_header(
    title = "Análisis de sentimiento",
    subtitle = "Cantidad de palabras positivas y negativas, agrupadas por libro")
Análisis de sentimiento
Cantidad de palabras positivas y negativas, agrupadas por libro
book sentiment n
Persuasion negative 2027
Persuasion positive 2461
Northanger Abbey negative 2232
Northanger Abbey positive 2368
Emma negative 3847
Emma positive 4787
Mansfield Park negative 4293
Mansfield Park positive 4773
Pride & Prejudice negative 3211
Pride & Prejudice positive 3700
Sense & Sensibility negative 3314
Sense & Sensibility positive 3739
austen_sent %>% 
  ggplot(aes(x= book, y= n, fill= sentiment))+
  geom_col(position = "dodge")+
  labs(title = "Análisis de sentimiento de los libros de Jane Austen", 
       subtitle = "Cantidad de palabras positivas y negativas, agrupadas por libro",
       x= "Libro", y = "Cantidad",
       fill= NULL)+
  theme_minimal()