# Paquetes ---------------------------------------------------- 

library(pacman) # Package Manager
p_load(foreign, colourpicker, plotrix, PerformanceAnalytics,
       ggplot2, gganimate, png, plotly, forcats,
       RColorBrewer, maps, mapdata, lubridate, scales,
       esquisse, cowplot, ggpubr, patchwork, ggthemes, tvthemes,
       gghighlight, gifski, av, ggpie, lessR, tidyverse,
       tidytext, tm, wordcloud, wordcloud2, waterfalls,
       treemapify, waffle, dplyr, readr, stringr, data.table, hexbin,
       ggdensity, tidyquant, pdftools, widyr,
       leaflet, magrittr, stopwords, readxl, htmlwidgets, dplyr)
options(scipen = 999)
options(digits = 3) 

1 Lectura, limpieza y procesamiento de Datos

#Lectura de datos
angeles_y_demonios <- pdf_text("Ángeles y demonios - Dan Brown.pdf")

#Pregunta 1 -------------------------
#Removiendo palabras, signos y números que no seran de utilidad--------------------------
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Ángeles y Demonios")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Vittoria") #Eliminamos los personajes
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Kohler")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Olivetti")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Dan Brown")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "FIN")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Los hechos")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "Nota del autor")
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, 'Prólogo')
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, 'Para Blythe')
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, 'Langdon')
angeles_y_demonios <-  str_remove_all(angeles_y_demonios,"-")            #Eliminamos los guiones
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "[:punct:]")   #Eliminamos los signos
angeles_y_demonios <-  str_remove_all(angeles_y_demonios, "[:digit:]")   #Eliminamos los números
angeles_y_demonios <- str_replace_all(angeles_y_demonios, "[:blank:]{2,}", " ") # Eliminamos los pies de páginas
#Convirtiendo a archivo .txt
write_lines(angeles_y_demonios, "Ángeles y Demonios.txt")
angeles_y_demonios <- scan("Ángeles y Demonios.txt",
                                         encoding = "UTF-8", what = "char", skip = 0,
                                         sep = "\n")

angeles_y_demonios <- tibble(angeles_y_demonios) |> 
  unnest_tokens(Token, angeles_y_demonios) |>
  mutate(Token = removeNumbers(Token))

#Limpieza  de stopwords
library(stopwords)
stopwords_es <- stopwords::stopwords(language = "es", 
                                     source = "nltk")
stopwords_es_1 <- read_excel("CustomStopWords.xlsx")
names(stopwords_es_1) <- c("Token", "Fuente")

stopwords_es_2 <- tibble(Token = c(""), Fuente = "Mis StopWords")

stopwords_es   <- rbind(stopwords_es_1, stopwords_es_2)
stopwords_es   <- stopwords_es[!duplicated(stopwords_es$Token), ]

# Removiendo los stopwords
angeles_y_demonios <- angeles_y_demonios |> anti_join(stopwords_es)
angeles_y_demonios$Token <- str_replace(angeles_y_demonios$Token, "camarlengo", "papa")
angeles_y_demonios_frecuencias <- angeles_y_demonios |>
  count(Token, sort = TRUE)
#Sentimientos
sentimientos <- read.delim("sentimientos_2.txt")

sentimientos <- as.tibble(sentimientos)

sentimientos <- distinct(sentimientos)

angeles_y_demonios_sentimientos <- angeles_y_demonios |> 
  inner_join(sentimientos, by = c("Token" = "palabra"))

2 Gráfico de Frecuencias (Top 10)

colors <- brewer.pal(10, "Set3")
grafico_frec <- angeles_y_demonios_frecuencias |> 
  top_n(10) |>
  ggplot() + aes(x = fct_reorder(Token, n), 
                 y = n, fill = Token) +
  geom_col(show.legend = F)  + 
  labs(title = "Top 10 palabras - Ángeles y Demonios de Dan Brown",
       x = "Top 10 Palabras", y = "Frecuencia") + 
  geom_text(aes(label = n), vjust = -0.5) + 
  expand_limits() + 
  ylim(0,800) + 
  scale_fill_manual(values = colors) + 
  theme(axis.ticks = element_blank()) + 
  theme(panel.grid = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(family = "Comic Sans MS", size = 16, face = "bold"), 
        axis.title = element_text(family = "Arial", size = 12), 
        axis.text = element_text(family = "Verdana", size = 10))
grafico_frec

2.1 Comentario:

3 Wordcloud

set.seed(123)
par(bg = c('white'))
wordcloud(words = angeles_y_demonios_frecuencias$Token,
          freq  = angeles_y_demonios_frecuencias$n,
          max.words = 400,
          random.order = FALSE,
          min.freq = 1,
          colors =  c('#bce1ab','#85ae72', '#573e54','#361542' , '#170132'),
          
          scale = c(5, 0.1),
          rot.per = 0.3)

3.1 Comentario:

4 Gráfica de Sentimientos

color1 <- brewer.pal(10, "Set3")
graf_sent <- angeles_y_demonios_sentimientos |> 
  count(sentimiento) |>
  ggplot() + aes(x = fct_reorder(sentimiento, n),
                 y = n,
                 fill = sentimiento) + 
  geom_text(aes(label = n), hjust = -0.25) +
  geom_col(show.legend = F) + coord_flip() +
  labs(title = "Gráfico de los sentimientos del libro Ángeles y Demonios - Dan Brown",
                                                             x = "Sentimientos", y = "Frecuencia") + 
  theme(panel.grid = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(family = "Comic Sans MS", size = 16, face = "bold"), 
        axis.title = element_text(family = "Arial", size = 12), 
        axis.text = element_text(family = "Verdana", size = 10),
        axis.ticks = element_blank()) + 
  scale_fill_manual(values = colors)
graf_sent

4.1 Comentario: