# Paquetes ----------------------------------------------------
library(pacman) # Package Manager
p_load(foreign, colourpicker, plotrix, PerformanceAnalytics,
ggplot2, gganimate, png, plotly, forcats,
RColorBrewer, maps, mapdata, lubridate, scales,
esquisse, cowplot, ggpubr, patchwork, ggthemes, tvthemes,
gghighlight, gifski, av, ggpie, lessR, tidyverse,
tidytext, tm, wordcloud, wordcloud2, waterfalls,
treemapify, waffle, dplyr, readr, stringr, data.table, hexbin,
ggdensity, tidyquant, pdftools, widyr,
leaflet, magrittr, stopwords, readxl, htmlwidgets, dplyr)
options(scipen = 999)
options(digits = 3)
Lectura, limpieza y
procesamiento de Datos
#Lectura de datos
angeles_y_demonios <- pdf_text("Ángeles y demonios - Dan Brown.pdf")
#Pregunta 1 -------------------------
#Removiendo palabras, signos y números que no seran de utilidad--------------------------
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Ángeles y Demonios")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Vittoria") #Eliminamos los personajes
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Kohler")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Olivetti")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Dan Brown")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "FIN")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Los hechos")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "Nota del autor")
angeles_y_demonios <- str_remove_all(angeles_y_demonios, 'Prólogo')
angeles_y_demonios <- str_remove_all(angeles_y_demonios, 'Para Blythe')
angeles_y_demonios <- str_remove_all(angeles_y_demonios, 'Langdon')
angeles_y_demonios <- str_remove_all(angeles_y_demonios,"-") #Eliminamos los guiones
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "[:punct:]") #Eliminamos los signos
angeles_y_demonios <- str_remove_all(angeles_y_demonios, "[:digit:]") #Eliminamos los números
angeles_y_demonios <- str_replace_all(angeles_y_demonios, "[:blank:]{2,}", " ") # Eliminamos los pies de páginas
#Convirtiendo a archivo .txt
write_lines(angeles_y_demonios, "Ángeles y Demonios.txt")
angeles_y_demonios <- scan("Ángeles y Demonios.txt",
encoding = "UTF-8", what = "char", skip = 0,
sep = "\n")
angeles_y_demonios <- tibble(angeles_y_demonios) |>
unnest_tokens(Token, angeles_y_demonios) |>
mutate(Token = removeNumbers(Token))
#Limpieza de stopwords
library(stopwords)
stopwords_es <- stopwords::stopwords(language = "es",
source = "nltk")
stopwords_es_1 <- read_excel("CustomStopWords.xlsx")
names(stopwords_es_1) <- c("Token", "Fuente")
stopwords_es_2 <- tibble(Token = c(""), Fuente = "Mis StopWords")
stopwords_es <- rbind(stopwords_es_1, stopwords_es_2)
stopwords_es <- stopwords_es[!duplicated(stopwords_es$Token), ]
# Removiendo los stopwords
angeles_y_demonios <- angeles_y_demonios |> anti_join(stopwords_es)
angeles_y_demonios$Token <- str_replace(angeles_y_demonios$Token, "camarlengo", "papa")
angeles_y_demonios_frecuencias <- angeles_y_demonios |>
count(Token, sort = TRUE)
#Sentimientos
sentimientos <- read.delim("sentimientos_2.txt")
sentimientos <- as.tibble(sentimientos)
sentimientos <- distinct(sentimientos)
angeles_y_demonios_sentimientos <- angeles_y_demonios |>
inner_join(sentimientos, by = c("Token" = "palabra"))
Gráfico de Frecuencias
(Top 10)
colors <- brewer.pal(10, "Set3")
grafico_frec <- angeles_y_demonios_frecuencias |>
top_n(10) |>
ggplot() + aes(x = fct_reorder(Token, n),
y = n, fill = Token) +
geom_col(show.legend = F) +
labs(title = "Top 10 palabras - Ángeles y Demonios de Dan Brown",
x = "Top 10 Palabras", y = "Frecuencia") +
geom_text(aes(label = n), vjust = -0.5) +
expand_limits() +
ylim(0,800) +
scale_fill_manual(values = colors) +
theme(axis.ticks = element_blank()) +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
plot.title = element_text(family = "Comic Sans MS", size = 16, face = "bold"),
axis.title = element_text(family = "Arial", size = 12),
axis.text = element_text(family = "Verdana", size = 10))
grafico_frec

Comentario:
Wordcloud
set.seed(123)
par(bg = c('white'))
wordcloud(words = angeles_y_demonios_frecuencias$Token,
freq = angeles_y_demonios_frecuencias$n,
max.words = 400,
random.order = FALSE,
min.freq = 1,
colors = c('#bce1ab','#85ae72', '#573e54','#361542' , '#170132'),
scale = c(5, 0.1),
rot.per = 0.3)

Comentario:
Gráfica de
Sentimientos
color1 <- brewer.pal(10, "Set3")
graf_sent <- angeles_y_demonios_sentimientos |>
count(sentimiento) |>
ggplot() + aes(x = fct_reorder(sentimiento, n),
y = n,
fill = sentimiento) +
geom_text(aes(label = n), hjust = -0.25) +
geom_col(show.legend = F) + coord_flip() +
labs(title = "Gráfico de los sentimientos del libro Ángeles y Demonios - Dan Brown",
x = "Sentimientos", y = "Frecuencia") +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
plot.title = element_text(family = "Comic Sans MS", size = 16, face = "bold"),
axis.title = element_text(family = "Arial", size = 12),
axis.text = element_text(family = "Verdana", size = 10),
axis.ticks = element_blank()) +
scale_fill_manual(values = colors)
graf_sent

Comentario: