La actividad corresponde en comparar dos guiones de película, uno es de Aladdin de 1992 y el otro es de Aladdin de 2019. El primero es una versión animada, por lo cual la tarea es analizar que tanto se ha modernizado el nuevo guión y si sigue siendo fiel a su antecesor.
library(tm)## Loading required package: NLP
library(pdftools)## Using poppler version 23.04.0
library(SnowballC)
library(NLP)
library(wordcloud)## Loading required package: RColorBrewer
library(RColorBrewer)
library(syuzhet)Sys.setenv(LANG = "en")#Let's start by importing the text to analyze
tex <- readLines(("Aladdin1992.txt"), warn = FALSE)
text <- iconv(tex, "WINDOWS-1252", "UTF-8")#We need to convert the text to a corpus
docs <- Corpus(VectorSource(text))
#inspect(docs)#General text cleaning
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))## Warning in tm_map.SimpleCorpus(docs, content_transformer(tolower)):
## transformation drops documents
# Remove numbers
docs <- tm_map(docs, removeNumbers)## Warning in tm_map.SimpleCorpus(docs, removeNumbers): transformation drops
## documents
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))## Warning in tm_map.SimpleCorpus(docs, removeWords, stopwords("english")):
## transformation drops documents
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2")) ## Warning in tm_map.SimpleCorpus(docs, removeWords, c("blabla1", "blabla2")):
## transformation drops documents
# Remove punctuations
docs <- tm_map(docs, removePunctuation)## Warning in tm_map.SimpleCorpus(docs, removePunctuation): transformation drops
## documents
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace) ## Warning in tm_map.SimpleCorpus(docs, stripWhitespace): transformation drops
## documents
#Term-document matrix. Document matrix is a table containing the frequency of the words.
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 100)## word freq
## aladdin aladdin 382
## jafar jafar 200
## jasmine jasmine 194
## abu abu 168
## genie genie 166
## sultan sultan 111
## iago iago 93
## carpet carpet 89
## back back 72
## prince prince 63
## looks looks 60
## lamp lamp 57
## like like 52
## one one 48
## princess princess 46
## head head 44
## just just 44
## see see 43
## turns turns 42
## pulls pulls 41
## ali ali 40
## look look 38
## now now 38
## get get 35
## got got 35
## sees sees 35
## right right 32
## will will 32
## man man 32
## away away 32
## can can 31
## never never 30
## wish wish 30
## cave cave 29
## guards guards 29
## make make 29
## cut cut 28
## guard guard 28
## grabs grabs 28
## well well 28
## boy boy 27
## rajah rajah 27
## hand hand 26
## around around 26
## come come 25
## good good 25
## comes comes 25
## way way 25
## yes yes 24
## begins begins 24
## aladdins aladdins 24
## palace palace 24
## old old 24
## hey hey 23
## little little 22
## two two 22
## new new 22
## top top 21
## want want 21
## face face 20
## know know 20
## hands hands 20
## street street 20
## time time 20
## gotta gotta 20
## think think 19
## flies flies 19
## goes goes 19
## genies genies 19
## world world 19
## falls falls 18
## normal normal 18
## jumps jumps 18
## free free 18
## mouth mouth 17
## gonna gonna 17
## take take 17
## takes takes 17
## walks walks 17
## staff staff 17
## say say 17
## whole whole 17
## finally finally 16
## jafars jafars 16
## three three 16
## tell tell 16
## door door 16
## appears appears 16
## pile pile 15
## turban turban 15
## stop stop 14
## another another 14
## friend friend 14
## find find 14
## eyes eyes 14
## rat rat 14
## runs runs 14
## love love 14
## sorry sorry 14
## floor floor 14
findFreqTerms(dtm, lowfreq = 20)## [1] "aladdin" "come" "cut" "face" "like" "hey"
## [7] "back" "right" "carpet" "good" "guard" "little"
## [13] "look" "yes" "will" "pulls" "never" "one"
## [19] "begins" "can" "see" "lamp" "man" "hand"
## [25] "iago" "jafar" "comes" "get" "grabs" "two"
## [31] "head" "cave" "now" "away" "know" "turns"
## [37] "just" "got" "hands" "street" "looks" "around"
## [43] "top" "way" "aladdins" "abu" "time" "guards"
## [49] "gotta" "make" "wish" "sees" "prince" "palace"
## [55] "princess" "boy" "sultan" "jasmine" "rajah" "want"
## [61] "well" "old" "genie" "ali" "new"
Sys.setenv(LANG = "en")#Let's start by importing the text to analyze
tex2 <- readLines(("Aladdin_2019.txt"), warn=FALSE)
text2 <- iconv(tex2,"WINDOWS-1252","UTF-8")#We need to convert the text to a corpus
docs2 <- Corpus(VectorSource(text))
#inspect(docs)#General text cleaning
# Convert the text to lower case
docs2 <- tm_map(docs, content_transformer(tolower))## Warning in tm_map.SimpleCorpus(docs, content_transformer(tolower)):
## transformation drops documents
# Remove numbers
docs2 <- tm_map(docs, removeNumbers)## Warning in tm_map.SimpleCorpus(docs, removeNumbers): transformation drops
## documents
# Remove english common stopwords
docs2 <- tm_map(docs, removeWords, stopwords("english"))## Warning in tm_map.SimpleCorpus(docs, removeWords, stopwords("english")):
## transformation drops documents
# Remove your own stop word
# specify your stopwords as a character vector
docs2 <- tm_map(docs, removeWords, c("blabla1", "blabla2")) ## Warning in tm_map.SimpleCorpus(docs, removeWords, c("blabla1", "blabla2")):
## transformation drops documents
# Remove punctuations
docs2 <- tm_map(docs, removePunctuation)## Warning in tm_map.SimpleCorpus(docs, removePunctuation): transformation drops
## documents
# Eliminate extra white spaces
docs2 <- tm_map(docs, stripWhitespace) ## Warning in tm_map.SimpleCorpus(docs, stripWhitespace): transformation drops
## documents
#Term-document matrix. Document matrix is a table containing the frequency of the words.
dtm2 <- TermDocumentMatrix(docs2)
m2 <- as.matrix(dtm2)
v2 <- sort(rowSums(m2),decreasing=TRUE)
d2 <- data.frame(word = names(v2),freq=v2)
head(d2, 100)## word freq
## aladdin aladdin 382
## jafar jafar 200
## jasmine jasmine 194
## abu abu 168
## genie genie 166
## sultan sultan 111
## iago iago 93
## carpet carpet 89
## back back 72
## prince prince 63
## looks looks 60
## lamp lamp 57
## like like 52
## one one 48
## princess princess 46
## head head 44
## just just 44
## see see 43
## turns turns 42
## pulls pulls 41
## ali ali 40
## look look 38
## now now 38
## get get 35
## got got 35
## sees sees 35
## right right 32
## will will 32
## man man 32
## away away 32
## can can 31
## never never 30
## wish wish 30
## cave cave 29
## guards guards 29
## make make 29
## cut cut 28
## guard guard 28
## grabs grabs 28
## well well 28
## boy boy 27
## rajah rajah 27
## hand hand 26
## around around 26
## come come 25
## good good 25
## comes comes 25
## way way 25
## yes yes 24
## begins begins 24
## aladdins aladdins 24
## palace palace 24
## old old 24
## hey hey 23
## little little 22
## two two 22
## new new 22
## top top 21
## want want 21
## face face 20
## know know 20
## hands hands 20
## street street 20
## time time 20
## gotta gotta 20
## think think 19
## flies flies 19
## goes goes 19
## genies genies 19
## world world 19
## falls falls 18
## normal normal 18
## jumps jumps 18
## free free 18
## mouth mouth 17
## gonna gonna 17
## take take 17
## takes takes 17
## walks walks 17
## staff staff 17
## say say 17
## whole whole 17
## finally finally 16
## jafars jafars 16
## three three 16
## tell tell 16
## door door 16
## appears appears 16
## pile pile 15
## turban turban 15
## stop stop 14
## another another 14
## friend friend 14
## find find 14
## eyes eyes 14
## rat rat 14
## runs runs 14
## love love 14
## sorry sorry 14
## floor floor 14
findFreqTerms(dtm, lowfreq = 20)## [1] "aladdin" "come" "cut" "face" "like" "hey"
## [7] "back" "right" "carpet" "good" "guard" "little"
## [13] "look" "yes" "will" "pulls" "never" "one"
## [19] "begins" "can" "see" "lamp" "man" "hand"
## [25] "iago" "jafar" "comes" "get" "grabs" "two"
## [31] "head" "cave" "now" "away" "know" "turns"
## [37] "just" "got" "hands" "street" "looks" "around"
## [43] "top" "way" "aladdins" "abu" "time" "guards"
## [49] "gotta" "make" "wish" "sees" "prince" "palace"
## [55] "princess" "boy" "sultan" "jasmine" "rajah" "want"
## [61] "well" "old" "genie" "ali" "new"
barplot(d[1:10,]$freq, las = 2, names.arg = d[1:10,]$word,
col ="orange", main ="Palabras más frecuentes - Aladdin 1992",
ylab = "Frecunecia de palabras")head(d)## word freq
## aladdin aladdin 382
## jafar jafar 200
## jasmine jasmine 194
## abu abu 168
## genie genie 166
## sultan sultan 111
barplot(d2[1:10,]$freq, las = 2, names.arg = d2[1:10,]$word,
col ="brown", main ="Palabras más frecuentes - Aladdin 2019",
ylab = "Frecunecia de palabras")head(d2)## word freq
## aladdin aladdin 382
## jafar jafar 200
## jasmine jasmine 194
## abu abu 168
## genie genie 166
## sultan sultan 111
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 15,
max.words=Inf, random.order=T, rot.per=0.5,
colors=brewer.pal(8, "Dark2"))wordcloud(words = d2$word, freq = d2$freq, min.freq = 15,
max.words=Inf, random.order=T, rot.per=0.5,
colors=brewer.pal(8, "Dark2"))findAssocs(dtm, terms = "aladdins", corlimit = 0.2)## $aladdins
## shoulder stopped trips bulge name tools debris diploma
## 0.23 0.20 0.20 0.20 0.20 0.20 0.20 0.20
## slaps arabic list hopin widely
## 0.20 0.20 0.20 0.20 0.20
findAssocs(dtm2, terms = "aladdins", corlimit = 0.2)## $aladdins
## shoulder stopped trips bulge name tools debris diploma
## 0.23 0.20 0.20 0.20 0.20 0.20 0.20 0.20
## slaps arabic list hopin widely
## 0.20 0.20 0.20 0.20 0.20
No se toma en cuenta todo el texto del archivo, aquelas secciones descriptivas, instrucciones de escenas o especificaciones técnicas quedan fuera del análisis.
El análisis de asociación para “aladdins” en ambos guiones de “Aladdin” muestra una notable consistencia, con términos como “shoulder”, “stopped”, y “arabic” asociados estrechamente. Esta similitud puede sugiere que ciertos aspectos del personaje y elementos culturales permanecen constantes entre las versiones de 1992 y 2019.
Busca alguna función asociada al análisis de texto que no hayamos visto en clase, úsala y escribe lo que lograste (aquí hay muchas funciones posibles, con que sea algo no visto en clase está bien)
aladdin_1992 <- readLines("Aladdin1992.txt", warn = FALSE)
aladdin_1992 <- iconv(aladdin_1992, "WINDOWS-1252", "UTF-8")
aladdin_2019 <- readLines("Aladdin_2019.txt", warn = FALSE)
aladdin_2019 <- iconv(aladdin_2019, "WINDOWS-1252", "UTF-8")
# Create corpora
corpus_1992 <- Corpus(VectorSource(aladdin_1992))
corpus_2019 <- Corpus(VectorSource(aladdin_2019))
# Análisis de sentimientos para Aladdin 1992
sentiments_1992 <- get_sentiment(as.character(corpus_1992), method = "syuzhet")
# Análisis de sentimientos para Aladdin 2019
sentiments_2019 <- get_sentiment(as.character(corpus_2019), method = "syuzhet")
# Calculamos la media de sentimientos para obtener una visión general del tono de cada guion de Aladdin
mean_sentiment_1992 <- mean(sentiments_1992)
mean_sentiment_2019 <- mean(sentiments_2019)
# Mostramos los resultados
cat("Media de sentimientos para Aladdin 1992:", mean_sentiment_1992, "\n")## Media de sentimientos para Aladdin 1992: -8.983333
cat("Media de sentimientos para Aladdin 2019:", mean_sentiment_2019)## Media de sentimientos para Aladdin 2019: 12.31667
Tono Más Positivo en 2019: La media positiva del guion de 2019 sugiere un enfoque más optimista en comparación con la versión de 1992, posiblemente destinado a hacer la historia más inspiradora, esperanzadora o alegre.
Adaptación a la Audiencia Actual: La variación en el tono refleja las preferencias cambiantes de la audiencia, que busca historias que transmitan mensajes positivos de esperanza y resiliencia.
Impacto de la Cultura Popular y Sensibilidades Modernas: La versión de 2019 puede estar influenciada por las sensibilidades actuales hacia la inclusión y el optimismo frente a la adversidad.
La diferencia entre los dos guiones representan sin duda alguna una evoluci´øn en la forma de contar historias, se puede intuir que el optimismo en el cine está en crecimiento en historias clásicas, cambiando su narrativa, personajes, tonos, etc.