library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
url <- "https://www.o-bible.com/download/kjv.txt"
path <- "C:/Users/chosun/Downloads/hiphop.txt"
bible <- readLines(url)
hiphop <- readLines(path)
head(bible)
## [1] "Holy Bible, Authorized (King James) Version, Textfile 930105."                                                                                       
## [2] "Ge1:1 In the beginning God created the heaven and the earth."                                                                                        
## [3] "Ge1:2 And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters."
## [4] "Ge1:3 And God said, Let there be light: and there was light."                                                                                        
## [5] "Ge1:4 And God saw the light, that it was good: and God divided the light from the darkness."                                                         
## [6] "Ge1:5 And God called the light Day, and the darkness he called Night. And the evening and the morning were the first day."
genesis <- grep("^Ge", bible, value = TRUE)
head(genesis)
## [1] "Ge1:1 In the beginning God created the heaven and the earth."                                                                                        
## [2] "Ge1:2 And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters."
## [3] "Ge1:3 And God said, Let there be light: and there was light."                                                                                        
## [4] "Ge1:4 And God saw the light, that it was good: and God divided the light from the darkness."                                                         
## [5] "Ge1:5 And God called the light Day, and the darkness he called Night. And the evening and the morning were the first day."                           
## [6] "Ge1:6 And God said, Let there be a firmament in the midst of the waters, and let it divide the waters from the waters."
genesis_text <- paste(genesis, collapse = " ")
genesis_text <- tolower(genesis_text)
genesis_text <- gsub("[[:punct:]]", "", genesis_text)
words <- strsplit(genesis_text, "\\s+")
words <- unlist(words)
words <- words[words != ""]
word_freq <- table(words)
sorted_word_freq <- sort(word_freq, decreasing = TRUE)
print(sorted_word_freq[1:20])
## words
##  and  the   of  his   he   to   in unto that    i said  him   my    a  for  was 
## 3678 2458 1365  653  652  612  600  598  521  484  478  402  343  341  326  317 
##   it with   me thou 
##  306  293  292  284
library(tm)
## Warning: 패키지 'tm'는 R 버전 4.3.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: NLP
genesis_text <- paste(genesis, collapse = " ")
genesis_text <- tolower(genesis_text)
genesis_text <- gsub("[[:punct:]]", "", genesis_text)
words <- unlist(strsplit(genesis_text, "\\s+"))
words <- words[words != ""]
corpus <- Corpus(VectorSource(words))
corpus <- tm_map(corpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(corpus, content_transformer(tolower)):
## transformation drops documents
corpus <- tm_map(corpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, removePunctuation): transformation drops
## documents
corpus <- tm_map(corpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(corpus, removeNumbers): transformation drops
## documents
corpus <- tm_map(corpus, removeWords, stopwords("en"))
## Warning in tm_map.SimpleCorpus(corpus, removeWords, stopwords("en")):
## transformation drops documents
dtm <- DocumentTermMatrix(corpus)
matrix <- as.matrix(dtm)
word_freq <- colSums(matrix)
sorted_word_freq <- sort(word_freq, decreasing = TRUE)
print(sorted_word_freq[1:20])
##    unto    said    thou     thy    thee   shall     god    lord    will    land 
##     598     478     284     279     268     259     230     206     195     187 
##    came  father   jacob    sons     son    upon  joseph   earth abraham  behold 
##     176     169     166     158     148     141     138     121     121     118