#The aim of this work is to identify which are the most used words in the Bible. I have choose to do this using RStudio Programming
#I have work with the King James Bible version which can be found in .csv file on https://www.kaggle.com/datasets
## Loading required package: NLP
#Our purpose as discribed before is to identify the most used words in Bible, so we have to just with the 'text' column
#Lets us save the 'text' in a new document called Bible_Coprus
Bible_Corpus=Corpus(VectorSource(Bible$text))
#The Bible text has lowercases and uppercases. Fist we have to transform all the word to lowercase.
defaultW <- getOption("warn")
options(warn = -1)
Corpus_clean=tm_map(Bible_Corpus,tolower)
options(warn = defaultW)
#The Bible, for those who have read it does not contain numbers through the text. But, in case there is a mistake with the database we clean the text from all the numbers
defaultW <- getOption("warn")
options(warn = -1)
Corpus_clean=tm_map(Bible_Corpus,removeNumbers)
options(warn = defaultW)
#The next step is to delete all the words that does not contain information such as 'to','for','and','or' etc.
defaultW <- getOption("warn")
options(warn = -1)
Corpus_clean=tm_map(Corpus_clean,removeWords,stopwords())
options(warn = defaultW)
#The next step is to delete all the punctuations from the text
defaultW <- getOption("warn")
options(warn = -1)
Corpus_clean=tm_map(Corpus_clean,removePunctuation)
options(warn = defaultW)
#The next step is to remove all the unnecessary spaces from the text
defaultW <- getOption("warn")
options(warn = -1)
Corpus_clean=tm_map(Corpus_clean,stripWhitespace)
Corpus_clean=tm_map(Bible_Corpus,tolower)
Corpus_clean=tm_map(Corpus_clean,removeWords,stopwords())
Corpus_vector=unlist(Corpus_clean)
options(warn = defaultW)
#Now lets plot the provided results
## Loading required package: RColorBrewer
defaultW <- getOption("warn")
options(warn = -1)
wordcloud(Corpus1,min.freq=40,random.order = FALSE)

options(warn = defaultW)