Step 1: Read in the positive and negative word files

Pos <- "https://cjacks04.github.io/687/Datasets/positive-words.txt"
Pos <-scan(Pos, character(0),sep = "\n")
Read 2006 items
Neg <-"https://cjacks04.github.io/687/Datasets/negative-words.txt"
Neg <-scan(Neg, character(0), sep = "\n")
Read 4783 items

Step 2: Process in the MLK speech

Speech <-"http://www.coreybjackson.com/687/Datasets/MLKspeech.txt"
MLK <-scan(Speech, character(0),sep = "\n")
Read 29 items
library(tm)
Loading required package: NLP
words.vec <-VectorSource(MLK)
words.corpus <-Corpus(words.vec)
words.corpus <-tm_map(words.corpus, content_transformer(tolower))
transformation drops documents
words.corpus <- tm_map(words.corpus, removePunctuation)
transformation drops documents
words.corpus <- tm_map(words.corpus, removeNumbers)
transformation drops documents
words.corpus <- tm_map(words.corpus, removeWords, stopwords("english"))
transformation drops documents
tdm <- TermDocumentMatrix(words.corpus)
m <- as.matrix(tdm)
wordcounts <-rowSums(m)
words <- sort(wordcounts, decreasing = TRUE)
words <- names(wordcounts)

Step 3: Determine how many positive words were in the speech

totalWords <- sum(wordcounts)
pTotal/totalWords
[1] 0.1129608

Step 4: Determine how many negative words were in the speech

# Hint: one way to do this is to use the ‘match’ function on the list of words from Step 2 and the positive words in the list from the import. 
matchedN <- match(words, Neg, nomatch = 0)
ntotal <-sum(wordcounts[which(matchedN != 0)])
print(ntotal)
ntotal/totalWords 

Step 5: Redo the ‘positive’ and ‘negative’ calculations for each 25% of the speech

