Step 1: Read in the positive and negative word files

Pos <- "https://cjacks04.github.io/687/Datasets/positive-words.txt"
Pos <-scan(Pos, character(0),sep = "\n")
Read 2006 items
Neg <-"https://cjacks04.github.io/687/Datasets/negative-words.txt"
Neg <-scan(Neg, character(0), sep = "\n")
Read 4783 items

Step 2: Process in the MLK speech

Speech <-"http://www.coreybjackson.com/687/Datasets/MLKspeech.txt"
MLK <-scan(Speech, character(0),sep = "\n")
Read 29 items
library(tm)
Loading required package: NLP
words.vec <-VectorSource(MLK)
words.corpus <-Corpus(words.vec)
words.corpus <-tm_map(words.corpus, content_transformer(tolower))
transformation drops documents
words.corpus <- tm_map(words.corpus, removePunctuation)
transformation drops documents
words.corpus <- tm_map(words.corpus, removeNumbers)
transformation drops documents
words.corpus <- tm_map(words.corpus, removeWords, stopwords("english"))
transformation drops documents
tdm <- TermDocumentMatrix(words.corpus)
m <- as.matrix(tdm)
wordcounts <-rowSums(m)
words <- sort(wordcounts, decreasing = TRUE)
words <- names(wordcounts)

Step 3: Determine how many positive words were in the speech

totalWords <- sum(wordcounts)
pTotal/totalWords
[1] 0.1129608

Step 4: Determine how many negative words were in the speech

# Hint: one way to do this is to use the ‘match’ function on the list of words from Step 2 and the positive words in the list from the import. 
matchedN <- match(words, Neg, nomatch = 0)
ntotal <-sum(wordcounts[which(matchedN != 0)])
print(ntotal)
ntotal/totalWords 

Step 5: Redo the ‘positive’ and ‘negative’ calculations for each 25% of the speech

LS0tDQp0aXRsZTogIkxhYiAxMDogVGV4dCBNaW5pbmciDQphdXRob3I6IA0KLSBUZXNzbHluIEtuYXBwDQotIERlcnJpY2sgRXNwYWRhcw0KZGF0ZTogImByIFN5cy50aW1lKClgIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMjIFN0ZXAJMToJUmVhZAlpbgl0aGUJcG9zaXRpdmUJYW5kCW5lZ2F0aXZlCXdvcmQJZmlsZXMNCmBgYHtyfQ0KDQojIENyZWF0ZSB0d28gdmVjdG9ycyBvZiB3b3Jkcywgb25lIGZvciB0aGUgcG9zaXRpdmUgd29yZHMgYW5kIG9uZSBmb3IgdGhlIG5lZ2F0aXZlIHdvcmRzLiBUaGUgcG9zaXRpdmUgd29yZHMgY2FuIGJlIGZvdW5kIGhlcmU6ICJodHRwczovL2NqYWNrczA0LmdpdGh1Yi5pby82ODcvRGF0YXNldHMvcG9zaXRpdmUtd29yZHMudHh0IiBhbmQgbmVnYXRpdmUgd29yZHMgaGVyZTogImh0dHBzOi8vY2phY2tzMDQuZ2l0aHViLmlvLzY4Ny9EYXRhc2V0cy9uZWdhdGl2ZS13b3Jkcy50eHQiLiBZb3Ugc2hvdWxkIHVzZSB0aGUgc2NhbigpIGZ1bmN0aW9uIHdoaWNoIHJlYWRzIGRhdGEgaW50byBhIHZlY3RvciBvciBsaXN0IGZyb20gdGhlIGNvbnNvbGUgb3IgZmlsZS4gWW91J2xsIG5lZWQgdGhyZWUgYXJndW1lbnRzOiAoMSkgdGhlIGZpbGUgbmFtZS9wYXRoLCAoMikgdGhlIHNlY29uZCBhcmd1bWVudCBpcyBjaGFyYWN0ZXIoMCkgd2hpY2ggd2lsbCByZWFkIHRoZSBuZXh0ICBsaW5lIGFzIGEgY2hhcmFjdGVyIChhcyBvcHBvc2VkIHRvIGludGVnZXIgb3Igc29tZSBvdGhlciBkYXRhdCB5cGUpLCAoMykgdGhlIHNlcCBhcmd1bWVudCB0byB0ZWxsIFIgaG93IHRoZSBkYXRhIGFyZSBzZXBlcmF0ZWQgZS5nLiwgXG4gDQoNCiMgTm90ZSB0aGF0IHdoZW4gcmVhZGluZyBpbiB0aGUgZmlsZXMsIHRoZXJlIG1pZ2h0IGJlIGxpbmVzIGF0IHRoZSBzdGFydCBhbmQvb3IgdGhlIGVuZCB0aGF0IHdpbGwgbmVlZCB0byBiZSByZW1vdmVkIChpLmUuIHlvdSBzaG91bGQgY2xlYW4geW91IGRhdGEgaWYgbmVlZGVkKS4gDQpQb3MgPC0gImh0dHBzOi8vY2phY2tzMDQuZ2l0aHViLmlvLzY4Ny9EYXRhc2V0cy9wb3NpdGl2ZS13b3Jkcy50eHQiDQpQb3MgPC1zY2FuKFBvcywgY2hhcmFjdGVyKDApLHNlcCA9ICJcbiIpDQpOZWcgPC0iaHR0cHM6Ly9jamFja3MwNC5naXRodWIuaW8vNjg3L0RhdGFzZXRzL25lZ2F0aXZlLXdvcmRzLnR4dCINCk5lZyA8LXNjYW4oTmVnLCBjaGFyYWN0ZXIoMCksIHNlcCA9ICJcbiIpDQoNCmBgYA0KDQojIyMgU3RlcCAyOiBQcm9jZXNzIGluIHRoZSBNTEsgc3BlZWNoIA0KYGBge3J9DQojIFJlYWQgdGhlIE1MSyB0ZXh0IGZpbGUgdXNpbmcgdGhlIHJlYWRMaW5lcygpIGZ1bmN0aW9uLiBPbmx5IHRoZSBVUkwgaXMgcmVxdWlyZWQuDQojIEluc3BlY3QgdGhlIHZlY3RvciBhYm92ZS4gU29tZSBsaW5lcyBhcmUgYmxhbmsgIiIuIFJlbW92ZSB0aGVzZS4NCiMgQ3JlYXRlIGEgdGVybSBtYXRyaXguIFRoZXJlIGFyZSBzZXZlcmFsIHN0ZXBzIGhlcmUgYmVnaW5uaW5nIHdpdGggY3JlYXRpbmcgYSB2ZWN0b3Igc291cmNlIGFuZCBtYWtpbmcgdGV4dCB0cmFuc2Zvcm1hdGlvbnMuIChDaGVjayBjaGFwdGVyIDE0IHdoZXJlIHNiYSBpcyB0cmFuc2Zvcm1lZCkNCiMgQ3JlYXRlIGEgbGlzdCBvZiBjb3VudHMgZm9yIGVhY2ggd29yZCANCg0KU3BlZWNoIDwtImh0dHA6Ly93d3cuY29yZXliamFja3Nvbi5jb20vNjg3L0RhdGFzZXRzL01MS3NwZWVjaC50eHQiDQpNTEsgPC1zY2FuKFNwZWVjaCwgY2hhcmFjdGVyKDApLHNlcCA9ICJcbiIpDQpsaWJyYXJ5KHRtKQ0Kd29yZHMudmVjIDwtVmVjdG9yU291cmNlKE1MSykNCndvcmRzLmNvcnB1cyA8LUNvcnB1cyh3b3Jkcy52ZWMpDQp3b3Jkcy5jb3JwdXMgPC10bV9tYXAod29yZHMuY29ycHVzLCBjb250ZW50X3RyYW5zZm9ybWVyKHRvbG93ZXIpKQ0Kd29yZHMuY29ycHVzIDwtIHRtX21hcCh3b3Jkcy5jb3JwdXMsIHJlbW92ZVB1bmN0dWF0aW9uKQ0Kd29yZHMuY29ycHVzIDwtIHRtX21hcCh3b3Jkcy5jb3JwdXMsIHJlbW92ZU51bWJlcnMpDQp3b3Jkcy5jb3JwdXMgPC0gdG1fbWFwKHdvcmRzLmNvcnB1cywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygiZW5nbGlzaCIpKQ0KdGRtIDwtIFRlcm1Eb2N1bWVudE1hdHJpeCh3b3Jkcy5jb3JwdXMpDQptIDwtIGFzLm1hdHJpeCh0ZG0pDQp3b3JkY291bnRzIDwtcm93U3VtcyhtKQ0Kd29yZHMgPC0gc29ydCh3b3JkY291bnRzLCBkZWNyZWFzaW5nID0gVFJVRSkNCndvcmRzIDwtIG5hbWVzKHdvcmRjb3VudHMpDQpgYGANCg0KIyMjIFN0ZXAgMzogRGV0ZXJtaW5lIGhvdyBtYW55IHBvc2l0aXZlIHdvcmRzIHdlcmUgaW4gdGhlIHNwZWVjaCAgDQpgYGB7cn0NCiMgSGludDogb25lIHdheSB0byBkbyB0aGlzIGlzIHRvIHVzZSB0aGUg4oCYbWF0Y2jigJkgZnVuY3Rpb24gb24gdGhlIGxpc3Qgb2Ygd29yZHMgZnJvbSBTdGVwIDIgYW5kIHRoZSBwb3NpdGl2ZSB3b3JkcyBpbiB0aGUgbGlzdCBmcm9tIHRoZSBpbXBvcnQuIA0KDQogICMgc3VtIHRoZSB0b3RhbCBudW1iZXIgb2Ygd29yZHMgYW5kIHN0b3JlIHRoZSB2YWx1ZSB0byAidG90YWxXb3JkcyINCiAgDQogICMgY3JlYXRlIGEgdmVjdG9yICJ3b3JkcyIgdGhhdCBjb250YWlucyBhbGwgdGhlIHdvcmRzIGluICJ3b3JkQ291bnRzIg0KICANCiAgIyBsb2NhdGUgd2hpY2ggd29yZHMgaW4gIm1sayIgd2VyZSBwb3NpdGl2ZSAoYXBwZWFyZWQgaW4gcG9zaXRpdmUtd29yZCBsaXN0KQ0KICANCiAgIyBjYWxjdWxhdGUgdGhlIHRvdGFsIG51bWJlciBvZiBwb3NpdGl2ZSB3b3JkcyBpbiAibWxrIiBzcGVlY2ggKGluIHdvcmRDb3VudHMpIGFuZCBhc3NpZ24gdGhlIG51bWJlciB0byB0aGUgdmFyaWFibGUgInBUb3RhbCIuIFRoZSB3aGljaCgpIGZ1bmN0aW9uIG9uIHdvcmRzIHRoZSB2ZWN0b3IgYWJvdmUgd2lsbCBnaXZlIHlvdSB0aGUgaW5kZXggbnVtYmVyLiANCiANCiAgIyB2aWV3IHRoZSB0b3RhbCBudW1iZXIgb2YgcG9zaXRpdmUgd29yZHMgKDk1IHBvc2l0aXZlIHdvcmRzIGluIHRoZSBzcGVlY2gpDQogICANCiAgIyB2aWV3IHRoZSBwZXJjZW50YWdlIG9mIHBvc2l0aXZlIHdvcmRzICgxMS4yOTYwOCUgb2YgdGhlIHNwZWVjaCB3b3JkcyBhcmUgcG9zaXRpdmUpDQptYXRjaGVkUCA8LSBtYXRjaCh3b3JkcywgUG9zLCBub21hdGNoID0gMCkNCnBUb3RhbCA8LXN1bSh3b3JkY291bnRzW3doaWNoKG1hdGNoZWRQICE9IDApXSkNCnByaW50KHBUb3RhbCkNCnRvdGFsV29yZHMgPC0gc3VtKHdvcmRjb3VudHMpDQpwVG90YWwvdG90YWxXb3Jkcw0KYGBgDQoNCiMjIyBTdGVwIDQ6IERldGVybWluZSBob3cgbWFueSBuZWdhdGl2ZSB3b3JkcyB3ZXJlIGluIHRoZSBzcGVlY2ggDQpgYGB7cn0NCiMgSGludDogb25lIHdheSB0byBkbyB0aGlzIGlzIHRvIHVzZSB0aGUg4oCYbWF0Y2jigJkgZnVuY3Rpb24gb24gdGhlIGxpc3Qgb2Ygd29yZHMgZnJvbSBTdGVwIDIgYW5kIHRoZSBwb3NpdGl2ZSB3b3JkcyBpbiB0aGUgbGlzdCBmcm9tIHRoZSBpbXBvcnQuIA0KbWF0Y2hlZE4gPC0gbWF0Y2god29yZHMsIE5lZywgbm9tYXRjaCA9IDApDQpudG90YWwgPC1zdW0od29yZGNvdW50c1t3aGljaChtYXRjaGVkTiAhPSAwKV0pDQpwcmludChudG90YWwpDQpudG90YWwvdG90YWxXb3JkcyANCg0KYGBgDQoNCiMjIyBTdGVwIDU6IFJlZG8gdGhlIOKAmHBvc2l0aXZl4oCZIGFuZCDigJhuZWdhdGl2ZeKAmSBjYWxjdWxhdGlvbnMgZm9yIGVhY2ggMjUlIG9mIHRoZSBzcGVlY2ggDQpgYGB7cn0NCiMgQ29tcGFyZSB0aGUgcmVzdWx0cyAoZXguIGEgc2ltcGxlIGJhcmNoYXJ0IG9mIHRoZSA0IG51bWJlcnMpLiBJIHJlY29tbWVuZCB0YWtpbmcgZXh0cmFjdGluZyBxdWFydGVycyBvZiB0aGUgc3BlZWNoLCBzdG9yaW5nIGVhY2ggcXVhcnRlciBpbiBhIHZlY3RvciBhbmQgdGhlbiBjb25kdWN0aW5nIHRoZSBjYWxjdWxhdGlvbnMgb3ZlciBlYWNoIHF1YXJ0ZXIuIA0KY3V0cG9pbnQgPC0gcm91bmQobGVuZ3RoKHdvcmRzLmNvcnB1cykvNCkNCndvcmRzLmNvcnB1czEgPC0gd29yZHMuY29ycHVzWzE6Y3V0cG9pbnRdDQp0ZG0xIDwtIFRlcm1Eb2N1bWVudE1hdHJpeCh3b3Jkcy5jb3JwdXMxKQ0KbTEgPC0gYXMubWF0cml4KHRkbTEpDQoNCndvcmRjb3VudHMxIDwtcm93U3VtcyhtMSkNCndvcmRjb3VudHMxIDwtIHNvcnQod29yZGNvdW50czEsIGRlY3JlYXNpbmcgPSBUUlVFKQ0KdG90YWxXb3JkczEgPC0gc3VtKHdvcmRjb3VudHMxKQ0Kd29yZHMxIDwtIG5hbWVzKHdvcmRjb3VudHMxKQ0KDQptYXRjaGVkUDEgPC0gbWF0Y2god29yZHMxLCBQb3MsIG5vbWF0Y2ggPSAwKQ0KcHRvdGFsTnVtYmVyMSA8LSBzdW0od29yZGNvdW50czFbd2hpY2gobWF0Y2hlZFAxICE9MCldKQ0KcmF0aW9wMSA8LSBwdG90YWxOdW1iZXIxL3RvdGFsV29yZHMxDQpyYXRpb3AxDQoNCm1hdGNoZWROMSA8LSBtYXRjaCh3b3JkczEsIE5lZywgbm9tYXRjaCA9IDApDQpOdG90YWxOdW1iZXIxIDwtIHN1bSh3b3JkY291bnRzMVt3aGljaChtYXRjaGVkTjEgIT0wKV0pDQpyYXRpb24xIDwtIE50b3RhbE51bWJlcjEvdG90YWxXb3JkczENCnJhdGlvbjENCg0KI1EyDQpjdXRwb2ludDIgPC0gcm91bmQobGVuZ3RoKHdvcmRzLmNvcnB1cykvMikNCndvcmRzLmNvcnB1czIgPC0gd29yZHMuY29ycHVzWzg6MTRdDQp0ZG0yIDwtIFRlcm1Eb2N1bWVudE1hdHJpeCh3b3Jkcy5jb3JwdXMyKQ0KbTIgPC0gYXMubWF0cml4KHRkbTIpDQoNCndvcmRjb3VudHMyIDwtcm93U3VtcyhtMikNCndvcmRjb3VudHMyIDwtIHNvcnQod29yZGNvdW50czIsIGRlY3JlYXNpbmcgPSBUUlVFKQ0KdG90YWxXb3JkczIgPC0gc3VtKHdvcmRjb3VudHMyKQ0Kd29yZHMyIDwtIG5hbWVzKHdvcmRjb3VudHMyKQ0KDQptYXRjaGVkUDIgPC0gbWF0Y2god29yZHMyLCBQb3MsIG5vbWF0Y2ggPSAwKQ0KcHRvdGFsTnVtYmVyMiA8LSBzdW0od29yZGNvdW50czJbd2hpY2gobWF0Y2hlZFAyICE9MCldKQ0KcmF0aW9wMiA8LSBwdG90YWxOdW1iZXIyL3RvdGFsV29yZHMyDQpyYXRpb3AyDQoNCm1hdGNoZWROMiA8LSBtYXRjaCh3b3JkczIsIE5lZywgbm9tYXRjaCA9IDApDQpOdG90YWxOdW1iZXIyIDwtIHN1bSh3b3JkY291bnRzMlt3aGljaChtYXRjaGVkTjIgIT0wKV0pDQpyYXRpb24yIDwtIE50b3RhbE51bWJlcjIvdG90YWxXb3JkczINCnJhdGlvbjINCg0KI1EzDQp3b3Jkcy5jb3JwdXMzIDwtIHdvcmRzLmNvcnB1c1s5OjIxXQ0KdGRtMyA8LSBUZXJtRG9jdW1lbnRNYXRyaXgod29yZHMuY29ycHVzMykNCm0zIDwtIGFzLm1hdHJpeCh0ZG0zKQ0KDQp3b3JkY291bnRzMyA8LXJvd1N1bXMobTMpDQp3b3JkY291bnRzMyA8LSBzb3J0KHdvcmRjb3VudHMzLCBkZWNyZWFzaW5nID0gVFJVRSkNCnRvdGFsV29yZHMzIDwtIHN1bSh3b3JkY291bnRzMykNCndvcmRzMyA8LSBuYW1lcyh3b3JkY291bnRzMykNCg0KbWF0Y2hlZFAzIDwtIG1hdGNoKHdvcmRzMywgUG9zLCBub21hdGNoID0gMCkNCnB0b3RhbE51bWJlcjMgPC0gc3VtKHdvcmRjb3VudHMzW3doaWNoKG1hdGNoZWRQMiAhPTApXSkNCnJhdGlvcDMgPC0gcHRvdGFsTnVtYmVyMy90b3RhbFdvcmRzMw0KcmF0aW9wMw0KDQptYXRjaGVkTjMgPC0gbWF0Y2god29yZHMzLCBOZWcsIG5vbWF0Y2ggPSAwKQ0KTnRvdGFsTnVtYmVyMyA8LSBzdW0od29yZGNvdW50czNbd2hpY2gobWF0Y2hlZE4zICE9MCldKQ0KcmF0aW9uMyA8LSBOdG90YWxOdW1iZXIzL3RvdGFsV29yZHMzDQpyYXRpb24zDQojUTQNCndvcmRzLmNvcnB1czQgPC0gd29yZHMuY29ycHVzWzIyOjI5XQ0KdGRtNCA8LSBUZXJtRG9jdW1lbnRNYXRyaXgod29yZHMuY29ycHVzNCkNCm00IDwtIGFzLm1hdHJpeCh0ZG00KQ0KDQp3b3JkY291bnRzNCA8LXJvd1N1bXMobTQpDQp3b3JkY291bnRzNCA8LSBzb3J0KHdvcmRjb3VudHM0LCBkZWNyZWFzaW5nID0gVFJVRSkNCnRvdGFsV29yZHM0IDwtIHN1bSh3b3JkY291bnRzNCkNCndvcmRzNCA8LSBuYW1lcyh3b3JkY291bnRzNCkNCg0KbWF0Y2hlZFA0IDwtIG1hdGNoKHdvcmRzNCwgUG9zLCBub21hdGNoID0gMCkNCnB0b3RhbE51bWJlcjQgPC0gc3VtKHdvcmRjb3VudHM0W3doaWNoKG1hdGNoZWRQMiAhPTApXSkNCnJhdGlvcDQgPC0gcHRvdGFsTnVtYmVyNC90b3RhbFdvcmRzNA0KcmF0aW9wNA0KDQptYXRjaGVkTjQgPC0gbWF0Y2god29yZHM0LCBOZWcsIG5vbWF0Y2ggPSAwKQ0KTnRvdGFsTnVtYmVyNCA8LSBzdW0od29yZGNvdW50czRbd2hpY2gobWF0Y2hlZE40ICE9MCldKQ0KcmF0aW9uNCA8LSBOdG90YWxOdW1iZXI0L3RvdGFsV29yZHM0DQpyYXRpb240DQoNClBvc2kgPC0gYyhyYXRpb3AxLCByYXRpb3AyLCByYXRpb3AzLCByYXRpb3A0KQ0KYmFycGxvdChQb3NpKQ0KDQpOZWdpIDwtIGMocmF0aW9uMSwgcmF0aW9uMiwgcmF0aW9uMywgcmF0aW9uNCkNCmJhcnBsb3QoTmVnaSkNCmBgYA0KDQo=