This program will take in a PDF file. Read in that file. Use Optical Character Recognition to identify text in that PDF. Strip the text out of the PDF. Put the text in a Corpus. Analytically process that Corpus data. Additionally a Tidy text mining aproach will be utilized with the base text data to strip out more analytical data.

First load package libraries that will be used in the program.

library(pdftools)
## Warning: package 'pdftools' was built under R version 3.6.3
## Using poppler version 0.73.0
library(tm)
## Warning: package 'tm' was built under R version 3.6.3
## Loading required package: NLP
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.6.3
## Loading required package: RColorBrewer
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
library(ggthemes)

library(tidytext)
## Warning: package 'tidytext' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(textdata)
## Warning: package 'textdata' was built under R version 3.6.3

Next we need to read in the PDF file. this line of code reads in the PDF, reads the text with OCR. Finally stores the text as a character vector in a variable.

#we can select specific pages, or other languages if necessary. Additionally passwords can be specified if the PDF is password protected.

TextVector <- pdf_ocr_text(pdf = "D:/lifelabourofpeop07bootiala.pdf", language = "eng", dpi = 1200)
## Converting page 1 to lifelabourofpeop07bootiala_1.png... done!
## Converting page 2 to lifelabourofpeop07bootiala_2.png... done!
## Converting page 3 to lifelabourofpeop07bootiala_3.png... done!
## Converting page 4 to lifelabourofpeop07bootiala_4.png... done!
## Converting page 5 to lifelabourofpeop07bootiala_5.png... done!
## Converting page 6 to lifelabourofpeop07bootiala_6.png... done!
## Converting page 7 to lifelabourofpeop07bootiala_7.png... done!
## Converting page 8 to lifelabourofpeop07bootiala_8.png... done!
## Converting page 9 to lifelabourofpeop07bootiala_9.png... done!
## Converting page 10 to lifelabourofpeop07bootiala_10.png... done!
## Converting page 11 to lifelabourofpeop07bootiala_11.png... done!
## Converting page 12 to lifelabourofpeop07bootiala_12.png... done!
## Converting page 13 to lifelabourofpeop07bootiala_13.png... done!
## Converting page 14 to lifelabourofpeop07bootiala_14.png... done!
## Converting page 15 to lifelabourofpeop07bootiala_15.png... done!
## Converting page 16 to lifelabourofpeop07bootiala_16.png... done!
## Converting page 17 to lifelabourofpeop07bootiala_17.png... done!
## Converting page 18 to lifelabourofpeop07bootiala_18.png... done!
## Converting page 19 to lifelabourofpeop07bootiala_19.png... done!
## Converting page 20 to lifelabourofpeop07bootiala_20.png... done!
## Converting page 21 to lifelabourofpeop07bootiala_21.png... done!
## Converting page 22 to lifelabourofpeop07bootiala_22.png... done!
## Converting page 23 to lifelabourofpeop07bootiala_23.png... done!
## Converting page 24 to lifelabourofpeop07bootiala_24.png... done!
## Converting page 25 to lifelabourofpeop07bootiala_25.png... done!
## Converting page 26 to lifelabourofpeop07bootiala_26.png... done!
## Converting page 27 to lifelabourofpeop07bootiala_27.png... done!
## Converting page 28 to lifelabourofpeop07bootiala_28.png... done!
## Converting page 29 to lifelabourofpeop07bootiala_29.png... done!
## Converting page 30 to lifelabourofpeop07bootiala_30.png... done!
## Converting page 31 to lifelabourofpeop07bootiala_31.png... done!
## Converting page 32 to lifelabourofpeop07bootiala_32.png... done!
## Converting page 33 to lifelabourofpeop07bootiala_33.png... done!
## Converting page 34 to lifelabourofpeop07bootiala_34.png... done!
## Converting page 35 to lifelabourofpeop07bootiala_35.png... done!
## Converting page 36 to lifelabourofpeop07bootiala_36.png... done!
## Converting page 37 to lifelabourofpeop07bootiala_37.png... done!
## Converting page 38 to lifelabourofpeop07bootiala_38.png... done!
## Converting page 39 to lifelabourofpeop07bootiala_39.png... done!
## Converting page 40 to lifelabourofpeop07bootiala_40.png... done!
## Converting page 41 to lifelabourofpeop07bootiala_41.png... done!
## Converting page 42 to lifelabourofpeop07bootiala_42.png... done!
## Converting page 43 to lifelabourofpeop07bootiala_43.png... done!
## Converting page 44 to lifelabourofpeop07bootiala_44.png... done!
## Converting page 45 to lifelabourofpeop07bootiala_45.png... done!
## Converting page 46 to lifelabourofpeop07bootiala_46.png... done!
## Converting page 47 to lifelabourofpeop07bootiala_47.png... done!
## Converting page 48 to lifelabourofpeop07bootiala_48.png... done!
## Converting page 49 to lifelabourofpeop07bootiala_49.png... done!
## Converting page 50 to lifelabourofpeop07bootiala_50.png... done!
## Converting page 51 to lifelabourofpeop07bootiala_51.png... done!
## Converting page 52 to lifelabourofpeop07bootiala_52.png... done!
## Converting page 53 to lifelabourofpeop07bootiala_53.png... done!
## Converting page 54 to lifelabourofpeop07bootiala_54.png... done!
## Converting page 55 to lifelabourofpeop07bootiala_55.png... done!
## Converting page 56 to lifelabourofpeop07bootiala_56.png... done!
## Converting page 57 to lifelabourofpeop07bootiala_57.png... done!
## Converting page 58 to lifelabourofpeop07bootiala_58.png... done!
## Converting page 59 to lifelabourofpeop07bootiala_59.png... done!
## Converting page 60 to lifelabourofpeop07bootiala_60.png... done!
## Converting page 61 to lifelabourofpeop07bootiala_61.png... done!
## Converting page 62 to lifelabourofpeop07bootiala_62.png... done!
## Converting page 63 to lifelabourofpeop07bootiala_63.png... done!
## Converting page 64 to lifelabourofpeop07bootiala_64.png... done!
## Converting page 65 to lifelabourofpeop07bootiala_65.png... done!
## Converting page 66 to lifelabourofpeop07bootiala_66.png... done!
## Converting page 67 to lifelabourofpeop07bootiala_67.png... done!
## Converting page 68 to lifelabourofpeop07bootiala_68.png... done!
## Converting page 69 to lifelabourofpeop07bootiala_69.png... done!
## Converting page 70 to lifelabourofpeop07bootiala_70.png... done!
## Converting page 71 to lifelabourofpeop07bootiala_71.png... done!
## Converting page 72 to lifelabourofpeop07bootiala_72.png... done!
## Converting page 73 to lifelabourofpeop07bootiala_73.png... done!
## Converting page 74 to lifelabourofpeop07bootiala_74.png... done!
## Converting page 75 to lifelabourofpeop07bootiala_75.png... done!
## Converting page 76 to lifelabourofpeop07bootiala_76.png... done!
## Converting page 77 to lifelabourofpeop07bootiala_77.png... done!
## Converting page 78 to lifelabourofpeop07bootiala_78.png... done!
## Converting page 79 to lifelabourofpeop07bootiala_79.png... done!
## Converting page 80 to lifelabourofpeop07bootiala_80.png... done!
## Converting page 81 to lifelabourofpeop07bootiala_81.png... done!
## Converting page 82 to lifelabourofpeop07bootiala_82.png... done!
## Converting page 83 to lifelabourofpeop07bootiala_83.png... done!
## Converting page 84 to lifelabourofpeop07bootiala_84.png... done!
## Converting page 85 to lifelabourofpeop07bootiala_85.png... done!
## Converting page 86 to lifelabourofpeop07bootiala_86.png... done!
## Converting page 87 to lifelabourofpeop07bootiala_87.png... done!
## Converting page 88 to lifelabourofpeop07bootiala_88.png... done!
## Converting page 89 to lifelabourofpeop07bootiala_89.png... done!
## Converting page 90 to lifelabourofpeop07bootiala_90.png... done!
## Converting page 91 to lifelabourofpeop07bootiala_91.png... done!
## Converting page 92 to lifelabourofpeop07bootiala_92.png... done!
## Converting page 93 to lifelabourofpeop07bootiala_93.png... done!
## Converting page 94 to lifelabourofpeop07bootiala_94.png... done!
## Converting page 95 to lifelabourofpeop07bootiala_95.png... done!
## Converting page 96 to lifelabourofpeop07bootiala_96.png... done!
## Converting page 97 to lifelabourofpeop07bootiala_97.png... done!
## Converting page 98 to lifelabourofpeop07bootiala_98.png... done!
## Converting page 99 to lifelabourofpeop07bootiala_99.png... done!
## Converting page 100 to lifelabourofpeop07bootiala_100.png... done!
## Converting page 101 to lifelabourofpeop07bootiala_101.png... done!
## Converting page 102 to lifelabourofpeop07bootiala_102.png... done!
## Converting page 103 to lifelabourofpeop07bootiala_103.png... done!
## Converting page 104 to lifelabourofpeop07bootiala_104.png... done!
## Converting page 105 to lifelabourofpeop07bootiala_105.png... done!
## Converting page 106 to lifelabourofpeop07bootiala_106.png... done!
## Converting page 107 to lifelabourofpeop07bootiala_107.png... done!
## Converting page 108 to lifelabourofpeop07bootiala_108.png... done!
## Converting page 109 to lifelabourofpeop07bootiala_109.png... done!
## Converting page 110 to lifelabourofpeop07bootiala_110.png... done!
## Converting page 111 to lifelabourofpeop07bootiala_111.png... done!
## Converting page 112 to lifelabourofpeop07bootiala_112.png... done!
## Converting page 113 to lifelabourofpeop07bootiala_113.png... done!
## Converting page 114 to lifelabourofpeop07bootiala_114.png... done!
## Converting page 115 to lifelabourofpeop07bootiala_115.png... done!
## Converting page 116 to lifelabourofpeop07bootiala_116.png... done!
## Converting page 117 to lifelabourofpeop07bootiala_117.png... done!
## Converting page 118 to lifelabourofpeop07bootiala_118.png... done!
## Converting page 119 to lifelabourofpeop07bootiala_119.png... done!
## Converting page 120 to lifelabourofpeop07bootiala_120.png... done!
## Converting page 121 to lifelabourofpeop07bootiala_121.png... done!
## Converting page 122 to lifelabourofpeop07bootiala_122.png... done!
## Converting page 123 to lifelabourofpeop07bootiala_123.png... done!
## Converting page 124 to lifelabourofpeop07bootiala_124.png... done!
## Converting page 125 to lifelabourofpeop07bootiala_125.png... done!
## Converting page 126 to lifelabourofpeop07bootiala_126.png... done!
## Converting page 127 to lifelabourofpeop07bootiala_127.png... done!
## Converting page 128 to lifelabourofpeop07bootiala_128.png... done!
## Converting page 129 to lifelabourofpeop07bootiala_129.png... done!
## Converting page 130 to lifelabourofpeop07bootiala_130.png... done!
## Converting page 131 to lifelabourofpeop07bootiala_131.png... done!
## Converting page 132 to lifelabourofpeop07bootiala_132.png... done!
## Converting page 133 to lifelabourofpeop07bootiala_133.png... done!
## Converting page 134 to lifelabourofpeop07bootiala_134.png... done!
## Converting page 135 to lifelabourofpeop07bootiala_135.png... done!
## Converting page 136 to lifelabourofpeop07bootiala_136.png... done!
## Converting page 137 to lifelabourofpeop07bootiala_137.png... done!
## Converting page 138 to lifelabourofpeop07bootiala_138.png... done!
## Converting page 139 to lifelabourofpeop07bootiala_139.png... done!
## Converting page 140 to lifelabourofpeop07bootiala_140.png... done!
## Converting page 141 to lifelabourofpeop07bootiala_141.png... done!
## Converting page 142 to lifelabourofpeop07bootiala_142.png... done!
## Converting page 143 to lifelabourofpeop07bootiala_143.png... done!
## Converting page 144 to lifelabourofpeop07bootiala_144.png... done!
## Converting page 145 to lifelabourofpeop07bootiala_145.png... done!
## Converting page 146 to lifelabourofpeop07bootiala_146.png... done!
## Converting page 147 to lifelabourofpeop07bootiala_147.png... done!
## Converting page 148 to lifelabourofpeop07bootiala_148.png... done!
## Converting page 149 to lifelabourofpeop07bootiala_149.png... done!
## Converting page 150 to lifelabourofpeop07bootiala_150.png... done!
## Converting page 151 to lifelabourofpeop07bootiala_151.png... done!
## Converting page 152 to lifelabourofpeop07bootiala_152.png... done!
## Converting page 153 to lifelabourofpeop07bootiala_153.png... done!
## Converting page 154 to lifelabourofpeop07bootiala_154.png... done!
## Converting page 155 to lifelabourofpeop07bootiala_155.png... done!
## Converting page 156 to lifelabourofpeop07bootiala_156.png... done!
## Converting page 157 to lifelabourofpeop07bootiala_157.png... done!
## Converting page 158 to lifelabourofpeop07bootiala_158.png... done!
## Converting page 159 to lifelabourofpeop07bootiala_159.png... done!
## Converting page 160 to lifelabourofpeop07bootiala_160.png... done!
## Converting page 161 to lifelabourofpeop07bootiala_161.png... done!
## Converting page 162 to lifelabourofpeop07bootiala_162.png... done!
## Converting page 163 to lifelabourofpeop07bootiala_163.png... done!
## Converting page 164 to lifelabourofpeop07bootiala_164.png... done!
## Converting page 165 to lifelabourofpeop07bootiala_165.png... done!
## Converting page 166 to lifelabourofpeop07bootiala_166.png... done!
## Converting page 167 to lifelabourofpeop07bootiala_167.png... done!
## Converting page 168 to lifelabourofpeop07bootiala_168.png... done!
## Converting page 169 to lifelabourofpeop07bootiala_169.png... done!
## Converting page 170 to lifelabourofpeop07bootiala_170.png... done!
## Converting page 171 to lifelabourofpeop07bootiala_171.png... done!
## Converting page 172 to lifelabourofpeop07bootiala_172.png... done!
## Converting page 173 to lifelabourofpeop07bootiala_173.png... done!
## Converting page 174 to lifelabourofpeop07bootiala_174.png... done!
## Converting page 175 to lifelabourofpeop07bootiala_175.png... done!
## Converting page 176 to lifelabourofpeop07bootiala_176.png... done!
## Converting page 177 to lifelabourofpeop07bootiala_177.png... done!
## Converting page 178 to lifelabourofpeop07bootiala_178.png... done!
## Converting page 179 to lifelabourofpeop07bootiala_179.png... done!
## Converting page 180 to lifelabourofpeop07bootiala_180.png... done!
## Converting page 181 to lifelabourofpeop07bootiala_181.png... done!
## Converting page 182 to lifelabourofpeop07bootiala_182.png... done!
## Converting page 183 to lifelabourofpeop07bootiala_183.png... done!
## Converting page 184 to lifelabourofpeop07bootiala_184.png... done!
## Converting page 185 to lifelabourofpeop07bootiala_185.png... done!
## Converting page 186 to lifelabourofpeop07bootiala_186.png... done!
## Converting page 187 to lifelabourofpeop07bootiala_187.png... done!
## Converting page 188 to lifelabourofpeop07bootiala_188.png... done!
## Converting page 189 to lifelabourofpeop07bootiala_189.png... done!
## Converting page 190 to lifelabourofpeop07bootiala_190.png... done!
## Converting page 191 to lifelabourofpeop07bootiala_191.png... done!
## Converting page 192 to lifelabourofpeop07bootiala_192.png... done!
## Converting page 193 to lifelabourofpeop07bootiala_193.png... done!
## Converting page 194 to lifelabourofpeop07bootiala_194.png... done!
## Converting page 195 to lifelabourofpeop07bootiala_195.png... done!
## Converting page 196 to lifelabourofpeop07bootiala_196.png... done!
## Converting page 197 to lifelabourofpeop07bootiala_197.png... done!
## Converting page 198 to lifelabourofpeop07bootiala_198.png... done!
## Converting page 199 to lifelabourofpeop07bootiala_199.png... done!
## Converting page 200 to lifelabourofpeop07bootiala_200.png... done!
## Converting page 201 to lifelabourofpeop07bootiala_201.png... done!
## Converting page 202 to lifelabourofpeop07bootiala_202.png... done!
## Converting page 203 to lifelabourofpeop07bootiala_203.png... done!
## Converting page 204 to lifelabourofpeop07bootiala_204.png... done!
## Converting page 205 to lifelabourofpeop07bootiala_205.png... done!
## Converting page 206 to lifelabourofpeop07bootiala_206.png... done!
## Converting page 207 to lifelabourofpeop07bootiala_207.png... done!
## Converting page 208 to lifelabourofpeop07bootiala_208.png... done!
## Converting page 209 to lifelabourofpeop07bootiala_209.png... done!
## Converting page 210 to lifelabourofpeop07bootiala_210.png... done!
## Converting page 211 to lifelabourofpeop07bootiala_211.png... done!
## Converting page 212 to lifelabourofpeop07bootiala_212.png... done!
## Converting page 213 to lifelabourofpeop07bootiala_213.png... done!
## Converting page 214 to lifelabourofpeop07bootiala_214.png... done!
## Converting page 215 to lifelabourofpeop07bootiala_215.png... done!
## Converting page 216 to lifelabourofpeop07bootiala_216.png... done!
## Converting page 217 to lifelabourofpeop07bootiala_217.png... done!
## Converting page 218 to lifelabourofpeop07bootiala_218.png... done!
## Converting page 219 to lifelabourofpeop07bootiala_219.png... done!
## Converting page 220 to lifelabourofpeop07bootiala_220.png... done!
## Converting page 221 to lifelabourofpeop07bootiala_221.png... done!
## Converting page 222 to lifelabourofpeop07bootiala_222.png... done!
## Converting page 223 to lifelabourofpeop07bootiala_223.png... done!
## Converting page 224 to lifelabourofpeop07bootiala_224.png... done!
## Converting page 225 to lifelabourofpeop07bootiala_225.png... done!
## Converting page 226 to lifelabourofpeop07bootiala_226.png... done!
## Converting page 227 to lifelabourofpeop07bootiala_227.png... done!
## Converting page 228 to lifelabourofpeop07bootiala_228.png... done!
## Converting page 229 to lifelabourofpeop07bootiala_229.png... done!
## Converting page 230 to lifelabourofpeop07bootiala_230.png... done!
## Converting page 231 to lifelabourofpeop07bootiala_231.png... done!
## Converting page 232 to lifelabourofpeop07bootiala_232.png... done!
## Converting page 233 to lifelabourofpeop07bootiala_233.png... done!
## Converting page 234 to lifelabourofpeop07bootiala_234.png... done!
## Converting page 235 to lifelabourofpeop07bootiala_235.png... done!
## Converting page 236 to lifelabourofpeop07bootiala_236.png... done!
## Converting page 237 to lifelabourofpeop07bootiala_237.png... done!
## Converting page 238 to lifelabourofpeop07bootiala_238.png... done!
## Converting page 239 to lifelabourofpeop07bootiala_239.png... done!
## Converting page 240 to lifelabourofpeop07bootiala_240.png... done!
## Converting page 241 to lifelabourofpeop07bootiala_241.png... done!
## Converting page 242 to lifelabourofpeop07bootiala_242.png... done!
## Converting page 243 to lifelabourofpeop07bootiala_243.png... done!
## Converting page 244 to lifelabourofpeop07bootiala_244.png... done!
## Converting page 245 to lifelabourofpeop07bootiala_245.png... done!
## Converting page 246 to lifelabourofpeop07bootiala_246.png... done!
## Converting page 247 to lifelabourofpeop07bootiala_247.png... done!
## Converting page 248 to lifelabourofpeop07bootiala_248.png... done!
## Converting page 249 to lifelabourofpeop07bootiala_249.png... done!
## Converting page 250 to lifelabourofpeop07bootiala_250.png... done!
## Converting page 251 to lifelabourofpeop07bootiala_251.png... done!
## Converting page 252 to lifelabourofpeop07bootiala_252.png... done!
## Converting page 253 to lifelabourofpeop07bootiala_253.png... done!
## Converting page 254 to lifelabourofpeop07bootiala_254.png... done!
## Converting page 255 to lifelabourofpeop07bootiala_255.png... done!
## Converting page 256 to lifelabourofpeop07bootiala_256.png... done!
## Converting page 257 to lifelabourofpeop07bootiala_257.png... done!
## Converting page 258 to lifelabourofpeop07bootiala_258.png... done!
## Converting page 259 to lifelabourofpeop07bootiala_259.png... done!
## Converting page 260 to lifelabourofpeop07bootiala_260.png... done!
## Converting page 261 to lifelabourofpeop07bootiala_261.png... done!
## Converting page 262 to lifelabourofpeop07bootiala_262.png... done!
## Converting page 263 to lifelabourofpeop07bootiala_263.png... done!
## Converting page 264 to lifelabourofpeop07bootiala_264.png... done!
## Converting page 265 to lifelabourofpeop07bootiala_265.png... done!
## Converting page 266 to lifelabourofpeop07bootiala_266.png... done!
## Converting page 267 to lifelabourofpeop07bootiala_267.png... done!
## Converting page 268 to lifelabourofpeop07bootiala_268.png... done!
## Converting page 269 to lifelabourofpeop07bootiala_269.png... done!
## Converting page 270 to lifelabourofpeop07bootiala_270.png... done!
## Converting page 271 to lifelabourofpeop07bootiala_271.png... done!
## Converting page 272 to lifelabourofpeop07bootiala_272.png... done!
## Converting page 273 to lifelabourofpeop07bootiala_273.png... done!
## Converting page 274 to lifelabourofpeop07bootiala_274.png... done!
## Converting page 275 to lifelabourofpeop07bootiala_275.png... done!
## Converting page 276 to lifelabourofpeop07bootiala_276.png... done!
## Converting page 277 to lifelabourofpeop07bootiala_277.png... done!
## Converting page 278 to lifelabourofpeop07bootiala_278.png... done!
## Converting page 279 to lifelabourofpeop07bootiala_279.png... done!
## Converting page 280 to lifelabourofpeop07bootiala_280.png... done!
## Converting page 281 to lifelabourofpeop07bootiala_281.png... done!
## Converting page 282 to lifelabourofpeop07bootiala_282.png... done!
## Converting page 283 to lifelabourofpeop07bootiala_283.png... done!
## Converting page 284 to lifelabourofpeop07bootiala_284.png... done!
## Converting page 285 to lifelabourofpeop07bootiala_285.png... done!
## Converting page 286 to lifelabourofpeop07bootiala_286.png... done!
## Converting page 287 to lifelabourofpeop07bootiala_287.png... done!
## Converting page 288 to lifelabourofpeop07bootiala_288.png... done!
## Converting page 289 to lifelabourofpeop07bootiala_289.png... done!
## Converting page 290 to lifelabourofpeop07bootiala_290.png... done!
## Converting page 291 to lifelabourofpeop07bootiala_291.png... done!
## Converting page 292 to lifelabourofpeop07bootiala_292.png... done!
## Converting page 293 to lifelabourofpeop07bootiala_293.png... done!
## Converting page 294 to lifelabourofpeop07bootiala_294.png... done!
## Converting page 295 to lifelabourofpeop07bootiala_295.png... done!
## Converting page 296 to lifelabourofpeop07bootiala_296.png... done!
## Converting page 297 to lifelabourofpeop07bootiala_297.png... done!
## Converting page 298 to lifelabourofpeop07bootiala_298.png... done!
## Converting page 299 to lifelabourofpeop07bootiala_299.png... done!
## Converting page 300 to lifelabourofpeop07bootiala_300.png... done!
## Converting page 301 to lifelabourofpeop07bootiala_301.png... done!
## Converting page 302 to lifelabourofpeop07bootiala_302.png... done!
## Converting page 303 to lifelabourofpeop07bootiala_303.png... done!
## Converting page 304 to lifelabourofpeop07bootiala_304.png... done!
## Converting page 305 to lifelabourofpeop07bootiala_305.png... done!
## Converting page 306 to lifelabourofpeop07bootiala_306.png... done!
## Converting page 307 to lifelabourofpeop07bootiala_307.png... done!
## Converting page 308 to lifelabourofpeop07bootiala_308.png... done!
## Converting page 309 to lifelabourofpeop07bootiala_309.png... done!
## Converting page 310 to lifelabourofpeop07bootiala_310.png... done!
## Converting page 311 to lifelabourofpeop07bootiala_311.png... done!
## Converting page 312 to lifelabourofpeop07bootiala_312.png... done!
## Converting page 313 to lifelabourofpeop07bootiala_313.png... done!
## Converting page 314 to lifelabourofpeop07bootiala_314.png... done!
## Converting page 315 to lifelabourofpeop07bootiala_315.png... done!
## Converting page 316 to lifelabourofpeop07bootiala_316.png... done!
## Converting page 317 to lifelabourofpeop07bootiala_317.png... done!
## Converting page 318 to lifelabourofpeop07bootiala_318.png... done!
## Converting page 319 to lifelabourofpeop07bootiala_319.png... done!
## Converting page 320 to lifelabourofpeop07bootiala_320.png... done!
## Converting page 321 to lifelabourofpeop07bootiala_321.png... done!
## Converting page 322 to lifelabourofpeop07bootiala_322.png... done!
## Converting page 323 to lifelabourofpeop07bootiala_323.png... done!
## Converting page 324 to lifelabourofpeop07bootiala_324.png... done!
## Converting page 325 to lifelabourofpeop07bootiala_325.png... done!
## Converting page 326 to lifelabourofpeop07bootiala_326.png... done!
## Converting page 327 to lifelabourofpeop07bootiala_327.png... done!
## Converting page 328 to lifelabourofpeop07bootiala_328.png... done!
## Converting page 329 to lifelabourofpeop07bootiala_329.png... done!
## Converting page 330 to lifelabourofpeop07bootiala_330.png... done!
## Converting page 331 to lifelabourofpeop07bootiala_331.png... done!
## Converting page 332 to lifelabourofpeop07bootiala_332.png... done!
## Converting page 333 to lifelabourofpeop07bootiala_333.png... done!
## Converting page 334 to lifelabourofpeop07bootiala_334.png... done!
## Converting page 335 to lifelabourofpeop07bootiala_335.png... done!
## Converting page 336 to lifelabourofpeop07bootiala_336.png... done!
## Converting page 337 to lifelabourofpeop07bootiala_337.png... done!
## Converting page 338 to lifelabourofpeop07bootiala_338.png... done!
## Converting page 339 to lifelabourofpeop07bootiala_339.png... done!
## Converting page 340 to lifelabourofpeop07bootiala_340.png... done!
## Converting page 341 to lifelabourofpeop07bootiala_341.png... done!
## Converting page 342 to lifelabourofpeop07bootiala_342.png... done!
## Converting page 343 to lifelabourofpeop07bootiala_343.png... done!
## Converting page 344 to lifelabourofpeop07bootiala_344.png... done!
## Converting page 345 to lifelabourofpeop07bootiala_345.png... done!
## Converting page 346 to lifelabourofpeop07bootiala_346.png... done!
## Converting page 347 to lifelabourofpeop07bootiala_347.png... done!
## Converting page 348 to lifelabourofpeop07bootiala_348.png... done!
## Converting page 349 to lifelabourofpeop07bootiala_349.png... done!
## Converting page 350 to lifelabourofpeop07bootiala_350.png... done!
## Converting page 351 to lifelabourofpeop07bootiala_351.png... done!
## Converting page 352 to lifelabourofpeop07bootiala_352.png... done!
## Converting page 353 to lifelabourofpeop07bootiala_353.png... done!
## Converting page 354 to lifelabourofpeop07bootiala_354.png... done!
## Converting page 355 to lifelabourofpeop07bootiala_355.png... done!
## Converting page 356 to lifelabourofpeop07bootiala_356.png... done!
## Converting page 357 to lifelabourofpeop07bootiala_357.png... done!
## Converting page 358 to lifelabourofpeop07bootiala_358.png... done!
## Converting page 359 to lifelabourofpeop07bootiala_359.png... done!
## Converting page 360 to lifelabourofpeop07bootiala_360.png... done!
## Converting page 361 to lifelabourofpeop07bootiala_361.png... done!
## Converting page 362 to lifelabourofpeop07bootiala_362.png... done!
## Converting page 363 to lifelabourofpeop07bootiala_363.png... done!
## Converting page 364 to lifelabourofpeop07bootiala_364.png... done!
## Converting page 365 to lifelabourofpeop07bootiala_365.png... done!
## Converting page 366 to lifelabourofpeop07bootiala_366.png... done!
## Converting page 367 to lifelabourofpeop07bootiala_367.png... done!
## Converting page 368 to lifelabourofpeop07bootiala_368.png... done!
## Converting page 369 to lifelabourofpeop07bootiala_369.png... done!
## Converting page 370 to lifelabourofpeop07bootiala_370.png... done!
## Converting page 371 to lifelabourofpeop07bootiala_371.png... done!
## Converting page 372 to lifelabourofpeop07bootiala_372.png... done!
## Converting page 373 to lifelabourofpeop07bootiala_373.png... done!
## Converting page 374 to lifelabourofpeop07bootiala_374.png... done!
## Converting page 375 to lifelabourofpeop07bootiala_375.png... done!
## Converting page 376 to lifelabourofpeop07bootiala_376.png... done!
## Converting page 377 to lifelabourofpeop07bootiala_377.png... done!
## Converting page 378 to lifelabourofpeop07bootiala_378.png... done!
## Converting page 379 to lifelabourofpeop07bootiala_379.png... done!
## Converting page 380 to lifelabourofpeop07bootiala_380.png... done!
## Converting page 381 to lifelabourofpeop07bootiala_381.png... done!
## Converting page 382 to lifelabourofpeop07bootiala_382.png... done!
## Converting page 383 to lifelabourofpeop07bootiala_383.png... done!
## Converting page 384 to lifelabourofpeop07bootiala_384.png... done!
## Converting page 385 to lifelabourofpeop07bootiala_385.png... done!
## Converting page 386 to lifelabourofpeop07bootiala_386.png... done!
## Converting page 387 to lifelabourofpeop07bootiala_387.png... done!
## Converting page 388 to lifelabourofpeop07bootiala_388.png... done!
## Converting page 389 to lifelabourofpeop07bootiala_389.png... done!
## Converting page 390 to lifelabourofpeop07bootiala_390.png... done!
## Converting page 391 to lifelabourofpeop07bootiala_391.png... done!
## Converting page 392 to lifelabourofpeop07bootiala_392.png... done!
## Converting page 393 to lifelabourofpeop07bootiala_393.png... done!
## Converting page 394 to lifelabourofpeop07bootiala_394.png... done!
## Converting page 395 to lifelabourofpeop07bootiala_395.png... done!
## Converting page 396 to lifelabourofpeop07bootiala_396.png... done!
## Converting page 397 to lifelabourofpeop07bootiala_397.png... done!
## Converting page 398 to lifelabourofpeop07bootiala_398.png... done!
## Converting page 399 to lifelabourofpeop07bootiala_399.png... done!
## Converting page 400 to lifelabourofpeop07bootiala_400.png... done!
## Converting page 401 to lifelabourofpeop07bootiala_401.png... done!
## Converting page 402 to lifelabourofpeop07bootiala_402.png... done!
## Converting page 403 to lifelabourofpeop07bootiala_403.png... done!
## Converting page 404 to lifelabourofpeop07bootiala_404.png... done!
## Converting page 405 to lifelabourofpeop07bootiala_405.png... done!
## Converting page 406 to lifelabourofpeop07bootiala_406.png... done!
## Converting page 407 to lifelabourofpeop07bootiala_407.png... done!
## Converting page 408 to lifelabourofpeop07bootiala_408.png... done!
## Converting page 409 to lifelabourofpeop07bootiala_409.png... done!
## Converting page 410 to lifelabourofpeop07bootiala_410.png... done!
## Converting page 411 to lifelabourofpeop07bootiala_411.png... done!
## Converting page 412 to lifelabourofpeop07bootiala_412.png... done!
## Converting page 413 to lifelabourofpeop07bootiala_413.png... done!
## Converting page 414 to lifelabourofpeop07bootiala_414.png... done!
## Converting page 415 to lifelabourofpeop07bootiala_415.png... done!
## Converting page 416 to lifelabourofpeop07bootiala_416.png... done!
## Converting page 417 to lifelabourofpeop07bootiala_417.png... done!
## Converting page 418 to lifelabourofpeop07bootiala_418.png... done!
## Converting page 419 to lifelabourofpeop07bootiala_419.png... done!
## Converting page 420 to lifelabourofpeop07bootiala_420.png... done!
## Converting page 421 to lifelabourofpeop07bootiala_421.png... done!
## Converting page 422 to lifelabourofpeop07bootiala_422.png... done!
## Converting page 423 to lifelabourofpeop07bootiala_423.png... done!
## Converting page 424 to lifelabourofpeop07bootiala_424.png... done!
## Converting page 425 to lifelabourofpeop07bootiala_425.png... done!
## Converting page 426 to lifelabourofpeop07bootiala_426.png... done!
## Converting page 427 to lifelabourofpeop07bootiala_427.png... done!
## Converting page 428 to lifelabourofpeop07bootiala_428.png... done!
## Converting page 429 to lifelabourofpeop07bootiala_429.png... done!
## Converting page 430 to lifelabourofpeop07bootiala_430.png... done!
## Converting page 431 to lifelabourofpeop07bootiala_431.png... done!
## Converting page 432 to lifelabourofpeop07bootiala_432.png... done!
## Converting page 433 to lifelabourofpeop07bootiala_433.png... done!
## Converting page 434 to lifelabourofpeop07bootiala_434.png... done!
## Converting page 435 to lifelabourofpeop07bootiala_435.png... done!
## Converting page 436 to lifelabourofpeop07bootiala_436.png... done!
## Converting page 437 to lifelabourofpeop07bootiala_437.png... done!
## Converting page 438 to lifelabourofpeop07bootiala_438.png... done!
## Converting page 439 to lifelabourofpeop07bootiala_439.png... done!
## Converting page 440 to lifelabourofpeop07bootiala_440.png... done!
## Converting page 441 to lifelabourofpeop07bootiala_441.png... done!
## Converting page 442 to lifelabourofpeop07bootiala_442.png... done!
## Converting page 443 to lifelabourofpeop07bootiala_443.png... done!
## Converting page 444 to lifelabourofpeop07bootiala_444.png... done!
## Converting page 445 to lifelabourofpeop07bootiala_445.png... done!
## Converting page 446 to lifelabourofpeop07bootiala_446.png... done!
## Converting page 447 to lifelabourofpeop07bootiala_447.png... done!
## Converting page 448 to lifelabourofpeop07bootiala_448.png... done!
## Converting page 449 to lifelabourofpeop07bootiala_449.png... done!
## Converting page 450 to lifelabourofpeop07bootiala_450.png... done!
## Converting page 451 to lifelabourofpeop07bootiala_451.png... done!
## Converting page 452 to lifelabourofpeop07bootiala_452.png... done!
## Converting page 453 to lifelabourofpeop07bootiala_453.png... done!
## Converting page 454 to lifelabourofpeop07bootiala_454.png... done!
## Converting page 455 to lifelabourofpeop07bootiala_455.png... done!
## Converting page 456 to lifelabourofpeop07bootiala_456.png... done!
## Converting page 457 to lifelabourofpeop07bootiala_457.png... done!
## Converting page 458 to lifelabourofpeop07bootiala_458.png... done!
## Converting page 459 to lifelabourofpeop07bootiala_459.png... done!
## Converting page 460 to lifelabourofpeop07bootiala_460.png... done!
## Converting page 461 to lifelabourofpeop07bootiala_461.png... done!
## Converting page 462 to lifelabourofpeop07bootiala_462.png... done!
## Converting page 463 to lifelabourofpeop07bootiala_463.png... done!
## Converting page 464 to lifelabourofpeop07bootiala_464.png... done!
#ADDITIONAL NOTE: TAKING TIME IS NORMAL. THIS COULD BE A BIT, JUST MAKE SURE THERE IS SOME PROGRESS.

Now, take the text vector data and put it into a Corpus stored as a variable.

#We can utilize different sources, but this should keep it simple.

TextCorpus <- Corpus(VectorSource(TextVector))

These lines of code remove junk we don’t need for this analysis. Specifically, punctuation, numbers, stopwords, and blank space. It also transforms all text into lower case so the first word of a sentence is not recognized as a different word just because it is capitalized.

#remove punctuation
TextCorpus <- tm_map(TextCorpus, removePunctuation)
## Warning in tm_map.SimpleCorpus(TextCorpus, removePunctuation): transformation
## drops documents
#make everything lower case
TextCorpus <- tm_map(TextCorpus, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(TextCorpus, content_transformer(tolower)):
## transformation drops documents
#remove numbers from my text analysis
TextCorpus <- tm_map(TextCorpus, removeNumbers)
## Warning in tm_map.SimpleCorpus(TextCorpus, removeNumbers): transformation drops
## documents
#remove short junk words like 'and', 'but', 'a', 'the'...
TextCorpus <- tm_map(TextCorpus, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(TextCorpus, removeWords, stopwords("english")):
## transformation drops documents
#clean up the blank space. put the document together.
TextCorpus <- tm_map(TextCorpus, stripWhitespace)
## Warning in tm_map.SimpleCorpus(TextCorpus, stripWhitespace): transformation
## drops documents
#Create a function to change things to space. effectively remofing junk.
toSpace <- content_transformer(function(x, pattern) {return (gsub(pattern, " ", x))})



#DOCUMENT SPECIFIC CHANGES GO HERE
TextCorpus <- tm_map(TextCorpus, toSpace, "booth")
## Warning in tm_map.SimpleCorpus(TextCorpus, toSpace, "booth"): transformation
## drops documents
TextCorpus <- tm_map(TextCorpus, toSpace, " `")
## Warning in tm_map.SimpleCorpus(TextCorpus, toSpace, " `"): transformation drops
## documents
TextCorpus <- tm_map(TextCorpus, toSpace, " ‘")
## Warning in tm_map.SimpleCorpus(TextCorpus, toSpace, " ‘"): transformation drops
## documents
TextCorpus <- tm_map(TextCorpus, toSpace, "_")
## Warning in tm_map.SimpleCorpus(TextCorpus, toSpace, "_"): transformation drops
## documents
#TextCorpus <- tm_map(TextCorpus, toSpace, "|")
#TextCorpus <- tm_map(TextCorpus, toSpace, " -")
#TextCorpus <- tm_map(TextCorpus, toSpace, ":")
#TextCorpus <- tm_map(TextCorpus, toSpace, "eee")

Utilize the Corpus to make a Document Term Matrix.

generalDTM <- DocumentTermMatrix(TextCorpus)

Inspect my DTM:

inspect(generalDTM)
## <<DocumentTermMatrix (documents: 464, terms: 12192)>>
## Non-/sparse entries: 61030/5596058
## Sparsity           : 99%
## Maximal term length: 48
## Weighting          : term frequency (tf)
## Sample             :
##      Terms
## Docs  church god london may mission one people religious will work
##   183      5   0      0   2       0   0      3         0    0    0
##   185      0   3      0   2       0   0      0         0    0    1
##   186      1   0      1   0       1   3      1         0    1    5
##   216      2   0      4   0       6   1      0         1    0    2
##   217      1   3      1   0       3   1      2         0    1    8
##   221      2   1      0   1       0   1      0         0    2    0
##   222      2   2      0   2       3   6      1         0    1    0
##   247      0   0      0   2       0   0      0         0    8    0
##   79       4   3      0   3       0   1      0         0    2    4
##   80       3   6      0   1       1   2      1         0    1    4

Make a list of most used terms, and check to make sure everything matches okay.

generalFreq <- colSums(as.matrix(generalDTM))


#Check to make sure the length matches the number of terms
length(generalFreq)
## [1] 12192

Order the list of most used terms.

generalFreqOrdered <- order(generalFreq,decreasing=TRUE)

Check the head of the ordered list. A few of the MOST used terms.

generalFreq[head(generalFreqOrdered)]
##    church      work       one       may religious      will 
##       904       670       515       456       365       365

check the tail of the ordered list. A few of the LEAST used terms. Sometimes this has random junk, honestly.

generalFreq[tail(generalFreqOrdered)]
##      los regional     eves      oma      snl     “—ie 
##        1        1        1        1        1        1

find frequently used terms, with a minimum number of frequency.

#Not running on first run because I don't want a wall of text if a ton of words meet the minimum frequency of 10.

#findFreqTerms(generalDTM,lowfreq=10)

find word associations. What is the next word going to be. With a minimum specified rate. We can specify any word but this is a basic one. With a 90% minimum correlation limit (that’s high but I just don’t want a wall of text).

findAssocs(x = generalDTM, terms =  c("poor", "social", "poverty", "work", "london", "research"), corlimit = 0.4)
## $poor
## numeric(0)
## 
## $social
## numeric(0)
## 
## $poverty
## numeric(0)
## 
## $work
## numeric(0)
## 
## $london
## east 
## 0.41 
## 
## $research
## numeric(0)

Make a wordcloud

#setting the seed so the layout doesn't change every time you run the code. A different random seed can be specified for a different looking cloud.

set.seed(1337)


#Making a wordcloud with most used terms. Setting a minimum frequency of 9.

#Later we can specify the title and subtitle depending on what we are showing.

wordcloud(names(generalFreq),generalFreq,min.freq=184,colors=brewer.pal(7,"Dark2"))
  title(main = "Life and Labor of the People in London\nThird Series: Religious Influences", sub = "Charles Booth")

make a plot of frequently used terms

#Turn my ordered word list into a data frame for ggplot2
generalData <- data.frame(term = names(generalFreq), occurrences = generalFreq)


#ggplot it. Setting a minimum frequency of plotted words to 9.
p <- ggplot(subset(generalData, generalFreq > 260), aes(term, occurrences))

p <- p + geom_bar(stat = "identity")

p <- p + theme(axis.text.x = element_text(angle = 45, hjust = 1))

p <- p + labs(title = "Title Here", caption = 'Caption Here')


p

Sentiment Analysis

This stage can change a lot depending of the lexicon used to analyze the data, so I’ll do one numerical and one more catagorical.

Get the first lexicon we want to use. In this case I’m using AFINN.

BEWARE!!!!
You will have to agree to some terms and conditions in the console before the lexicon is downloaded the first time.

get_sentiments(lexicon = "afinn")
## # A tibble: 2,477 x 2
##    word       value
##    <chr>      <dbl>
##  1 abandon       -2
##  2 abandoned     -2
##  3 abandons      -2
##  4 abducted      -2
##  5 abduction     -2
##  6 abductions    -2
##  7 abhor         -3
##  8 abhorred      -3
##  9 abhorrent     -3
## 10 abhors        -3
## # ... with 2,467 more rows

Change Corpus type and pre-process it for tidy sentiment analysis

generalVectorCorpus <- VCorpus(VectorSource(TextCorpus))


generalTidy <- generalVectorCorpus %>% tidy()

generalTidy
## # A tibble: 464 x 8
##    author datetimestamp       description heading id    language origin text    
##    <lgl>  <dttm>              <lgl>       <lgl>   <chr> <chr>    <lgl>  <chr>   
##  1 NA     2020-06-29 23:19:00 NA          NA      1     en       NA     " — — —~
##  2 NA     2020-06-29 23:19:01 NA          NA      2     en       NA     " libra~
##  3 NA     2020-06-29 23:19:01 NA          NA      3     en       NA     " f hee~
##  4 NA     2020-06-29 23:19:01 NA          NA      4     en       NA     "te ¢ e~
##  5 NA     2020-06-29 23:19:01 NA          NA      5     en       NA     "tn pee~
##  6 NA     2020-06-29 23:19:01 NA          NA      6     en       NA     " ats d~
##  7 NA     2020-06-29 23:19:01 NA          NA      7     en       NA     "life l~
##  8 NA     2020-06-29 23:19:01 NA          NA      8     en       NA     " oh on~
##  9 NA     2020-06-29 23:19:01 NA          NA      9     en       NA     "fgoa l~
## 10 NA     2020-06-29 23:19:01 NA          NA      10    en       NA     "g norm~
## # ... with 454 more rows
generalTidy$id <- as.integer(generalTidy$id)

strip away useless information
keep just the page number and words on those pages.

generalTidy2 <- generalTidy %>% 
  select(id, text)

get the words and text seperated from the rest of the data.
Tokenize the text.

generalTidy3 <- generalTidy %>%
  unnest_tokens(word, text)

order by id

generalTidy3 <- generalTidy3 %>% arrange(data = id)

show the first 15 words by their pages.

generalTidy3 %>%
  select(id, word) %>%
  head(15)
## # A tibble: 15 x 2
##       id word 
##    <int> <chr>
##  1     1 l    
##  2     1 e    
##  3     1 cite 
##  4     1 oe   
##  5     1 es   
##  6     1 bar  
##  7     1 ae   
##  8     1 od   
##  9     1 c    
## 10     1 ea   
## 11     1 oe   
## 12     1 ee   
## 13     1 oo   
## 14     1 os   
## 15     1 os

clean up the data a bit more.
Filter out Stopwords again, this time with the Tidy list of stopwords.
This code finds matches with stopwords and takes them out.

generalMoreTidy <- generalTidy3 %>% 
  anti_join(get_stopwords()) %>% 
  filter(is.na(as.numeric(word)))
## Joining, by = "word"
## Warning in mask$eval_all_filter(dots, env_filter): NAs introduced by coercion

plot to check the similarity with the first plot. This one will plot the 8 most used terms.

generalMoreTidy %>%
  count(word, sort = TRUE) %>%
  head(8) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip()

Sentiment Analysis goodness starts here:

sentiment by page in a more clear way

sentimentByPage <- generalMoreTidy %>% 
  group_by(id) %>% 
  count(word)

sentimentByPage <- inner_join(sentimentByPage, get_sentiments("afinn"))
## Joining, by = "word"
sentimentByPage <- sentimentByPage %>% 
  summarise(pageSentiment = n * value)
## `summarise()` regrouping output by 'id' (override with `.groups` argument)
sentimentByPage <- sentimentByPage %>% summarise(pageSentiment = sum(pageSentiment))
## `summarise()` ungrouping output (override with `.groups` argument)
#plot(sentimentByPage$id, sentimentByPage$pageSentiment)


barplot(height = sentimentByPage$pageSentiment, names.arg = sentimentByPage$id, main = "Sentiment by Page")

plot sentiment across pages. Using the foundation theme.

ggplot(sentimentByPage, aes(x = id, y = pageSentiment, fill = NULL)) +
  geom_col(show.legend = F) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "Page", y = "Aggregate Sentiment", title = "Life and Labor of the People in London\nThird Series: Religious Influences\nCharles Booth", caption = "Finn Årup Nielsen A new ANEW: Evaluation of a word list for sentiment analysis in microblogs. \nProceedings of the ESWC2011 Workshop on 'Making Sense of Microposts': Big things come in small packages \n718 in CEUR Workshop Proceedings 93-98. 2011 May. http://arxiv.org/abs/1103.2903.", subtitle = "Using AFINN lexicon")+
  theme_foundation()+
  geom_line(aes(y = 0), col = "red", lwd = 1.25)

now I want to use a catagorical lexicon

get_sentiments("bing")
## # A tibble: 6,786 x 2
##    word        sentiment
##    <chr>       <chr>    
##  1 2-faces     negative 
##  2 abnormal    negative 
##  3 abolish     negative 
##  4 abominable  negative 
##  5 abominably  negative 
##  6 abominate   negative 
##  7 abomination negative 
##  8 abort       negative 
##  9 aborted     negative 
## 10 aborts      negative 
## # ... with 6,776 more rows
sentimentByPageBing <- generalMoreTidy %>% 
  group_by(id) %>% 
  count(word)

sentimentByPageBing <- inner_join(sentimentByPageBing, get_sentiments("bing"))
## Joining, by = "word"
sentimentByPageBingNegative <- sentimentByPageBing %>% mutate(n = str_count(sentiment, 'negative')) %>%
   summarise(n = sum(n))
## `summarise()` ungrouping output (override with `.groups` argument)
sentimentByPageBingPositive <- sentimentByPageBing %>% mutate(n = str_count(sentiment, 'positive')) %>%
   summarise(n = sum(n))
## `summarise()` ungrouping output (override with `.groups` argument)
catagoricalSentiment <- inner_join(x = sentimentByPageBingPositive, y = sentimentByPageBingNegative, "id")

catagoricalSentiment$sentiment <- catagoricalSentiment$n.x - catagoricalSentiment$n.y

#plot(sentimentByPage$id, sentimentByPage$pageSentiment)


barplot(height = catagoricalSentiment$sentiment, names.arg = catagoricalSentiment$id, main = "Sentiment by Page Bing Lexicon")

ggplot(catagoricalSentiment, aes(x = id, y = sentiment, fill = NULL)) +
  geom_col(show.legend = F) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "Page", y = "Aggregate Sentiment", title = "Life and Labor of the People in London\nThird Series: Religious Influences\nCharles Booth", caption = "Minqing Hu and Bing Liu, “Mining and summarizing customer reviews.”, \nProceedings of the ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (KDD-2004), \nSeattle, Washington, USA, Aug 22-25, 2004.", subtitle = "Using Bing lexicon")+
  theme_foundation()+
  geom_line(aes(y = 0), col = "red", lwd = 1.25)

lets check for correlation between the lexicons

plot(catagoricalSentiment$sentiment[1:505], sentimentByPage$pageSentiment[1:505])

sentimentLm <- lm(formula = sentimentByPage$pageSentiment[1:505] ~ catagoricalSentiment$sentiment[1:505])

summary(sentimentLm)
## 
## Call:
## lm(formula = sentimentByPage$pageSentiment[1:505] ~ catagoricalSentiment$sentiment[1:505])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.565  -8.994  -1.748   7.294  45.885 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            10.3812     0.7522  13.801  < 2e-16 ***
## catagoricalSentiment$sentiment[1:505]   0.5917     0.1083   5.465 7.75e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.98 on 441 degrees of freedom
##   (62 observations deleted due to missingness)
## Multiple R-squared:  0.06343,    Adjusted R-squared:  0.06131 
## F-statistic: 29.87 on 1 and 441 DF,  p-value: 7.752e-08
plot(sentimentByPage$pageSentiment[1:505] ~ catagoricalSentiment$sentiment[1:505], xlab = "Bing Lexicon Sentiment", ylab = "AFINN Lexicon Sentiment", main = "Sentiment Correlation of Bing and AFINN Lexicons on \nSentiment Analysis of 'Early Innovations in Social Research'", type = "p", col = "blue")
abline(sentimentLm, col = "red")

boxplot(sentimentByPage$pageSentiment[1:505] ~ catagoricalSentiment$sentiment[1:505], xlab = "Bing Lexicon Sentiment", ylab = "AFINN Lexicon Sentiment", main = "Sentiment Correlation of Bing and AFINN Lexicons on \nSentiment Analysis of 'Early Innovations in Social Research'", type = "p", col = "blue")