Homework 4 | Federalist Papers
Set WD & read in libraries
setwd("/Users/emma/Documents/SYRACUSE/IST707/Week 4")
okgs <- c("cluster", "tidyverse", "FactoMineR","factoextra" )
library(cluster)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.1 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
Load in data & omit any NA’s
fed_papers <- read.csv("fedPapers85.csv")
fed_papers <- na.omit(fed_papers)
Analyze fed_papers
str(fed_papers)
## 'data.frame': 85 obs. of 72 variables:
## $ author : chr "dispt" "dispt" "dispt" "dispt" ...
## $ filename: chr "dispt_fed_49.txt" "dispt_fed_50.txt" "dispt_fed_51.txt" "dispt_fed_52.txt" ...
## $ a : num 0.28 0.177 0.339 0.27 0.303 0.245 0.349 0.414 0.248 0.442 ...
## $ all : num 0.052 0.063 0.09 0.024 0.054 0.059 0.036 0.083 0.04 0.062 ...
## $ also : num 0.009 0.013 0.008 0.016 0.027 0.007 0.007 0.009 0.007 0.006 ...
## $ an : num 0.096 0.038 0.03 0.024 0.034 0.067 0.029 0.018 0.04 0.075 ...
## $ and : num 0.358 0.393 0.301 0.262 0.404 0.282 0.335 0.478 0.356 0.423 ...
## $ any : num 0.026 0.063 0.008 0.056 0.04 0.052 0.058 0.046 0.034 0.037 ...
## $ are : num 0.131 0.051 0.068 0.064 0.128 0.111 0.087 0.11 0.154 0.093 ...
## $ as : num 0.122 0.139 0.203 0.111 0.148 0.252 0.073 0.074 0.161 0.1 ...
## $ at : num 0.017 0.114 0.023 0.056 0.013 0.015 0.116 0.037 0.047 0.031 ...
## $ be : num 0.411 0.393 0.474 0.365 0.344 0.297 0.378 0.331 0.289 0.379 ...
## $ been : num 0.026 0.165 0.015 0.127 0.047 0.03 0.044 0.046 0.027 0.025 ...
## $ but : num 0.009 0 0.038 0.032 0.061 0.037 0.007 0.055 0.027 0.037 ...
## $ by : num 0.14 0.139 0.173 0.167 0.209 0.186 0.102 0.092 0.168 0.174 ...
## $ can : num 0.035 0 0.023 0.056 0.088 0 0.058 0.037 0.047 0.056 ...
## $ do : num 0.026 0.013 0 0 0 0 0.015 0.028 0 0 ...
## $ down : num 0 0 0.008 0 0 0.007 0 0 0 0 ...
## $ even : num 0.009 0.025 0.015 0.024 0.02 0.007 0.007 0.018 0 0.006 ...
## $ every : num 0.044 0 0.023 0.04 0.027 0.007 0.087 0.064 0.081 0.05 ...
## $ for. : num 0.096 0.076 0.098 0.103 0.141 0.067 0.116 0.055 0.127 0.1 ...
## $ from : num 0.044 0.101 0.053 0.079 0.074 0.096 0.08 0.083 0.074 0.124 ...
## $ had : num 0.035 0.101 0.008 0.016 0 0.022 0.015 0.009 0.007 0 ...
## $ has : num 0.017 0.013 0.015 0.024 0.054 0.015 0.036 0.037 0.02 0.019 ...
## $ have : num 0.044 0.152 0.023 0.143 0.047 0.119 0.044 0.074 0.074 0.044 ...
## $ her : num 0 0 0 0 0 0 0.007 0 0.034 0.025 ...
## $ his : num 0.017 0 0 0.024 0.02 0.067 0 0.018 0.02 0.05 ...
## $ if. : num 0 0.025 0.023 0.04 0.034 0.03 0.029 0 0 0.025 ...
## $ in. : num 0.262 0.291 0.308 0.238 0.263 0.401 0.189 0.267 0.248 0.274 ...
## $ into : num 0.009 0.025 0.038 0.008 0.013 0.037 0 0.037 0.013 0.037 ...
## $ is : num 0.157 0.038 0.15 0.151 0.189 0.26 0.167 0.083 0.208 0.23 ...
## $ it : num 0.175 0.127 0.173 0.222 0.108 0.156 0.102 0.165 0.134 0.131 ...
## $ its : num 0.07 0.038 0.03 0.048 0.013 0.015 0 0.046 0.02 0.019 ...
## $ may : num 0.035 0.038 0.12 0.056 0.047 0.074 0.08 0.092 0.027 0.106 ...
## $ more : num 0.026 0 0.038 0.056 0.067 0.045 0.08 0.064 0.06 0.081 ...
## $ must : num 0.026 0.013 0.083 0.071 0.013 0.015 0.044 0.018 0.027 0.068 ...
## $ my : num 0 0 0 0 0 0 0.007 0 0 0 ...
## $ no : num 0.035 0 0.03 0.032 0.047 0.059 0.022 0.018 0.02 0.044 ...
## $ not : num 0.114 0.127 0.068 0.087 0.128 0.134 0.102 0.101 0.094 0.106 ...
## $ now : num 0 0 0 0 0 0 0.007 0 0.007 0.012 ...
## $ of : num 0.9 0.747 0.858 0.802 0.869 ...
## $ on : num 0.14 0.139 0.15 0.143 0.054 0.141 0.051 0.083 0.127 0.118 ...
## $ one : num 0.026 0.025 0.03 0.032 0.047 0.052 0.073 0.046 0.06 0.031 ...
## $ only : num 0.035 0 0.023 0.048 0.027 0.022 0.007 0.046 0.02 0.012 ...
## $ or : num 0.096 0.114 0.06 0.064 0.081 0.074 0.153 0.037 0.154 0.081 ...
## $ our : num 0.017 0 0 0.016 0.027 0.03 0.051 0 0.007 0.025 ...
## $ shall : num 0.017 0 0.008 0.016 0 0.015 0.007 0 0.02 0 ...
## $ should : num 0.017 0.013 0.068 0.032 0 0.03 0.007 0 0 0.012 ...
## $ so : num 0.035 0.013 0.038 0.04 0.027 0.007 0.051 0.018 0.04 0.05 ...
## $ some : num 0.009 0.063 0.03 0.024 0.067 0.045 0.007 0.028 0.027 0.025 ...
## $ such : num 0.026 0 0.045 0.008 0.027 0.015 0.015 0 0.013 0.031 ...
## $ than : num 0.009 0 0.023 0 0.047 0.03 0.109 0.055 0.067 0.044 ...
## $ that : num 0.184 0.152 0.188 0.238 0.162 0.208 0.233 0.165 0.208 0.218 ...
## $ the : num 1.43 1.25 1.49 1.33 1.19 ...
## $ their : num 0.114 0.165 0.053 0.071 0.027 0.089 0.109 0.083 0.154 0.081 ...
## $ then : num 0 0 0.015 0.008 0.007 0.007 0.015 0.009 0.007 0.012 ...
## $ there : num 0.009 0 0.015 0 0.007 0.007 0.036 0.028 0.02 0 ...
## $ things : num 0.009 0 0 0 0 0 0 0 0 0.012 ...
## $ this : num 0.044 0.051 0.075 0.103 0.094 0.126 0.08 0.11 0.067 0.093 ...
## $ to : num 0.507 0.355 0.361 0.532 0.485 0.445 0.56 0.34 0.49 0.498 ...
## $ up : num 0 0 0 0 0 0 0.007 0 0 0 ...
## $ upon : num 0 0.013 0 0 0 0 0 0 0 0 ...
## $ was : num 0.009 0.051 0.008 0.087 0.027 0.007 0.015 0.018 0.027 0 ...
## $ were : num 0.017 0 0.015 0.079 0.02 0.03 0.029 0.009 0.007 0 ...
## $ what : num 0 0 0.008 0.008 0.02 0.015 0.015 0.009 0.02 0.025 ...
## $ when : num 0.009 0 0 0.024 0.007 0.037 0.007 0 0.02 0.012 ...
## $ which : num 0.175 0.114 0.105 0.167 0.155 0.186 0.211 0.175 0.201 0.199 ...
## $ who : num 0.044 0.038 0.008 0 0.027 0.045 0.022 0.018 0.04 0.031 ...
## $ will : num 0.009 0.089 0.173 0.079 0.168 0.111 0.145 0.267 0.154 0.106 ...
## $ with : num 0.087 0.063 0.045 0.079 0.074 0.089 0.073 0.129 0.027 0.081 ...
## $ would : num 0.192 0.139 0.068 0.064 0.04 0.037 0.073 0.037 0.04 0.031 ...
## $ your : num 0 0 0 0 0 0 0 0 0 0 ...
summary(fed_papers)
## author filename a all
## Length:85 Length:85 Min. :0.0960 Min. :0.01500
## Class :character Class :character 1st Qu.:0.2400 1st Qu.:0.03500
## Mode :character Mode :character Median :0.2990 Median :0.05000
## Mean :0.2932 Mean :0.05284
## 3rd Qu.:0.3490 3rd Qu.:0.06600
## Max. :0.4660 Max. :0.12700
## also an and any
## Min. :0.000000 Min. :0.00900 Min. :0.2170 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.04900 1st Qu.:0.3190 1st Qu.:0.02500
## Median :0.007000 Median :0.07100 Median :0.3580 Median :0.04300
## Mean :0.007659 Mean :0.06839 Mean :0.3846 Mean :0.04161
## 3rd Qu.:0.013000 3rd Qu.:0.08500 3rd Qu.:0.4130 3rd Qu.:0.05600
## Max. :0.047000 Max. :0.17900 Max. :0.8210 Max. :0.11400
## are as at be
## Min. :0.01300 Min. :0.0270 Min. :0.00000 Min. :0.0400
## 1st Qu.:0.05100 1st Qu.:0.1000 1st Qu.:0.02600 1st Qu.:0.2580
## Median :0.06800 Median :0.1240 Median :0.03800 Median :0.3070
## Mean :0.07707 Mean :0.1242 Mean :0.04427 Mean :0.3012
## 3rd Qu.:0.10200 3rd Qu.:0.1440 3rd Qu.:0.06300 3rd Qu.:0.3580
## Max. :0.16300 Max. :0.2520 Max. :0.11800 Max. :0.4810
## been but by can
## Min. :0.00000 Min. :0.00000 Min. :0.0270 Min. :0.00000
## 1st Qu.:0.03000 1st Qu.:0.02200 1st Qu.:0.0920 1st Qu.:0.01400
## Median :0.05300 Median :0.03200 Median :0.1240 Median :0.02900
## Mean :0.05967 Mean :0.03232 Mean :0.1272 Mean :0.03558
## 3rd Qu.:0.08400 3rd Qu.:0.04200 3rd Qu.:0.1620 3rd Qu.:0.05200
## Max. :0.16500 Max. :0.08900 Max. :0.2640 Max. :0.11000
## do down even every
## Min. :0.000000 Min. :0.000000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.00900
## Median :0.006000 Median :0.000000 Median :0.0100 Median :0.02200
## Mean :0.006259 Mean :0.001529 Mean :0.0114 Mean :0.02391
## 3rd Qu.:0.010000 3rd Qu.:0.000000 3rd Qu.:0.0180 3rd Qu.:0.03400
## Max. :0.028000 Max. :0.017000 Max. :0.0370 Max. :0.08700
## for. from had has
## Min. :0.03000 Min. :0.02600 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.07000 1st Qu.:0.05700 1st Qu.:0.00800 1st Qu.:0.02500
## Median :0.08800 Median :0.07800 Median :0.01600 Median :0.04600
## Mean :0.09376 Mean :0.07978 Mean :0.02116 Mean :0.04442
## 3rd Qu.:0.11400 3rd Qu.:0.09800 3rd Qu.:0.02700 3rd Qu.:0.05700
## Max. :0.21300 Max. :0.16200 Max. :0.14100 Max. :0.11400
## have her his if.
## Min. :0.01100 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.07300 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.01600
## Median :0.09000 Median :0.000000 Median :0.01400 Median :0.02600
## Mean :0.09474 Mean :0.008094 Mean :0.02862 Mean :0.02733
## 3rd Qu.:0.12400 3rd Qu.:0.007000 3rd Qu.:0.03900 3rd Qu.:0.03400
## Max. :0.18500 Max. :0.150000 Max. :0.24700 Max. :0.09900
## in. into is it
## Min. :0.1890 Min. :0.00000 Min. :0.0280 Min. :0.0750
## 1st Qu.:0.2670 1st Qu.:0.01000 1st Qu.:0.1180 1st Qu.:0.1290
## Median :0.3040 Median :0.02200 Median :0.1510 Median :0.1510
## Mean :0.3174 Mean :0.02409 Mean :0.1563 Mean :0.1567
## 3rd Qu.:0.3550 3rd Qu.:0.03400 3rd Qu.:0.1960 3rd Qu.:0.1900
## Max. :0.4990 Max. :0.10500 Max. :0.3230 Max. :0.2840
## its may more must
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.03000 1st Qu.:0.03600 1st Qu.:0.02300 1st Qu.:0.01400
## Median :0.04200 Median :0.05600 Median :0.04400 Median :0.02700
## Mean :0.04836 Mean :0.06181 Mean :0.04561 Mean :0.03305
## 3rd Qu.:0.06400 3rd Qu.:0.08500 3rd Qu.:0.06100 3rd Qu.:0.04400
## Max. :0.15000 Max. :0.13400 Max. :0.13000 Max. :0.11100
## my no not now
## Min. :0.000000 Min. :0.00000 Min. :0.02000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.02000 1st Qu.:0.07500 1st Qu.:0.000000
## Median :0.000000 Median :0.02900 Median :0.09500 Median :0.005000
## Mean :0.003259 Mean :0.03236 Mean :0.09248 Mean :0.006035
## 3rd Qu.:0.005000 3rd Qu.:0.04300 3rd Qu.:0.11200 3rd Qu.:0.010000
## Max. :0.056000 Max. :0.08300 Max. :0.14800 Max. :0.026000
## of on one only
## Min. :0.5620 Min. :0.00000 Min. :0.00500 Min. :0.00000
## 1st Qu.:0.8560 1st Qu.:0.04300 1st Qu.:0.02700 1st Qu.:0.01000
## Median :0.9020 Median :0.06200 Median :0.03600 Median :0.02200
## Mean :0.9094 Mean :0.06926 Mean :0.04079 Mean :0.02288
## 3rd Qu.:0.9690 3rd Qu.:0.09700 3rd Qu.:0.05000 3rd Qu.:0.03400
## Max. :1.2110 Max. :0.15600 Max. :0.13500 Max. :0.06500
## or our shall should
## Min. :0.02700 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.07000 1st Qu.:0.000 1st Qu.:0.00600 1st Qu.:0.01000
## Median :0.08100 Median :0.013 Median :0.01400 Median :0.02700
## Mean :0.09674 Mean :0.023 Mean :0.01875 Mean :0.02656
## 3rd Qu.:0.11600 3rd Qu.:0.028 3rd Qu.:0.02700 3rd Qu.:0.03800
## Max. :0.32100 Max. :0.199 Max. :0.07900 Max. :0.09100
## so some such than
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.01800 1st Qu.:0.00900 1st Qu.:0.01800 1st Qu.:0.02700
## Median :0.02900 Median :0.01700 Median :0.02900 Median :0.04300
## Mean :0.02982 Mean :0.01989 Mean :0.02922 Mean :0.04396
## 3rd Qu.:0.04000 3rd Qu.:0.02800 3rd Qu.:0.03800 3rd Qu.:0.05500
## Max. :0.07200 Max. :0.06700 Max. :0.08500 Max. :0.15000
## that the their then
## Min. :0.081 Min. :0.669 Min. :0.00500 Min. :0.000000
## 1st Qu.:0.171 1st Qu.:1.178 1st Qu.:0.05500 1st Qu.:0.000000
## Median :0.208 Median :1.275 Median :0.08600 Median :0.006000
## Mean :0.212 Mean :1.281 Mean :0.08553 Mean :0.006082
## 3rd Qu.:0.244 3rd Qu.:1.423 3rd Qu.:0.10600 3rd Qu.:0.010000
## Max. :0.380 Max. :1.803 Max. :0.18300 Max. :0.021000
## there things this to
## Min. :0.00000 Min. :0.000000 Min. :0.00900 Min. :0.3330
## 1st Qu.:0.00900 1st Qu.:0.000000 1st Qu.:0.06900 1st Qu.:0.4690
## Median :0.02200 Median :0.000000 Median :0.09000 Median :0.5400
## Mean :0.02638 Mean :0.002659 Mean :0.08701 Mean :0.5358
## 3rd Qu.:0.03900 3rd Qu.:0.006000 3rd Qu.:0.10500 3rd Qu.:0.6060
## Max. :0.10500 Max. :0.015000 Max. :0.15300 Max. :0.7760
## up upon was were
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00900 1st Qu.:0.00700
## Median :0.000000 Median :0.02800 Median :0.01500 Median :0.01500
## Mean :0.003482 Mean :0.02922 Mean :0.02584 Mean :0.02022
## 3rd Qu.:0.006000 3rd Qu.:0.05000 3rd Qu.:0.03200 3rd Qu.:0.02900
## Max. :0.032000 Max. :0.10200 Max. :0.18900 Max. :0.10800
## what when which who
## Min. :0.00000 Min. :0.00000 Min. :0.0810 Min. :0.00000
## 1st Qu.:0.00500 1st Qu.:0.00000 1st Qu.:0.1180 1st Qu.:0.01600
## Median :0.01000 Median :0.00900 Median :0.1520 Median :0.02700
## Mean :0.01286 Mean :0.01174 Mean :0.1578 Mean :0.03253
## 3rd Qu.:0.02000 3rd Qu.:0.01500 3rd Qu.:0.1830 3rd Qu.:0.04400
## Max. :0.06000 Max. :0.07300 Max. :0.2760 Max. :0.12900
## will with would your
## Min. :0.00600 Min. :0.02700 Min. :0.0090 Min. :0.000000
## 1st Qu.:0.05200 1st Qu.:0.06100 1st Qu.:0.0420 1st Qu.:0.000000
## Median :0.08100 Median :0.07900 Median :0.0780 Median :0.000000
## Mean :0.09865 Mean :0.07968 Mean :0.1017 Mean :0.002024
## 3rd Qu.:0.13500 3rd Qu.:0.09200 3rd Qu.:0.1470 3rd Qu.:0.000000
## Max. :0.34000 Max. :0.15000 Max. :0.3820 Max. :0.074000
Remove filename & author
clean_fed_papers <- fed_papers[,c(-1,-2)]
str(clean_fed_papers)
## 'data.frame': 85 obs. of 70 variables:
## $ a : num 0.28 0.177 0.339 0.27 0.303 0.245 0.349 0.414 0.248 0.442 ...
## $ all : num 0.052 0.063 0.09 0.024 0.054 0.059 0.036 0.083 0.04 0.062 ...
## $ also : num 0.009 0.013 0.008 0.016 0.027 0.007 0.007 0.009 0.007 0.006 ...
## $ an : num 0.096 0.038 0.03 0.024 0.034 0.067 0.029 0.018 0.04 0.075 ...
## $ and : num 0.358 0.393 0.301 0.262 0.404 0.282 0.335 0.478 0.356 0.423 ...
## $ any : num 0.026 0.063 0.008 0.056 0.04 0.052 0.058 0.046 0.034 0.037 ...
## $ are : num 0.131 0.051 0.068 0.064 0.128 0.111 0.087 0.11 0.154 0.093 ...
## $ as : num 0.122 0.139 0.203 0.111 0.148 0.252 0.073 0.074 0.161 0.1 ...
## $ at : num 0.017 0.114 0.023 0.056 0.013 0.015 0.116 0.037 0.047 0.031 ...
## $ be : num 0.411 0.393 0.474 0.365 0.344 0.297 0.378 0.331 0.289 0.379 ...
## $ been : num 0.026 0.165 0.015 0.127 0.047 0.03 0.044 0.046 0.027 0.025 ...
## $ but : num 0.009 0 0.038 0.032 0.061 0.037 0.007 0.055 0.027 0.037 ...
## $ by : num 0.14 0.139 0.173 0.167 0.209 0.186 0.102 0.092 0.168 0.174 ...
## $ can : num 0.035 0 0.023 0.056 0.088 0 0.058 0.037 0.047 0.056 ...
## $ do : num 0.026 0.013 0 0 0 0 0.015 0.028 0 0 ...
## $ down : num 0 0 0.008 0 0 0.007 0 0 0 0 ...
## $ even : num 0.009 0.025 0.015 0.024 0.02 0.007 0.007 0.018 0 0.006 ...
## $ every : num 0.044 0 0.023 0.04 0.027 0.007 0.087 0.064 0.081 0.05 ...
## $ for. : num 0.096 0.076 0.098 0.103 0.141 0.067 0.116 0.055 0.127 0.1 ...
## $ from : num 0.044 0.101 0.053 0.079 0.074 0.096 0.08 0.083 0.074 0.124 ...
## $ had : num 0.035 0.101 0.008 0.016 0 0.022 0.015 0.009 0.007 0 ...
## $ has : num 0.017 0.013 0.015 0.024 0.054 0.015 0.036 0.037 0.02 0.019 ...
## $ have : num 0.044 0.152 0.023 0.143 0.047 0.119 0.044 0.074 0.074 0.044 ...
## $ her : num 0 0 0 0 0 0 0.007 0 0.034 0.025 ...
## $ his : num 0.017 0 0 0.024 0.02 0.067 0 0.018 0.02 0.05 ...
## $ if. : num 0 0.025 0.023 0.04 0.034 0.03 0.029 0 0 0.025 ...
## $ in. : num 0.262 0.291 0.308 0.238 0.263 0.401 0.189 0.267 0.248 0.274 ...
## $ into : num 0.009 0.025 0.038 0.008 0.013 0.037 0 0.037 0.013 0.037 ...
## $ is : num 0.157 0.038 0.15 0.151 0.189 0.26 0.167 0.083 0.208 0.23 ...
## $ it : num 0.175 0.127 0.173 0.222 0.108 0.156 0.102 0.165 0.134 0.131 ...
## $ its : num 0.07 0.038 0.03 0.048 0.013 0.015 0 0.046 0.02 0.019 ...
## $ may : num 0.035 0.038 0.12 0.056 0.047 0.074 0.08 0.092 0.027 0.106 ...
## $ more : num 0.026 0 0.038 0.056 0.067 0.045 0.08 0.064 0.06 0.081 ...
## $ must : num 0.026 0.013 0.083 0.071 0.013 0.015 0.044 0.018 0.027 0.068 ...
## $ my : num 0 0 0 0 0 0 0.007 0 0 0 ...
## $ no : num 0.035 0 0.03 0.032 0.047 0.059 0.022 0.018 0.02 0.044 ...
## $ not : num 0.114 0.127 0.068 0.087 0.128 0.134 0.102 0.101 0.094 0.106 ...
## $ now : num 0 0 0 0 0 0 0.007 0 0.007 0.012 ...
## $ of : num 0.9 0.747 0.858 0.802 0.869 ...
## $ on : num 0.14 0.139 0.15 0.143 0.054 0.141 0.051 0.083 0.127 0.118 ...
## $ one : num 0.026 0.025 0.03 0.032 0.047 0.052 0.073 0.046 0.06 0.031 ...
## $ only : num 0.035 0 0.023 0.048 0.027 0.022 0.007 0.046 0.02 0.012 ...
## $ or : num 0.096 0.114 0.06 0.064 0.081 0.074 0.153 0.037 0.154 0.081 ...
## $ our : num 0.017 0 0 0.016 0.027 0.03 0.051 0 0.007 0.025 ...
## $ shall : num 0.017 0 0.008 0.016 0 0.015 0.007 0 0.02 0 ...
## $ should: num 0.017 0.013 0.068 0.032 0 0.03 0.007 0 0 0.012 ...
## $ so : num 0.035 0.013 0.038 0.04 0.027 0.007 0.051 0.018 0.04 0.05 ...
## $ some : num 0.009 0.063 0.03 0.024 0.067 0.045 0.007 0.028 0.027 0.025 ...
## $ such : num 0.026 0 0.045 0.008 0.027 0.015 0.015 0 0.013 0.031 ...
## $ than : num 0.009 0 0.023 0 0.047 0.03 0.109 0.055 0.067 0.044 ...
## $ that : num 0.184 0.152 0.188 0.238 0.162 0.208 0.233 0.165 0.208 0.218 ...
## $ the : num 1.43 1.25 1.49 1.33 1.19 ...
## $ their : num 0.114 0.165 0.053 0.071 0.027 0.089 0.109 0.083 0.154 0.081 ...
## $ then : num 0 0 0.015 0.008 0.007 0.007 0.015 0.009 0.007 0.012 ...
## $ there : num 0.009 0 0.015 0 0.007 0.007 0.036 0.028 0.02 0 ...
## $ things: num 0.009 0 0 0 0 0 0 0 0 0.012 ...
## $ this : num 0.044 0.051 0.075 0.103 0.094 0.126 0.08 0.11 0.067 0.093 ...
## $ to : num 0.507 0.355 0.361 0.532 0.485 0.445 0.56 0.34 0.49 0.498 ...
## $ up : num 0 0 0 0 0 0 0.007 0 0 0 ...
## $ upon : num 0 0.013 0 0 0 0 0 0 0 0 ...
## $ was : num 0.009 0.051 0.008 0.087 0.027 0.007 0.015 0.018 0.027 0 ...
## $ were : num 0.017 0 0.015 0.079 0.02 0.03 0.029 0.009 0.007 0 ...
## $ what : num 0 0 0.008 0.008 0.02 0.015 0.015 0.009 0.02 0.025 ...
## $ when : num 0.009 0 0 0.024 0.007 0.037 0.007 0 0.02 0.012 ...
## $ which : num 0.175 0.114 0.105 0.167 0.155 0.186 0.211 0.175 0.201 0.199 ...
## $ who : num 0.044 0.038 0.008 0 0.027 0.045 0.022 0.018 0.04 0.031 ...
## $ will : num 0.009 0.089 0.173 0.079 0.168 0.111 0.145 0.267 0.154 0.106 ...
## $ with : num 0.087 0.063 0.045 0.079 0.074 0.089 0.073 0.129 0.027 0.081 ...
## $ would : num 0.192 0.139 0.068 0.064 0.04 0.037 0.073 0.037 0.04 0.031 ...
## $ your : num 0 0 0 0 0 0 0 0 0 0 ...
K-Means
Analyze using 5 centroids
km <- kmeans(clean_fed_papers, centers=5, nstart=30)
cluster_assignment <- data.frame(fed_papers,km$cluster)
cluster_assignment$author<- as.factor(cluster_assignment$author)
cluster_assignment <- cluster_assignment[,-2]
head(cluster_assignment)
## author a all also an and any are as at be been
## 1 dispt 0.280 0.052 0.009 0.096 0.358 0.026 0.131 0.122 0.017 0.411 0.026
## 2 dispt 0.177 0.063 0.013 0.038 0.393 0.063 0.051 0.139 0.114 0.393 0.165
## 3 dispt 0.339 0.090 0.008 0.030 0.301 0.008 0.068 0.203 0.023 0.474 0.015
## 4 dispt 0.270 0.024 0.016 0.024 0.262 0.056 0.064 0.111 0.056 0.365 0.127
## 5 dispt 0.303 0.054 0.027 0.034 0.404 0.040 0.128 0.148 0.013 0.344 0.047
## 6 dispt 0.245 0.059 0.007 0.067 0.282 0.052 0.111 0.252 0.015 0.297 0.030
## but by can do down even every for. from had has have her
## 1 0.009 0.140 0.035 0.026 0.000 0.009 0.044 0.096 0.044 0.035 0.017 0.044 0
## 2 0.000 0.139 0.000 0.013 0.000 0.025 0.000 0.076 0.101 0.101 0.013 0.152 0
## 3 0.038 0.173 0.023 0.000 0.008 0.015 0.023 0.098 0.053 0.008 0.015 0.023 0
## 4 0.032 0.167 0.056 0.000 0.000 0.024 0.040 0.103 0.079 0.016 0.024 0.143 0
## 5 0.061 0.209 0.088 0.000 0.000 0.020 0.027 0.141 0.074 0.000 0.054 0.047 0
## 6 0.037 0.186 0.000 0.000 0.007 0.007 0.007 0.067 0.096 0.022 0.015 0.119 0
## his if. in. into is it its may more must my no not
## 1 0.017 0.000 0.262 0.009 0.157 0.175 0.070 0.035 0.026 0.026 0 0.035 0.114
## 2 0.000 0.025 0.291 0.025 0.038 0.127 0.038 0.038 0.000 0.013 0 0.000 0.127
## 3 0.000 0.023 0.308 0.038 0.150 0.173 0.030 0.120 0.038 0.083 0 0.030 0.068
## 4 0.024 0.040 0.238 0.008 0.151 0.222 0.048 0.056 0.056 0.071 0 0.032 0.087
## 5 0.020 0.034 0.263 0.013 0.189 0.108 0.013 0.047 0.067 0.013 0 0.047 0.128
## 6 0.067 0.030 0.401 0.037 0.260 0.156 0.015 0.074 0.045 0.015 0 0.059 0.134
## now of on one only or our shall should so some such than
## 1 0 0.900 0.140 0.026 0.035 0.096 0.017 0.017 0.017 0.035 0.009 0.026 0.009
## 2 0 0.747 0.139 0.025 0.000 0.114 0.000 0.000 0.013 0.013 0.063 0.000 0.000
## 3 0 0.858 0.150 0.030 0.023 0.060 0.000 0.008 0.068 0.038 0.030 0.045 0.023
## 4 0 0.802 0.143 0.032 0.048 0.064 0.016 0.016 0.032 0.040 0.024 0.008 0.000
## 5 0 0.869 0.054 0.047 0.027 0.081 0.027 0.000 0.000 0.027 0.067 0.027 0.047
## 6 0 0.876 0.141 0.052 0.022 0.074 0.030 0.015 0.030 0.007 0.045 0.015 0.030
## that the their then there things this to up upon was were what
## 1 0.184 1.425 0.114 0.000 0.009 0.009 0.044 0.507 0 0.000 0.009 0.017 0.000
## 2 0.152 1.254 0.165 0.000 0.000 0.000 0.051 0.355 0 0.013 0.051 0.000 0.000
## 3 0.188 1.490 0.053 0.015 0.015 0.000 0.075 0.361 0 0.000 0.008 0.015 0.008
## 4 0.238 1.326 0.071 0.008 0.000 0.000 0.103 0.532 0 0.000 0.087 0.079 0.008
## 5 0.162 1.193 0.027 0.007 0.007 0.000 0.094 0.485 0 0.000 0.027 0.020 0.020
## 6 0.208 1.469 0.089 0.007 0.007 0.000 0.126 0.445 0 0.000 0.007 0.030 0.015
## when which who will with would your km.cluster
## 1 0.009 0.175 0.044 0.009 0.087 0.192 0 2
## 2 0.000 0.114 0.038 0.089 0.063 0.139 0 5
## 3 0.000 0.105 0.008 0.173 0.045 0.068 0 2
## 4 0.024 0.167 0.000 0.079 0.079 0.064 0 2
## 5 0.007 0.155 0.027 0.168 0.074 0.040 0 5
## 6 0.037 0.186 0.045 0.111 0.089 0.037 0 2
plot((km$cluster),col=as.factor(fed_papers$author))
legend(60, 4, legend = c(unique(fed_papers$author)), col=c("black", "red", "green", "blue", "cadetblue2"), lty=1)

fviz_cluster(km, clean_fed_papers, geom=c("point", "text"), show.clust.cent = TRUE)

Analyze using hierarchical analysis
hc.cut<- hcut(clean_fed_papers, k=5, hc_method="complete")
fviz_dend(hc.cut, show_labels=TRUE, rect=TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

hc.cut.sing<- hcut(clean_fed_papers, k=5, hc_method="single")
fviz_dend(hc.cut.sing, show_labels=FALSE, rect=TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Plot K-Means analysis using GGPlot
ggplot(data=cluster_assignment, aes(y=km$cluster, fill=author))+
geom_bar()+
ggtitle("Count of Clusters by Author") +
theme(plot.title = element_text(hjust = 0.5))

K-Means; more centroids
Analyze using 15 centroids
km <- kmeans(clean_fed_papers, centers=15, nstart=50)
cluster_assignment <- data.frame(fed_papers,km$cluster)
cluster_assignment$author<- as.factor(cluster_assignment$author)
cluster_assignment <- cluster_assignment[,-2]
head(cluster_assignment)
## author a all also an and any are as at be been
## 1 dispt 0.280 0.052 0.009 0.096 0.358 0.026 0.131 0.122 0.017 0.411 0.026
## 2 dispt 0.177 0.063 0.013 0.038 0.393 0.063 0.051 0.139 0.114 0.393 0.165
## 3 dispt 0.339 0.090 0.008 0.030 0.301 0.008 0.068 0.203 0.023 0.474 0.015
## 4 dispt 0.270 0.024 0.016 0.024 0.262 0.056 0.064 0.111 0.056 0.365 0.127
## 5 dispt 0.303 0.054 0.027 0.034 0.404 0.040 0.128 0.148 0.013 0.344 0.047
## 6 dispt 0.245 0.059 0.007 0.067 0.282 0.052 0.111 0.252 0.015 0.297 0.030
## but by can do down even every for. from had has have her
## 1 0.009 0.140 0.035 0.026 0.000 0.009 0.044 0.096 0.044 0.035 0.017 0.044 0
## 2 0.000 0.139 0.000 0.013 0.000 0.025 0.000 0.076 0.101 0.101 0.013 0.152 0
## 3 0.038 0.173 0.023 0.000 0.008 0.015 0.023 0.098 0.053 0.008 0.015 0.023 0
## 4 0.032 0.167 0.056 0.000 0.000 0.024 0.040 0.103 0.079 0.016 0.024 0.143 0
## 5 0.061 0.209 0.088 0.000 0.000 0.020 0.027 0.141 0.074 0.000 0.054 0.047 0
## 6 0.037 0.186 0.000 0.000 0.007 0.007 0.007 0.067 0.096 0.022 0.015 0.119 0
## his if. in. into is it its may more must my no not
## 1 0.017 0.000 0.262 0.009 0.157 0.175 0.070 0.035 0.026 0.026 0 0.035 0.114
## 2 0.000 0.025 0.291 0.025 0.038 0.127 0.038 0.038 0.000 0.013 0 0.000 0.127
## 3 0.000 0.023 0.308 0.038 0.150 0.173 0.030 0.120 0.038 0.083 0 0.030 0.068
## 4 0.024 0.040 0.238 0.008 0.151 0.222 0.048 0.056 0.056 0.071 0 0.032 0.087
## 5 0.020 0.034 0.263 0.013 0.189 0.108 0.013 0.047 0.067 0.013 0 0.047 0.128
## 6 0.067 0.030 0.401 0.037 0.260 0.156 0.015 0.074 0.045 0.015 0 0.059 0.134
## now of on one only or our shall should so some such than
## 1 0 0.900 0.140 0.026 0.035 0.096 0.017 0.017 0.017 0.035 0.009 0.026 0.009
## 2 0 0.747 0.139 0.025 0.000 0.114 0.000 0.000 0.013 0.013 0.063 0.000 0.000
## 3 0 0.858 0.150 0.030 0.023 0.060 0.000 0.008 0.068 0.038 0.030 0.045 0.023
## 4 0 0.802 0.143 0.032 0.048 0.064 0.016 0.016 0.032 0.040 0.024 0.008 0.000
## 5 0 0.869 0.054 0.047 0.027 0.081 0.027 0.000 0.000 0.027 0.067 0.027 0.047
## 6 0 0.876 0.141 0.052 0.022 0.074 0.030 0.015 0.030 0.007 0.045 0.015 0.030
## that the their then there things this to up upon was were what
## 1 0.184 1.425 0.114 0.000 0.009 0.009 0.044 0.507 0 0.000 0.009 0.017 0.000
## 2 0.152 1.254 0.165 0.000 0.000 0.000 0.051 0.355 0 0.013 0.051 0.000 0.000
## 3 0.188 1.490 0.053 0.015 0.015 0.000 0.075 0.361 0 0.000 0.008 0.015 0.008
## 4 0.238 1.326 0.071 0.008 0.000 0.000 0.103 0.532 0 0.000 0.087 0.079 0.008
## 5 0.162 1.193 0.027 0.007 0.007 0.000 0.094 0.485 0 0.000 0.027 0.020 0.020
## 6 0.208 1.469 0.089 0.007 0.007 0.000 0.126 0.445 0 0.000 0.007 0.030 0.015
## when which who will with would your km.cluster
## 1 0.009 0.175 0.044 0.009 0.087 0.192 0 8
## 2 0.000 0.114 0.038 0.089 0.063 0.139 0 5
## 3 0.000 0.105 0.008 0.173 0.045 0.068 0 8
## 4 0.024 0.167 0.000 0.079 0.079 0.064 0 5
## 5 0.007 0.155 0.027 0.168 0.074 0.040 0 2
## 6 0.037 0.186 0.045 0.111 0.089 0.037 0 8
Plot using GGPlot
ggplot(data=cluster_assignment, aes(y=km$cluster, fill=author))+
geom_bar()+
ggtitle("Count of Clusters by Author") +
theme(plot.title = element_text(hjust = 0.5))

Visualize which author the disputed documaents may have been written by.
potential_papers <- tibble(fed_papers, cluster=km$cluster)
clusters_w_dispt <-(cluster_assignment[which(cluster_assignment$author=="dispt"), 72])
clusters_w_dispt <- unique(clusters_w_dispt)
files<- data_frame()
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
plot_disp <- function(x){
cluster_plot<- cluster_assignment[which(cluster_assignment$km.cluster==clusters_w_dispt[x]),1]
files<- potential_papers[which(potential_papers$cluster==clusters_w_dispt[x]),c(1,2, 73)]
plot(cluster_plot)
print(files)
}
i<-1
while(i<=length(clusters_w_dispt)){
plot_disp(i)
i<-i+1
}

## # A tibble: 7 x 3
## author filename cluster
## <chr> <chr> <int>
## 1 dispt dispt_fed_49.txt 8
## 2 dispt dispt_fed_51.txt 8
## 3 dispt dispt_fed_54.txt 8
## 4 dispt dispt_fed_57.txt 8
## 5 Madison Madison_fed_39.txt 8
## 6 Madison Madison_fed_43.txt 8
## 7 Madison Madison_fed_44.txt 8

## # A tibble: 6 x 3
## author filename cluster
## <chr> <chr> <int>
## 1 dispt dispt_fed_50.txt 5
## 2 dispt dispt_fed_52.txt 5
## 3 dispt dispt_fed_63.txt 5
## 4 Madison Madison_fed_14.txt 5
## 5 Madison Madison_fed_41.txt 5
## 6 Madison Madison_fed_58.txt 5

## # A tibble: 5 x 3
## author filename cluster
## <chr> <chr> <int>
## 1 dispt dispt_fed_53.txt 2
## 2 dispt dispt_fed_55.txt 2
## 3 dispt dispt_fed_56.txt 2
## 4 dispt dispt_fed_62.txt 2
## 5 Hamilton Hamilton_fed_1.txt 2
files
## # A tibble: 0 x 0