Homework 4 | Federalist Papers

Set WD & read in libraries

setwd("/Users/emma/Documents/SYRACUSE/IST707/Week 4")
okgs <- c("cluster", "tidyverse", "FactoMineR","factoextra" )
library(cluster)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.1     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Load in data & omit any NA’s

fed_papers <- read.csv("fedPapers85.csv")
fed_papers <- na.omit(fed_papers)

Analyze fed_papers

str(fed_papers)
## 'data.frame':    85 obs. of  72 variables:
##  $ author  : chr  "dispt" "dispt" "dispt" "dispt" ...
##  $ filename: chr  "dispt_fed_49.txt" "dispt_fed_50.txt" "dispt_fed_51.txt" "dispt_fed_52.txt" ...
##  $ a       : num  0.28 0.177 0.339 0.27 0.303 0.245 0.349 0.414 0.248 0.442 ...
##  $ all     : num  0.052 0.063 0.09 0.024 0.054 0.059 0.036 0.083 0.04 0.062 ...
##  $ also    : num  0.009 0.013 0.008 0.016 0.027 0.007 0.007 0.009 0.007 0.006 ...
##  $ an      : num  0.096 0.038 0.03 0.024 0.034 0.067 0.029 0.018 0.04 0.075 ...
##  $ and     : num  0.358 0.393 0.301 0.262 0.404 0.282 0.335 0.478 0.356 0.423 ...
##  $ any     : num  0.026 0.063 0.008 0.056 0.04 0.052 0.058 0.046 0.034 0.037 ...
##  $ are     : num  0.131 0.051 0.068 0.064 0.128 0.111 0.087 0.11 0.154 0.093 ...
##  $ as      : num  0.122 0.139 0.203 0.111 0.148 0.252 0.073 0.074 0.161 0.1 ...
##  $ at      : num  0.017 0.114 0.023 0.056 0.013 0.015 0.116 0.037 0.047 0.031 ...
##  $ be      : num  0.411 0.393 0.474 0.365 0.344 0.297 0.378 0.331 0.289 0.379 ...
##  $ been    : num  0.026 0.165 0.015 0.127 0.047 0.03 0.044 0.046 0.027 0.025 ...
##  $ but     : num  0.009 0 0.038 0.032 0.061 0.037 0.007 0.055 0.027 0.037 ...
##  $ by      : num  0.14 0.139 0.173 0.167 0.209 0.186 0.102 0.092 0.168 0.174 ...
##  $ can     : num  0.035 0 0.023 0.056 0.088 0 0.058 0.037 0.047 0.056 ...
##  $ do      : num  0.026 0.013 0 0 0 0 0.015 0.028 0 0 ...
##  $ down    : num  0 0 0.008 0 0 0.007 0 0 0 0 ...
##  $ even    : num  0.009 0.025 0.015 0.024 0.02 0.007 0.007 0.018 0 0.006 ...
##  $ every   : num  0.044 0 0.023 0.04 0.027 0.007 0.087 0.064 0.081 0.05 ...
##  $ for.    : num  0.096 0.076 0.098 0.103 0.141 0.067 0.116 0.055 0.127 0.1 ...
##  $ from    : num  0.044 0.101 0.053 0.079 0.074 0.096 0.08 0.083 0.074 0.124 ...
##  $ had     : num  0.035 0.101 0.008 0.016 0 0.022 0.015 0.009 0.007 0 ...
##  $ has     : num  0.017 0.013 0.015 0.024 0.054 0.015 0.036 0.037 0.02 0.019 ...
##  $ have    : num  0.044 0.152 0.023 0.143 0.047 0.119 0.044 0.074 0.074 0.044 ...
##  $ her     : num  0 0 0 0 0 0 0.007 0 0.034 0.025 ...
##  $ his     : num  0.017 0 0 0.024 0.02 0.067 0 0.018 0.02 0.05 ...
##  $ if.     : num  0 0.025 0.023 0.04 0.034 0.03 0.029 0 0 0.025 ...
##  $ in.     : num  0.262 0.291 0.308 0.238 0.263 0.401 0.189 0.267 0.248 0.274 ...
##  $ into    : num  0.009 0.025 0.038 0.008 0.013 0.037 0 0.037 0.013 0.037 ...
##  $ is      : num  0.157 0.038 0.15 0.151 0.189 0.26 0.167 0.083 0.208 0.23 ...
##  $ it      : num  0.175 0.127 0.173 0.222 0.108 0.156 0.102 0.165 0.134 0.131 ...
##  $ its     : num  0.07 0.038 0.03 0.048 0.013 0.015 0 0.046 0.02 0.019 ...
##  $ may     : num  0.035 0.038 0.12 0.056 0.047 0.074 0.08 0.092 0.027 0.106 ...
##  $ more    : num  0.026 0 0.038 0.056 0.067 0.045 0.08 0.064 0.06 0.081 ...
##  $ must    : num  0.026 0.013 0.083 0.071 0.013 0.015 0.044 0.018 0.027 0.068 ...
##  $ my      : num  0 0 0 0 0 0 0.007 0 0 0 ...
##  $ no      : num  0.035 0 0.03 0.032 0.047 0.059 0.022 0.018 0.02 0.044 ...
##  $ not     : num  0.114 0.127 0.068 0.087 0.128 0.134 0.102 0.101 0.094 0.106 ...
##  $ now     : num  0 0 0 0 0 0 0.007 0 0.007 0.012 ...
##  $ of      : num  0.9 0.747 0.858 0.802 0.869 ...
##  $ on      : num  0.14 0.139 0.15 0.143 0.054 0.141 0.051 0.083 0.127 0.118 ...
##  $ one     : num  0.026 0.025 0.03 0.032 0.047 0.052 0.073 0.046 0.06 0.031 ...
##  $ only    : num  0.035 0 0.023 0.048 0.027 0.022 0.007 0.046 0.02 0.012 ...
##  $ or      : num  0.096 0.114 0.06 0.064 0.081 0.074 0.153 0.037 0.154 0.081 ...
##  $ our     : num  0.017 0 0 0.016 0.027 0.03 0.051 0 0.007 0.025 ...
##  $ shall   : num  0.017 0 0.008 0.016 0 0.015 0.007 0 0.02 0 ...
##  $ should  : num  0.017 0.013 0.068 0.032 0 0.03 0.007 0 0 0.012 ...
##  $ so      : num  0.035 0.013 0.038 0.04 0.027 0.007 0.051 0.018 0.04 0.05 ...
##  $ some    : num  0.009 0.063 0.03 0.024 0.067 0.045 0.007 0.028 0.027 0.025 ...
##  $ such    : num  0.026 0 0.045 0.008 0.027 0.015 0.015 0 0.013 0.031 ...
##  $ than    : num  0.009 0 0.023 0 0.047 0.03 0.109 0.055 0.067 0.044 ...
##  $ that    : num  0.184 0.152 0.188 0.238 0.162 0.208 0.233 0.165 0.208 0.218 ...
##  $ the     : num  1.43 1.25 1.49 1.33 1.19 ...
##  $ their   : num  0.114 0.165 0.053 0.071 0.027 0.089 0.109 0.083 0.154 0.081 ...
##  $ then    : num  0 0 0.015 0.008 0.007 0.007 0.015 0.009 0.007 0.012 ...
##  $ there   : num  0.009 0 0.015 0 0.007 0.007 0.036 0.028 0.02 0 ...
##  $ things  : num  0.009 0 0 0 0 0 0 0 0 0.012 ...
##  $ this    : num  0.044 0.051 0.075 0.103 0.094 0.126 0.08 0.11 0.067 0.093 ...
##  $ to      : num  0.507 0.355 0.361 0.532 0.485 0.445 0.56 0.34 0.49 0.498 ...
##  $ up      : num  0 0 0 0 0 0 0.007 0 0 0 ...
##  $ upon    : num  0 0.013 0 0 0 0 0 0 0 0 ...
##  $ was     : num  0.009 0.051 0.008 0.087 0.027 0.007 0.015 0.018 0.027 0 ...
##  $ were    : num  0.017 0 0.015 0.079 0.02 0.03 0.029 0.009 0.007 0 ...
##  $ what    : num  0 0 0.008 0.008 0.02 0.015 0.015 0.009 0.02 0.025 ...
##  $ when    : num  0.009 0 0 0.024 0.007 0.037 0.007 0 0.02 0.012 ...
##  $ which   : num  0.175 0.114 0.105 0.167 0.155 0.186 0.211 0.175 0.201 0.199 ...
##  $ who     : num  0.044 0.038 0.008 0 0.027 0.045 0.022 0.018 0.04 0.031 ...
##  $ will    : num  0.009 0.089 0.173 0.079 0.168 0.111 0.145 0.267 0.154 0.106 ...
##  $ with    : num  0.087 0.063 0.045 0.079 0.074 0.089 0.073 0.129 0.027 0.081 ...
##  $ would   : num  0.192 0.139 0.068 0.064 0.04 0.037 0.073 0.037 0.04 0.031 ...
##  $ your    : num  0 0 0 0 0 0 0 0 0 0 ...
summary(fed_papers)
##     author            filename               a               all         
##  Length:85          Length:85          Min.   :0.0960   Min.   :0.01500  
##  Class :character   Class :character   1st Qu.:0.2400   1st Qu.:0.03500  
##  Mode  :character   Mode  :character   Median :0.2990   Median :0.05000  
##                                        Mean   :0.2932   Mean   :0.05284  
##                                        3rd Qu.:0.3490   3rd Qu.:0.06600  
##                                        Max.   :0.4660   Max.   :0.12700  
##       also                an               and              any         
##  Min.   :0.000000   Min.   :0.00900   Min.   :0.2170   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.04900   1st Qu.:0.3190   1st Qu.:0.02500  
##  Median :0.007000   Median :0.07100   Median :0.3580   Median :0.04300  
##  Mean   :0.007659   Mean   :0.06839   Mean   :0.3846   Mean   :0.04161  
##  3rd Qu.:0.013000   3rd Qu.:0.08500   3rd Qu.:0.4130   3rd Qu.:0.05600  
##  Max.   :0.047000   Max.   :0.17900   Max.   :0.8210   Max.   :0.11400  
##       are                as               at                be        
##  Min.   :0.01300   Min.   :0.0270   Min.   :0.00000   Min.   :0.0400  
##  1st Qu.:0.05100   1st Qu.:0.1000   1st Qu.:0.02600   1st Qu.:0.2580  
##  Median :0.06800   Median :0.1240   Median :0.03800   Median :0.3070  
##  Mean   :0.07707   Mean   :0.1242   Mean   :0.04427   Mean   :0.3012  
##  3rd Qu.:0.10200   3rd Qu.:0.1440   3rd Qu.:0.06300   3rd Qu.:0.3580  
##  Max.   :0.16300   Max.   :0.2520   Max.   :0.11800   Max.   :0.4810  
##       been              but                by              can         
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0270   Min.   :0.00000  
##  1st Qu.:0.03000   1st Qu.:0.02200   1st Qu.:0.0920   1st Qu.:0.01400  
##  Median :0.05300   Median :0.03200   Median :0.1240   Median :0.02900  
##  Mean   :0.05967   Mean   :0.03232   Mean   :0.1272   Mean   :0.03558  
##  3rd Qu.:0.08400   3rd Qu.:0.04200   3rd Qu.:0.1620   3rd Qu.:0.05200  
##  Max.   :0.16500   Max.   :0.08900   Max.   :0.2640   Max.   :0.11000  
##        do                down               even            every        
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.00900  
##  Median :0.006000   Median :0.000000   Median :0.0100   Median :0.02200  
##  Mean   :0.006259   Mean   :0.001529   Mean   :0.0114   Mean   :0.02391  
##  3rd Qu.:0.010000   3rd Qu.:0.000000   3rd Qu.:0.0180   3rd Qu.:0.03400  
##  Max.   :0.028000   Max.   :0.017000   Max.   :0.0370   Max.   :0.08700  
##       for.              from              had               has         
##  Min.   :0.03000   Min.   :0.02600   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.07000   1st Qu.:0.05700   1st Qu.:0.00800   1st Qu.:0.02500  
##  Median :0.08800   Median :0.07800   Median :0.01600   Median :0.04600  
##  Mean   :0.09376   Mean   :0.07978   Mean   :0.02116   Mean   :0.04442  
##  3rd Qu.:0.11400   3rd Qu.:0.09800   3rd Qu.:0.02700   3rd Qu.:0.05700  
##  Max.   :0.21300   Max.   :0.16200   Max.   :0.14100   Max.   :0.11400  
##       have              her                his               if.         
##  Min.   :0.01100   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.07300   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.01600  
##  Median :0.09000   Median :0.000000   Median :0.01400   Median :0.02600  
##  Mean   :0.09474   Mean   :0.008094   Mean   :0.02862   Mean   :0.02733  
##  3rd Qu.:0.12400   3rd Qu.:0.007000   3rd Qu.:0.03900   3rd Qu.:0.03400  
##  Max.   :0.18500   Max.   :0.150000   Max.   :0.24700   Max.   :0.09900  
##       in.              into               is               it        
##  Min.   :0.1890   Min.   :0.00000   Min.   :0.0280   Min.   :0.0750  
##  1st Qu.:0.2670   1st Qu.:0.01000   1st Qu.:0.1180   1st Qu.:0.1290  
##  Median :0.3040   Median :0.02200   Median :0.1510   Median :0.1510  
##  Mean   :0.3174   Mean   :0.02409   Mean   :0.1563   Mean   :0.1567  
##  3rd Qu.:0.3550   3rd Qu.:0.03400   3rd Qu.:0.1960   3rd Qu.:0.1900  
##  Max.   :0.4990   Max.   :0.10500   Max.   :0.3230   Max.   :0.2840  
##       its               may               more              must        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.03000   1st Qu.:0.03600   1st Qu.:0.02300   1st Qu.:0.01400  
##  Median :0.04200   Median :0.05600   Median :0.04400   Median :0.02700  
##  Mean   :0.04836   Mean   :0.06181   Mean   :0.04561   Mean   :0.03305  
##  3rd Qu.:0.06400   3rd Qu.:0.08500   3rd Qu.:0.06100   3rd Qu.:0.04400  
##  Max.   :0.15000   Max.   :0.13400   Max.   :0.13000   Max.   :0.11100  
##        my                 no               not               now          
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.02000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.02000   1st Qu.:0.07500   1st Qu.:0.000000  
##  Median :0.000000   Median :0.02900   Median :0.09500   Median :0.005000  
##  Mean   :0.003259   Mean   :0.03236   Mean   :0.09248   Mean   :0.006035  
##  3rd Qu.:0.005000   3rd Qu.:0.04300   3rd Qu.:0.11200   3rd Qu.:0.010000  
##  Max.   :0.056000   Max.   :0.08300   Max.   :0.14800   Max.   :0.026000  
##        of               on               one               only        
##  Min.   :0.5620   Min.   :0.00000   Min.   :0.00500   Min.   :0.00000  
##  1st Qu.:0.8560   1st Qu.:0.04300   1st Qu.:0.02700   1st Qu.:0.01000  
##  Median :0.9020   Median :0.06200   Median :0.03600   Median :0.02200  
##  Mean   :0.9094   Mean   :0.06926   Mean   :0.04079   Mean   :0.02288  
##  3rd Qu.:0.9690   3rd Qu.:0.09700   3rd Qu.:0.05000   3rd Qu.:0.03400  
##  Max.   :1.2110   Max.   :0.15600   Max.   :0.13500   Max.   :0.06500  
##        or               our            shall             should       
##  Min.   :0.02700   Min.   :0.000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.07000   1st Qu.:0.000   1st Qu.:0.00600   1st Qu.:0.01000  
##  Median :0.08100   Median :0.013   Median :0.01400   Median :0.02700  
##  Mean   :0.09674   Mean   :0.023   Mean   :0.01875   Mean   :0.02656  
##  3rd Qu.:0.11600   3rd Qu.:0.028   3rd Qu.:0.02700   3rd Qu.:0.03800  
##  Max.   :0.32100   Max.   :0.199   Max.   :0.07900   Max.   :0.09100  
##        so               some              such              than        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.01800   1st Qu.:0.00900   1st Qu.:0.01800   1st Qu.:0.02700  
##  Median :0.02900   Median :0.01700   Median :0.02900   Median :0.04300  
##  Mean   :0.02982   Mean   :0.01989   Mean   :0.02922   Mean   :0.04396  
##  3rd Qu.:0.04000   3rd Qu.:0.02800   3rd Qu.:0.03800   3rd Qu.:0.05500  
##  Max.   :0.07200   Max.   :0.06700   Max.   :0.08500   Max.   :0.15000  
##       that            the            their              then         
##  Min.   :0.081   Min.   :0.669   Min.   :0.00500   Min.   :0.000000  
##  1st Qu.:0.171   1st Qu.:1.178   1st Qu.:0.05500   1st Qu.:0.000000  
##  Median :0.208   Median :1.275   Median :0.08600   Median :0.006000  
##  Mean   :0.212   Mean   :1.281   Mean   :0.08553   Mean   :0.006082  
##  3rd Qu.:0.244   3rd Qu.:1.423   3rd Qu.:0.10600   3rd Qu.:0.010000  
##  Max.   :0.380   Max.   :1.803   Max.   :0.18300   Max.   :0.021000  
##      there             things              this               to        
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00900   Min.   :0.3330  
##  1st Qu.:0.00900   1st Qu.:0.000000   1st Qu.:0.06900   1st Qu.:0.4690  
##  Median :0.02200   Median :0.000000   Median :0.09000   Median :0.5400  
##  Mean   :0.02638   Mean   :0.002659   Mean   :0.08701   Mean   :0.5358  
##  3rd Qu.:0.03900   3rd Qu.:0.006000   3rd Qu.:0.10500   3rd Qu.:0.6060  
##  Max.   :0.10500   Max.   :0.015000   Max.   :0.15300   Max.   :0.7760  
##        up                upon              was               were        
##  Min.   :0.000000   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00900   1st Qu.:0.00700  
##  Median :0.000000   Median :0.02800   Median :0.01500   Median :0.01500  
##  Mean   :0.003482   Mean   :0.02922   Mean   :0.02584   Mean   :0.02022  
##  3rd Qu.:0.006000   3rd Qu.:0.05000   3rd Qu.:0.03200   3rd Qu.:0.02900  
##  Max.   :0.032000   Max.   :0.10200   Max.   :0.18900   Max.   :0.10800  
##       what              when             which             who         
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0810   Min.   :0.00000  
##  1st Qu.:0.00500   1st Qu.:0.00000   1st Qu.:0.1180   1st Qu.:0.01600  
##  Median :0.01000   Median :0.00900   Median :0.1520   Median :0.02700  
##  Mean   :0.01286   Mean   :0.01174   Mean   :0.1578   Mean   :0.03253  
##  3rd Qu.:0.02000   3rd Qu.:0.01500   3rd Qu.:0.1830   3rd Qu.:0.04400  
##  Max.   :0.06000   Max.   :0.07300   Max.   :0.2760   Max.   :0.12900  
##       will              with             would             your         
##  Min.   :0.00600   Min.   :0.02700   Min.   :0.0090   Min.   :0.000000  
##  1st Qu.:0.05200   1st Qu.:0.06100   1st Qu.:0.0420   1st Qu.:0.000000  
##  Median :0.08100   Median :0.07900   Median :0.0780   Median :0.000000  
##  Mean   :0.09865   Mean   :0.07968   Mean   :0.1017   Mean   :0.002024  
##  3rd Qu.:0.13500   3rd Qu.:0.09200   3rd Qu.:0.1470   3rd Qu.:0.000000  
##  Max.   :0.34000   Max.   :0.15000   Max.   :0.3820   Max.   :0.074000

Remove filename & author

clean_fed_papers <- fed_papers[,c(-1,-2)]
str(clean_fed_papers)
## 'data.frame':    85 obs. of  70 variables:
##  $ a     : num  0.28 0.177 0.339 0.27 0.303 0.245 0.349 0.414 0.248 0.442 ...
##  $ all   : num  0.052 0.063 0.09 0.024 0.054 0.059 0.036 0.083 0.04 0.062 ...
##  $ also  : num  0.009 0.013 0.008 0.016 0.027 0.007 0.007 0.009 0.007 0.006 ...
##  $ an    : num  0.096 0.038 0.03 0.024 0.034 0.067 0.029 0.018 0.04 0.075 ...
##  $ and   : num  0.358 0.393 0.301 0.262 0.404 0.282 0.335 0.478 0.356 0.423 ...
##  $ any   : num  0.026 0.063 0.008 0.056 0.04 0.052 0.058 0.046 0.034 0.037 ...
##  $ are   : num  0.131 0.051 0.068 0.064 0.128 0.111 0.087 0.11 0.154 0.093 ...
##  $ as    : num  0.122 0.139 0.203 0.111 0.148 0.252 0.073 0.074 0.161 0.1 ...
##  $ at    : num  0.017 0.114 0.023 0.056 0.013 0.015 0.116 0.037 0.047 0.031 ...
##  $ be    : num  0.411 0.393 0.474 0.365 0.344 0.297 0.378 0.331 0.289 0.379 ...
##  $ been  : num  0.026 0.165 0.015 0.127 0.047 0.03 0.044 0.046 0.027 0.025 ...
##  $ but   : num  0.009 0 0.038 0.032 0.061 0.037 0.007 0.055 0.027 0.037 ...
##  $ by    : num  0.14 0.139 0.173 0.167 0.209 0.186 0.102 0.092 0.168 0.174 ...
##  $ can   : num  0.035 0 0.023 0.056 0.088 0 0.058 0.037 0.047 0.056 ...
##  $ do    : num  0.026 0.013 0 0 0 0 0.015 0.028 0 0 ...
##  $ down  : num  0 0 0.008 0 0 0.007 0 0 0 0 ...
##  $ even  : num  0.009 0.025 0.015 0.024 0.02 0.007 0.007 0.018 0 0.006 ...
##  $ every : num  0.044 0 0.023 0.04 0.027 0.007 0.087 0.064 0.081 0.05 ...
##  $ for.  : num  0.096 0.076 0.098 0.103 0.141 0.067 0.116 0.055 0.127 0.1 ...
##  $ from  : num  0.044 0.101 0.053 0.079 0.074 0.096 0.08 0.083 0.074 0.124 ...
##  $ had   : num  0.035 0.101 0.008 0.016 0 0.022 0.015 0.009 0.007 0 ...
##  $ has   : num  0.017 0.013 0.015 0.024 0.054 0.015 0.036 0.037 0.02 0.019 ...
##  $ have  : num  0.044 0.152 0.023 0.143 0.047 0.119 0.044 0.074 0.074 0.044 ...
##  $ her   : num  0 0 0 0 0 0 0.007 0 0.034 0.025 ...
##  $ his   : num  0.017 0 0 0.024 0.02 0.067 0 0.018 0.02 0.05 ...
##  $ if.   : num  0 0.025 0.023 0.04 0.034 0.03 0.029 0 0 0.025 ...
##  $ in.   : num  0.262 0.291 0.308 0.238 0.263 0.401 0.189 0.267 0.248 0.274 ...
##  $ into  : num  0.009 0.025 0.038 0.008 0.013 0.037 0 0.037 0.013 0.037 ...
##  $ is    : num  0.157 0.038 0.15 0.151 0.189 0.26 0.167 0.083 0.208 0.23 ...
##  $ it    : num  0.175 0.127 0.173 0.222 0.108 0.156 0.102 0.165 0.134 0.131 ...
##  $ its   : num  0.07 0.038 0.03 0.048 0.013 0.015 0 0.046 0.02 0.019 ...
##  $ may   : num  0.035 0.038 0.12 0.056 0.047 0.074 0.08 0.092 0.027 0.106 ...
##  $ more  : num  0.026 0 0.038 0.056 0.067 0.045 0.08 0.064 0.06 0.081 ...
##  $ must  : num  0.026 0.013 0.083 0.071 0.013 0.015 0.044 0.018 0.027 0.068 ...
##  $ my    : num  0 0 0 0 0 0 0.007 0 0 0 ...
##  $ no    : num  0.035 0 0.03 0.032 0.047 0.059 0.022 0.018 0.02 0.044 ...
##  $ not   : num  0.114 0.127 0.068 0.087 0.128 0.134 0.102 0.101 0.094 0.106 ...
##  $ now   : num  0 0 0 0 0 0 0.007 0 0.007 0.012 ...
##  $ of    : num  0.9 0.747 0.858 0.802 0.869 ...
##  $ on    : num  0.14 0.139 0.15 0.143 0.054 0.141 0.051 0.083 0.127 0.118 ...
##  $ one   : num  0.026 0.025 0.03 0.032 0.047 0.052 0.073 0.046 0.06 0.031 ...
##  $ only  : num  0.035 0 0.023 0.048 0.027 0.022 0.007 0.046 0.02 0.012 ...
##  $ or    : num  0.096 0.114 0.06 0.064 0.081 0.074 0.153 0.037 0.154 0.081 ...
##  $ our   : num  0.017 0 0 0.016 0.027 0.03 0.051 0 0.007 0.025 ...
##  $ shall : num  0.017 0 0.008 0.016 0 0.015 0.007 0 0.02 0 ...
##  $ should: num  0.017 0.013 0.068 0.032 0 0.03 0.007 0 0 0.012 ...
##  $ so    : num  0.035 0.013 0.038 0.04 0.027 0.007 0.051 0.018 0.04 0.05 ...
##  $ some  : num  0.009 0.063 0.03 0.024 0.067 0.045 0.007 0.028 0.027 0.025 ...
##  $ such  : num  0.026 0 0.045 0.008 0.027 0.015 0.015 0 0.013 0.031 ...
##  $ than  : num  0.009 0 0.023 0 0.047 0.03 0.109 0.055 0.067 0.044 ...
##  $ that  : num  0.184 0.152 0.188 0.238 0.162 0.208 0.233 0.165 0.208 0.218 ...
##  $ the   : num  1.43 1.25 1.49 1.33 1.19 ...
##  $ their : num  0.114 0.165 0.053 0.071 0.027 0.089 0.109 0.083 0.154 0.081 ...
##  $ then  : num  0 0 0.015 0.008 0.007 0.007 0.015 0.009 0.007 0.012 ...
##  $ there : num  0.009 0 0.015 0 0.007 0.007 0.036 0.028 0.02 0 ...
##  $ things: num  0.009 0 0 0 0 0 0 0 0 0.012 ...
##  $ this  : num  0.044 0.051 0.075 0.103 0.094 0.126 0.08 0.11 0.067 0.093 ...
##  $ to    : num  0.507 0.355 0.361 0.532 0.485 0.445 0.56 0.34 0.49 0.498 ...
##  $ up    : num  0 0 0 0 0 0 0.007 0 0 0 ...
##  $ upon  : num  0 0.013 0 0 0 0 0 0 0 0 ...
##  $ was   : num  0.009 0.051 0.008 0.087 0.027 0.007 0.015 0.018 0.027 0 ...
##  $ were  : num  0.017 0 0.015 0.079 0.02 0.03 0.029 0.009 0.007 0 ...
##  $ what  : num  0 0 0.008 0.008 0.02 0.015 0.015 0.009 0.02 0.025 ...
##  $ when  : num  0.009 0 0 0.024 0.007 0.037 0.007 0 0.02 0.012 ...
##  $ which : num  0.175 0.114 0.105 0.167 0.155 0.186 0.211 0.175 0.201 0.199 ...
##  $ who   : num  0.044 0.038 0.008 0 0.027 0.045 0.022 0.018 0.04 0.031 ...
##  $ will  : num  0.009 0.089 0.173 0.079 0.168 0.111 0.145 0.267 0.154 0.106 ...
##  $ with  : num  0.087 0.063 0.045 0.079 0.074 0.089 0.073 0.129 0.027 0.081 ...
##  $ would : num  0.192 0.139 0.068 0.064 0.04 0.037 0.073 0.037 0.04 0.031 ...
##  $ your  : num  0 0 0 0 0 0 0 0 0 0 ...

K-Means

Analyze using 5 centroids

km <- kmeans(clean_fed_papers, centers=5, nstart=30)
cluster_assignment <- data.frame(fed_papers,km$cluster)
cluster_assignment$author<- as.factor(cluster_assignment$author)
cluster_assignment <- cluster_assignment[,-2]
head(cluster_assignment)
##   author     a   all  also    an   and   any   are    as    at    be  been
## 1  dispt 0.280 0.052 0.009 0.096 0.358 0.026 0.131 0.122 0.017 0.411 0.026
## 2  dispt 0.177 0.063 0.013 0.038 0.393 0.063 0.051 0.139 0.114 0.393 0.165
## 3  dispt 0.339 0.090 0.008 0.030 0.301 0.008 0.068 0.203 0.023 0.474 0.015
## 4  dispt 0.270 0.024 0.016 0.024 0.262 0.056 0.064 0.111 0.056 0.365 0.127
## 5  dispt 0.303 0.054 0.027 0.034 0.404 0.040 0.128 0.148 0.013 0.344 0.047
## 6  dispt 0.245 0.059 0.007 0.067 0.282 0.052 0.111 0.252 0.015 0.297 0.030
##     but    by   can    do  down  even every  for.  from   had   has  have her
## 1 0.009 0.140 0.035 0.026 0.000 0.009 0.044 0.096 0.044 0.035 0.017 0.044   0
## 2 0.000 0.139 0.000 0.013 0.000 0.025 0.000 0.076 0.101 0.101 0.013 0.152   0
## 3 0.038 0.173 0.023 0.000 0.008 0.015 0.023 0.098 0.053 0.008 0.015 0.023   0
## 4 0.032 0.167 0.056 0.000 0.000 0.024 0.040 0.103 0.079 0.016 0.024 0.143   0
## 5 0.061 0.209 0.088 0.000 0.000 0.020 0.027 0.141 0.074 0.000 0.054 0.047   0
## 6 0.037 0.186 0.000 0.000 0.007 0.007 0.007 0.067 0.096 0.022 0.015 0.119   0
##     his   if.   in.  into    is    it   its   may  more  must my    no   not
## 1 0.017 0.000 0.262 0.009 0.157 0.175 0.070 0.035 0.026 0.026  0 0.035 0.114
## 2 0.000 0.025 0.291 0.025 0.038 0.127 0.038 0.038 0.000 0.013  0 0.000 0.127
## 3 0.000 0.023 0.308 0.038 0.150 0.173 0.030 0.120 0.038 0.083  0 0.030 0.068
## 4 0.024 0.040 0.238 0.008 0.151 0.222 0.048 0.056 0.056 0.071  0 0.032 0.087
## 5 0.020 0.034 0.263 0.013 0.189 0.108 0.013 0.047 0.067 0.013  0 0.047 0.128
## 6 0.067 0.030 0.401 0.037 0.260 0.156 0.015 0.074 0.045 0.015  0 0.059 0.134
##   now    of    on   one  only    or   our shall should    so  some  such  than
## 1   0 0.900 0.140 0.026 0.035 0.096 0.017 0.017  0.017 0.035 0.009 0.026 0.009
## 2   0 0.747 0.139 0.025 0.000 0.114 0.000 0.000  0.013 0.013 0.063 0.000 0.000
## 3   0 0.858 0.150 0.030 0.023 0.060 0.000 0.008  0.068 0.038 0.030 0.045 0.023
## 4   0 0.802 0.143 0.032 0.048 0.064 0.016 0.016  0.032 0.040 0.024 0.008 0.000
## 5   0 0.869 0.054 0.047 0.027 0.081 0.027 0.000  0.000 0.027 0.067 0.027 0.047
## 6   0 0.876 0.141 0.052 0.022 0.074 0.030 0.015  0.030 0.007 0.045 0.015 0.030
##    that   the their  then there things  this    to up  upon   was  were  what
## 1 0.184 1.425 0.114 0.000 0.009  0.009 0.044 0.507  0 0.000 0.009 0.017 0.000
## 2 0.152 1.254 0.165 0.000 0.000  0.000 0.051 0.355  0 0.013 0.051 0.000 0.000
## 3 0.188 1.490 0.053 0.015 0.015  0.000 0.075 0.361  0 0.000 0.008 0.015 0.008
## 4 0.238 1.326 0.071 0.008 0.000  0.000 0.103 0.532  0 0.000 0.087 0.079 0.008
## 5 0.162 1.193 0.027 0.007 0.007  0.000 0.094 0.485  0 0.000 0.027 0.020 0.020
## 6 0.208 1.469 0.089 0.007 0.007  0.000 0.126 0.445  0 0.000 0.007 0.030 0.015
##    when which   who  will  with would your km.cluster
## 1 0.009 0.175 0.044 0.009 0.087 0.192    0          2
## 2 0.000 0.114 0.038 0.089 0.063 0.139    0          5
## 3 0.000 0.105 0.008 0.173 0.045 0.068    0          2
## 4 0.024 0.167 0.000 0.079 0.079 0.064    0          2
## 5 0.007 0.155 0.027 0.168 0.074 0.040    0          5
## 6 0.037 0.186 0.045 0.111 0.089 0.037    0          2
plot((km$cluster),col=as.factor(fed_papers$author))
legend(60, 4, legend = c(unique(fed_papers$author)), col=c("black", "red", "green", "blue", "cadetblue2"), lty=1)

fviz_cluster(km, clean_fed_papers, geom=c("point", "text"), show.clust.cent = TRUE)

Analyze using hierarchical analysis

hc.cut<- hcut(clean_fed_papers, k=5, hc_method="complete")
fviz_dend(hc.cut, show_labels=TRUE, rect=TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

hc.cut.sing<- hcut(clean_fed_papers, k=5, hc_method="single")
fviz_dend(hc.cut.sing, show_labels=FALSE, rect=TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Plot K-Means analysis using GGPlot

ggplot(data=cluster_assignment, aes(y=km$cluster, fill=author))+
  geom_bar()+
  ggtitle("Count of Clusters by Author") +
  theme(plot.title = element_text(hjust = 0.5))

K-Means; more centroids

Analyze using 15 centroids

km <- kmeans(clean_fed_papers, centers=15, nstart=50)
cluster_assignment <- data.frame(fed_papers,km$cluster)
cluster_assignment$author<- as.factor(cluster_assignment$author)
cluster_assignment <- cluster_assignment[,-2]
head(cluster_assignment)
##   author     a   all  also    an   and   any   are    as    at    be  been
## 1  dispt 0.280 0.052 0.009 0.096 0.358 0.026 0.131 0.122 0.017 0.411 0.026
## 2  dispt 0.177 0.063 0.013 0.038 0.393 0.063 0.051 0.139 0.114 0.393 0.165
## 3  dispt 0.339 0.090 0.008 0.030 0.301 0.008 0.068 0.203 0.023 0.474 0.015
## 4  dispt 0.270 0.024 0.016 0.024 0.262 0.056 0.064 0.111 0.056 0.365 0.127
## 5  dispt 0.303 0.054 0.027 0.034 0.404 0.040 0.128 0.148 0.013 0.344 0.047
## 6  dispt 0.245 0.059 0.007 0.067 0.282 0.052 0.111 0.252 0.015 0.297 0.030
##     but    by   can    do  down  even every  for.  from   had   has  have her
## 1 0.009 0.140 0.035 0.026 0.000 0.009 0.044 0.096 0.044 0.035 0.017 0.044   0
## 2 0.000 0.139 0.000 0.013 0.000 0.025 0.000 0.076 0.101 0.101 0.013 0.152   0
## 3 0.038 0.173 0.023 0.000 0.008 0.015 0.023 0.098 0.053 0.008 0.015 0.023   0
## 4 0.032 0.167 0.056 0.000 0.000 0.024 0.040 0.103 0.079 0.016 0.024 0.143   0
## 5 0.061 0.209 0.088 0.000 0.000 0.020 0.027 0.141 0.074 0.000 0.054 0.047   0
## 6 0.037 0.186 0.000 0.000 0.007 0.007 0.007 0.067 0.096 0.022 0.015 0.119   0
##     his   if.   in.  into    is    it   its   may  more  must my    no   not
## 1 0.017 0.000 0.262 0.009 0.157 0.175 0.070 0.035 0.026 0.026  0 0.035 0.114
## 2 0.000 0.025 0.291 0.025 0.038 0.127 0.038 0.038 0.000 0.013  0 0.000 0.127
## 3 0.000 0.023 0.308 0.038 0.150 0.173 0.030 0.120 0.038 0.083  0 0.030 0.068
## 4 0.024 0.040 0.238 0.008 0.151 0.222 0.048 0.056 0.056 0.071  0 0.032 0.087
## 5 0.020 0.034 0.263 0.013 0.189 0.108 0.013 0.047 0.067 0.013  0 0.047 0.128
## 6 0.067 0.030 0.401 0.037 0.260 0.156 0.015 0.074 0.045 0.015  0 0.059 0.134
##   now    of    on   one  only    or   our shall should    so  some  such  than
## 1   0 0.900 0.140 0.026 0.035 0.096 0.017 0.017  0.017 0.035 0.009 0.026 0.009
## 2   0 0.747 0.139 0.025 0.000 0.114 0.000 0.000  0.013 0.013 0.063 0.000 0.000
## 3   0 0.858 0.150 0.030 0.023 0.060 0.000 0.008  0.068 0.038 0.030 0.045 0.023
## 4   0 0.802 0.143 0.032 0.048 0.064 0.016 0.016  0.032 0.040 0.024 0.008 0.000
## 5   0 0.869 0.054 0.047 0.027 0.081 0.027 0.000  0.000 0.027 0.067 0.027 0.047
## 6   0 0.876 0.141 0.052 0.022 0.074 0.030 0.015  0.030 0.007 0.045 0.015 0.030
##    that   the their  then there things  this    to up  upon   was  were  what
## 1 0.184 1.425 0.114 0.000 0.009  0.009 0.044 0.507  0 0.000 0.009 0.017 0.000
## 2 0.152 1.254 0.165 0.000 0.000  0.000 0.051 0.355  0 0.013 0.051 0.000 0.000
## 3 0.188 1.490 0.053 0.015 0.015  0.000 0.075 0.361  0 0.000 0.008 0.015 0.008
## 4 0.238 1.326 0.071 0.008 0.000  0.000 0.103 0.532  0 0.000 0.087 0.079 0.008
## 5 0.162 1.193 0.027 0.007 0.007  0.000 0.094 0.485  0 0.000 0.027 0.020 0.020
## 6 0.208 1.469 0.089 0.007 0.007  0.000 0.126 0.445  0 0.000 0.007 0.030 0.015
##    when which   who  will  with would your km.cluster
## 1 0.009 0.175 0.044 0.009 0.087 0.192    0          8
## 2 0.000 0.114 0.038 0.089 0.063 0.139    0          5
## 3 0.000 0.105 0.008 0.173 0.045 0.068    0          8
## 4 0.024 0.167 0.000 0.079 0.079 0.064    0          5
## 5 0.007 0.155 0.027 0.168 0.074 0.040    0          2
## 6 0.037 0.186 0.045 0.111 0.089 0.037    0          8

Plot using GGPlot

ggplot(data=cluster_assignment, aes(y=km$cluster, fill=author))+
  geom_bar()+
  ggtitle("Count of Clusters by Author") +
  theme(plot.title = element_text(hjust = 0.5))

Visualize which author the disputed documaents may have been written by.

potential_papers <- tibble(fed_papers, cluster=km$cluster)
clusters_w_dispt <-(cluster_assignment[which(cluster_assignment$author=="dispt"), 72])
clusters_w_dispt <- unique(clusters_w_dispt)
files<- data_frame()
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
plot_disp <- function(x){
  cluster_plot<- cluster_assignment[which(cluster_assignment$km.cluster==clusters_w_dispt[x]),1]
  files<- potential_papers[which(potential_papers$cluster==clusters_w_dispt[x]),c(1,2, 73)]
  plot(cluster_plot)
  print(files)
}

i<-1
while(i<=length(clusters_w_dispt)){
  plot_disp(i)
  i<-i+1
}

## # A tibble: 7 x 3
##   author  filename           cluster
##   <chr>   <chr>                <int>
## 1 dispt   dispt_fed_49.txt         8
## 2 dispt   dispt_fed_51.txt         8
## 3 dispt   dispt_fed_54.txt         8
## 4 dispt   dispt_fed_57.txt         8
## 5 Madison Madison_fed_39.txt       8
## 6 Madison Madison_fed_43.txt       8
## 7 Madison Madison_fed_44.txt       8

## # A tibble: 6 x 3
##   author  filename           cluster
##   <chr>   <chr>                <int>
## 1 dispt   dispt_fed_50.txt         5
## 2 dispt   dispt_fed_52.txt         5
## 3 dispt   dispt_fed_63.txt         5
## 4 Madison Madison_fed_14.txt       5
## 5 Madison Madison_fed_41.txt       5
## 6 Madison Madison_fed_58.txt       5

## # A tibble: 5 x 3
##   author   filename           cluster
##   <chr>    <chr>                <int>
## 1 dispt    dispt_fed_53.txt         2
## 2 dispt    dispt_fed_55.txt         2
## 3 dispt    dispt_fed_56.txt         2
## 4 dispt    dispt_fed_62.txt         2
## 5 Hamilton Hamilton_fed_1.txt       2
files
## # A tibble: 0 x 0