Demonstration code for extracting metrics from PDF files.
This is for a single document, similar code will be used to process an entire directory.
For more details on how this notebook works, please see
suppressPackageStartupMessages(library("ggplot2")) # For viz
suppressPackageStartupMessages(library(pdftools)) # for reading PDF files
# https://cran.r-project.org/web/packages/pdftools/index.html
suppressPackageStartupMessages(library(lattice)) # for some graphics
suppressPackageStartupMessages(library(quanteda)) # for text_readability
suppressPackageStartupMessages(library(syuzhet)) # For Sentiment
suppressPackageStartupMessages(library(tictoc)) # For Timing
suppressPackageStartupMessages(library(udpipe)) # For nlp
#https://cran.r-project.org/web/packages/udpipe/index.html
normalit<-function(m){
(m - min(m,na.rm=TRUE))/(max(m,na.rm=TRUE)-min(m,na.rm=TRUE))
}
tic()
filename <- '../PDF_Input/Calculus.pdf'
text <- pdf_text(filename)
toc()
## 2.36 sec elapsed
print(text[2])
## [1] " Contents\r\nCHAPTER 1 Introduction to Calculus\r\n 1.1 Velocity and Distance\r\n 1.2 Calculus Without Limits\r\n 1.3 The Velocity at an Instant\r\n 1.4 Circular Motion\r\n 1.5 A Review of Trigonometry\r\n 1.6 A Thousand Points of Light\r\n 1.7 Computing in Calculus\r\nCHAPTER 2 Derivatives\r\n The Derivative of a Function\r\n Powers and Polynomials\r\n The Slope and the Tangent Line\r\n Derivative of the Sine and Cosine\r\n The Product and Quotient and Power Rules\r\n Limits\r\n Continuous Functions\r\nCHAPTER 3 Applications of the Derivative\r\n 3.1 Linear Approximation\r\n 3.2 Maximum and Minimum Problems\r\n 3.3 Second Derivatives: Minimum vs. Maximum\r\n 3.4 Graphs\r\n 3.5 Ellipses, Parabolas, and Hyperbolas\r\n 3.6 ,\r\n Iterations x,+ = F(x,)\r\n 3.7 Newton's Method and Chaos\r\n 3.8 The Mean Value Theorem and l'H8pital's Rule\r\n"
tic()
section_size <- floor(length(text)/5)
final_bit <- length(text)-(section_size*5)
doc_id_text <- c(rep("Section 01",section_size),rep("Section 02",section_size) ,rep("Section 03",section_size),rep("Section 04",section_size),rep("Section 05",section_size+final_bit))
raw.df <- data.frame(doc_id=doc_id_text,raw_text=text,stringsAsFactors = FALSE)
toc()
## 0 sec elapsed
names(raw.df)
## [1] "doc_id" "raw_text"
raw.df[c(3:5),]$raw_text
## [1] " Contents\r\nCHAPTER 4 The Chain Rule\r\n 4.1 Derivatives by the Chain Rule\r\n 4.2 Implicit Differentiation and Related Rates\r\n 4.3 Inverse Functions and Their Derivatives\r\n 4.4 Inverses of Trigonometric Functions\r\nCHAPTER 5 Integrals\r\n 5.1 The Idea of the Integral 177\r\n 5.2 Antiderivatives 182\r\n 5.3 Summation vs. Integration 187\r\n 5.4 Indefinite Integrals and Substitutions 195\r\n 5.5 The Definite Integral 201\r\n 5.6 Properties of the Integral and the Average Value 206\r\n 5.7 The Fundamental Theorem and Its Consequences 213\r\n 5.8 Numerical Integration 220\r\nCHAPTER 6 Exponentials and Logarithms\r\n 6.1 An Overview 228\r\n 6.2 The Exponential ex 236\r\n 6.3 Growth and Decay in Science and Economics 242\r\n 6.4 Logarithms 252\r\n 6.5 Separable Equations Including the Logistic Equation 259\r\n 6.6 Powers Instead of Exponentials 267\r\n 6.7 Hyperbolic Functions 277\r\nCHAPTER 7 Techniques of Integration\r\n 7.1 Integration by Parts\r\n 7.2 Trigonometric Integrals\r\n 7.3 Trigonometric Substitutions\r\n 7.4 Partial Fractions\r\n 7.5 Improper Integrals\r\nCHAPTER 8 Applications of the Integral\r\n 8.1 Areas and Volumes by Slices\r\n 8.2 Length of a Plane Curve\r\n 8.3 Area of a Surface of Revolution\r\n 8.4 Probability and Calculus\r\n 8.5 Masses and Moments\r\n 8.6 Force, Work, and Energy\r\n"
## [2] " Contents\r\nCHAPTER 9 Polar Coordinates and Complex Numbers\r\n 9.1 Polar Coordinates 348\r\n 9.2 Polar Equations and Graphs 351\r\n 9.3 Slope, Length, and Area for Polar Curves 356\r\n 9.4 Complex Numbers 360\r\nCHAPTER 10 Infinite Series\r\n 10.1 The Geometric Series\r\n 10.2 Convergence Tests: Positive Series\r\n 10.3 Convergence Tests: All Series\r\n 10.4 The Taylor Series for ex, sin x, and cos x\r\n 10.5 Power Series\r\nCHAPTER 11 Vectors and Matrices\r\n 11.1 Vectors and Dot Products\r\n 11.2 Planes and Projections\r\n 11.3 Cross Products and Determinants\r\n 11.4 Matrices and Linear Equations\r\n 11.5 Linear Algebra in Three Dimensions\r\nCHAPTER 12 Motion along a Curve\r\n 12.1 The Position Vector 446\r\n 12.2 Plane Motion: Projectiles and Cycloids 453\r\n 12.3 Tangent Vector and Normal Vector 459\r\n 12.4 Polar Coordinates and Planetary Motion 464\r\nCHAPTER 13 Partial Derivatives\r\n 13.1 Surfaces and Level Curves 472\r\n 13.2 Partial Derivatives 475\r\n 13.3 Tangent Planes and Linear Approximations 480\r\n 13.4 Directional Derivatives and Gradients 490\r\n 13.5 The Chain Rule 497\r\n 13.6 Maxima, Minima, and Saddle Points 504\r\n 13.7 Constraints and Lagrange Multipliers 514\r\n"
## [3] " Contents\r\nCHAPTER 14 Multiple Integrals\r\n 14.1 Double Integrals\r\n 14.2 Changing to Better Coordinates\r\n 14.3 Triple Integrals\r\n 14.4 Cylindrical and Spherical Coordinates\r\nCHAPTER 15 Vector Calculus\r\n 15.1 Vector Fields\r\n 15.2 Line Integrals\r\n 15.3 Green's Theorem\r\n 15.4 Surface Integrals\r\n 15.5 The Divergence Theorem\r\n 15.6 Stokes' Theorem and the Curl of F\r\nCHAPTER 16 Mathematics after Calculus\r\n 16.1 Linear Algebra\r\n 16.2 Differential Equations\r\n 16.3 Discrete Mathematics\r\n Study Guide For Chapter 1\r\n Answers to Odd-Numbered Problems\r\n Index\r\n Table of Integrals\r\n"
tic()
tr.df <- textstat_readability(corpus(raw.df,docid_field="doc_id",text_field="raw_text"))
spl <- strsplit(as.character(tr.df$document), "\\.")
tr.df$section <- sapply(lapply(spl,head,-1),paste,collapse="\\.")
tr.df$subsection <- sapply(lapply(spl,tail,-1),paste,collapse="\\.")
toc()
## 5.47 sec elapsed
names(tr.df)
## [1] "document" "ARI"
## [3] "ARI.simple" "Bormuth"
## [5] "Bormuth.GP" "Coleman"
## [7] "Coleman.C2" "Coleman.Liau"
## [9] "Coleman.Liau.grade" "Coleman.Liau.short"
## [11] "Dale.Chall" "Dale.Chall.old"
## [13] "Dale.Chall.PSK" "Danielson.Bryan"
## [15] "Danielson.Bryan.2" "Dickes.Steiwer"
## [17] "DRP" "ELF"
## [19] "Farr.Jenkins.Paterson" "Flesch"
## [21] "Flesch.PSK" "Flesch.Kincaid"
## [23] "FOG" "FOG.PSK"
## [25] "FOG.NRI" "FORCAST"
## [27] "FORCAST.RGL" "Fucks"
## [29] "Linsear.Write" "LIW"
## [31] "nWS" "nWS.2"
## [33] "nWS.3" "nWS.4"
## [35] "RIX" "Scrabble"
## [37] "SMOG" "SMOG.C"
## [39] "SMOG.simple" "SMOG.de"
## [41] "Spache" "Spache.old"
## [43] "Strain" "Traenkle.Bailer"
## [45] "Traenkle.Bailer.2" "Wheeler.Smith"
## [47] "meanSentenceLength" "meanWordSyllables"
## [49] "section" "subsection"
inaugReadability <- textstat_readability(data_corpus_inaugural, "all")
cor(inaugReadability[,-1])
## ARI ARI.simple Bormuth Bormuth.GP
## ARI 1.00000000 0.999985813 -0.99188017 0.90232231
## ARI.simple 0.99998581 1.000000000 -0.99254312 0.90363285
## Bormuth -0.99188017 -0.992543125 1.00000000 -0.92665498
## Bormuth.GP 0.90232231 0.903632850 -0.92665498 1.00000000
## Coleman -0.57842561 -0.574425719 0.47841665 -0.34186891
## Coleman.C2 -0.76781514 -0.764705749 0.68780777 -0.51669363
## Coleman.Liau -0.80785899 -0.804736665 0.72721217 -0.55936385
## Coleman.Liau.grade 0.80785899 0.804736665 -0.72721217 0.55936385
## Coleman.Liau.short 0.80788731 0.804765212 -0.72724574 0.55939187
## Dale.Chall -0.99099423 -0.991693434 0.99996713 -0.92746308
## Dale.Chall.old 0.99099423 0.991693434 -0.99996713 0.92746308
## Dale.Chall.PSK 0.99099423 0.991693434 -0.99996713 0.92746308
## Danielson.Bryan 0.99708146 0.996677212 -0.97962805 0.88486422
## Danielson.Bryan.2 -0.31030344 -0.305238155 0.18706023 -0.04006627
## Dickes.Steiwer -0.99844204 -0.998703929 0.99691688 -0.91329434
## DRP 0.99188017 0.992543125 -1.00000000 0.92665498
## ELF 0.99594813 0.995835853 -0.98541527 0.89837506
## Farr.Jenkins.Paterson -0.99154201 -0.992219215 0.99998536 -0.92696268
## Flesch -0.96831214 -0.967086924 0.93162624 -0.82257543
## Flesch.PSK 0.98543988 0.984630595 -0.95852762 0.85630865
## Flesch.Kincaid 0.99828644 0.998096618 -0.98602857 0.89508980
## FOG 0.99534334 0.995012384 -0.97973311 0.88272558
## FOG.PSK 0.99804952 0.998321809 -0.99679394 0.91366411
## FOG.NRI 0.49053097 0.489283365 -0.45717614 0.33617976
## FORCAST 0.57842561 0.574425719 -0.47841665 0.34186891
## FORCAST.RGL 0.57842561 0.574425719 -0.47841665 0.34186891
## Fucks 0.99773065 0.998049346 -0.99751520 0.91989373
## Linsear.Write 0.98324412 0.982726746 -0.96316927 0.88236447
## LIW 0.99534363 0.994936160 -0.97782198 0.88013733
## nWS 0.97160116 0.970438242 -0.93628326 0.82182649
## nWS.2 0.97550799 0.974435314 -0.94231463 0.82921349
## nWS.3 0.98501356 0.984272471 -0.95970981 0.85402853
## nWS.4 0.99487148 0.994513256 -0.97861519 0.88101179
## RIX 0.99222014 0.991916139 -0.97704126 0.89115856
## Scrabble -0.01121825 -0.009365094 -0.03334969 -0.01582000
## SMOG 0.97393941 0.972959989 -0.94303306 0.82113760
## SMOG.C 0.97457741 0.973614539 -0.94405248 0.82339465
## SMOG.simple 0.97393941 0.972959989 -0.94303306 0.82113760
## SMOG.de 0.97393941 0.972959989 -0.94303306 0.82113760
## Spache 0.99099423 0.991693434 -0.99996713 0.92746308
## Spache.old 0.99099423 0.991693434 -0.99996713 0.92746308
## Strain 0.99760642 0.997863573 -0.99596056 0.92013679
## Traenkle.Bailer -0.99922385 -0.999411499 0.99593169 -0.91015126
## Traenkle.Bailer.2 -0.75491779 -0.751448659 0.66631519 -0.51471861
## Wheeler.Smith 0.99594813 0.995835853 -0.98541527 0.89837506
## meanSentenceLength 0.99099423 0.991693434 -0.99996713 0.92746308
## meanWordSyllables 0.67621719 0.672476162 -0.58187505 0.44795440
## Coleman Coleman.C2 Coleman.Liau
## ARI -0.5784256 -0.7678151 -0.8078590
## ARI.simple -0.5744257 -0.7647057 -0.8047367
## Bormuth 0.4784167 0.6878078 0.7272122
## Bormuth.GP -0.3418689 -0.5166936 -0.5593638
## Coleman 1.0000000 0.9578621 0.9027073
## Coleman.C2 0.9578621 1.0000000 0.9676044
## Coleman.Liau 0.9027073 0.9676044 1.0000000
## Coleman.Liau.grade -0.9027073 -0.9676044 -1.0000000
## Coleman.Liau.short -0.9026909 -0.9676038 -1.0000000
## Dale.Chall 0.4728710 0.6830083 0.7223808
## Dale.Chall.old -0.4728710 -0.6830083 -0.7223808
## Dale.Chall.PSK -0.4728710 -0.6830083 -0.7223808
## Danielson.Bryan -0.6320177 -0.8068048 -0.8471402
## Danielson.Bryan.2 0.8922077 0.7931428 0.8066665
## Dickes.Steiwer 0.5390065 0.7368282 0.7764052
## DRP -0.4784167 -0.6878078 -0.7272122
## ELF -0.6127006 -0.7903882 -0.8134246
## Farr.Jenkins.Paterson 0.4768093 0.6862258 0.7252233
## Flesch 0.7524555 0.8921701 0.9145879
## Flesch.PSK -0.6991742 -0.8564052 -0.8829007
## Flesch.Kincaid -0.6124186 -0.7933597 -0.8255002
## FOG -0.6279137 -0.8051202 -0.8390619
## FOG.PSK -0.5401789 -0.7376615 -0.7749720
## FOG.NRI -0.4679026 -0.5350724 -0.5426918
## FORCAST -1.0000000 -0.9578621 -0.9027073
## FORCAST.RGL -1.0000000 -0.9578621 -0.9027073
## Fucks -0.5305954 -0.7279653 -0.7681848
## Linsear.Write -0.6540873 -0.8139821 -0.8446074
## LIW -0.6415138 -0.8144833 -0.8466378
## nWS -0.7435480 -0.8869623 -0.9108590
## nWS.2 -0.7297189 -0.8775918 -0.9042061
## nWS.3 -0.6826246 -0.8444418 -0.8757884
## nWS.4 -0.6316756 -0.8078997 -0.8416766
## RIX -0.6292106 -0.7992488 -0.8296056
## Scrabble 0.3522408 0.2472068 0.1918831
## SMOG -0.7133315 -0.8681187 -0.8952482
## SMOG.C -0.7113151 -0.8663610 -0.8936817
## SMOG.simple -0.7133315 -0.8681187 -0.8952482
## SMOG.de -0.7133315 -0.8681187 -0.8952482
## Spache -0.4728710 -0.6830083 -0.7223808
## Spache.old -0.4728710 -0.6830083 -0.7223808
## Strain -0.5461293 -0.7394040 -0.7744192
## Traenkle.Bailer 0.5491468 0.7451720 0.7853284
## Traenkle.Bailer.2 0.9206771 0.9574552 0.9904793
## Wheeler.Smith -0.6127006 -0.7903882 -0.8134246
## meanSentenceLength -0.4728710 -0.6830083 -0.7223808
## meanWordSyllables -0.9645525 -0.9629188 -0.9543023
## Coleman.Liau.grade Coleman.Liau.short Dale.Chall
## ARI 0.8078590 0.8078873 -0.99099423
## ARI.simple 0.8047367 0.8047652 -0.99169343
## Bormuth -0.7272122 -0.7272457 0.99996713
## Bormuth.GP 0.5593638 0.5593919 -0.92746308
## Coleman -0.9027073 -0.9026909 0.47287100
## Coleman.C2 -0.9676044 -0.9676038 0.68300831
## Coleman.Liau -1.0000000 -1.0000000 0.72238083
## Coleman.Liau.grade 1.0000000 1.0000000 -0.72238083
## Coleman.Liau.short 1.0000000 1.0000000 -0.72241463
## Dale.Chall -0.7223808 -0.7224146 1.00000000
## Dale.Chall.old 0.7223808 0.7224146 -1.00000000
## Dale.Chall.PSK 0.7223808 0.7224146 -1.00000000
## Danielson.Bryan 0.8471402 0.8471650 -0.97829531
## Danielson.Bryan.2 -0.8066665 -0.8066351 0.18028533
## Dickes.Steiwer -0.7764052 -0.7764357 0.99638999
## DRP 0.7272122 0.7272457 -0.99996713
## ELF 0.8134246 0.8134514 -0.98451149
## Farr.Jenkins.Paterson -0.7252233 -0.7252569 0.99998999
## Flesch -0.9145879 -0.9146054 0.92913685
## Flesch.PSK 0.8829007 0.8829221 -0.95657238
## Flesch.Kincaid 0.8255002 0.8255270 -0.98488025
## FOG 0.8390619 0.8390877 -0.97841468
## FOG.PSK 0.7749720 0.7750026 -0.99626213
## FOG.NRI 0.5426918 0.5426992 -0.45492516
## FORCAST 0.9027073 0.9026909 -0.47287100
## FORCAST.RGL 0.9027073 0.9026909 -0.47287100
## Fucks 0.7681848 0.7682153 -0.99711281
## Linsear.Write 0.8446074 0.8446295 -0.96173384
## LIW 0.8466378 0.8466626 -0.97649153
## nWS 0.9108590 0.9108773 -0.93396357
## nWS.2 0.9042061 0.9042253 -0.94010518
## nWS.3 0.8757884 0.8758106 -0.95786413
## nWS.4 0.8416766 0.8417021 -0.97726151
## RIX 0.8296056 0.8296301 -0.97599601
## Scrabble -0.1918831 -0.1918618 -0.03546462
## SMOG 0.8952482 0.8952689 -0.94089430
## SMOG.C 0.8936817 0.8937024 -0.94194286
## SMOG.simple 0.8952482 0.8952689 -0.94089430
## SMOG.de 0.8952482 0.8952689 -0.94089430
## Spache 0.7223808 0.7224146 -1.00000000
## Spache.old 0.7223808 0.7224146 -1.00000000
## Strain 0.7744192 0.7744491 -0.99544237
## Traenkle.Bailer -0.7853284 -0.7853584 0.99529866
## Traenkle.Bailer.2 -0.9904793 -0.9904734 0.66117918
## Wheeler.Smith 0.8134246 0.8134514 -0.98451149
## meanSentenceLength 0.7223808 0.7224146 -1.00000000
## meanWordSyllables 0.9543023 0.9542911 -0.57632421
## Dale.Chall.old Dale.Chall.PSK Danielson.Bryan
## ARI 0.99099423 0.99099423 0.99708146
## ARI.simple 0.99169343 0.99169343 0.99667721
## Bormuth -0.99996713 -0.99996713 -0.97962805
## Bormuth.GP 0.92746308 0.92746308 0.88486422
## Coleman -0.47287100 -0.47287100 -0.63201768
## Coleman.C2 -0.68300831 -0.68300831 -0.80680483
## Coleman.Liau -0.72238083 -0.72238083 -0.84714024
## Coleman.Liau.grade 0.72238083 0.72238083 0.84714024
## Coleman.Liau.short 0.72241463 0.72241463 0.84716497
## Dale.Chall -1.00000000 -1.00000000 -0.97829531
## Dale.Chall.old 1.00000000 1.00000000 0.97829531
## Dale.Chall.PSK 1.00000000 1.00000000 0.97829531
## Danielson.Bryan 0.97829531 0.97829531 1.00000000
## Danielson.Bryan.2 -0.18028533 -0.18028533 -0.37865977
## Dickes.Steiwer -0.99638999 -0.99638999 -0.99178586
## DRP 0.99996713 0.99996713 0.97962805
## ELF 0.98451149 0.98451149 0.99535015
## Farr.Jenkins.Paterson -0.99998999 -0.99998999 -0.97914589
## Flesch -0.92913685 -0.92913685 -0.98231683
## Flesch.PSK 0.95657238 0.95657238 0.99363576
## Flesch.Kincaid 0.98488025 0.98488025 0.99786963
## FOG 0.97841468 0.97841468 0.99695239
## FOG.PSK 0.99626213 0.99626213 0.99126431
## FOG.NRI 0.45492516 0.45492516 0.50283748
## FORCAST 0.47287100 0.47287100 0.63201768
## FORCAST.RGL 0.47287100 0.47287100 0.63201768
## Fucks 0.99711281 0.99711281 0.99077571
## Linsear.Write 0.96173384 0.96173384 0.98940268
## LIW 0.97649153 0.97649153 0.99817496
## nWS 0.93396357 0.93396357 0.98486561
## nWS.2 0.94010518 0.94010518 0.98751970
## nWS.3 0.95786413 0.95786413 0.99235519
## nWS.4 0.97726151 0.97726151 0.99686096
## RIX 0.97599601 0.97599601 0.99500484
## Scrabble 0.03546462 0.03546462 -0.03679625
## SMOG 0.94089430 0.94089430 0.98460337
## SMOG.C 0.94194286 0.94194286 0.98509384
## SMOG.simple 0.94089430 0.94089430 0.98460337
## SMOG.de 0.94089430 0.94089430 0.98460337
## Spache 1.00000000 1.00000000 0.97829531
## Spache.old 1.00000000 1.00000000 0.97829531
## Strain 0.99544237 0.99544237 0.99164686
## Traenkle.Bailer -0.99529866 -0.99529866 -0.99349190
## Traenkle.Bailer.2 -0.66117918 -0.66117918 -0.79995865
## Wheeler.Smith 0.98451149 0.98451149 0.99535015
## meanSentenceLength 1.00000000 1.00000000 0.97829531
## meanWordSyllables 0.57632421 0.57632421 0.72593204
## Danielson.Bryan.2 Dickes.Steiwer DRP
## ARI -0.31030344 -0.99844204 0.99188017
## ARI.simple -0.30523815 -0.99870393 0.99254312
## Bormuth 0.18706023 0.99691688 -1.00000000
## Bormuth.GP -0.04006627 -0.91329434 0.92665498
## Coleman 0.89220768 0.53900648 -0.47841665
## Coleman.C2 0.79314280 0.73682822 -0.68780777
## Coleman.Liau 0.80666655 0.77640516 -0.72721217
## Coleman.Liau.grade -0.80666655 -0.77640516 0.72721217
## Coleman.Liau.short -0.80663507 -0.77643567 0.72724574
## Dale.Chall 0.18028533 0.99638999 -0.99996713
## Dale.Chall.old -0.18028533 -0.99638999 0.99996713
## Dale.Chall.PSK -0.18028533 -0.99638999 0.99996713
## Danielson.Bryan -0.37865977 -0.99178586 0.97962805
## Danielson.Bryan.2 1.00000000 0.26101392 -0.18706023
## Dickes.Steiwer 0.26101392 1.00000000 -0.99691688
## DRP -0.18706023 -0.99691688 1.00000000
## ELF -0.32598561 -0.99345440 0.98541527
## Farr.Jenkins.Paterson 0.18438177 0.99672456 -0.99998536
## Flesch 0.51737927 0.95524130 -0.93162624
## Flesch.PSK -0.44828841 -0.97634394 0.95852762
## Flesch.Kincaid -0.34148774 -0.99512741 0.98602857
## FOG -0.36653699 -0.99112313 0.97973311
## FOG.PSK -0.25909248 -0.99945426 0.99679394
## FOG.NRI -0.36711298 -0.46497779 0.45717614
## FORCAST -0.89220768 -0.53900648 0.47841665
## FORCAST.RGL -0.89220768 -0.53900648 0.47841665
## Fucks -0.24974568 -0.99940700 0.99751520
## Linsear.Write -0.39525523 -0.97750750 0.96316927
## LIW -0.37953455 -0.99028645 0.97782198
## nWS -0.50773654 -0.95947137 0.93628326
## nWS.2 -0.49287612 -0.96426773 0.94231463
## nWS.3 -0.43696629 -0.97697873 0.95970981
## nWS.4 -0.37128519 -0.99039744 0.97861519
## RIX -0.35912065 -0.98825075 0.97704126
## Scrabble 0.32114325 -0.01434789 0.03334969
## SMOG -0.47640589 -0.96367176 0.94303306
## SMOG.C -0.47358728 -0.96447120 0.94405248
## SMOG.simple -0.47640589 -0.96367176 0.94303306
## SMOG.de -0.47640589 -0.96367176 0.94303306
## Spache -0.18028533 -0.99638999 0.99996713
## Spache.old -0.18028533 -0.99638999 0.99996713
## Strain -0.26089612 -0.99875609 0.99596056
## Traenkle.Bailer 0.27409970 0.99959449 -0.99593169
## Traenkle.Bailer.2 0.85092550 0.71952902 -0.66631519
## Wheeler.Smith -0.32598561 -0.99345440 0.98541527
## meanSentenceLength -0.18028533 -0.99638999 0.99996713
## meanWordSyllables -0.87721338 -0.63935562 0.58187505
## ELF Farr.Jenkins.Paterson Flesch
## ARI 0.99594813 -0.99154201 -0.9683121
## ARI.simple 0.99583585 -0.99221922 -0.9670869
## Bormuth -0.98541527 0.99998536 0.9316262
## Bormuth.GP 0.89837506 -0.92696268 -0.8225754
## Coleman -0.61270055 0.47680930 0.7524555
## Coleman.C2 -0.79038825 0.68622584 0.8921701
## Coleman.Liau -0.81342458 0.72522331 0.9145879
## Coleman.Liau.grade 0.81342458 -0.72522331 -0.9145879
## Coleman.Liau.short 0.81345135 -0.72525694 -0.9146054
## Dale.Chall -0.98451149 0.99998999 0.9291368
## Dale.Chall.old 0.98451149 -0.99998999 -0.9291368
## Dale.Chall.PSK 0.98451149 -0.99998999 -0.9291368
## Danielson.Bryan 0.99535015 -0.97914589 -0.9823168
## Danielson.Bryan.2 -0.32598561 0.18438177 0.5173793
## Dickes.Steiwer -0.99345440 0.99672456 0.9552413
## DRP 0.98541527 -0.99998536 -0.9316262
## ELF 1.00000000 -0.98524897 -0.9737553
## Farr.Jenkins.Paterson -0.98524897 1.00000000 0.9307176
## Flesch -0.97375531 0.93071764 1.0000000
## Flesch.PSK 0.98827872 -0.95781641 -0.9965628
## Flesch.Kincaid 0.99727425 -0.98561541 -0.9791403
## FOG 0.99443432 -0.97924413 -0.9834416
## FOG.PSK 0.99386237 -0.99660297 -0.9567470
## FOG.NRI 0.49527062 -0.45620440 -0.5313693
## FORCAST 0.61270055 -0.47680930 -0.7524555
## FORCAST.RGL 0.61270055 -0.47680930 -0.7524555
## Fucks 0.99362322 -0.99740292 -0.9519647
## Linsear.Write 0.98844709 -0.96273644 -0.9842487
## LIW 0.99591724 -0.97739468 -0.9839516
## nWS 0.97678636 -0.93548749 -0.9980699
## nWS.2 0.97966902 -0.94154405 -0.9974721
## nWS.3 0.98635632 -0.95902099 -0.9933395
## nWS.4 0.99411156 -0.97811285 -0.9843084
## RIX 0.99677384 -0.97683788 -0.9779743
## Scrabble -0.04113608 -0.03359018 0.1056726
## SMOG 0.97747350 -0.94224804 -0.9945369
## SMOG.C 0.97820253 -0.94328383 -0.9944270
## SMOG.simple 0.97747350 -0.94224804 -0.9945369
## SMOG.de 0.97747350 -0.94224804 -0.9945369
## Spache 0.98451149 -0.99998999 -0.9291368
## Spache.old 0.98451149 -0.99998999 -0.9291368
## Strain 0.99576469 -0.99581541 -0.9579642
## Traenkle.Bailer -0.99437253 0.99568736 0.9590869
## Traenkle.Bailer.2 -0.76363493 0.66426051 0.8800874
## Wheeler.Smith 1.00000000 -0.98524897 -0.9737553
## meanSentenceLength 0.98451149 -0.99998999 -0.9291368
## meanWordSyllables 0.69782528 -0.57983300 -0.8376402
## Flesch.PSK Flesch.Kincaid FOG FOG.PSK
## ARI 0.98543988 0.99828644 0.99534334 0.9980495
## ARI.simple 0.98463060 0.99809662 0.99501238 0.9983218
## Bormuth -0.95852762 -0.98602857 -0.97973311 -0.9967939
## Bormuth.GP 0.85630865 0.89508980 0.88272558 0.9136641
## Coleman -0.69917416 -0.61241856 -0.62791369 -0.5401789
## Coleman.C2 -0.85640522 -0.79335974 -0.80512019 -0.7376615
## Coleman.Liau -0.88290067 -0.82550024 -0.83906193 -0.7749720
## Coleman.Liau.grade 0.88290067 0.82550024 0.83906193 0.7749720
## Coleman.Liau.short 0.88292206 0.82552702 0.83908765 0.7750026
## Dale.Chall -0.95657238 -0.98488025 -0.97841468 -0.9962621
## Dale.Chall.old 0.95657238 0.98488025 0.97841468 0.9962621
## Dale.Chall.PSK 0.95657238 0.98488025 0.97841468 0.9962621
## Danielson.Bryan 0.99363576 0.99786963 0.99695239 0.9912643
## Danielson.Bryan.2 -0.44828841 -0.34148774 -0.36653699 -0.2590925
## Dickes.Steiwer -0.97634394 -0.99512741 -0.99112313 -0.9994543
## DRP 0.95852762 0.98602857 0.97973311 0.9967939
## ELF 0.98827872 0.99727425 0.99443432 0.9938624
## Farr.Jenkins.Paterson -0.95781641 -0.98561541 -0.97924413 -0.9966030
## Flesch -0.99656281 -0.97914032 -0.98344165 -0.9567470
## Flesch.PSK 1.00000000 0.99260681 0.99454929 0.9775024
## Flesch.Kincaid 0.99260681 1.00000000 0.99846226 0.9957626
## FOG 0.99454929 0.99846226 1.00000000 0.9926083
## FOG.PSK 0.97750238 0.99576264 0.99260833 1.0000000
## FOG.NRI 0.52085197 0.49896862 0.50751706 0.4793132
## FORCAST 0.69917416 0.61241856 0.62791369 0.5401789
## FORCAST.RGL 0.69917416 0.61241856 0.62791369 0.5401789
## Fucks 0.97392268 0.99398941 0.98936219 0.9991427
## Linsear.Write 0.99144816 0.98967358 0.99372655 0.9801897
## LIW 0.99452041 0.99764432 0.99741442 0.9903981
## nWS 0.99612259 0.98088847 0.98637622 0.9608083
## nWS.2 0.99702731 0.98398342 0.98925526 0.9656186
## nWS.3 0.99774824 0.99180645 0.99654303 0.9790944
## nWS.4 0.99497427 0.99823466 0.99998483 0.9919248
## RIX 0.98969704 0.99457144 0.99391070 0.9886425
## Scrabble -0.07536487 -0.03002267 -0.02275926 0.0113141
## SMOG 0.99489006 0.98304183 0.99004142 0.9664107
## SMOG.C 0.99503836 0.98356657 0.99047862 0.9672092
## SMOG.simple 0.99489006 0.98304183 0.99004142 0.9664107
## SMOG.de 0.99489006 0.98304183 0.99004142 0.9664107
## Spache 0.95657238 0.98488025 0.97841468 0.9962621
## Spache.old 0.95657238 0.98488025 0.97841468 0.9962621
## Strain 0.97827829 0.99588242 0.99196627 0.9992502
## Traenkle.Bailer -0.97913122 -0.99632949 -0.99246787 -0.9993755
## Traenkle.Bailer.2 -0.84198853 -0.77570251 -0.79053472 -0.7187449
## Wheeler.Smith 0.98827872 0.99727425 0.99443432 0.9938624
## meanSentenceLength 0.95657238 0.98488025 0.97841468 0.9962621
## meanWordSyllables 0.78951164 0.70918290 0.72824185 0.6428726
## FOG.NRI FORCAST FORCAST.RGL Fucks
## ARI 0.4905310 0.5784256 0.5784256 0.997730647
## ARI.simple 0.4892834 0.5744257 0.5744257 0.998049346
## Bormuth -0.4571761 -0.4784167 -0.4784167 -0.997515204
## Bormuth.GP 0.3361798 0.3418689 0.3418689 0.919893732
## Coleman -0.4679026 -1.0000000 -1.0000000 -0.530595415
## Coleman.C2 -0.5350724 -0.9578621 -0.9578621 -0.727965258
## Coleman.Liau -0.5426918 -0.9027073 -0.9027073 -0.768184772
## Coleman.Liau.grade 0.5426918 0.9027073 0.9027073 0.768184772
## Coleman.Liau.short 0.5426992 0.9026909 0.9026909 0.768215251
## Dale.Chall -0.4549252 -0.4728710 -0.4728710 -0.997112805
## Dale.Chall.old 0.4549252 0.4728710 0.4728710 0.997112805
## Dale.Chall.PSK 0.4549252 0.4728710 0.4728710 0.997112805
## Danielson.Bryan 0.5028375 0.6320177 0.6320177 0.990775706
## Danielson.Bryan.2 -0.3671130 -0.8922077 -0.8922077 -0.249745678
## Dickes.Steiwer -0.4649778 -0.5390065 -0.5390065 -0.999407002
## DRP 0.4571761 0.4784167 0.4784167 0.997515204
## ELF 0.4952706 0.6127006 0.6127006 0.993623225
## Farr.Jenkins.Paterson -0.4562044 -0.4768093 -0.4768093 -0.997402917
## Flesch -0.5313693 -0.7524555 -0.7524555 -0.951964715
## Flesch.PSK 0.5208520 0.6991742 0.6991742 0.973922677
## Flesch.Kincaid 0.4989686 0.6124186 0.6124186 0.993989409
## FOG 0.5075171 0.6279137 0.6279137 0.989362193
## FOG.PSK 0.4793132 0.5401789 0.5401789 0.999142674
## FOG.NRI 1.0000000 0.4679026 0.4679026 0.472326429
## FORCAST 0.4679026 1.0000000 1.0000000 0.530595415
## FORCAST.RGL 0.4679026 1.0000000 1.0000000 0.530595415
## Fucks 0.4723264 0.5305954 0.5305954 1.000000000
## Linsear.Write 0.5059810 0.6540873 0.6540873 0.977258794
## LIW 0.5041603 0.6415138 0.6415138 0.988738238
## nWS 0.5303825 0.7435480 0.7435480 0.956043120
## nWS.2 0.5280599 0.7297189 0.7297189 0.960995665
## nWS.3 0.5225015 0.6826246 0.6826246 0.974240643
## nWS.4 0.5086192 0.6316756 0.6316756 0.988571335
## RIX 0.4923100 0.6292106 0.6292106 0.988410496
## Scrabble -0.1428063 -0.3522408 -0.3522408 0.008523205
## SMOG 0.5367565 0.7133315 0.7133315 0.960356032
## SMOG.C 0.5360437 0.7113151 0.7113151 0.961281850
## SMOG.simple 0.5367565 0.7133315 0.7133315 0.960356032
## SMOG.de 0.5367565 0.7133315 0.7133315 0.960356032
## Spache 0.4549252 0.4728710 0.4728710 0.997112805
## Spache.old 0.4549252 0.4728710 0.4728710 0.997112805
## Strain 0.4766100 0.5461293 0.5461293 0.999278833
## Traenkle.Bailer -0.4831579 -0.5491468 -0.5491468 -0.999275945
## Traenkle.Bailer.2 -0.5380406 -0.9206771 -0.9206771 -0.712342431
## Wheeler.Smith 0.4952706 0.6127006 0.6127006 0.993623225
## meanSentenceLength 0.4549252 0.4728710 0.4728710 0.997112805
## meanWordSyllables 0.5024015 0.9645525 0.9645525 0.631045601
## Linsear.Write LIW nWS nWS.2
## ARI 0.98324412 0.99534363 0.9716012 0.97550799
## ARI.simple 0.98272675 0.99493616 0.9704382 0.97443531
## Bormuth -0.96316927 -0.97782198 -0.9362833 -0.94231463
## Bormuth.GP 0.88236447 0.88013733 0.8218265 0.82921349
## Coleman -0.65408730 -0.64151385 -0.7435480 -0.72971888
## Coleman.C2 -0.81398206 -0.81448329 -0.8869623 -0.87759182
## Coleman.Liau -0.84460735 -0.84663781 -0.9108590 -0.90420615
## Coleman.Liau.grade 0.84460735 0.84663781 0.9108590 0.90420615
## Coleman.Liau.short 0.84462952 0.84666264 0.9108773 0.90422529
## Dale.Chall -0.96173384 -0.97649153 -0.9339636 -0.94010518
## Dale.Chall.old 0.96173384 0.97649153 0.9339636 0.94010518
## Dale.Chall.PSK 0.96173384 0.97649153 0.9339636 0.94010518
## Danielson.Bryan 0.98940268 0.99817496 0.9848656 0.98751970
## Danielson.Bryan.2 -0.39525523 -0.37953455 -0.5077365 -0.49287612
## Dickes.Steiwer -0.97750750 -0.99028645 -0.9594714 -0.96426773
## DRP 0.96316927 0.97782198 0.9362833 0.94231463
## ELF 0.98844709 0.99591724 0.9767864 0.97966902
## Farr.Jenkins.Paterson -0.96273644 -0.97739468 -0.9354875 -0.94154405
## Flesch -0.98424870 -0.98395156 -0.9980699 -0.99747205
## Flesch.PSK 0.99144816 0.99452041 0.9961226 0.99702731
## Flesch.Kincaid 0.98967358 0.99764432 0.9808885 0.98398342
## FOG 0.99372655 0.99741442 0.9863762 0.98925526
## FOG.PSK 0.98018969 0.99039812 0.9608083 0.96561861
## FOG.NRI 0.50598103 0.50416033 0.5303825 0.52805992
## FORCAST 0.65408730 0.64151385 0.7435480 0.72971888
## FORCAST.RGL 0.65408730 0.64151385 0.7435480 0.72971888
## Fucks 0.97725879 0.98873824 0.9560431 0.96099567
## Linsear.Write 1.00000000 0.99064913 0.9868813 0.98906653
## LIW 0.99064913 1.00000000 0.9871894 0.98960771
## nWS 0.98688126 0.98718935 1.0000000 0.99978564
## nWS.2 0.98906653 0.98960771 0.9997856 1.00000000
## nWS.3 0.99452970 0.99372252 0.9954778 0.99701030
## nWS.4 0.99399250 0.99738312 0.9871908 0.98998119
## RIX 0.99270597 0.99722072 0.9816466 0.98430393
## Scrabble -0.03242714 -0.03879638 -0.0868361 -0.07743888
## SMOG 0.99042784 0.98677649 0.9968232 0.99752895
## SMOG.C 0.99111709 0.98723721 0.9967751 0.99753895
## SMOG.simple 0.99042784 0.98677649 0.9968232 0.99752895
## SMOG.de 0.99042784 0.98677649 0.9968232 0.99752895
## Spache 0.96173384 0.97649153 0.9339636 0.94010518
## Spache.old 0.96173384 0.97649153 0.9339636 0.94010518
## Strain 0.98250555 0.99073666 0.9611723 0.96577125
## Traenkle.Bailer -0.97898368 -0.99190213 -0.9628587 -0.96743426
## Traenkle.Bailer.2 -0.80373175 -0.80039428 -0.8744090 -0.86651511
## Wheeler.Smith 0.98844709 0.99591724 0.9767864 0.97966902
## meanSentenceLength 0.96173384 0.97649153 0.9339636 0.94010518
## meanWordSyllables 0.75466877 0.73221003 0.8262434 0.81584881
## nWS.3 nWS.4 RIX Scrabble
## ARI 0.98501356 0.99487148 0.99222014 -0.0112182516
## ARI.simple 0.98427247 0.99451326 0.99191614 -0.0093650938
## Bormuth -0.95970981 -0.97861519 -0.97704126 -0.0333496851
## Bormuth.GP 0.85402853 0.88101179 0.89115856 -0.0158199992
## Coleman -0.68262459 -0.63167564 -0.62921060 0.3522407942
## Coleman.C2 -0.84444185 -0.80789970 -0.79924879 0.2472067525
## Coleman.Liau -0.87578843 -0.84167664 -0.82960560 0.1918831342
## Coleman.Liau.grade 0.87578843 0.84167664 0.82960560 -0.1918831342
## Coleman.Liau.short 0.87581059 0.84170213 0.82963012 -0.1918617971
## Dale.Chall -0.95786413 -0.97726151 -0.97599601 -0.0354646177
## Dale.Chall.old 0.95786413 0.97726151 0.97599601 0.0354646177
## Dale.Chall.PSK 0.95786413 0.97726151 0.97599601 0.0354646177
## Danielson.Bryan 0.99235519 0.99686096 0.99500484 -0.0367962483
## Danielson.Bryan.2 -0.43696629 -0.37128519 -0.35912065 0.3211432536
## Dickes.Steiwer -0.97697873 -0.99039744 -0.98825075 -0.0143478880
## DRP 0.95970981 0.97861519 0.97704126 0.0333496851
## ELF 0.98635632 0.99411156 0.99677384 -0.0411360767
## Farr.Jenkins.Paterson -0.95902099 -0.97811285 -0.97683788 -0.0335901823
## Flesch -0.99333954 -0.98430842 -0.97797435 0.1056725636
## Flesch.PSK 0.99774824 0.99497427 0.98969704 -0.0753648675
## Flesch.Kincaid 0.99180645 0.99823466 0.99457144 -0.0300226725
## FOG 0.99654303 0.99998483 0.99391070 -0.0227592579
## FOG.PSK 0.97909440 0.99192476 0.98864254 0.0113141021
## FOG.NRI 0.52250155 0.50861922 0.49230997 -0.1428062743
## FORCAST 0.68262459 0.63167564 0.62921060 -0.3522407942
## FORCAST.RGL 0.68262459 0.63167564 0.62921060 -0.3522407942
## Fucks 0.97424064 0.98857134 0.98841050 0.0085232049
## Linsear.Write 0.99452970 0.99399250 0.99270597 -0.0324271351
## LIW 0.99372252 0.99738312 0.99722072 -0.0387963848
## nWS 0.99547782 0.98719082 0.98164655 -0.0868361035
## nWS.2 0.99701030 0.98998119 0.98430393 -0.0774388847
## nWS.3 1.00000000 0.99698554 0.98905195 -0.0458903170
## nWS.4 0.99698554 1.00000000 0.99380128 -0.0242978124
## RIX 0.98905195 0.99380128 1.00000000 -0.0444703290
## Scrabble -0.04589032 -0.02429781 -0.04447033 1.0000000000
## SMOG 0.99778573 0.99076681 0.98160988 -0.0511186091
## SMOG.C 0.99797185 0.99118746 0.98237501 -0.0501480088
## SMOG.simple 0.99778573 0.99076681 0.98160988 -0.0511186091
## SMOG.de 0.99778573 0.99076681 0.98160988 -0.0511186091
## Spache 0.95786413 0.97726151 0.97599601 0.0354646177
## Spache.old 0.95786413 0.97726151 0.97599601 0.0354646177
## Strain 0.97853156 0.99128781 0.99114270 -0.0005155977
## Traenkle.Bailer -0.97928651 -0.99180632 -0.98944923 -0.0023260095
## Traenkle.Bailer.2 -0.83294548 -0.79351594 -0.78537286 0.2500591399
## Wheeler.Smith 0.98635632 0.99411156 0.99677384 -0.0411360767
## meanSentenceLength 0.95786413 0.97726151 0.97599601 0.0354646177
## meanWordSyllables 0.78047895 0.73186125 0.71973074 -0.2859592235
## SMOG SMOG.C SMOG.simple SMOG.de
## ARI 0.97393941 0.97457741 0.97393941 0.97393941
## ARI.simple 0.97295999 0.97361454 0.97295999 0.97295999
## Bormuth -0.94303306 -0.94405248 -0.94303306 -0.94303306
## Bormuth.GP 0.82113760 0.82339465 0.82113760 0.82113760
## Coleman -0.71333148 -0.71131513 -0.71333148 -0.71333148
## Coleman.C2 -0.86811865 -0.86636103 -0.86811865 -0.86811865
## Coleman.Liau -0.89524825 -0.89368168 -0.89524825 -0.89524825
## Coleman.Liau.grade 0.89524825 0.89368168 0.89524825 0.89524825
## Coleman.Liau.short 0.89526894 0.89370243 0.89526894 0.89526894
## Dale.Chall -0.94089430 -0.94194286 -0.94089430 -0.94089430
## Dale.Chall.old 0.94089430 0.94194286 0.94089430 0.94089430
## Dale.Chall.PSK 0.94089430 0.94194286 0.94089430 0.94089430
## Danielson.Bryan 0.98460337 0.98509384 0.98460337 0.98460337
## Danielson.Bryan.2 -0.47640589 -0.47358728 -0.47640589 -0.47640589
## Dickes.Steiwer -0.96367176 -0.96447120 -0.96367176 -0.96367176
## DRP 0.94303306 0.94405248 0.94303306 0.94303306
## ELF 0.97747350 0.97820253 0.97747350 0.97747350
## Farr.Jenkins.Paterson -0.94224804 -0.94328383 -0.94224804 -0.94224804
## Flesch -0.99453689 -0.99442699 -0.99453689 -0.99453689
## Flesch.PSK 0.99489006 0.99503836 0.99489006 0.99489006
## Flesch.Kincaid 0.98304183 0.98356657 0.98304183 0.98304183
## FOG 0.99004142 0.99047862 0.99004142 0.99004142
## FOG.PSK 0.96641067 0.96720922 0.96641067 0.96641067
## FOG.NRI 0.53675653 0.53604370 0.53675653 0.53675653
## FORCAST 0.71333148 0.71131513 0.71333148 0.71333148
## FORCAST.RGL 0.71333148 0.71131513 0.71333148 0.71333148
## Fucks 0.96035603 0.96128185 0.96035603 0.96035603
## Linsear.Write 0.99042784 0.99111709 0.99042784 0.99042784
## LIW 0.98677649 0.98723721 0.98677649 0.98677649
## nWS 0.99682318 0.99677506 0.99682318 0.99682318
## nWS.2 0.99752895 0.99753895 0.99752895 0.99752895
## nWS.3 0.99778573 0.99797185 0.99778573 0.99778573
## nWS.4 0.99076681 0.99118746 0.99076681 0.99076681
## RIX 0.98160988 0.98237501 0.98160988 0.98160988
## Scrabble -0.05111861 -0.05014801 -0.05111861 -0.05111861
## SMOG 1.00000000 0.99998563 1.00000000 1.00000000
## SMOG.C 0.99998563 1.00000000 0.99998563 0.99998563
## SMOG.simple 1.00000000 0.99998563 1.00000000 1.00000000
## SMOG.de 1.00000000 0.99998563 1.00000000 1.00000000
## Spache 0.94089430 0.94194286 0.94089430 0.94089430
## Spache.old 0.94089430 0.94194286 0.94089430 0.94089430
## Strain 0.96573029 0.96664268 0.96573029 0.96573029
## Traenkle.Bailer -0.96666632 -0.96741430 -0.96666632 -0.96666632
## Traenkle.Bailer.2 -0.85456811 -0.85299245 -0.85456811 -0.85456811
## Wheeler.Smith 0.97747350 0.97820253 0.97747350 0.97747350
## meanSentenceLength 0.94089430 0.94194286 0.94089430 0.94089430
## meanWordSyllables 0.80819548 0.80640351 0.80819548 0.80819548
## Spache Spache.old Strain
## ARI 0.99099423 0.99099423 0.9976064239
## ARI.simple 0.99169343 0.99169343 0.9978635733
## Bormuth -0.99996713 -0.99996713 -0.9959605646
## Bormuth.GP 0.92746308 0.92746308 0.9201367937
## Coleman -0.47287100 -0.47287100 -0.5461292792
## Coleman.C2 -0.68300831 -0.68300831 -0.7394040240
## Coleman.Liau -0.72238083 -0.72238083 -0.7744192348
## Coleman.Liau.grade 0.72238083 0.72238083 0.7744192348
## Coleman.Liau.short 0.72241463 0.72241463 0.7744490784
## Dale.Chall -1.00000000 -1.00000000 -0.9954423736
## Dale.Chall.old 1.00000000 1.00000000 0.9954423736
## Dale.Chall.PSK 1.00000000 1.00000000 0.9954423736
## Danielson.Bryan 0.97829531 0.97829531 0.9916468644
## Danielson.Bryan.2 -0.18028533 -0.18028533 -0.2608961180
## Dickes.Steiwer -0.99638999 -0.99638999 -0.9987560895
## DRP 0.99996713 0.99996713 0.9959605646
## ELF 0.98451149 0.98451149 0.9957646921
## Farr.Jenkins.Paterson -0.99998999 -0.99998999 -0.9958154056
## Flesch -0.92913685 -0.92913685 -0.9579641510
## Flesch.PSK 0.95657238 0.95657238 0.9782782859
## Flesch.Kincaid 0.98488025 0.98488025 0.9958824210
## FOG 0.97841468 0.97841468 0.9919662659
## FOG.PSK 0.99626213 0.99626213 0.9992501846
## FOG.NRI 0.45492516 0.45492516 0.4766100001
## FORCAST 0.47287100 0.47287100 0.5461292792
## FORCAST.RGL 0.47287100 0.47287100 0.5461292792
## Fucks 0.99711281 0.99711281 0.9992788331
## Linsear.Write 0.96173384 0.96173384 0.9825055506
## LIW 0.97649153 0.97649153 0.9907366605
## nWS 0.93396357 0.93396357 0.9611722939
## nWS.2 0.94010518 0.94010518 0.9657712464
## nWS.3 0.95786413 0.95786413 0.9785315584
## nWS.4 0.97726151 0.97726151 0.9912878102
## RIX 0.97599601 0.97599601 0.9911426991
## Scrabble 0.03546462 0.03546462 -0.0005155977
## SMOG 0.94089430 0.94089430 0.9657302854
## SMOG.C 0.94194286 0.94194286 0.9666426822
## SMOG.simple 0.94089430 0.94089430 0.9657302854
## SMOG.de 0.94089430 0.94089430 0.9657302854
## Spache 1.00000000 1.00000000 0.9954423736
## Spache.old 1.00000000 1.00000000 0.9954423736
## Strain 0.99544237 0.99544237 1.0000000000
## Traenkle.Bailer -0.99529866 -0.99529866 -0.9987681336
## Traenkle.Bailer.2 -0.66117918 -0.66117918 -0.7203881294
## Wheeler.Smith 0.98451149 0.98451149 0.9957646921
## meanSentenceLength 1.00000000 1.00000000 0.9954423736
## meanWordSyllables 0.57632421 0.57632421 0.6467738310
## Traenkle.Bailer Traenkle.Bailer.2 Wheeler.Smith
## ARI -0.999223850 -0.7549178 0.99594813
## ARI.simple -0.999411499 -0.7514487 0.99583585
## Bormuth 0.995931694 0.6663152 -0.98541527
## Bormuth.GP -0.910151256 -0.5147186 0.89837506
## Coleman 0.549146837 0.9206771 -0.61270055
## Coleman.C2 0.745171965 0.9574552 -0.79038825
## Coleman.Liau 0.785328440 0.9904793 -0.81342458
## Coleman.Liau.grade -0.785328440 -0.9904793 0.81342458
## Coleman.Liau.short -0.785358433 -0.9904734 0.81345135
## Dale.Chall 0.995298661 0.6611792 -0.98451149
## Dale.Chall.old -0.995298661 -0.6611792 0.98451149
## Dale.Chall.PSK -0.995298661 -0.6611792 0.98451149
## Danielson.Bryan -0.993491902 -0.7999586 0.99535015
## Danielson.Bryan.2 0.274099704 0.8509255 -0.32598561
## Dickes.Steiwer 0.999594489 0.7195290 -0.99345440
## DRP -0.995931694 -0.6663152 0.98541527
## ELF -0.994372531 -0.7636349 1.00000000
## Farr.Jenkins.Paterson 0.995687365 0.6642605 -0.98524897
## Flesch 0.959086871 0.8800874 -0.97375531
## Flesch.PSK -0.979131224 -0.8419885 0.98827872
## Flesch.Kincaid -0.996329486 -0.7757025 0.99727425
## FOG -0.992467872 -0.7905347 0.99443432
## FOG.PSK -0.999375460 -0.7187449 0.99386237
## FOG.NRI -0.483157912 -0.5380406 0.49527062
## FORCAST -0.549146837 -0.9206771 0.61270055
## FORCAST.RGL -0.549146837 -0.9206771 0.61270055
## Fucks -0.999275945 -0.7123424 0.99362322
## Linsear.Write -0.978983682 -0.8037317 0.98844709
## LIW -0.991902133 -0.8003943 0.99591724
## nWS -0.962858718 -0.8744090 0.97678636
## nWS.2 -0.967434256 -0.8665151 0.97966902
## nWS.3 -0.979286510 -0.8329455 0.98635632
## nWS.4 -0.991806322 -0.7935159 0.99411156
## RIX -0.989449234 -0.7853729 0.99677384
## Scrabble -0.002326009 0.2500591 -0.04113608
## SMOG -0.966666322 -0.8545681 0.97747350
## SMOG.C -0.967414302 -0.8529924 0.97820253
## SMOG.simple -0.966666322 -0.8545681 0.97747350
## SMOG.de -0.966666322 -0.8545681 0.97747350
## Spache -0.995298661 -0.6611792 0.98451149
## Spache.old -0.995298661 -0.6611792 0.98451149
## Strain -0.998768134 -0.7203881 0.99576469
## Traenkle.Bailer 1.000000000 0.7306470 -0.99437253
## Traenkle.Bailer.2 0.730647041 1.0000000 -0.76363493
## Wheeler.Smith -0.994372531 -0.7636349 1.00000000
## meanSentenceLength -0.995298661 -0.6611792 0.98451149
## meanWordSyllables -0.649467672 -0.9684616 0.69782528
## meanSentenceLength meanWordSyllables
## ARI 0.99099423 0.6762172
## ARI.simple 0.99169343 0.6724762
## Bormuth -0.99996713 -0.5818750
## Bormuth.GP 0.92746308 0.4479544
## Coleman -0.47287100 -0.9645525
## Coleman.C2 -0.68300831 -0.9629188
## Coleman.Liau -0.72238083 -0.9543023
## Coleman.Liau.grade 0.72238083 0.9543023
## Coleman.Liau.short 0.72241463 0.9542911
## Dale.Chall -1.00000000 -0.5763242
## Dale.Chall.old 1.00000000 0.5763242
## Dale.Chall.PSK 1.00000000 0.5763242
## Danielson.Bryan 0.97829531 0.7259320
## Danielson.Bryan.2 -0.18028533 -0.8772134
## Dickes.Steiwer -0.99638999 -0.6393556
## DRP 0.99996713 0.5818750
## ELF 0.98451149 0.6978253
## Farr.Jenkins.Paterson -0.99998999 -0.5798330
## Flesch -0.92913685 -0.8376402
## Flesch.PSK 0.95657238 0.7895116
## Flesch.Kincaid 0.98488025 0.7091829
## FOG 0.97841468 0.7282418
## FOG.PSK 0.99626213 0.6428726
## FOG.NRI 0.45492516 0.5024015
## FORCAST 0.47287100 0.9645525
## FORCAST.RGL 0.47287100 0.9645525
## Fucks 0.99711281 0.6310456
## Linsear.Write 0.96173384 0.7546688
## LIW 0.97649153 0.7322100
## nWS 0.93396357 0.8262434
## nWS.2 0.94010518 0.8158488
## nWS.3 0.95786413 0.7804790
## nWS.4 0.97726151 0.7318613
## RIX 0.97599601 0.7197307
## Scrabble 0.03546462 -0.2859592
## SMOG 0.94089430 0.8081955
## SMOG.C 0.94194286 0.8064035
## SMOG.simple 0.94089430 0.8081955
## SMOG.de 0.94089430 0.8081955
## Spache 1.00000000 0.5763242
## Spache.old 1.00000000 0.5763242
## Strain 0.99544237 0.6467738
## Traenkle.Bailer -0.99529866 -0.6494677
## Traenkle.Bailer.2 -0.66117918 -0.9684616
## Wheeler.Smith 0.98451149 0.6978253
## meanSentenceLength 1.00000000 0.5763242
## meanWordSyllables 0.57632421 1.0000000
tic()
boxplot(normalit(Flesch.Kincaid)~section,data=tr.df,main="Reading level by Section ")
boxplot(normalit(Flesch.Kincaid)~section,data=tr.df[tr.df$section %in% c("Section 01","Section 02","Section 03","Section 04")],main="Reading level by Section ")
toc()
## 0.09 sec elapsed
tic()
Sentiment <- get_nrc_sentiment(raw.df$raw_text)
sentiment.df <- data.frame(section_id=doc_id_text,Sentiment,stringsAsFactors = FALSE)
toc()
## 7.03 sec elapsed
tic()
#Transformation and cleaning
td<-data.frame(t(Sentiment))
td_Rowsum <- data.frame(rowSums(td[2:length(td)]))
names(td_Rowsum)[1] <- "count"
td_Rowsum <- cbind("sentiment" = rownames(td_Rowsum), td_Rowsum)
rownames(td_Rowsum) <- NULL
td_Plot<-td_Rowsum[1:10,]
levels(td_Plot$sentiment) <- c("Negative","Anger","Anticipation","Disgust","Fear","Joy","Sadness","Surprise","Trust","Positive" )
qplot(sentiment, data=td_Plot, weight=count, geom="bar",fill=sentiment)+
ggtitle("Doug's book Collection Overall sentiment analysis")
for(doc_id in sort(unique(sentiment.df$section_id))) {
tmp.df <- sentiment.df[doc_id == sentiment.df$section_id,]
td<-data.frame(t( tmp.df[,c(2:11)]))
td_Rowsum <- data.frame(rowSums(td[2:length(td)]))
#Transformation and cleaning
names(td_Rowsum)[1] <- "count"
td_Rowsum <- cbind("sentiment" = rownames(td_Rowsum), td_Rowsum)
rownames(td_Rowsum) <- NULL
td_Plot<-td_Rowsum[1:10,]
levels(td_Plot$sentiment) <- c("Negative","Anger","Anticipation","Disgust","Fear","Joy","Sadness","Surprise","Trust","Positive" )
print(qplot(sentiment, data=td_Plot, weight=count, geom="bar",fill=sentiment)+
ggtitle(paste0("Doug's book Collection Overall sentiment analysis for ",doc_id)))
}
toc()
## 2.66 sec elapsed
udmodel_english <- udpipe_load_model(file = '../udpipe/english-ud-2.0-170801.udpipe')
tic()
ann.raw <- udpipe_annotate(udmodel_english,text,doc_id=paste(filename,doc_id_text,sep=" "))
ann.df <- data.frame(ann.raw)
toc()
## 484.06 sec elapsed
demo.raw <- udpipe_annotate(udmodel_english,c("What is the average temperature?",
"Can you average those numbers?",
"What does the average say? ")
)
demo.df <- data.frame(demo.raw)
#
# Same word, different part of speech.
#
demo.df[demo.df$token=="average",c(4,6,8)]
demo.df[c(1:2),]
x <- c("PROPN", "SCONJ", "ADJ", "NOUN", "VERB", "INTJ", "DET", "VERB",
"PROPN", "AUX", "NUM", "NUM", "X", "SCONJ", "PRON", "PUNCT", "ADP",
"X", "PUNCT", "AUX", "PROPN", "ADP", "X", "PROPN", "ADP", "DET",
"CCONJ", "INTJ", "NOUN", "PROPN")
as_phrasemachine(x)
## [1] "N" "C" "A" "N" "V" "O" "D" "V" "N" "V" "A" "A" "O" "C" "N" "O" "P"
## [18] "O" "O" "V" "N" "P" "O" "N" "P" "D" "C" "O" "N" "N"
tic()
tmp.df <- ann.df
tmp.df$phrase_tag <- as_phrasemachine(tmp.df$upos,
type = "upos")
phrases <- keywords_phrases(x = tmp.df$phrase_tag,
term = tmp.df$token,
pattern = "(A|N)*N(P+D*(A|N)*N)*",
is_regex = TRUE,
detailed = FALSE)
phrases <- subset(phrases, ngram > 1 & freq > 3)
phrases$key <- factor(phrases$keyword,
levels = rev(phrases$keyword))
print(barchart(key ~ freq,
data = head(phrases[order(phrases$freq,decreasing = TRUE),],20),
col = "green",
main = "Keywords - simple noun phrases ",
xlab = "Frequency keywords"))
for(doc in unique(ann.df$doc_id)) {
tmp.df <- ann.df[ann.df$doc_id==doc,]
tmp.df$phrase_tag <- as_phrasemachine(tmp.df$upos, type = "upos")
phrases <- keywords_phrases(x = tmp.df$phrase_tag,
term = tmp.df$token,
pattern = "(A|N)*N(P+D*(A|N)*N)*",
is_regex = TRUE,
detailed = FALSE)
phrases <- subset(phrases, ngram > 1 & freq > 3)
phrases$key <- factor(phrases$keyword, levels = rev(phrases$keyword))
print(barchart(key ~ freq,
data = head(phrases[order(phrases$freq,decreasing = TRUE),],20),
col = "Green",
main = paste0("Keywords - simple noun phrases ",doc),
xlab = "Frequency"))
}
toc()
## 47.68 sec elapsed
tmp.df <- ann.df
rake_keywords <- keywords_rake(tmp.df,
term = "lemma",
group = "doc_id",
relevant = tmp.df$upos %in% c("NOUN", "ADJ")
)
rake_keywords$key <- factor(rake_keywords$keyword, levels = rev(rake_keywords$keyword))
rake_keywords <- subset(rake_keywords, ngram > 1 & freq > 3)
print(barchart(key ~ rake,
data = head(rake_keywords[order(rake_keywords$freq,decreasing = TRUE),], 20),
col = "blue",
main = "Keywords identified by RAKE ",
xlab = "Rake"))
for(doc in unique(ann.df$doc_id)) {
tmp.df <- ann.df[ann.df$doc_id==doc,]
rake_keywords <- keywords_rake(tmp.df ,
term = "lemma",
group = "doc_id",
relevant = tmp.df$upos %in% c("NOUN", "ADJ")
)
rake_keywords$key <- factor(rake_keywords$keyword, levels = rev(rake_keywords$keyword))
rake_keywords <- subset(rake_keywords,ngram > 1 & freq > 3)
print(barchart(key ~ rake,
data = head(rake_keywords[order(rake_keywords$freq,decreasing = TRUE),], 20),
col = "blue",
main = paste0("Keywords identified by RAKE ",doc),
xlab = "Rake Keyword"))
}