Packages

library(stylo)

Corpus all

corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus

test_corpus <- load.corpus.and.parse(files = "Philebus.txt", corpus.dir = getwd(), markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 1000, preserve.case = FALSE, encoding = "UTF-8")

## unite
corpus1000 <- c(corpus_all, test_corpus) ## 72 elements, last 17 = Philebus

MFW & Frequencies

mfw <- make.frequency.list(corpus1000)
mfw <- mfw[1:101]
mfw <- mfw[-71] ## remove Socrates
mfw
##   [1] "ὁ"        "καί"      "εἰμί"     "δέ"       "οὗτος"    "ἐγώ"     
##   [7] "αὐτός"    "οὐ"       "τε"       "μέν"      "ἄν"       "τις"     
##  [13] "ὅς"       "λέγω"     "γάρ"      "ἠέ"       "ἐν"       "δή"      
##  [19] "γε"       "ἀλλά"     "ἄλλος"    "σύ"       "πᾶς"      "φημί"    
##  [25] "μή"       "ὅστις"    "γίγνομαι" "ὡς"       "περί"     "τίς"     
##  [31] "οὖν"      "ὦ"        "ἔχω"      "εἰ"       "πρός"     "λόγος"   
##  [37] "πολύς"    "κατά"     "εἰς"      "τοιοῦτος" "οὕτως"    "ἑαυτοῦ"  
##  [43] "δοκέω"    "ποιέω"    "οὐδείς"   "ἐκ"       "νῦν"      "εἶπον"   
##  [49] "εἷς"      "διά"      "οἴομαι"   "ἐπί"      "καλός"    "ἀγαθός"  
##  [55] "ἐκεῖνος"  "οὐδέ"     "πόλις"    "οὔτε"     "ἐάν"      "φαίνω"   
##  [61] "μέγας"    "οἷος"     "ἄνθρωπος" "πρότερος" "αὖ"       "δέομαι"  
##  [67] "ὑπό"      "πῶς"      "ἀίω"      "ἕτερος"   "ὀρθός"    "ἕκαστος" 
##  [73] "ἀληθής"   "πάνυ"     "οἶδα"     "ἕ"        "ψυχή"     "ὅσος"    
##  [79] "οὐκοῦν"   "ἄρα"      "παρά"     "κακός"    "μετά"     "βούλομαι"
##  [85] "ἔοικα"    "ἀνήρ"     "ὥσπερ"    "ἔτι"      "ἆρα"      "θεός"    
##  [91] "νόμος"    "ὅδε"      "σῶμα"     "πού"      "δίκαιος"  "φύσις"   
##  [97] "ποτέ"     "μήν"      "μᾶλλον"   "ἀεί"
freq <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus1000, mfw, absent.sensitive = FALSE)))
dim(freq)
## [1]  72 100

Make Shortlist (z-scores all)

d <- dist.delta(freq)
dm <- as.matrix(d) 
my_rows <- rownames(dm)[1:55] 

## subset columns, save as df
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17")
sdm <- as.data.frame(dm[1:55,(colnames(dm) %in% Phlb)])
rownames(sdm) <- my_rows

## select 5 minimal values for each block
n<- ncol(sdm)
x <- c()
for(i in 1:n){
   o <- order(sdm[,i])
   z <- rownames(sdm)[o]
   z <- z[1:5]
   x <- rbind(x,z)
}
rownames(x) <- Phlb
x
##             [,1]         [,2]        [,3]         [,4]         [,5]        
## Philebus_1  "Sophist"    "Statesman" "Laws7"      "Laws4"      "Protagoras"
## Philebus_2  "Laws10"     "Phaedrus"  "Sophist"    "Statesman"  "Laws7"     
## Philebus_3  "Sophist"    "Statesman" "Laws3"      "Theaetetus" "Laws4"     
## Philebus_4  "Sophist"    "Laws2"     "Theaetetus" "Laws7"      "Statesman" 
## Philebus_5  "Laws1"      "Laws4"     "Laws7"      "Statesman"  "Laws3"     
## Philebus_6  "Statesman"  "Laws1"     "Laws7"      "Laws2"      "Laws3"     
## Philebus_7  "Statesman"  "Laws2"     "Laws7"      "Laws3"      "Laws1"     
## Philebus_8  "Sophist"    "Statesman" "Laws3"      "Laws2"      "Laws4"     
## Philebus_9  "Theaetetus" "Sophist"   "Laws2"      "Laws1"      "Republic1" 
## Philebus_10 "Statesman"  "Laws3"     "Laws2"      "Sophist"    "Laws1"     
## Philebus_11 "Laws1"      "Gorgias"   "Laches"     "Republic6"  "Laws2"     
## Philebus_12 "Statesman"  "Laws7"     "Laws3"      "Laws4"      "Laws10"    
## Philebus_13 "Sophist"    "Statesman" "Laws3"      "Laws2"      "Laws7"     
## Philebus_14 "Sophist"    "Statesman" "Laws2"      "Laws3"      "Phaedrus"  
## Philebus_15 "Laws7"      "Statesman" "Cratylus"   "Laws3"      "Sophist"   
## Philebus_16 "Laws2"      "Statesman" "Laws1"      "Laws3"      "Cratylus"  
## Philebus_17 "Laws1"      "Laws3"     "Laws2"      "Laws7"      "Statesman"

Make Shortlist (z-scores corpus)

delta1 <- perform.delta(training.set = freq[1:55,], test.set = freq[56:72,], distance = "delta", no.of.candidates = 5, z.scores.both.sets = FALSE)
delta1$ranking
##             1           2            3            4           5           
## Philebus_1  "Sophist"   "Statesman"  "Laws7"      "Laws4"     "Protagoras"
## Philebus_2  "Sophist"   "Laws10"     "Statesman"  "Phaedrus"  "Laws3"     
## Philebus_3  "Sophist"   "Statesman"  "Laws3"      "Laws4"     "Theaetetus"
## Philebus_4  "Sophist"   "Laws2"      "Theaetetus" "Statesman" "Laws7"     
## Philebus_5  "Laws4"     "Laws1"      "Laws7"      "Statesman" "Laws3"     
## Philebus_6  "Laws1"     "Statesman"  "Laws2"      "Laws7"     "Laws3"     
## Philebus_7  "Statesman" "Laws2"      "Laws7"      "Epinomis"  "Laws3"     
## Philebus_8  "Sophist"   "Statesman"  "Laws3"      "Laws2"     "Laws4"     
## Philebus_9  "Sophist"   "Theaetetus" "Laws2"      "Laws1"     "Republic1" 
## Philebus_10 "Statesman" "Laws3"      "Sophist"    "Laws2"     "Laws1"     
## Philebus_11 "Laws1"     "Laws2"      "Gorgias"    "Sophist"   "Laches"    
## Philebus_12 "Statesman" "Laws7"      "Republic9"  "Laws10"    "Laws4"     
## Philebus_13 "Sophist"   "Statesman"  "Laws3"      "Laws2"     "Laws4"     
## Philebus_14 "Sophist"   "Statesman"  "Laws2"      "Laws3"     "Theaetetus"
## Philebus_15 "Laws7"     "Cratylus"   "Laws3"      "Sophist"   "Theaetetus"
## Philebus_16 "Laws2"     "Statesman"  "Laws1"      "Laws3"     "Cratylus"  
## Philebus_17 "Laws1"     "Laws3"      "Laws2"      "Laws7"     "Statesman" 
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta1$scores
##                     1        2        3        4        5
## Philebus_1  0.9676037 1.122057 1.170879 1.195967 1.207110
## Philebus_2  1.2065061 1.207241 1.225184 1.233250 1.238732
## Philebus_3  1.2549495 1.273373 1.311981 1.367779 1.399472
## Philebus_4  1.0413330 1.136596 1.154685 1.182364 1.203936
## Philebus_5  1.1044829 1.118566 1.134280 1.142611 1.164133
## Philebus_6  1.3016101 1.310024 1.332382 1.358906 1.384327
## Philebus_7  1.1754862 1.193079 1.211349 1.238194 1.239209
## Philebus_8  1.2443948 1.373784 1.384514 1.418846 1.497310
## Philebus_9  1.4273551 1.431466 1.434651 1.495979 1.522404
## Philebus_10 1.3215110 1.406722 1.439911 1.441624 1.456667
## Philebus_11 1.2624594 1.285467 1.302802 1.312611 1.316706
## Philebus_12 1.1075352 1.140141 1.189376 1.197329 1.203037
## Philebus_13 1.0875262 1.106343 1.124710 1.136040 1.181927
## Philebus_14 1.1322134 1.180965 1.254489 1.266012 1.280901
## Philebus_15 1.1119978 1.113894 1.123937 1.124668 1.131898
## Philebus_16 1.3174663 1.371384 1.388362 1.413595 1.434500
## Philebus_17 1.0080851 1.052048 1.056533 1.109706 1.127742
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"
## to compare, z-scores.both.sets = TRUE

delta2 <- perform.delta(training.set = freq[1:55,], test.set = freq[56:72,], distance = "delta", no.of.candidates = 5, z.scores.both.sets = TRUE)
delta2$ranking
##             1            2           3            4            5           
## Philebus_1  "Sophist"    "Statesman" "Laws7"      "Laws4"      "Protagoras"
## Philebus_2  "Laws10"     "Phaedrus"  "Sophist"    "Statesman"  "Laws7"     
## Philebus_3  "Sophist"    "Statesman" "Laws3"      "Theaetetus" "Laws4"     
## Philebus_4  "Sophist"    "Laws2"     "Theaetetus" "Laws7"      "Statesman" 
## Philebus_5  "Laws1"      "Laws4"     "Laws7"      "Statesman"  "Laws3"     
## Philebus_6  "Statesman"  "Laws1"     "Laws7"      "Laws2"      "Laws3"     
## Philebus_7  "Statesman"  "Laws2"     "Laws7"      "Laws3"      "Laws1"     
## Philebus_8  "Sophist"    "Statesman" "Laws3"      "Laws2"      "Laws4"     
## Philebus_9  "Theaetetus" "Sophist"   "Laws2"      "Laws1"      "Republic1" 
## Philebus_10 "Statesman"  "Laws3"     "Laws2"      "Sophist"    "Laws1"     
## Philebus_11 "Laws1"      "Gorgias"   "Laches"     "Republic6"  "Laws2"     
## Philebus_12 "Statesman"  "Laws7"     "Laws3"      "Laws4"      "Laws10"    
## Philebus_13 "Sophist"    "Statesman" "Laws3"      "Laws2"      "Laws7"     
## Philebus_14 "Sophist"    "Statesman" "Laws2"      "Laws3"      "Phaedrus"  
## Philebus_15 "Laws7"      "Statesman" "Cratylus"   "Laws3"      "Sophist"   
## Philebus_16 "Laws2"      "Statesman" "Laws1"      "Laws3"      "Cratylus"  
## Philebus_17 "Laws1"      "Laws3"     "Laws2"      "Laws7"      "Statesman" 
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta2$scores
##                     1         2         3         4         5
## Philebus_1  0.8243880 0.9461000 0.9811575 1.0102986 1.0130875
## Philebus_2  1.0002920 1.0084294 1.0155985 1.0294057 1.0341863
## Philebus_3  1.0240379 1.0397440 1.0993220 1.1245411 1.1360292
## Philebus_4  0.8629180 0.9334550 0.9529578 0.9696327 0.9743726
## Philebus_5  0.9138170 0.9158632 0.9166366 0.9468616 0.9492765
## Philebus_6  1.0699696 1.0736744 1.1046025 1.1064011 1.1230333
## Philebus_7  0.9363344 0.9729947 0.9917472 0.9930119 1.0053274
## Philebus_8  0.9905226 1.0803036 1.1102350 1.1357096 1.1979431
## Philebus_9  1.1151917 1.1238661 1.1418482 1.1781119 1.2011876
## Philebus_10 1.0673672 1.1204329 1.1521679 1.1528030 1.1681888
## Philebus_11 1.0585023 1.0605358 1.0712905 1.0830221 1.0843613
## Philebus_12 0.9047089 0.9307059 0.9850718 0.9912397 0.9955936
## Philebus_13 0.9146190 0.9256685 0.9499466 0.9501581 0.9939371
## Philebus_14 0.9225994 0.9646321 1.0253777 1.0403143 1.0483780
## Philebus_15 0.9130220 0.9303398 0.9312650 0.9319110 0.9344285
## Philebus_16 1.0258327 1.0716923 1.0914840 1.1055128 1.1317762
## Philebus_17 0.8216427 0.8580213 0.8655289 0.8919713 0.9251122
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"