Packages

library(stylo)

Corpus 500

corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 500, preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus

test_corpus <- load.corpus.and.parse(files = "Philebus.txt", corpus.dir = getwd(), markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 500, preserve.case = FALSE, encoding = "UTF-8")

## unite
corpus500 <- c(corpus_all, test_corpus)

MFW & Frequencies

mfw <- make.frequency.list(corpus500)
mfw <- mfw[1:100]
mfw <- mfw[-73] ## remove Socrates
mfw
##  [1] "ὁ"        "καί"      "εἰμί"     "δέ"       "οὗτος"    "ἐγώ"     
##  [7] "αὐτός"    "οὐ"       "τε"       "μέν"      "ἄν"       "τις"     
## [13] "ὅς"       "λέγω"     "γάρ"      "ἠέ"       "ἐν"       "δή"      
## [19] "γε"       "ἀλλά"     "ἄλλος"    "σύ"       "πᾶς"      "φημί"    
## [25] "ὅστις"    "μή"       "γίγνομαι" "ὡς"       "περί"     "τίς"     
## [31] "οὖν"      "ὦ"        "ἔχω"      "εἰ"       "πρός"     "λόγος"   
## [37] "πολύς"    "κατά"     "εἰς"      "τοιοῦτος" "οὕτως"    "ἑαυτοῦ"  
## [43] "δοκέω"    "οὐδείς"   "ποιέω"    "ἐκ"       "νῦν"      "εἶπον"   
## [49] "διά"      "εἷς"      "οἴομαι"   "ἐπί"      "καλός"    "ἐκεῖνος" 
## [55] "ἀγαθός"   "πόλις"    "οὐδέ"     "οὔτε"     "ἐάν"      "μέγας"   
## [61] "φαίνω"    "οἷος"     "ἄνθρωπος" "πρότερος" "αὖ"       "δέομαι"  
## [67] "ἕτερος"   "πῶς"      "ὑπό"      "ἀίω"      "ἕκαστος"  "ὀρθός"   
## [73] "ἀληθής"   "πάνυ"     "οἶδα"     "ἕ"        "ψυχή"     "οὐκοῦν"  
## [79] "ὅσος"     "ἄρα"      "κακός"    "μετά"     "παρά"     "βούλομαι"
## [85] "ἔοικα"    "ὥσπερ"    "ἀνήρ"     "ἆρα"      "ἔτι"      "θεός"    
## [91] "ὅδε"      "πού"      "σῶμα"     "νόμος"    "φύσις"    "δίκαιος" 
## [97] "ποτέ"     "μήν"      "ἀεί"
freq <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus500, mfw, absent.sensitive = FALSE)))
dim(freq)
## [1] 1092   99

Make Shortlist 3000

d <- dist.delta(freq)
dm <- as.matrix(d) 
my_rows <- rownames(dm)[1:1057] 

## subset columns, save as df
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17", "Philebus_18", "Philebus_19", "Philebus_20", "Philebus_21", "Philebus_22", "Philebus_23", "Philebus_24", "Philebus_25", "Philebus_26", "Philebus_27", "Philebus_28", "Philebus_29", "Philebus_30", "Philebus_31", "Philebus_32", "Philebus_33", "Philebus_34", "Philebus_35")
sdm <- as.data.frame(dm[1:1057,(colnames(dm) %in% Phlb)])
rownames(sdm) <- my_rows

## select 5 minimal values for each block
n<- ncol(sdm)
x <- c()
for(i in 1:n){
   o <- order(sdm[,i])
   z <- rownames(sdm)[o]
   z <- z[1:5]
   x <- rbind(x,z)
}
rownames(x) <- Phlb
x
##             [,1]           [,2]           [,3]           [,4]          
## Philebus_1  "Cratylus_12"  "Cratylus_21"  "Sophist_20"   "Sophist_21"  
## Philebus_2  "Republic6_15" "Statesman_4"  "Sophist_14"   "Laws7_16"    
## Philebus_3  "Epinomis_9"   "Parmenides_3" "Laws10_7"     "Laws11_5"    
## Philebus_4  "Statesman_4"  "Phaedrus_7"   "Gorgias_52"   "Laws11_7"    
## Philebus_5  "Sophist_14"   "Statesman_29" "Statesman_7"  "Republic7_11"
## Philebus_6  "Statesman_4"  "Statesman_19" "Sophist_16"   "Laws3_11"    
## Philebus_7  "Statesman_6"  "Laws7_22"     "Statesman_4"  "Republic3_12"
## Philebus_8  "Phaedrus_11"  "Statesman_6"  "Laws1_14"     "Statesman_3" 
## Philebus_9  "Laws7_15"     "Epinomis_9"   "Epinomis_4"   "Laws10_9"    
## Philebus_10 "Laws7_21"     "Laws7_22"     "Laws4_4"      "Laws7_3"     
## Philebus_11 "Laws2_4"      "Laws7_3"      "Laws7_16"     "Statesman_16"
## Philebus_12 "Republic6_12" "Laws7_16"     "Laws1_9"      "Charmides_1" 
## Philebus_13 "Symposium_24" "Republic7_14" "Statesman_13" "Statesman_15"
## Philebus_14 "Laws7_19"     "Sophist_4"    "Statesman_16" "Laws7_18"    
## Philebus_15 "Statesman_15" "Statesman_5"  "Statesman_31" "Statesman_12"
## Philebus_16 "Sophist_28"   "Sophist_3"    "Sophist_17"   "Statesman_3" 
## Philebus_17 "Laws7_3"      "Laws7_23"     "Meno_18"      "Laws2_9"     
## Philebus_18 "Laws2_4"      "Sophist_30"   "Laws1_15"     "Laws1_14"    
## Philebus_19 "Sophist_30"   "Phaedrus_23"  "Laws1_14"     "Laws3_1"     
## Philebus_20 "Statesman_15" "Sophist_30"   "Laws7_1"      "Laws7_18"    
## Philebus_21 "Laws9_6"      "Laws2_4"      "Laws7_3"      "Sophist_14"  
## Philebus_22 "Sophist_9"    "Theages_1"    "Cratylus_5"   "Cratylus_17" 
## Philebus_23 "Laws7_19"     "Statesman_15" "Statesman_16" "Laws2_14"    
## Philebus_24 "Epistles_20"  "Republic8_15" "Statesman_31" "Republic7_8" 
## Philebus_25 "Statesman_15" "Statesman_16" "Statesman_7"  "Statesman_3" 
## Philebus_26 "Republic1_2"  "Phaedrus_2"   "Republic6_9"  "Statesman_3" 
## Philebus_27 "Statesman_15" "Statesman_7"  "Statesman_31" "Statesman_13"
## Philebus_28 "Sophist_32"   "Statesman_31" "Sophist_8"    "Sophist_3"   
## Philebus_29 "Statesman_13" "Statesman_15" "Statesman_3"  "Laws3_1"     
## Philebus_30 "Epistles_7"   "Theages_1"    "Laws7_3"      "Sophist_4"   
## Philebus_31 "Laws7_16"     "Laws3_7"      "Statesman_7"  "Sophist_11"  
## Philebus_32 "Statesman_3"  "Statesman_15" "Laws7_3"      "Statesman_12"
## Philebus_33 "Sophist_11"   "Republic1_2"  "Statesman_13" "Statesman_15"
## Philebus_34 "Laws6_20"     "Laws7_21"     "Laws1_15"     "Laws1_11"    
## Philebus_35 "Statesman_15" "Laws9_6"      "Laws7_3"      "Statesman_19"
##             [,5]           
## Philebus_1  "Statesman_6"  
## Philebus_2  "Gorgias_16"   
## Philebus_3  "Laws3_16"     
## Philebus_4  "Sophist_22"   
## Philebus_5  "Republic7_8"  
## Philebus_6  "Alcibiades2_7"
## Philebus_7  "Sophist_14"   
## Philebus_8  "Laws2_7"      
## Philebus_9  "Laws5_10"     
## Philebus_10 "Sophist_14"   
## Philebus_11 "Lysis_3"      
## Philebus_12 "Cratylus_17"  
## Philebus_13 "Laws1_4"      
## Philebus_14 "Statesman_14" 
## Philebus_15 "Statesman_2"  
## Philebus_16 "Statesman_31" 
## Philebus_17 "Statesman_16" 
## Philebus_18 "Statesman_33" 
## Philebus_19 "Statesman_5"  
## Philebus_20 "Laws7_3"      
## Philebus_21 "Laws7_16"     
## Philebus_22 "Republic1_15" 
## Philebus_23 "Laws3_1"      
## Philebus_24 "Republic8_14" 
## Philebus_25 "Laws2_14"     
## Philebus_26 "Statesman_15" 
## Philebus_27 "Laws10_9"     
## Philebus_28 "Laws1_13"     
## Philebus_29 "Theaetetus_43"
## Philebus_30 "Theaetetus_1" 
## Philebus_31 "Theaetetus_1" 
## Philebus_32 "Laws7_16"     
## Philebus_33 "Laws7_3"      
## Philebus_34 "Republic6_12" 
## Philebus_35 "Laws7_22"