Packages

library(stylo)

Corpus 1000

corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 1000, preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus

test_corpus <- load.corpus.and.parse(files = "Philebus.txt", corpus.dir = getwd(), markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 1000, preserve.case = FALSE, encoding = "UTF-8")

## unite
corpus1000 <- c(corpus_all, test_corpus) ## 534 elements, last 17 = Philebus

MFW & Frequencies

mfw <- make.frequency.list(corpus1000)
mfw <- mfw[1:101]
mfw <- mfw[-72] ## remove Socrates
mfw
##   [1] "ὁ"        "καί"      "εἰμί"     "δέ"       "οὗτος"    "ἐγώ"     
##   [7] "αὐτός"    "οὐ"       "τε"       "μέν"      "ἄν"       "τις"     
##  [13] "ὅς"       "λέγω"     "γάρ"      "ἠέ"       "ἐν"       "δή"      
##  [19] "γε"       "ἀλλά"     "ἄλλος"    "σύ"       "πᾶς"      "φημί"    
##  [25] "ὅστις"    "μή"       "γίγνομαι" "ὡς"       "περί"     "τίς"     
##  [31] "οὖν"      "ὦ"        "ἔχω"      "εἰ"       "πρός"     "λόγος"   
##  [37] "πολύς"    "κατά"     "εἰς"      "τοιοῦτος" "οὕτως"    "ἑαυτοῦ"  
##  [43] "δοκέω"    "ποιέω"    "οὐδείς"   "ἐκ"       "εἶπον"    "νῦν"     
##  [49] "εἷς"      "οἴομαι"   "διά"      "καλός"    "ἐπί"      "ἐκεῖνος" 
##  [55] "ἀγαθός"   "πόλις"    "οὐδέ"     "οὔτε"     "ἐάν"      "μέγας"   
##  [61] "φαίνω"    "οἷος"     "πρότερος" "ἄνθρωπος" "αὖ"       "δέομαι"  
##  [67] "πῶς"      "ἕτερος"   "ἀίω"      "ὑπό"      "ἕκαστος"  "ὀρθός"   
##  [73] "πάνυ"     "ἀληθής"   "οἶδα"     "ἕ"        "ὅσος"     "ψυχή"    
##  [79] "οὐκοῦν"   "ἄρα"      "κακός"    "μετά"     "παρά"     "βούλομαι"
##  [85] "ἆρα"      "ὥσπερ"    "ἔοικα"    "ἀνήρ"     "ἔτι"      "θεός"    
##  [91] "σῶμα"     "ὅδε"      "πού"      "νόμος"    "δίκαιος"  "φύσις"   
##  [97] "ποτέ"     "μήν"      "ἀεί"      "μᾶλλον"
freq <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus1000, mfw, absent.sensitive = FALSE)))
dim(freq)
## [1] 534 100

Make Shortlist 3000

d <- dist.delta(freq)
dm <- as.matrix(d) ## distance as matrix 534 x 534
my_rows <- rownames(dm)[1:517] 

## subset columns, save as df
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17")
sdm <- as.data.frame(dm[1:517,(colnames(dm) %in% Phlb)])
rownames(sdm) <- my_rows

## select 5 minimal values for each block
n<- ncol(sdm)
x <- c()
for(i in 1:n){
   o <- order(sdm[,i])
   z <- rownames(sdm)[o]
   z <- z[1:5]
   x <- rbind(x,z)
}
rownames(x) <- Phlb
x
##             [,1]          [,2]          [,3]           [,4]          
## Philebus_1  "Sophist_11"  "Statesman_2" "Laws7_10"     "Sophist_10"  
## Philebus_2  "Epistles_4"  "Laws7_8"     "Laws7_11"     "Epinomis_5"  
## Philebus_3  "Sophist_1"   "Sophist_11"  "Laws6_1"      "Laws1_7"     
## Philebus_4  "Laws2_4"     "Sophist_11"  "Statesman_2"  "Laws7_11"    
## Philebus_5  "Laws7_11"    "Laws4_2"     "Laws7_2"      "Laws7_8"     
## Philebus_6  "Laws4_2"     "Laws7_8"     "Sophist_9"    "Sophist_6"   
## Philebus_7  "Laws3_5"     "Statesman_7" "Statesman_8"  "Epinomis_5"  
## Philebus_8  "Sophist_6"   "Statesman_3" "Sophist_2"    "Statesman_1" 
## Philebus_9  "Laws7_11"    "Laws2_2"     "Laws3_7"      "Sophist_10"  
## Philebus_10 "Laws2_7"     "Sophist_11"  "Statesman_16" "Statesman_3" 
## Philebus_11 "Sophist_9"   "Laws10_6"    "Laws2_2"      "Laws7_11"    
## Philebus_12 "Laws2_5"     "Laws8_2"     "Laws8_3"      "Republic8_7" 
## Philebus_13 "Sophist_6"   "Laws3_7"     "Statesman_8"  "Theaetetus_1"
## Philebus_14 "Sophist_6"   "Laws1_7"     "Statesman_8"  "Sophist_11"  
## Philebus_15 "Lovers_2"    "Statesman_7" "Laws7_8"      "Theaetetus_1"
## Philebus_16 "Statesman_1" "Laws2_5"     "Laws2_1"      "Laws3_4"     
## Philebus_17 "Laws3_7"     "Laws1_8"     "Laws7_11"     "Laws6_10"    
##             [,5]          
## Philebus_1  "Laws9_3"     
## Philebus_2  "Laws3_8"     
## Philebus_3  "Epistles_12" 
## Philebus_4  "Sophist_6"   
## Philebus_5  "Statesman_2" 
## Philebus_6  "Statesman_1" 
## Philebus_7  "Cratylus_14" 
## Philebus_8  "Statesman_16"
## Philebus_9  "Laws2_1"     
## Philebus_10 "Laws1_7"     
## Philebus_11 "Laws9_3"     
## Philebus_12 "Statesman_8" 
## Philebus_13 "Republic1_1" 
## Philebus_14 "Statesman_16"
## Philebus_15 "Laws4_2"     
## Philebus_16 "Laws3_3"     
## Philebus_17 "Laws6_1"