Packages

library(stylo)

1000-word blocks

corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "random.sampling", preserve.case = FALSE, encoding = "UTF-8", number.of.samples = 3, sampling.with.replacement = TRUE, sample.size = 1000) ## this corpus does not contain Philebus

test_corpus <- load.corpus.and.parse(files = "Philebus.txt", markup.type= "plain", corpus.lang = "Other", sample.size = 1000, sampling = "normal.sampling", sample.overlap = 0, features = "w", ngram.size = 1, preserve.case = FALSE,encoding = "UTF-8")

corpus1000 <- c(corpus_all, test_corpus)

mfw <- make.frequency.list(corpus1000)
mfw <- mfw[1:100]
mfw
##   [1] "ὁ"        "καί"      "εἰμί"     "δέ"       "οὗτος"    "ἐγώ"     
##   [7] "αὐτός"    "οὐ"       "τε"       "μέν"      "ἄν"       "τις"     
##  [13] "ὅς"       "λέγω"     "ἠέ"       "γάρ"      "ἐν"       "γε"      
##  [19] "δή"       "ἀλλά"     "σύ"       "ἄλλος"    "πᾶς"      "φημί"    
##  [25] "τίς"      "μή"       "γίγνομαι" "ὅστις"    "περί"     "ὡς"      
##  [31] "οὖν"      "ὦ"        "εἰ"       "ἔχω"      "πρός"     "πολύς"   
##  [37] "λόγος"    "τοιοῦτος" "εἰς"      "κατά"     "οὕτως"    "δοκέω"   
##  [43] "ποιέω"    "νῦν"      "ἑαυτοῦ"   "εἶπον"    "οὐδείς"   "ἀγαθός"  
##  [49] "καλός"    "οἴομαι"   "ἐκ"       "πόλις"    "διά"      "οὐδέ"    
##  [55] "ἐπί"      "ἐάν"      "εἷς"      "ὀρθός"    "σωκράτης" "πρότερος"
##  [61] "ἐκεῖνος"  "οἷος"     "ἄνθρωπος" "φαίνω"    "πῶς"      "οὔτε"    
##  [67] "οὐκοῦν"   "μέγας"    "ἀληθής"   "δέομαι"   "πάνυ"     "ἀίω"     
##  [73] "ὑπό"      "αὖ"       "νόμος"    "μετά"     "ἕ"        "ἄρα"     
##  [79] "οἶδα"     "ἕτερος"   "βούλομαι" "ψυχή"     "ἡδονά"    "παρά"    
##  [85] "ὅσος"     "κακός"    "ἆρα"      "ἕκαστος"  "ἔτι"      "ἀνήρ"    
##  [91] "ἔοικα"    "ποτέ"     "θεός"     "ὥσπερ"    "ὅδε"      "δίκαιος" 
##  [97] "σῶμα"     "μήν"      "μόνος"    "πού"
freq1000 <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus1000, mfw, absent.sensitive = FALSE)))

d1000 <- dist.delta(freq1000)
dm1000 <- as.matrix(d1000)
my_rows1000 <- rownames(dm1000)[1:165] 
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17")
## subset columns, save as df
sdm1000 <- as.data.frame(dm1000[1:165,(colnames(dm1000) %in% Phlb)])
rownames(sdm1000) <- my_rows1000
## select 5 minimal values for each block
n<- ncol(sdm1000)
x <- c()
for(i in 1:n){
   o <- order(sdm1000[,i])
   z <- rownames(sdm1000)[o]
   z <- z[1:5]
   x <- rbind(x,z)
}
rownames(x) <- Phlb
x
##             [,1]           [,2]           [,3]           [,4]            
## Philebus_1  "Sophist_3"    "Protagoras_2" "Laws1_2"      "Sophist_1"     
## Philebus_2  "Laws1_2"      "Symposium_2"  "Laws10_1"     "Laws3_3"       
## Philebus_3  "Sophist_2"    "Laws3_2"      "Sophist_1"    "Sophist_3"     
## Philebus_4  "Statesman_3"  "Phaedrus_1"   "Theaetetus_3" "Republic7_2"   
## Philebus_5  "Laws2_1"      "Statesman_3"  "Laws10_1"     "Republic7_2"   
## Philebus_6  "Statesman_3"  "Laws2_2"      "Laws7_1"      "HippiasMinor_3"
## Philebus_7  "Laws3_2"      "Laws7_1"      "Statesman_1"  "Laws7_2"       
## Philebus_8  "Laws3_3"      "Sophist_3"    "Statesman_3"  "Sophist_2"     
## Philebus_9  "Laws2_2"      "Laws1_3"      "Theaetetus_1" "Laws7_2"       
## Philebus_10 "Statesman_1"  "Laws1_2"      "Laws10_1"     "Laws3_3"       
## Philebus_11 "Laws1_3"      "Theaetetus_1" "Gorgias_3"    "Laches_2"      
## Philebus_12 "Statesman_1"  "Statesman_2"  "Laws10_1"     "Republic7_1"   
## Philebus_13 "Statesman_1"  "Sophist_2"    "Sophist_1"    "Laws1_2"       
## Philebus_14 "Sophist_1"    "Statesman_3"  "Laws10_1"     "Republic9_2"   
## Philebus_15 "Theaetetus_1" "Theaetetus_3" "Cratylus_2"   "Laws7_3"       
## Philebus_16 "Laws1_3"      "Laws3_2"      "Laws1_2"      "Laws3_3"       
## Philebus_17 "Laws1_2"      "Laws1_3"      "Laws7_1"      "Laws2_3"       
##             [,5]            
## Philebus_1  "Laws7_2"       
## Philebus_2  "Phaedrus_2"    
## Philebus_3  "Phaedrus_2"    
## Philebus_4  "Sophist_1"     
## Philebus_5  "Statesman_1"   
## Philebus_6  "Statesman_1"   
## Philebus_7  "Laws7_3"       
## Philebus_8  "Sophist_1"     
## Philebus_9  "Laws2_1"       
## Philebus_10 "Laws2_1"       
## Philebus_11 "HippiasMinor_3"
## Philebus_12 "Laws12_1"      
## Philebus_13 "Laws2_1"       
## Philebus_14 "Statesman_1"   
## Philebus_15 "Statesman_1"   
## Philebus_16 "Laws2_1"       
## Philebus_17 "Laws1_1"
delta1000 <- perform.delta(training.set = freq1000[1:165, ], test.set = freq1000[166:182, ], distance = "delta", no.of.candidates = 5, z.scores.both.sets = TRUE)
delta1000$ranking
##             1            2            3            4             
## Philebus_1  "Sophist"    "Protagoras" "Laws1"      "Sophist"     
## Philebus_2  "Laws1"      "Symposium"  "Laws10"     "Laws3"       
## Philebus_3  "Sophist"    "Laws3"      "Sophist"    "Sophist"     
## Philebus_4  "Statesman"  "Phaedrus"   "Theaetetus" "Republic7"   
## Philebus_5  "Laws2"      "Statesman"  "Laws10"     "Republic7"   
## Philebus_6  "Statesman"  "Laws2"      "Laws7"      "HippiasMinor"
## Philebus_7  "Laws3"      "Laws7"      "Statesman"  "Laws7"       
## Philebus_8  "Laws3"      "Sophist"    "Statesman"  "Sophist"     
## Philebus_9  "Laws2"      "Laws1"      "Theaetetus" "Laws7"       
## Philebus_10 "Statesman"  "Laws1"      "Laws10"     "Laws3"       
## Philebus_11 "Laws1"      "Theaetetus" "Gorgias"    "Laches"      
## Philebus_12 "Statesman"  "Statesman"  "Laws10"     "Republic7"   
## Philebus_13 "Statesman"  "Sophist"    "Sophist"    "Laws1"       
## Philebus_14 "Sophist"    "Statesman"  "Laws10"     "Republic9"   
## Philebus_15 "Theaetetus" "Theaetetus" "Cratylus"   "Laws7"       
## Philebus_16 "Laws1"      "Laws3"      "Laws1"      "Laws3"       
## Philebus_17 "Laws1"      "Laws1"      "Laws7"      "Laws2"       
##             5             
## Philebus_1  "Laws7"       
## Philebus_2  "Phaedrus"    
## Philebus_3  "Phaedrus"    
## Philebus_4  "Sophist"     
## Philebus_5  "Statesman"   
## Philebus_6  "Statesman"   
## Philebus_7  "Laws7"       
## Philebus_8  "Sophist"     
## Philebus_9  "Laws2"       
## Philebus_10 "Laws2"       
## Philebus_11 "HippiasMinor"
## Philebus_12 "Laws12"      
## Philebus_13 "Laws2"       
## Philebus_14 "Statesman"   
## Philebus_15 "Statesman"   
## Philebus_16 "Laws2"       
## Philebus_17 "Laws1"       
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta1000$scores
##                     1         2         3         4         5
## Philebus_1  0.8571077 0.8862542 0.9209077 0.9342242 0.9373487
## Philebus_2  0.8627945 0.9243215 0.9256416 0.9317832 0.9362680
## Philebus_3  0.9548043 0.9644187 0.9724751 0.9953928 0.9995489
## Philebus_4  0.8711599 0.8723084 0.9033674 0.9166716 0.9194434
## Philebus_5  0.7639212 0.7788644 0.7860787 0.7957408 0.8042644
## Philebus_6  0.9189054 0.9304804 0.9368253 0.9465218 0.9543760
## Philebus_7  0.8797749 0.8815430 0.8949324 0.8984935 0.8994268
## Philebus_8  0.9180316 0.9303233 0.9569155 0.9692315 0.9853906
## Philebus_9  1.0017521 1.0344143 1.0523057 1.0603652 1.0704190
## Philebus_10 0.9244900 0.9317332 0.9436964 0.9700488 0.9972218
## Philebus_11 0.9086691 0.9142053 0.9210046 0.9448190 0.9572873
## Philebus_12 0.8167080 0.8773637 0.8909533 0.8951696 0.8977139
## Philebus_13 0.8301134 0.8358716 0.8374085 0.8521541 0.8715899
## Philebus_14 0.8209662 0.8237288 0.8639593 0.9100729 0.9102564
## Philebus_15 0.8044127 0.8210058 0.8375746 0.8399280 0.8609914
## Philebus_16 0.9309036 0.9422520 0.9634839 0.9701466 0.9701597
## Philebus_17 0.7839562 0.7945683 0.8043762 0.8110594 0.8162090
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"