Corpus 1000
corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 1000, preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus
test_corpus <- load.corpus.and.parse(files = "Philebus.txt", corpus.dir = getwd(), markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 1000, preserve.case = FALSE, encoding = "UTF-8")
## unite
corpus1000 <- c(corpus_all, test_corpus) ## 534 elements, last 17 = Philebus
MFW & Frequencies
mfw <- make.frequency.list(corpus1000)
mfw <- mfw[1:101]
mfw <- mfw[-72] ## remove Socrates
mfw
## [1] "ὁ" "καί" "εἰμί" "δέ" "οὗτος" "ἐγώ"
## [7] "αὐτός" "οὐ" "τε" "μέν" "ἄν" "τις"
## [13] "ὅς" "λέγω" "γάρ" "ἠέ" "ἐν" "δή"
## [19] "γε" "ἀλλά" "ἄλλος" "σύ" "πᾶς" "φημί"
## [25] "ὅστις" "μή" "γίγνομαι" "ὡς" "περί" "τίς"
## [31] "οὖν" "ὦ" "ἔχω" "εἰ" "πρός" "λόγος"
## [37] "πολύς" "κατά" "εἰς" "τοιοῦτος" "οὕτως" "ἑαυτοῦ"
## [43] "δοκέω" "ποιέω" "οὐδείς" "ἐκ" "εἶπον" "νῦν"
## [49] "εἷς" "οἴομαι" "διά" "καλός" "ἐπί" "ἐκεῖνος"
## [55] "ἀγαθός" "πόλις" "οὐδέ" "οὔτε" "ἐάν" "μέγας"
## [61] "φαίνω" "οἷος" "πρότερος" "ἄνθρωπος" "αὖ" "δέομαι"
## [67] "πῶς" "ἕτερος" "ἀίω" "ὑπό" "ἕκαστος" "ὀρθός"
## [73] "πάνυ" "ἀληθής" "οἶδα" "ἕ" "ὅσος" "ψυχή"
## [79] "οὐκοῦν" "ἄρα" "κακός" "μετά" "παρά" "βούλομαι"
## [85] "ἆρα" "ὥσπερ" "ἔοικα" "ἀνήρ" "ἔτι" "θεός"
## [91] "σῶμα" "ὅδε" "πού" "νόμος" "δίκαιος" "φύσις"
## [97] "ποτέ" "μήν" "ἀεί" "μᾶλλον"
freq <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus1000, mfw, absent.sensitive = FALSE)))
dim(freq)
## [1] 534 100
Make Shortlist 3000
d <- dist.delta(freq)
dm <- as.matrix(d) ## distance as matrix 534 x 534
my_rows <- rownames(dm)[1:517]
## subset columns, save as df
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17")
sdm <- as.data.frame(dm[1:517,(colnames(dm) %in% Phlb)])
rownames(sdm) <- my_rows
## select 5 minimal values for each block
n<- ncol(sdm)
x <- c()
for(i in 1:n){
o <- order(sdm[,i])
z <- rownames(sdm)[o]
z <- z[1:5]
x <- rbind(x,z)
}
rownames(x) <- Phlb
x
## [,1] [,2] [,3] [,4]
## Philebus_1 "Sophist_11" "Statesman_2" "Laws7_10" "Sophist_10"
## Philebus_2 "Epistles_4" "Laws7_8" "Laws7_11" "Epinomis_5"
## Philebus_3 "Sophist_1" "Sophist_11" "Laws6_1" "Laws1_7"
## Philebus_4 "Laws2_4" "Sophist_11" "Statesman_2" "Laws7_11"
## Philebus_5 "Laws7_11" "Laws4_2" "Laws7_2" "Laws7_8"
## Philebus_6 "Laws4_2" "Laws7_8" "Sophist_9" "Sophist_6"
## Philebus_7 "Laws3_5" "Statesman_7" "Statesman_8" "Epinomis_5"
## Philebus_8 "Sophist_6" "Statesman_3" "Sophist_2" "Statesman_1"
## Philebus_9 "Laws7_11" "Laws2_2" "Laws3_7" "Sophist_10"
## Philebus_10 "Laws2_7" "Sophist_11" "Statesman_16" "Statesman_3"
## Philebus_11 "Sophist_9" "Laws10_6" "Laws2_2" "Laws7_11"
## Philebus_12 "Laws2_5" "Laws8_2" "Laws8_3" "Republic8_7"
## Philebus_13 "Sophist_6" "Laws3_7" "Statesman_8" "Theaetetus_1"
## Philebus_14 "Sophist_6" "Laws1_7" "Statesman_8" "Sophist_11"
## Philebus_15 "Lovers_2" "Statesman_7" "Laws7_8" "Theaetetus_1"
## Philebus_16 "Statesman_1" "Laws2_5" "Laws2_1" "Laws3_4"
## Philebus_17 "Laws3_7" "Laws1_8" "Laws7_11" "Laws6_10"
## [,5]
## Philebus_1 "Laws9_3"
## Philebus_2 "Laws3_8"
## Philebus_3 "Epistles_12"
## Philebus_4 "Sophist_6"
## Philebus_5 "Statesman_2"
## Philebus_6 "Statesman_1"
## Philebus_7 "Cratylus_14"
## Philebus_8 "Statesman_16"
## Philebus_9 "Laws2_1"
## Philebus_10 "Laws1_7"
## Philebus_11 "Laws9_3"
## Philebus_12 "Statesman_8"
## Philebus_13 "Republic1_1"
## Philebus_14 "Statesman_16"
## Philebus_15 "Laws4_2"
## Philebus_16 "Laws3_3"
## Philebus_17 "Laws6_1"