Corpus 500
corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 500, preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus
test_corpus <- load.corpus.and.parse(files = "Philebus.txt", corpus.dir = getwd(), markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 500, preserve.case = FALSE, encoding = "UTF-8")
## unite
corpus500 <- c(corpus_all, test_corpus)
MFW & Frequencies
mfw <- make.frequency.list(corpus500)
mfw <- mfw[1:100]
mfw <- mfw[-73] ## remove Socrates
mfw
## [1] "ὁ" "καί" "εἰμί" "δέ" "οὗτος" "ἐγώ"
## [7] "αὐτός" "οὐ" "τε" "μέν" "ἄν" "τις"
## [13] "ὅς" "λέγω" "γάρ" "ἠέ" "ἐν" "δή"
## [19] "γε" "ἀλλά" "ἄλλος" "σύ" "πᾶς" "φημί"
## [25] "ὅστις" "μή" "γίγνομαι" "ὡς" "περί" "τίς"
## [31] "οὖν" "ὦ" "ἔχω" "εἰ" "πρός" "λόγος"
## [37] "πολύς" "κατά" "εἰς" "τοιοῦτος" "οὕτως" "ἑαυτοῦ"
## [43] "δοκέω" "οὐδείς" "ποιέω" "ἐκ" "νῦν" "εἶπον"
## [49] "διά" "εἷς" "οἴομαι" "ἐπί" "καλός" "ἐκεῖνος"
## [55] "ἀγαθός" "πόλις" "οὐδέ" "οὔτε" "ἐάν" "μέγας"
## [61] "φαίνω" "οἷος" "ἄνθρωπος" "πρότερος" "αὖ" "δέομαι"
## [67] "ἕτερος" "πῶς" "ὑπό" "ἀίω" "ἕκαστος" "ὀρθός"
## [73] "ἀληθής" "πάνυ" "οἶδα" "ἕ" "ψυχή" "οὐκοῦν"
## [79] "ὅσος" "ἄρα" "κακός" "μετά" "παρά" "βούλομαι"
## [85] "ἔοικα" "ὥσπερ" "ἀνήρ" "ἆρα" "ἔτι" "θεός"
## [91] "ὅδε" "πού" "σῶμα" "νόμος" "φύσις" "δίκαιος"
## [97] "ποτέ" "μήν" "ἀεί"
freq <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus500, mfw, absent.sensitive = FALSE)))
dim(freq)
## [1] 1092 99
Make Shortlist 3000
d <- dist.delta(freq)
dm <- as.matrix(d)
my_rows <- rownames(dm)[1:1057]
## subset columns, save as df
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17", "Philebus_18", "Philebus_19", "Philebus_20", "Philebus_21", "Philebus_22", "Philebus_23", "Philebus_24", "Philebus_25", "Philebus_26", "Philebus_27", "Philebus_28", "Philebus_29", "Philebus_30", "Philebus_31", "Philebus_32", "Philebus_33", "Philebus_34", "Philebus_35")
sdm <- as.data.frame(dm[1:1057,(colnames(dm) %in% Phlb)])
rownames(sdm) <- my_rows
## select 5 minimal values for each block
n<- ncol(sdm)
x <- c()
for(i in 1:n){
o <- order(sdm[,i])
z <- rownames(sdm)[o]
z <- z[1:5]
x <- rbind(x,z)
}
rownames(x) <- Phlb
x
## [,1] [,2] [,3] [,4]
## Philebus_1 "Cratylus_12" "Cratylus_21" "Sophist_20" "Sophist_21"
## Philebus_2 "Republic6_15" "Statesman_4" "Sophist_14" "Laws7_16"
## Philebus_3 "Epinomis_9" "Parmenides_3" "Laws10_7" "Laws11_5"
## Philebus_4 "Statesman_4" "Phaedrus_7" "Gorgias_52" "Laws11_7"
## Philebus_5 "Sophist_14" "Statesman_29" "Statesman_7" "Republic7_11"
## Philebus_6 "Statesman_4" "Statesman_19" "Sophist_16" "Laws3_11"
## Philebus_7 "Statesman_6" "Laws7_22" "Statesman_4" "Republic3_12"
## Philebus_8 "Phaedrus_11" "Statesman_6" "Laws1_14" "Statesman_3"
## Philebus_9 "Laws7_15" "Epinomis_9" "Epinomis_4" "Laws10_9"
## Philebus_10 "Laws7_21" "Laws7_22" "Laws4_4" "Laws7_3"
## Philebus_11 "Laws2_4" "Laws7_3" "Laws7_16" "Statesman_16"
## Philebus_12 "Republic6_12" "Laws7_16" "Laws1_9" "Charmides_1"
## Philebus_13 "Symposium_24" "Republic7_14" "Statesman_13" "Statesman_15"
## Philebus_14 "Laws7_19" "Sophist_4" "Statesman_16" "Laws7_18"
## Philebus_15 "Statesman_15" "Statesman_5" "Statesman_31" "Statesman_12"
## Philebus_16 "Sophist_28" "Sophist_3" "Sophist_17" "Statesman_3"
## Philebus_17 "Laws7_3" "Laws7_23" "Meno_18" "Laws2_9"
## Philebus_18 "Laws2_4" "Sophist_30" "Laws1_15" "Laws1_14"
## Philebus_19 "Sophist_30" "Phaedrus_23" "Laws1_14" "Laws3_1"
## Philebus_20 "Statesman_15" "Sophist_30" "Laws7_1" "Laws7_18"
## Philebus_21 "Laws9_6" "Laws2_4" "Laws7_3" "Sophist_14"
## Philebus_22 "Sophist_9" "Theages_1" "Cratylus_5" "Cratylus_17"
## Philebus_23 "Laws7_19" "Statesman_15" "Statesman_16" "Laws2_14"
## Philebus_24 "Epistles_20" "Republic8_15" "Statesman_31" "Republic7_8"
## Philebus_25 "Statesman_15" "Statesman_16" "Statesman_7" "Statesman_3"
## Philebus_26 "Republic1_2" "Phaedrus_2" "Republic6_9" "Statesman_3"
## Philebus_27 "Statesman_15" "Statesman_7" "Statesman_31" "Statesman_13"
## Philebus_28 "Sophist_32" "Statesman_31" "Sophist_8" "Sophist_3"
## Philebus_29 "Statesman_13" "Statesman_15" "Statesman_3" "Laws3_1"
## Philebus_30 "Epistles_7" "Theages_1" "Laws7_3" "Sophist_4"
## Philebus_31 "Laws7_16" "Laws3_7" "Statesman_7" "Sophist_11"
## Philebus_32 "Statesman_3" "Statesman_15" "Laws7_3" "Statesman_12"
## Philebus_33 "Sophist_11" "Republic1_2" "Statesman_13" "Statesman_15"
## Philebus_34 "Laws6_20" "Laws7_21" "Laws1_15" "Laws1_11"
## Philebus_35 "Statesman_15" "Laws9_6" "Laws7_3" "Statesman_19"
## [,5]
## Philebus_1 "Statesman_6"
## Philebus_2 "Gorgias_16"
## Philebus_3 "Laws3_16"
## Philebus_4 "Sophist_22"
## Philebus_5 "Republic7_8"
## Philebus_6 "Alcibiades2_7"
## Philebus_7 "Sophist_14"
## Philebus_8 "Laws2_7"
## Philebus_9 "Laws5_10"
## Philebus_10 "Sophist_14"
## Philebus_11 "Lysis_3"
## Philebus_12 "Cratylus_17"
## Philebus_13 "Laws1_4"
## Philebus_14 "Statesman_14"
## Philebus_15 "Statesman_2"
## Philebus_16 "Statesman_31"
## Philebus_17 "Statesman_16"
## Philebus_18 "Statesman_33"
## Philebus_19 "Statesman_5"
## Philebus_20 "Laws7_3"
## Philebus_21 "Laws7_16"
## Philebus_22 "Republic1_15"
## Philebus_23 "Laws3_1"
## Philebus_24 "Republic8_14"
## Philebus_25 "Laws2_14"
## Philebus_26 "Statesman_15"
## Philebus_27 "Laws10_9"
## Philebus_28 "Laws1_13"
## Philebus_29 "Theaetetus_43"
## Philebus_30 "Theaetetus_1"
## Philebus_31 "Theaetetus_1"
## Philebus_32 "Laws7_16"
## Philebus_33 "Laws7_3"
## Philebus_34 "Republic6_12"
## Philebus_35 "Laws7_22"