1000-word blocks
corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "random.sampling", preserve.case = FALSE, encoding = "UTF-8", number.of.samples = 3, sampling.with.replacement = TRUE, sample.size = 1000) ## this corpus does not contain Philebus
test_corpus <- load.corpus.and.parse(files = "Philebus.txt", markup.type= "plain", corpus.lang = "Other", sample.size = 1000, sampling = "normal.sampling", sample.overlap = 0, features = "w", ngram.size = 1, preserve.case = FALSE,encoding = "UTF-8")
corpus1000 <- c(corpus_all, test_corpus)
mfw <- make.frequency.list(corpus1000)
mfw <- mfw[1:100]
mfw
## [1] "ὁ" "καί" "εἰμί" "δέ" "οὗτος" "ἐγώ"
## [7] "αὐτός" "οὐ" "τε" "μέν" "ἄν" "τις"
## [13] "ὅς" "λέγω" "ἠέ" "γάρ" "ἐν" "γε"
## [19] "δή" "ἀλλά" "σύ" "ἄλλος" "πᾶς" "φημί"
## [25] "τίς" "μή" "γίγνομαι" "ὅστις" "περί" "ὡς"
## [31] "οὖν" "ὦ" "εἰ" "ἔχω" "πρός" "πολύς"
## [37] "λόγος" "τοιοῦτος" "εἰς" "κατά" "οὕτως" "δοκέω"
## [43] "ποιέω" "νῦν" "ἑαυτοῦ" "εἶπον" "οὐδείς" "ἀγαθός"
## [49] "καλός" "οἴομαι" "ἐκ" "πόλις" "διά" "οὐδέ"
## [55] "ἐπί" "ἐάν" "εἷς" "ὀρθός" "σωκράτης" "πρότερος"
## [61] "ἐκεῖνος" "οἷος" "ἄνθρωπος" "φαίνω" "πῶς" "οὔτε"
## [67] "οὐκοῦν" "μέγας" "ἀληθής" "δέομαι" "πάνυ" "ἀίω"
## [73] "ὑπό" "αὖ" "νόμος" "μετά" "ἕ" "ἄρα"
## [79] "οἶδα" "ἕτερος" "βούλομαι" "ψυχή" "ἡδονά" "παρά"
## [85] "ὅσος" "κακός" "ἆρα" "ἕκαστος" "ἔτι" "ἀνήρ"
## [91] "ἔοικα" "ποτέ" "θεός" "ὥσπερ" "ὅδε" "δίκαιος"
## [97] "σῶμα" "μήν" "μόνος" "πού"
freq1000 <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus1000, mfw, absent.sensitive = FALSE)))
d1000 <- dist.delta(freq1000)
dm1000 <- as.matrix(d1000)
my_rows1000 <- rownames(dm1000)[1:165]
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17")
## subset columns, save as df
sdm1000 <- as.data.frame(dm1000[1:165,(colnames(dm1000) %in% Phlb)])
rownames(sdm1000) <- my_rows1000
## select 5 minimal values for each block
n<- ncol(sdm1000)
x <- c()
for(i in 1:n){
o <- order(sdm1000[,i])
z <- rownames(sdm1000)[o]
z <- z[1:5]
x <- rbind(x,z)
}
rownames(x) <- Phlb
x
## [,1] [,2] [,3] [,4]
## Philebus_1 "Sophist_3" "Protagoras_2" "Laws1_2" "Sophist_1"
## Philebus_2 "Laws1_2" "Symposium_2" "Laws10_1" "Laws3_3"
## Philebus_3 "Sophist_2" "Laws3_2" "Sophist_1" "Sophist_3"
## Philebus_4 "Statesman_3" "Phaedrus_1" "Theaetetus_3" "Republic7_2"
## Philebus_5 "Laws2_1" "Statesman_3" "Laws10_1" "Republic7_2"
## Philebus_6 "Statesman_3" "Laws2_2" "Laws7_1" "HippiasMinor_3"
## Philebus_7 "Laws3_2" "Laws7_1" "Statesman_1" "Laws7_2"
## Philebus_8 "Laws3_3" "Sophist_3" "Statesman_3" "Sophist_2"
## Philebus_9 "Laws2_2" "Laws1_3" "Theaetetus_1" "Laws7_2"
## Philebus_10 "Statesman_1" "Laws1_2" "Laws10_1" "Laws3_3"
## Philebus_11 "Laws1_3" "Theaetetus_1" "Gorgias_3" "Laches_2"
## Philebus_12 "Statesman_1" "Statesman_2" "Laws10_1" "Republic7_1"
## Philebus_13 "Statesman_1" "Sophist_2" "Sophist_1" "Laws1_2"
## Philebus_14 "Sophist_1" "Statesman_3" "Laws10_1" "Republic9_2"
## Philebus_15 "Theaetetus_1" "Theaetetus_3" "Cratylus_2" "Laws7_3"
## Philebus_16 "Laws1_3" "Laws3_2" "Laws1_2" "Laws3_3"
## Philebus_17 "Laws1_2" "Laws1_3" "Laws7_1" "Laws2_3"
## [,5]
## Philebus_1 "Laws7_2"
## Philebus_2 "Phaedrus_2"
## Philebus_3 "Phaedrus_2"
## Philebus_4 "Sophist_1"
## Philebus_5 "Statesman_1"
## Philebus_6 "Statesman_1"
## Philebus_7 "Laws7_3"
## Philebus_8 "Sophist_1"
## Philebus_9 "Laws2_1"
## Philebus_10 "Laws2_1"
## Philebus_11 "HippiasMinor_3"
## Philebus_12 "Laws12_1"
## Philebus_13 "Laws2_1"
## Philebus_14 "Statesman_1"
## Philebus_15 "Statesman_1"
## Philebus_16 "Laws2_1"
## Philebus_17 "Laws1_1"
delta1000 <- perform.delta(training.set = freq1000[1:165, ], test.set = freq1000[166:182, ], distance = "delta", no.of.candidates = 5, z.scores.both.sets = TRUE)
delta1000$ranking
## 1 2 3 4
## Philebus_1 "Sophist" "Protagoras" "Laws1" "Sophist"
## Philebus_2 "Laws1" "Symposium" "Laws10" "Laws3"
## Philebus_3 "Sophist" "Laws3" "Sophist" "Sophist"
## Philebus_4 "Statesman" "Phaedrus" "Theaetetus" "Republic7"
## Philebus_5 "Laws2" "Statesman" "Laws10" "Republic7"
## Philebus_6 "Statesman" "Laws2" "Laws7" "HippiasMinor"
## Philebus_7 "Laws3" "Laws7" "Statesman" "Laws7"
## Philebus_8 "Laws3" "Sophist" "Statesman" "Sophist"
## Philebus_9 "Laws2" "Laws1" "Theaetetus" "Laws7"
## Philebus_10 "Statesman" "Laws1" "Laws10" "Laws3"
## Philebus_11 "Laws1" "Theaetetus" "Gorgias" "Laches"
## Philebus_12 "Statesman" "Statesman" "Laws10" "Republic7"
## Philebus_13 "Statesman" "Sophist" "Sophist" "Laws1"
## Philebus_14 "Sophist" "Statesman" "Laws10" "Republic9"
## Philebus_15 "Theaetetus" "Theaetetus" "Cratylus" "Laws7"
## Philebus_16 "Laws1" "Laws3" "Laws1" "Laws3"
## Philebus_17 "Laws1" "Laws1" "Laws7" "Laws2"
## 5
## Philebus_1 "Laws7"
## Philebus_2 "Phaedrus"
## Philebus_3 "Phaedrus"
## Philebus_4 "Sophist"
## Philebus_5 "Statesman"
## Philebus_6 "Statesman"
## Philebus_7 "Laws7"
## Philebus_8 "Sophist"
## Philebus_9 "Laws2"
## Philebus_10 "Laws2"
## Philebus_11 "HippiasMinor"
## Philebus_12 "Laws12"
## Philebus_13 "Laws2"
## Philebus_14 "Statesman"
## Philebus_15 "Statesman"
## Philebus_16 "Laws2"
## Philebus_17 "Laws1"
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta1000$scores
## 1 2 3 4 5
## Philebus_1 0.8571077 0.8862542 0.9209077 0.9342242 0.9373487
## Philebus_2 0.8627945 0.9243215 0.9256416 0.9317832 0.9362680
## Philebus_3 0.9548043 0.9644187 0.9724751 0.9953928 0.9995489
## Philebus_4 0.8711599 0.8723084 0.9033674 0.9166716 0.9194434
## Philebus_5 0.7639212 0.7788644 0.7860787 0.7957408 0.8042644
## Philebus_6 0.9189054 0.9304804 0.9368253 0.9465218 0.9543760
## Philebus_7 0.8797749 0.8815430 0.8949324 0.8984935 0.8994268
## Philebus_8 0.9180316 0.9303233 0.9569155 0.9692315 0.9853906
## Philebus_9 1.0017521 1.0344143 1.0523057 1.0603652 1.0704190
## Philebus_10 0.9244900 0.9317332 0.9436964 0.9700488 0.9972218
## Philebus_11 0.9086691 0.9142053 0.9210046 0.9448190 0.9572873
## Philebus_12 0.8167080 0.8773637 0.8909533 0.8951696 0.8977139
## Philebus_13 0.8301134 0.8358716 0.8374085 0.8521541 0.8715899
## Philebus_14 0.8209662 0.8237288 0.8639593 0.9100729 0.9102564
## Philebus_15 0.8044127 0.8210058 0.8375746 0.8399280 0.8609914
## Philebus_16 0.9309036 0.9422520 0.9634839 0.9701466 0.9701597
## Philebus_17 0.7839562 0.7945683 0.8043762 0.8110594 0.8162090
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"