Corpus all
corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus
test_corpus <- load.corpus.and.parse(files = "Philebus.txt", corpus.dir = getwd(), markup.type= "plain", corpus.lang = "Other", sampling = "normal.sampling", sample.size = 1000, preserve.case = FALSE, encoding = "UTF-8")
## unite
corpus1000 <- c(corpus_all, test_corpus) ## 72 elements, last 17 = Philebus
MFW & Frequencies
mfw <- make.frequency.list(corpus1000)
mfw <- mfw[1:101]
mfw <- mfw[-71] ## remove Socrates
mfw
## [1] "ὁ" "καί" "εἰμί" "δέ" "οὗτος" "ἐγώ"
## [7] "αὐτός" "οὐ" "τε" "μέν" "ἄν" "τις"
## [13] "ὅς" "λέγω" "γάρ" "ἠέ" "ἐν" "δή"
## [19] "γε" "ἀλλά" "ἄλλος" "σύ" "πᾶς" "φημί"
## [25] "μή" "ὅστις" "γίγνομαι" "ὡς" "περί" "τίς"
## [31] "οὖν" "ὦ" "ἔχω" "εἰ" "πρός" "λόγος"
## [37] "πολύς" "κατά" "εἰς" "τοιοῦτος" "οὕτως" "ἑαυτοῦ"
## [43] "δοκέω" "ποιέω" "οὐδείς" "ἐκ" "νῦν" "εἶπον"
## [49] "εἷς" "διά" "οἴομαι" "ἐπί" "καλός" "ἀγαθός"
## [55] "ἐκεῖνος" "οὐδέ" "πόλις" "οὔτε" "ἐάν" "φαίνω"
## [61] "μέγας" "οἷος" "ἄνθρωπος" "πρότερος" "αὖ" "δέομαι"
## [67] "ὑπό" "πῶς" "ἀίω" "ἕτερος" "ὀρθός" "ἕκαστος"
## [73] "ἀληθής" "πάνυ" "οἶδα" "ἕ" "ψυχή" "ὅσος"
## [79] "οὐκοῦν" "ἄρα" "παρά" "κακός" "μετά" "βούλομαι"
## [85] "ἔοικα" "ἀνήρ" "ὥσπερ" "ἔτι" "ἆρα" "θεός"
## [91] "νόμος" "ὅδε" "σῶμα" "πού" "δίκαιος" "φύσις"
## [97] "ποτέ" "μήν" "μᾶλλον" "ἀεί"
freq <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus1000, mfw, absent.sensitive = FALSE)))
dim(freq)
## [1] 72 100
Make Shortlist (z-scores all)
d <- dist.delta(freq)
dm <- as.matrix(d)
my_rows <- rownames(dm)[1:55]
## subset columns, save as df
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5", "Philebus_6", "Philebus_7", "Philebus_8", "Philebus_9", "Philebus_10", "Philebus_11", "Philebus_12", "Philebus_13", "Philebus_14", "Philebus_15", "Philebus_16", "Philebus_17")
sdm <- as.data.frame(dm[1:55,(colnames(dm) %in% Phlb)])
rownames(sdm) <- my_rows
## select 5 minimal values for each block
n<- ncol(sdm)
x <- c()
for(i in 1:n){
o <- order(sdm[,i])
z <- rownames(sdm)[o]
z <- z[1:5]
x <- rbind(x,z)
}
rownames(x) <- Phlb
x
## [,1] [,2] [,3] [,4] [,5]
## Philebus_1 "Sophist" "Statesman" "Laws7" "Laws4" "Protagoras"
## Philebus_2 "Laws10" "Phaedrus" "Sophist" "Statesman" "Laws7"
## Philebus_3 "Sophist" "Statesman" "Laws3" "Theaetetus" "Laws4"
## Philebus_4 "Sophist" "Laws2" "Theaetetus" "Laws7" "Statesman"
## Philebus_5 "Laws1" "Laws4" "Laws7" "Statesman" "Laws3"
## Philebus_6 "Statesman" "Laws1" "Laws7" "Laws2" "Laws3"
## Philebus_7 "Statesman" "Laws2" "Laws7" "Laws3" "Laws1"
## Philebus_8 "Sophist" "Statesman" "Laws3" "Laws2" "Laws4"
## Philebus_9 "Theaetetus" "Sophist" "Laws2" "Laws1" "Republic1"
## Philebus_10 "Statesman" "Laws3" "Laws2" "Sophist" "Laws1"
## Philebus_11 "Laws1" "Gorgias" "Laches" "Republic6" "Laws2"
## Philebus_12 "Statesman" "Laws7" "Laws3" "Laws4" "Laws10"
## Philebus_13 "Sophist" "Statesman" "Laws3" "Laws2" "Laws7"
## Philebus_14 "Sophist" "Statesman" "Laws2" "Laws3" "Phaedrus"
## Philebus_15 "Laws7" "Statesman" "Cratylus" "Laws3" "Sophist"
## Philebus_16 "Laws2" "Statesman" "Laws1" "Laws3" "Cratylus"
## Philebus_17 "Laws1" "Laws3" "Laws2" "Laws7" "Statesman"
Make Shortlist (z-scores corpus)
delta1 <- perform.delta(training.set = freq[1:55,], test.set = freq[56:72,], distance = "delta", no.of.candidates = 5, z.scores.both.sets = FALSE)
delta1$ranking
## 1 2 3 4 5
## Philebus_1 "Sophist" "Statesman" "Laws7" "Laws4" "Protagoras"
## Philebus_2 "Sophist" "Laws10" "Statesman" "Phaedrus" "Laws3"
## Philebus_3 "Sophist" "Statesman" "Laws3" "Laws4" "Theaetetus"
## Philebus_4 "Sophist" "Laws2" "Theaetetus" "Statesman" "Laws7"
## Philebus_5 "Laws4" "Laws1" "Laws7" "Statesman" "Laws3"
## Philebus_6 "Laws1" "Statesman" "Laws2" "Laws7" "Laws3"
## Philebus_7 "Statesman" "Laws2" "Laws7" "Epinomis" "Laws3"
## Philebus_8 "Sophist" "Statesman" "Laws3" "Laws2" "Laws4"
## Philebus_9 "Sophist" "Theaetetus" "Laws2" "Laws1" "Republic1"
## Philebus_10 "Statesman" "Laws3" "Sophist" "Laws2" "Laws1"
## Philebus_11 "Laws1" "Laws2" "Gorgias" "Sophist" "Laches"
## Philebus_12 "Statesman" "Laws7" "Republic9" "Laws10" "Laws4"
## Philebus_13 "Sophist" "Statesman" "Laws3" "Laws2" "Laws4"
## Philebus_14 "Sophist" "Statesman" "Laws2" "Laws3" "Theaetetus"
## Philebus_15 "Laws7" "Cratylus" "Laws3" "Sophist" "Theaetetus"
## Philebus_16 "Laws2" "Statesman" "Laws1" "Laws3" "Cratylus"
## Philebus_17 "Laws1" "Laws3" "Laws2" "Laws7" "Statesman"
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta1$scores
## 1 2 3 4 5
## Philebus_1 0.9676037 1.122057 1.170879 1.195967 1.207110
## Philebus_2 1.2065061 1.207241 1.225184 1.233250 1.238732
## Philebus_3 1.2549495 1.273373 1.311981 1.367779 1.399472
## Philebus_4 1.0413330 1.136596 1.154685 1.182364 1.203936
## Philebus_5 1.1044829 1.118566 1.134280 1.142611 1.164133
## Philebus_6 1.3016101 1.310024 1.332382 1.358906 1.384327
## Philebus_7 1.1754862 1.193079 1.211349 1.238194 1.239209
## Philebus_8 1.2443948 1.373784 1.384514 1.418846 1.497310
## Philebus_9 1.4273551 1.431466 1.434651 1.495979 1.522404
## Philebus_10 1.3215110 1.406722 1.439911 1.441624 1.456667
## Philebus_11 1.2624594 1.285467 1.302802 1.312611 1.316706
## Philebus_12 1.1075352 1.140141 1.189376 1.197329 1.203037
## Philebus_13 1.0875262 1.106343 1.124710 1.136040 1.181927
## Philebus_14 1.1322134 1.180965 1.254489 1.266012 1.280901
## Philebus_15 1.1119978 1.113894 1.123937 1.124668 1.131898
## Philebus_16 1.3174663 1.371384 1.388362 1.413595 1.434500
## Philebus_17 1.0080851 1.052048 1.056533 1.109706 1.127742
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"
## to compare, z-scores.both.sets = TRUE
delta2 <- perform.delta(training.set = freq[1:55,], test.set = freq[56:72,], distance = "delta", no.of.candidates = 5, z.scores.both.sets = TRUE)
delta2$ranking
## 1 2 3 4 5
## Philebus_1 "Sophist" "Statesman" "Laws7" "Laws4" "Protagoras"
## Philebus_2 "Laws10" "Phaedrus" "Sophist" "Statesman" "Laws7"
## Philebus_3 "Sophist" "Statesman" "Laws3" "Theaetetus" "Laws4"
## Philebus_4 "Sophist" "Laws2" "Theaetetus" "Laws7" "Statesman"
## Philebus_5 "Laws1" "Laws4" "Laws7" "Statesman" "Laws3"
## Philebus_6 "Statesman" "Laws1" "Laws7" "Laws2" "Laws3"
## Philebus_7 "Statesman" "Laws2" "Laws7" "Laws3" "Laws1"
## Philebus_8 "Sophist" "Statesman" "Laws3" "Laws2" "Laws4"
## Philebus_9 "Theaetetus" "Sophist" "Laws2" "Laws1" "Republic1"
## Philebus_10 "Statesman" "Laws3" "Laws2" "Sophist" "Laws1"
## Philebus_11 "Laws1" "Gorgias" "Laches" "Republic6" "Laws2"
## Philebus_12 "Statesman" "Laws7" "Laws3" "Laws4" "Laws10"
## Philebus_13 "Sophist" "Statesman" "Laws3" "Laws2" "Laws7"
## Philebus_14 "Sophist" "Statesman" "Laws2" "Laws3" "Phaedrus"
## Philebus_15 "Laws7" "Statesman" "Cratylus" "Laws3" "Sophist"
## Philebus_16 "Laws2" "Statesman" "Laws1" "Laws3" "Cratylus"
## Philebus_17 "Laws1" "Laws3" "Laws2" "Laws7" "Statesman"
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta2$scores
## 1 2 3 4 5
## Philebus_1 0.8243880 0.9461000 0.9811575 1.0102986 1.0130875
## Philebus_2 1.0002920 1.0084294 1.0155985 1.0294057 1.0341863
## Philebus_3 1.0240379 1.0397440 1.0993220 1.1245411 1.1360292
## Philebus_4 0.8629180 0.9334550 0.9529578 0.9696327 0.9743726
## Philebus_5 0.9138170 0.9158632 0.9166366 0.9468616 0.9492765
## Philebus_6 1.0699696 1.0736744 1.1046025 1.1064011 1.1230333
## Philebus_7 0.9363344 0.9729947 0.9917472 0.9930119 1.0053274
## Philebus_8 0.9905226 1.0803036 1.1102350 1.1357096 1.1979431
## Philebus_9 1.1151917 1.1238661 1.1418482 1.1781119 1.2011876
## Philebus_10 1.0673672 1.1204329 1.1521679 1.1528030 1.1681888
## Philebus_11 1.0585023 1.0605358 1.0712905 1.0830221 1.0843613
## Philebus_12 0.9047089 0.9307059 0.9850718 0.9912397 0.9955936
## Philebus_13 0.9146190 0.9256685 0.9499466 0.9501581 0.9939371
## Philebus_14 0.9225994 0.9646321 1.0253777 1.0403143 1.0483780
## Philebus_15 0.9130220 0.9303398 0.9312650 0.9319110 0.9344285
## Philebus_16 1.0258327 1.0716923 1.0914840 1.1055128 1.1317762
## Philebus_17 0.8216427 0.8580213 0.8655289 0.8919713 0.9251122
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"