3000-word blocks
corpus_all <- load.corpus.and.parse(files = "all", corpus.dir = "corpus_all", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8") ## this corpus does not contain Philebus
## delete dialogues < 3000 words
q <- vector()
for(i in 1:length(corpus_all)){
if(length(corpus_all[[i]]) < 3000){q <- c(q, i)}
q
}
corpus3000 <- corpus_all[-q]
samples3000 <- make.samples(corpus3000, sample.size = 3000,
sampling = "random.sampling",
sampling.with.replacement = TRUE, number.of.samples = 3)
test_corpus <- load.corpus.and.parse(files = "Philebus.txt", markup.type= "plain", corpus.lang = "Other", sample.size = 3000,
sampling = "normal.sampling", sample.overlap = 0, features = "w",
ngram.size = 1, preserve.case = FALSE,encoding = "UTF-8")
corpus3000 <- c(samples3000, test_corpus)
mfw <- make.frequency.list(corpus3000)
mfw <- mfw[1:100]
mfw
## [1] "ὁ" "καί" "δέ" "εἰμί" "οὗτος" "ἐγώ"
## [7] "οὐ" "αὐτός" "τε" "μέν" "ἄν" "ὅς"
## [13] "τις" "λέγω" "γάρ" "ἠέ" "ἐν" "δή"
## [19] "γε" "ἀλλά" "ἄλλος" "φημί" "σύ" "πᾶς"
## [25] "ὅστις" "γίγνομαι" "μή" "ὡς" "περί" "τίς"
## [31] "οὖν" "ὦ" "εἰ" "ἔχω" "πρός" "πολύς"
## [37] "λόγος" "εἰς" "τοιοῦτος" "κατά" "οὕτως" "ἑαυτοῦ"
## [43] "δοκέω" "ποιέω" "ἐκ" "νῦν" "πόλις" "οὐδείς"
## [49] "εἶπον" "οἴομαι" "ἀγαθός" "ἐπί" "καλός" "διά"
## [55] "εἷς" "ἐκεῖνος" "οὐδέ" "ἐάν" "οὔτε" "οἷος"
## [61] "πρότερος" "δέομαι" "μέγας" "ὑπό" "ἄνθρωπος" "σωκράτης"
## [67] "ἀίω" "ὀρθός" "ἀληθής" "αὖ" "πῶς" "φαίνω"
## [73] "οἶδα" "κακός" "ἕκαστος" "ἕ" "παρά" "οὐκοῦν"
## [79] "ἀνήρ" "ὅσος" "ψυχή" "ἄρα" "πάνυ" "μετά"
## [85] "ἕτερος" "θεός" "νόμος" "βούλομαι" "ἆρα" "ὥσπερ"
## [91] "ἔτι" "ἔοικα" "ὅδε" "πού" "δίκαιος" "ποτέ"
## [97] "φύσις" "μᾶλλον" "μόνος" "ναί"
freq3000 <- as.data.frame.matrix(as.table(make.table.of.frequencies(corpus3000, mfw, absent.sensitive = FALSE)))
d3000 <- dist.delta(freq3000)
dm3000 <- as.matrix(d3000)
my_rows3000 <- rownames(dm3000)[1:153]
Phlb <- c("Philebus_1", "Philebus_2", "Philebus_3", "Philebus_4", "Philebus_5")
## subset columns, save as df
sdm3000 <- as.data.frame(dm3000[1:153,(colnames(dm3000) %in% Phlb)])
rownames(sdm3000) <- my_rows3000
## select 5 minimal values for each block
n<- ncol(sdm3000)
x <- c()
for(i in 1:n){
o <- order(sdm3000[,i])
z <- rownames(sdm3000)[o]
z <- z[1:5]
x <- rbind(x,z)
}
rownames(x) <- Phlb
x
## [,1] [,2] [,3] [,4]
## Philebus_1 "Statesman_3" "Sophist_2" "Sophist_1" "Sophist_3"
## Philebus_2 "Laws4_3" "Laws1_1" "Laws7_3" "Laws2_1"
## Philebus_3 "Sophist_2" "Laws2_3" "Statesman_2" "Cratylus_2"
## Philebus_4 "Statesman_2" "Statesman_1" "Laws2_3" "Laws7_1"
## Philebus_5 "Statesman_2" "Theaetetus_2" "Statesman_3" "Laws2_3"
## [,5]
## Philebus_1 "Laws7_3"
## Philebus_2 "Laws7_1"
## Philebus_3 "Laws2_1"
## Philebus_4 "Statesman_3"
## Philebus_5 "Statesman_1"
delta3000 <- perform.delta(training.set = freq3000[1:153, ], test.set = freq3000[154:158, ], distance = "delta", no.of.candidates = 5, z.scores.both.sets = TRUE)
delta3000$ranking
## 1 2 3 4 5
## Philebus_1 "Statesman" "Sophist" "Sophist" "Sophist" "Laws7"
## Philebus_2 "Laws4" "Laws1" "Laws7" "Laws2" "Laws7"
## Philebus_3 "Sophist" "Laws2" "Statesman" "Cratylus" "Laws2"
## Philebus_4 "Statesman" "Statesman" "Laws2" "Laws7" "Statesman"
## Philebus_5 "Statesman" "Theaetetus" "Statesman" "Laws2" "Statesman"
## attr(,"description")
## [1] "predicted classes with their runner-ups"
delta3000$scores
## 1 2 3 4 5
## Philebus_1 0.7649767 0.7833329 0.8271692 0.8280086 0.8622148
## Philebus_2 0.7620637 0.7912929 0.8028715 0.8082739 0.8119822
## Philebus_3 0.8843033 0.8853721 0.8910809 0.8979043 0.9026789
## Philebus_4 0.7795680 0.8022373 0.8043081 0.8067096 0.8111693
## Philebus_5 0.7226361 0.7843164 0.7868572 0.8031154 0.8052725
## attr(,"description")
## [1] "Delta scores, ordered according to candidates"