Packages
library(stylo)
library(dendextend)
library(ape)
Training & Test Corpora: Load and Parse
- Set 1: Plato 1 (HiMa., Euthph) vs. Plato 2 (R. 2 and 3) vs. Plato 3 (Lg 6 and 11)
- Set 2: Plato 1 (Prt., Grg.) vs. Plato 2 (R. 8 and 9) vs. Plato 3 (Lg. 8 and 9)
tr_corpus_1 <- load.corpus.and.parse(files = "all", corpus.dir = "training_corpus1", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8")
tr_corpus_2 <- load.corpus.and.parse(files = "all", corpus.dir = "training_corpus2", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8")
test_5000 <- load.corpus.and.parse(files = "Philebus.txt", markup.type= "plain", corpus.lang = "Other", sample.size = 5000, sampling = "normal.sampling", sample.overlap = 0, features = "w", ngram.size = 1, preserve.case = FALSE, encoding = "UTF-8")
Most frequent words
my_freq1 <- make.frequency.list(tr_corpus_1)
my_freq2 <- make.frequency.list(tr_corpus_2)
my_freq_intersect <- intersect(my_freq1, my_freq2) ## select words that occur in two corpora
mfw <- my_freq_intersect[1:100] ## subset 100 mfw
mfw <- mfw[-c(41, 56, 77, 86)]
mfw
## [1] "ὁ" "καί" "δέ" "εἰμί" "οὗτος" "ἐγώ"
## [7] "τε" "οὐ" "ἄν" "αὐτός" "μέν" "ὅς"
## [13] "τις" "ἠέ" "γάρ" "μή" "λέγω" "ἐν"
## [19] "πᾶς" "γε" "δή" "ὡς" "ἀλλά" "φημί"
## [25] "ὅστις" "ἄλλος" "περί" "γίγνομαι" "ὦ" "τίς"
## [31] "σύ" "καλός" "οὖν" "τοιοῦτος" "ἔχω" "πρός"
## [37] "ἐάν" "εἰ" "εἰς" "κατά" "ποιέω" "πολύς"
## [43] "ἑαυτοῦ" "οὕτως" "θεός" "λόγος" "δοκέω" "ἐκ"
## [49] "διά" "δέομαι" "ἐπί" "νόμος" "εἶπον" "οὐδέ"
## [55] "ὑπό" "πρότερος" "ἐκεῖνος" "ἀίω" "νῦν" "οὐδείς"
## [61] "ἄνθρωπος" "οἴομαι" "ἀγαθός" "ἄρα" "ἕκαστος" "μέγας"
## [67] "ὀρθός" "αὖ" "φαίνω" "οἷος" "ἕ" "ὅσος"
## [73] "παῖς" "πάνυ" "κακός" "ἦ" "ἀνήρ" "πῶς"
## [79] "ὅσιος" "πού" "οἶδα" "παρά" "ἀρχή" "βούλομαι"
## [85] "ἀληθής" "ἔοικα" "οὐκοῦν" "θέα" "ἑκάτερος" "μάλιστα"
## [91] "χρή" "ἀμφότερος" "μηδείς" "ἆρα" "μήτε" "μετά"
Tables of Frequencies
freq_corp1 <- as.data.frame.matrix(as.table(make.table.of.frequencies(tr_corpus_1, mfw)))
freq_corp2 <- as.data.frame.matrix(as.table(make.table.of.frequencies(tr_corpus_2, mfw)))
freq_test_5000 <- as.data.frame.matrix(as.table(make.table.of.frequencies(test_5000, mfw, absent.sensitive = FALSE)))
Clusters: Phlb. vs Set 1
dataset1 <- rbind(freq_corp1, freq_test_5000)
par(mar=c(2,2,2,6))
hc1 <- hclust(dist.delta(dataset1))
hcd1 <- as.dendrogram(hc1)
hcd1 %>% set("branches_k_color", k = 4) %>% set("labels_col", k=4) %>% plot(horiz = TRUE)
abline(v=1,col="red",lty=2)

Clusters: Phlb. vs. Set 2
dataset2 <- rbind(freq_corp2, freq_test_5000)
hc2 <- hclust(dist.delta(dataset2))
hcd2 <- as.dendrogram(hc2)
par(mar=c(2,2,2,6))
hcd2 %>% set("branches_k_color", k = 4) %>% set("labels_col", k=4) %>% plot(horiz = TRUE)
abline(v=1,col="red",lty=2)

Training Corpus 3 & 4
tr_corpus_3 <- load.corpus.and.parse(files = "all", corpus.dir = "training_corpus3", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8")
tr_corpus_4 <- load.corpus.and.parse(files = "all", corpus.dir = "training_corpus4", markup.type= "plain", corpus.lang = "Other", sampling = "no.sampling", preserve.case = FALSE, encoding = "UTF-8")
freq_corp3 <- as.data.frame.matrix(as.table(make.table.of.frequencies(tr_corpus_3, mfw, absent.sensitive = FALSE)))
freq_corp4 <- as.data.frame.matrix(as.table(make.table.of.frequencies(tr_corpus_4, mfw, absent.sensitive = FALSE)))
Clusters: Phlb. vs Set 3 & 4
dataset3 <- rbind(freq_corp3, freq_test_5000)
hc3 <- hclust(dist.delta(dataset3))
hcd3 <- as.dendrogram(hc3)
par(mar=c(2,2,2,6))
hcd3 %>% set("branches_k_color", k = 4) %>% set("labels_col", k=4) %>% plot(horiz = TRUE)
abline(v=1,col="red",lty=2)

dataset4 <- rbind(freq_corp4, freq_test_5000)
hc4 <- hclust(dist.delta(dataset4))
hcd4 <- as.dendrogram(hc4)
par(mar=c(2,2,2,6))
hcd4 %>% set("branches_k_color", k = 4) %>% set("labels_col", k=4) %>% plot(horiz = TRUE)
abline(v=1,col="red",lty=2)

Distance tables (Sets 1-4)
as.matrix(dist.delta((dataset1)))
## Pl1_Euthyphro Pl1_HippiasMajor Pl2_Republic2 Pl2_Republic3
## Pl1_Euthyphro 0.0000000 0.7982304 1.1324481 1.1371370
## Pl1_HippiasMajor 0.7982304 0.0000000 1.0653264 1.0543927
## Pl2_Republic2 1.1324481 1.0653264 0.0000000 0.6271313
## Pl2_Republic3 1.1371370 1.0543927 0.6271313 0.0000000
## Pl3_Laws11 1.6299021 1.6566945 1.3089634 1.1282576
## Pl3_Laws6 1.5568739 1.6097192 1.2174111 1.1233399
## Philebus_1 1.2236525 1.1317020 1.0560632 1.0207462
## Philebus_2 1.1565279 1.2034091 1.1539192 1.1055276
## Philebus_3 1.1746438 1.1530257 1.0076794 0.8882321
## Pl3_Laws11 Pl3_Laws6 Philebus_1 Philebus_2 Philebus_3
## Pl1_Euthyphro 1.6299021 1.5568739 1.2236525 1.1565279 1.1746438
## Pl1_HippiasMajor 1.6566945 1.6097192 1.1317020 1.2034091 1.1530257
## Pl2_Republic2 1.3089634 1.2174111 1.0560632 1.1539192 1.0076794
## Pl2_Republic3 1.1282576 1.1233399 1.0207462 1.1055276 0.8882321
## Pl3_Laws11 0.0000000 0.7304595 1.3278116 1.4338780 1.2012549
## Pl3_Laws6 0.7304595 0.0000000 1.1507042 1.2296972 1.0871940
## Philebus_1 1.3278116 1.1507042 0.0000000 0.8806585 0.7948277
## Philebus_2 1.4338780 1.2296972 0.8806585 0.0000000 0.6753248
## Philebus_3 1.2012549 1.0871940 0.7948277 0.6753248 0.0000000
as.matrix(dist.delta((dataset2)))
## Pl1_Gorgias Pl1_Protagoras Pl2_Republic8 Pl2_Republic9 Pl3_Laws8
## Pl1_Gorgias 0.000000 0.678999 1.229048 1.112928 1.5203558
## Pl1_Protagoras 0.678999 0.000000 1.127090 1.003995 1.3565115
## Pl2_Republic8 1.229048 1.127090 0.000000 0.644581 1.2449511
## Pl2_Republic9 1.112928 1.003995 0.644581 0.000000 1.3141871
## Pl3_Laws8 1.520356 1.356512 1.244951 1.314187 0.0000000
## Pl3_Laws9 1.514058 1.434185 1.207720 1.360203 0.7472828
## Philebus_1 1.146171 1.075359 1.191393 1.178378 1.2776216
## Philebus_2 1.237692 1.258824 1.191461 1.194538 1.3855281
## Philebus_3 1.056840 1.079718 1.019441 1.046299 1.1524515
## Pl3_Laws9 Philebus_1 Philebus_2 Philebus_3
## Pl1_Gorgias 1.5140575 1.1461713 1.2376919 1.0568399
## Pl1_Protagoras 1.4341850 1.0753588 1.2588242 1.0797184
## Pl2_Republic8 1.2077201 1.1913926 1.1914614 1.0194409
## Pl2_Republic9 1.3602027 1.1783782 1.1945378 1.0462986
## Pl3_Laws8 0.7472828 1.2776216 1.3855281 1.1524515
## Pl3_Laws9 0.0000000 1.3045668 1.4093791 1.2131989
## Philebus_1 1.3045668 0.0000000 1.0133291 0.9066277
## Philebus_2 1.4093791 1.0133291 0.0000000 0.7796193
## Philebus_3 1.2131989 0.9066277 0.7796193 0.0000000
as.matrix(dist.delta((dataset3)))
## Pl1_Euthyphro Pl1_HippiasMajor Pl2_Republic2 Pl2_Republic3
## Pl1_Euthyphro 0.0000000 0.9080436 1.3344141 1.3457935
## Pl1_HippiasMajor 0.9080436 0.0000000 1.2410607 1.2306269
## Pl2_Republic2 1.3344141 1.2410607 0.0000000 0.7606477
## Pl2_Republic3 1.3457935 1.2306269 0.7606477 0.0000000
## Pl3_Laws1 1.3647860 1.3923041 1.0870141 0.9845900
## Pl3_Laws2 1.3176924 1.2876555 1.0000289 0.9560728
## Philebus_1 1.4431512 1.3038777 1.2249446 1.1982442
## Philebus_2 1.3463579 1.3866101 1.3640438 1.3187183
## Philebus_3 1.3826356 1.3499426 1.2209616 1.0673194
## Pl3_Laws1 Pl3_Laws2 Philebus_1 Philebus_2 Philebus_3
## Pl1_Euthyphro 1.3647860 1.3176924 1.4431512 1.3463579 1.3826356
## Pl1_HippiasMajor 1.3923041 1.2876555 1.3038777 1.3866101 1.3499426
## Pl2_Republic2 1.0870141 1.0000289 1.2249446 1.3640438 1.2209616
## Pl2_Republic3 0.9845900 0.9560728 1.1982442 1.3187183 1.0673194
## Pl3_Laws1 0.0000000 0.6402198 1.0146754 1.0655754 0.9104088
## Pl3_Laws2 0.6402198 0.0000000 0.9209539 1.0197191 0.9071319
## Philebus_1 1.0146754 0.9209539 0.0000000 1.0567281 0.9713519
## Philebus_2 1.0655754 1.0197191 1.0567281 0.0000000 0.7900575
## Philebus_3 0.9104088 0.9071319 0.9713519 0.7900575 0.0000000
as.matrix(dist.delta((dataset4)))
## Pl1_Gorgias Pl1_Protagoras Pl2_Republic8 Pl2_Republic9 Pl3_Laws1
## Pl1_Gorgias 0.0000000 0.7652905 1.3573703 1.2197161 1.2619296
## Pl1_Protagoras 0.7652905 0.0000000 1.2502423 1.1152130 1.1573086
## Pl2_Republic8 1.3573703 1.2502423 0.0000000 0.7146617 1.1239121
## Pl2_Republic9 1.2197161 1.1152130 0.7146617 0.0000000 1.1973106
## Pl3_Laws1 1.2619296 1.1573086 1.1239121 1.1973106 0.0000000
## Pl3_Laws2 1.2417717 1.1907925 1.0919095 1.2015376 0.7014496
## Philebus_1 1.2485689 1.1923176 1.3271783 1.3096179 1.0698683
## Philebus_2 1.3619493 1.3970653 1.3290277 1.3332667 1.1702709
## Philebus_3 1.1865464 1.2355921 1.1636352 1.1950003 1.0115704
## Pl3_Laws2 Philebus_1 Philebus_2 Philebus_3
## Pl1_Gorgias 1.2417717 1.248569 1.3619493 1.1865464
## Pl1_Protagoras 1.1907925 1.192318 1.3970653 1.2355921
## Pl2_Republic8 1.0919095 1.327178 1.3290277 1.1636352
## Pl2_Republic9 1.2015376 1.309618 1.3332667 1.1950003
## Pl3_Laws1 0.7014496 1.069868 1.1702709 1.0115704
## Pl3_Laws2 0.0000000 1.018049 1.1573076 1.0281623
## Philebus_1 1.0180490 0.000000 1.1258875 1.0368510
## Philebus_2 1.1573076 1.125888 0.0000000 0.8680829
## Philebus_3 1.0281623 1.036851 0.8680829 0.0000000