Packages
library(stylo)
##
## ### stylo version: 0.7.4 ###
##
## If you plan to cite this software (please do!), use the following reference:
## Eder, M., Rybicki, J. and Kestemont, M. (2016). Stylometry with R:
## a package for computational text analysis. R Journal 8(1): 107-121.
## <https://journal.r-project.org/archive/2016/RJ-2016-007/index.html>
##
## To get full BibTeX entry, type: citation("stylo")
Load parsed Isocrates’ speeches
corpus <- load.corpus(files = c("Antidosis.txt", "Panathenaicus.txt"), corpus.dir = "corpus")
Make random samples
samples <- make.samples(corpus, sample.size = 1000, sampling = "random.sampling", sampling.with.replacement = TRUE, number.of.samples = 16) ## returns list of 32
## Antidosis.txt
## - text length (in words): 17618
## - nr. of random samples: 16
## - sample length: 1000
## Panathenaicus.txt
## - text length (in words): 15862
## - nr. of random samples: 16
## - sample length: 1000
Randomly distribute samples between profiles
# Get 16 integers from 1 to 33
# Use max=33 because it will never actually equal 33
num1 <- floor(runif(16, min=1, max=33))
num1
## [1] 25 19 4 22 17 6 23 6 11 28 7 2 23 21 25 4
num2 <- floor(runif(16, min=1, max=33))
num2
## [1] 32 25 19 21 27 31 1 20 15 22 10 5 3 29 7 20
## a few samples appear in both sets, which is good, for it makes our profiles stylistically closer
intersect(num1, num2)
## [1] 25 19 22 7 21
Create 4 texts by 2 Isocrates
Isoc1_Text1 <- samples[num1[1:8]]
Isoc1_Text2 <- samples[num1[9:16]]
Isoc2_Text1 <- samples[num2[1:8]]
Isoc2_Text2 <- samples[num2[9:16]]
## merge 8 elements of the list into one "text"
Isoc1_Text1 <- unlist(Isoc1_Text1, recursive = TRUE, use.names = FALSE)
Isoc1_Text2 <- unlist(Isoc1_Text2, recursive = TRUE, use.names = FALSE)
Isoc2_Text1 <- unlist(Isoc2_Text1, recursive = TRUE, use.names = FALSE)
Isoc2_Text2 <- unlist(Isoc2_Text2, recursive = TRUE, use.names = FALSE)
Write files
write.table(Isoc1_Text1, "Isoc1_Text1", row.names = FALSE, col.names = FALSE, quote = FALSE)
write.table(Isoc1_Text2, "Isoc1_Text2", row.names = FALSE, col.names = FALSE, quote = FALSE)
write.table(Isoc2_Text1, "Isoc2_Text1", row.names = FALSE, col.names = FALSE, quote = FALSE)
write.table(Isoc2_Text2, "Isoc2_Text2", row.names = FALSE, col.names = FALSE, quote = FALSE)
Penalize
sp <- size.penalize(mfw = c(35, 70, 100), corpus.dir = "corpus_twins",
sample.size.coverage = c(500, 1000, 1500),
iterations = 100, classification.method = "delta")
sp$accuracy.scores
## $Isoc1_Text1
## 500 1000 1500
## mfw_35 0.43 0.54 0.63
## mfw_70 0.38 0.47 0.58
## mfw_100 0.51 0.61 0.64
##
## $Isoc1_Text2
## 500 1000 1500
## mfw_35 0.33 0.27 0.29
## mfw_70 0.30 0.19 0.08
## mfw_100 0.29 0.16 0.07
##
## $Isoc2_Text1
## 500 1000 1500
## mfw_35 0.32 0.26 0.39
## mfw_70 0.38 0.44 0.58
## mfw_100 0.32 0.37 0.66
##
## $Isoc2_Text2
## 500 1000 1500
## mfw_35 0.31 0.32 0.42
## mfw_70 0.31 0.28 0.29
## mfw_100 0.26 0.22 0.22
##
## attr(,"description")
## [1] "accuracy scores for the tested texts"
Mean & SD
## get all values from accuracy matrices
vec <- c()
for(i in 1:length(sp$accuracy.scores)){
v <- as.vector(sp$accuracy.scores[[i]])
vec <- c(vec, v)
}
mean(vec)
## [1] 0.3644444
sd(vec)
## [1] 0.1528481