Packages

library(stylo)
## 
## ### stylo version: 0.7.4 ###
## 
## If you plan to cite this software (please do!), use the following reference:
##     Eder, M., Rybicki, J. and Kestemont, M. (2016). Stylometry with R:
##     a package for computational text analysis. R Journal 8(1): 107-121.
##     <https://journal.r-project.org/archive/2016/RJ-2016-007/index.html>
## 
## To get full BibTeX entry, type: citation("stylo")

Load parsed Isocrates’ speeches

corpus <- load.corpus(files = c("Antidosis.txt", "Panathenaicus.txt"), corpus.dir = "corpus")

Make random samples

samples <- make.samples(corpus, sample.size = 1000, sampling = "random.sampling", sampling.with.replacement = TRUE, number.of.samples = 16) ## returns list of 32
## Antidosis.txt
##  - text length (in words): 17618
##  - nr. of random samples: 16
##  - sample length: 1000
## Panathenaicus.txt
##  - text length (in words): 15862
##  - nr. of random samples: 16
##  - sample length: 1000

Randomly distribute samples between profiles

# Get 16 integers from 1 to 33
# Use max=33 because it will never actually equal 33
num1 <- floor(runif(16, min=1, max=33))
num1
##  [1] 25 19  4 22 17  6 23  6 11 28  7  2 23 21 25  4
num2 <- floor(runif(16, min=1, max=33))
num2
##  [1] 32 25 19 21 27 31  1 20 15 22 10  5  3 29  7 20
## a few samples appear in both sets, which is good, for it makes our profiles stylistically closer
intersect(num1, num2)
## [1] 25 19 22  7 21

Create 4 texts by 2 Isocrates

Isoc1_Text1 <- samples[num1[1:8]]
Isoc1_Text2 <- samples[num1[9:16]]
Isoc2_Text1 <- samples[num2[1:8]]
Isoc2_Text2 <- samples[num2[9:16]]

## merge 8 elements of the list into one "text"
Isoc1_Text1 <- unlist(Isoc1_Text1, recursive = TRUE, use.names = FALSE)
Isoc1_Text2 <- unlist(Isoc1_Text2, recursive = TRUE, use.names = FALSE)
Isoc2_Text1 <- unlist(Isoc2_Text1, recursive = TRUE, use.names = FALSE)
Isoc2_Text2 <- unlist(Isoc2_Text2, recursive = TRUE, use.names = FALSE)

Write files

write.table(Isoc1_Text1, "Isoc1_Text1", row.names = FALSE, col.names = FALSE, quote = FALSE)
write.table(Isoc1_Text2, "Isoc1_Text2", row.names = FALSE, col.names = FALSE, quote = FALSE)
write.table(Isoc2_Text1, "Isoc2_Text1", row.names = FALSE, col.names = FALSE, quote = FALSE)
write.table(Isoc2_Text2, "Isoc2_Text2", row.names = FALSE, col.names = FALSE, quote = FALSE)

Penalize

sp <- size.penalize(mfw = c(35, 70, 100), corpus.dir = "corpus_twins",
              sample.size.coverage = c(500, 1000, 1500),
              iterations = 100, classification.method = "delta")
sp$accuracy.scores
## $Isoc1_Text1
##          500 1000 1500
## mfw_35  0.43 0.54 0.63
## mfw_70  0.38 0.47 0.58
## mfw_100 0.51 0.61 0.64
## 
## $Isoc1_Text2
##          500 1000 1500
## mfw_35  0.33 0.27 0.29
## mfw_70  0.30 0.19 0.08
## mfw_100 0.29 0.16 0.07
## 
## $Isoc2_Text1
##          500 1000 1500
## mfw_35  0.32 0.26 0.39
## mfw_70  0.38 0.44 0.58
## mfw_100 0.32 0.37 0.66
## 
## $Isoc2_Text2
##          500 1000 1500
## mfw_35  0.31 0.32 0.42
## mfw_70  0.31 0.28 0.29
## mfw_100 0.26 0.22 0.22
## 
## attr(,"description")
## [1] "accuracy scores for the tested texts"

Mean & SD

## get all values from accuracy matrices
vec <- c()
for(i in 1:length(sp$accuracy.scores)){
  v <- as.vector(sp$accuracy.scores[[i]])
  vec <- c(vec, v)
}
mean(vec) 
## [1] 0.3644444
sd(vec)
## [1] 0.1528481