Stuff for calculating confidence intervals for GECCO benchmarks paper.
lex = c(98, 5, 1, 7, 6, 0, 51, 0, 2, 0, 66, 21, 16, 8, 78, 0, 6, 1, 8, 0, 45, 2, 0, 0, 7, 4, 45, 81, 18)
tourney = c(68, 3, 0, 3, 0, 0, 8, 0, 0, 0, 7, 8, 14, 0, 46, 0, 2, 0, 0, 0, 10, 0, 0, 0, 0, 0, 7, 75, 1)
ifs = c(72, 3, 0, 6, 0, 0, 16, 0, 0, 0, 10, 4, 13, 0, 64, 0, 0, 0, 0, 0, 8, 0, 0, 0, 1, 0, 43, 98, 7)
(lex - tourney) / 100
## [1] 0.30 0.02 0.01 0.04 0.06 0.00 0.43 0.00 0.02 0.00 0.59 0.13 0.02 0.08
## [15] 0.32 0.00 0.04 0.01 0.08 0.00 0.35 0.02 0.00 0.00 0.07 0.04 0.38 0.06
## [29] 0.17
(lex - ifs) / 100
## [1] 0.26 0.02 0.01 0.01 0.06 0.00 0.35 0.00 0.02 0.00 0.56
## [12] 0.17 0.03 0.08 0.14 0.00 0.06 0.01 0.08 0.00 0.37 0.02
## [23] 0.00 0.00 0.06 0.04 0.02 -0.17 0.11
# Lower bounds of confidence intervals, lexicase and tourney
sapply(seq(1, 29), function(x) prop.test(c(lex[x], tourney[x]), c(100, 100))$conf.int[1])
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## [1] 0.194543516 -0.044245345 -0.019501395 -0.030155251 0.003453434
## [6] 0.000000000 0.308523010 0.000000000 -0.017439496 0.000000000
## [11] 0.474543738 0.024081838 -0.088934509 0.016827510 0.182979816
## [16] 0.000000000 -0.024032478 -0.019501395 0.016827510 0.000000000
## [21] 0.226136382 -0.017439496 0.000000000 0.000000000 0.009992104
## [26] -0.008407293 0.260417153 -0.064519607 0.082216341
# Upper bounds of confidence intervals, lexicase and tourney
sapply(seq(1, 29), function(x) prop.test(c(lex[x], tourney[x]), c(100, 100))$conf.int[2])
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], tourney[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## [1] 0.40545648 0.08424535 0.03950140 0.11015525 0.11654657 0.00000000
## [7] 0.55147699 0.00000000 0.05743950 0.00000000 0.70545626 0.23591816
## [13] 0.12893451 0.14317249 0.45702018 0.00000000 0.10403248 0.03950140
## [19] 0.14317249 0.00000000 0.47386362 0.05743950 0.00000000 0.00000000
## [25] 0.13000790 0.08840729 0.49958285 0.18451961 0.25778366
# Lower bounds of confidence intervals, lexicase and ifs
sapply(seq(1, 29), function(x) prop.test(c(lex[x], ifs[x]), c(100, 100))$conf.int[1])
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## [1] 0.157819162 -0.044245345 -0.019501395 -0.068318171 0.003453434
## [6] 0.000000000 0.218498040 0.000000000 -0.017439496 0.000000000
## [11] 0.440102109 0.071410495 -0.077506977 0.016827510 0.005731482
## [16] 0.000000000 0.003453434 -0.019501395 0.016827510 0.000000000
## [21] 0.248937296 -0.017439496 0.000000000 0.000000000 -0.003675824
## [26] -0.008407293 -0.127560991 -0.261639029 0.009607617
# Upper bounds of confidence intervals, lexicase and ifs
sapply(seq(1, 29), function(x) prop.test(c(lex[x], ifs[x]), c(100, 100))$conf.int[2])
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## Warning in prop.test(c(lex[x], ifs[x]), c(100, 100)): Chi-squared
## approximation may be incorrect
## [1] 0.36218084 0.08424535 0.03950140 0.08831817 0.11654657
## [6] 0.00000000 0.48150196 0.00000000 0.05743950 0.00000000
## [11] 0.67989789 0.26858950 0.13750698 0.14317249 0.27426852
## [16] 0.00000000 0.11654657 0.03950140 0.14317249 0.00000000
## [21] 0.49106270 0.05743950 0.00000000 0.00000000 0.12367582
## [26] 0.08840729 0.16756099 -0.07836097 0.21039238
(let [lex-tourn-diff [0.30 0.02 0.01 0.04 0.06 0.00 0.43 0.00 0.02 0.00 0.59 0.13 0.02 0.08 0.32 0.00 0.04 0.01 0.08 0.00 0.35 0.02 0.00 0.00 0.07 0.04 0.38 0.06 0.17]
lex-ifs-diff [0.26 0.02 0.01 0.01 0.06 0.00 0.35 0.00 0.02 0.00 0.56 0.17 0.03 0.08 0.14 0.00 0.06 0.01 0.08 0.00 0.37 0.02 0.00 0.00 0.06 0.04 0.02 -0.17 0.11]
lex-tourn-low [0.194543516 -0.044245345 -0.019501395 -0.030155251 0.003453434 0.000000000 0.308523010
0.000000000 -0.017439496 0.000000000 0.474543738 0.024081838 -0.088934509 0.016827510
0.182979816 0.000000000 -0.024032478 -0.019501395 0.016827510 0.000000000 0.226136382
-0.017439496 0.000000000 0.000000000 0.009992104 -0.008407293 0.260417153 -0.064519607 0.082216341]
lex-tourn-up [0.40545648 0.08424535 0.03950140 0.11015525 0.11654657 0.00000000 0.55147699 0.00000000 0.05743950 0.00000000
0.70545626 0.23591816 0.12893451 0.14317249 0.45702018 0.00000000 0.10403248 0.03950140 0.14317249 0.00000000
0.47386362 0.05743950 0.00000000 0.00000000 0.13000790 0.08840729 0.49958285 0.18451961 0.25778366]
lex-ifs-low [0.157819162 -0.044245345 -0.019501395 -0.068318171 0.003453434 0.000000000 0.218498040 0.000000000
-0.017439496 0.000000000 0.440102109 0.071410495 -0.077506977 0.016827510 0.005731482 0.000000000
0.003453434 -0.019501395 0.016827510 0.000000000 0.248937296 -0.017439496 0.000000000 0.000000000
-0.003675824 -0.008407293 -0.127560991 -0.261639029 0.009607617]
lex-ifs-up [0.36218084 0.08424535 0.03950140 0.08831817 0.11654657 0.00000000 0.48150196 0.00000000 0.05743950
0.00000000 0.67989789 0.26858950 0.13750698 0.14317249 0.27426852 0.00000000 0.11654657 0.03950140
0.14317249 0.00000000 0.49106270 0.05743950 0.00000000 0.00000000 0.12367582 0.08840729 0.16756099
-0.07836097 0.21039238]
]
(doseq [line (map (fn [ltd lid ltl ltu lil liu]
(format "$%.2f$ & $[%.2f, %.2f]$ & $%.2f$ & $[%.2f, %.2f]$ \\tabularnewline"
ltd ltl ltu
lid lil liu)
)
lex-tourn-diff
lex-ifs-diff
lex-tourn-low
lex-tourn-up
lex-ifs-low
lex-ifs-up)
]
(println line)))
Friedman’s test for multiple achievements of multiple subjects.
benchResults = matrix(c(lex, tourney, ifs), nrow=29, dimnames = list(1:29, c("lexicase", "tourney", "ifs")))
apply(benchResults, 1, function(x) rank(-x))
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
## lexicase 1 1.0 1.0 1 1.0 2 1 2 1.0 2 1 1 1 1.0 1 2 1 1.0 1.0 2 1
## tourney 3 2.5 2.5 3 2.5 2 3 2 2.5 2 3 2 2 2.5 3 2 2 2.5 2.5 2 2
## ifs 2 2.5 2.5 2 2.5 2 2 2 2.5 2 2 3 3 2.5 2 2 3 2.5 2.5 2 3
## 22 23 24 25 26 27 28 29
## lexicase 1.0 2 2 1 1.0 1 2 1
## tourney 2.5 2 2 3 2.5 3 3 3
## ifs 2.5 2 2 2 2.5 2 1 2
rowMeans(apply(benchResults, 1, function(x) rank(-x)))
## lexicase tourney ifs
## 1.275862 2.465517 2.258621
friedman.test(benchResults)
##
## Friedman rank sum test
##
## data: benchResults
## Friedman chi-squared = 34.4051, df = 2, p-value = 3.381e-08