library(MASS)

Data

Generalization

generalizationData <- read.csv("/Users/helmuth/Dropbox/Research/My\ Papers/Simplification\ and\ Generalization/Data/Generalization/perct_generalized_table.csv", comment.char="#",header=TRUE)

keeps <- c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program")

#generalizationMatrix <- generalizationData[keeps]

generalizationMatrix <- data.matrix(generalizationData[keeps])

Sizes

sizeData <- read.csv("/Users/helmuth/Dropbox/Research/My\ Papers/Simplification\ and\ Generalization/Data/Generalization/prog_size_table.csv", comment.char="#",header=TRUE)

keeps <- c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program")

#sizeMatrix <- sizeData[keeps]

sizeMatrix <- data.matrix(sizeData[keeps])

Average ranks and Friedman’s test

Friedman’s test for multiple achievements of multiple subjects.

Generalization

apply(generalizationMatrix, 1, function(x) rank(1-x))
##                        [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## none                    3.5    6  3.5    6  1.0  3.5  3.5    6    6     6
## Genome                  3.5    4  3.5    2  4.0  3.5  3.5    1    5     4
## GenomeBacktracking      3.5    2  3.5    3  2.5  3.5  3.5    4    2     2
## GenomeBacktrackingNoop  3.5    1  3.5    4  2.5  3.5  3.5    2    3     1
## GenomeNoop              3.5    3  3.5    1  5.0  3.5  3.5    3    4     3
## Program                 3.5    5  3.5    5  6.0  3.5  3.5    5    1     5
##                        [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18]
## none                       6     6     6     6   3.5     6     6     6
## Genome                     3     3     3     1   3.5     2     5     4
## GenomeBacktracking         4     4     3     3   3.5     1     3     2
## GenomeBacktrackingNoop     5     1     3     2   3.5     4     1     3
## GenomeNoop                 1     2     3     4   3.5     5     2     5
## Program                    2     5     3     5   3.5     3     4     1
##                        [,19] [,20] [,21] [,22] [,23] [,24]
## none                     3.5   3.5     6     6   3.5     6
## Genome                   3.5   3.5     3     3   3.5     5
## GenomeBacktracking       3.5   3.5     2     3   3.5     4
## GenomeBacktrackingNoop   3.5   3.5     1     3   3.5     1
## GenomeNoop               3.5   3.5     4     3   3.5     3
## Program                  3.5   3.5     5     3   3.5     2
rowMeans(apply(generalizationMatrix, 1, function(x) rank(1-x)))
##                   none                 Genome     GenomeBacktracking 
##               4.958333               3.333333               3.020833 
## GenomeBacktrackingNoop             GenomeNoop                Program 
##               2.729167               3.291667               3.666667
friedman.test(generalizationMatrix)
## 
##  Friedman rank sum test
## 
## data:  generalizationMatrix
## Friedman chi-squared = 33.844, df = 5, p-value = 2.558e-06
generalizationDataSorted <- data.frame(generalizationData$GenomeBacktrackingNoop, generalizationData$GenomeBacktracking, generalizationData$GenomeNoop, generalizationData$Genome, generalizationData$Program, generalizationData$none)
colnames(generalizationDataSorted) <- c("GBN", "GB", "GN", "G", "P", "n")

parcoord(generalizationDataSorted, col=rainbow(length(generalizationMatrix[,1])), var.label=TRUE)

parcoord(generalizationDataSorted, col=rainbow(length(generalizationMatrix[,1])), var.label=TRUE, ylim=c(0.7,1))

Size

apply(sizeMatrix, 1, function(x) rank(x))
##                        [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## none                      6    6    6    6  6.0    6  6.0    6    6     6
## Genome                    5    5    5    5  4.5    5  3.5    5    5     5
## GenomeBacktracking        3    4    4    3  2.0    2  3.5    3    3     4
## GenomeBacktrackingNoop    2    2    3    1  3.0    3  3.5    2    1     1
## GenomeNoop                4    3    2    4  4.5    4  3.5    4    4     3
## Program                   1    1    1    2  1.0    1  1.0    1    2     2
##                        [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18]
## none                       6     6     6     6     6     6     6     6
## Genome                     5     4     3     5     4     5     5     5
## GenomeBacktracking         3     1     1     3     1     4     3     3
## GenomeBacktrackingNoop     2     2     2     1     3     2     2     2
## GenomeNoop                 4     3     4     2     5     3     4     4
## Program                    1     5     5     4     2     1     1     1
##                        [,19] [,20] [,21] [,22] [,23] [,24]
## none                       6     6     6     6   6.0     6
## Genome                     5     5     5     4   2.5     5
## GenomeBacktracking         1     4     3     2   2.5     4
## GenomeBacktrackingNoop     3     2     1     3   2.5     2
## GenomeNoop                 4     3     4     5   2.5     3
## Program                    2     1     2     1   5.0     1
rowMeans(apply(sizeMatrix, 1, function(x) rank(x)))
##                   none                 Genome     GenomeBacktracking 
##               6.000000               4.604167               2.791667 
## GenomeBacktrackingNoop             GenomeNoop                Program 
##               2.125000               3.604167               1.875000
friedman.test(sizeMatrix)
## 
##  Friedman rank sum test
## 
## data:  sizeMatrix
## Friedman chi-squared = 88.004, df = 5, p-value < 2.2e-16
sizeDataSorted <- data.frame(sizeData$Program, sizeData$GenomeBacktrackingNoop, sizeData$GenomeBacktracking, sizeData$GenomeNoop, sizeData$Genome, sizeData$none)
colnames(sizeDataSorted) <- c( "P","GBN", "GB", "GN", "G", "n")

parcoord(sizeDataSorted, col=rainbow(length(sizeMatrix[,1])), var.label=TRUE)

sizeDataSorted <- data.frame(sizeData$Program, sizeData$GenomeBacktrackingNoop, sizeData$GenomeBacktracking, sizeData$GenomeNoop, sizeData$Genome)
colnames(sizeDataSorted) <- c("P", "GBN", "GB", "GN", "G")

parcoord(sizeDataSorted, col=rainbow(length(sizeMatrix[,1])), var.label=TRUE)

# Not sure why this doesn't work
#parcoord(sizeDataSorted, col=rainbow(length(sizeMatrix[,1])), var.label=TRUE, ylim=c(3,100))
library(reshape)
library(ggplot2)
sizeLong <- melt(sizeData, id=c("problem", "X")) 

ggplot(sizeLong) + 
  geom_line(aes(x = variable, y = value, group = problem, color = problem)) +
  theme(legend.position="none",
        axis.text.x = element_text(angle=10, vjust=0.5))

sizeLongNoNone <- melt(subset(sizeData, select = -none), id=c("problem", "X")) 

ggplot(sizeLongNoNone) + 
  geom_line(aes(x = variable, y = value, group = problem, color = problem)) +
  theme(legend.position="none",
        axis.text.x = element_text(angle=10, vjust=0.5))