Load Data

I load each OFT dataset, add duration and study name columns, and rank order each strain by mean and variance within each study.

library(data.table)
library(rCharts)
library(knitr)
## Warning: package 'knitr' was built under R version 3.1.2

Brown1

Brown1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Brown1.csv'))

Brown1.strains <- Brown1[sex == 'm',
                         list(study = 'Brown1',
                              dur = 5,
                              n = .N,
                              mean = mean(measured_value), 
                              var = var(measured_value)),
                         by = strain]
Brown1.strains <- Brown1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Chesler4

Chesler4 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Chesler4.csv'))

Chesler4.strains <- Chesler4[sex == 'm',
                             list(study = 'Chesler4',
                                dur = 20,
                                n = .N,
                                mean = mean(measured_value), 
                                var = var(measured_value)),
                         by = strain]
Chesler4.strains <- Chesler4.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Crowley1.0

Crowley1.0 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_0days.csv'))

Crowley1.0.strains <- Crowley1.0[sex == 'm',
                                 list(study = 'Crowley1.0',
                                        dur = 60,
                                        n = .N,
                                        mean = mean(measured_value), 
                                        var = var(measured_value)),
                                 by = strain]
Crowley1.0.strains <- Crowley1.0.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Crowley1.30

Crowley1.30 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_30days.csv'))

Crowley1.30.strains <- Crowley1.30[sex == 'm',
                                   list(study = 'Crowley1.30',
                                          dur = 60,
                                          n = .N,
                                          mean = mean(measured_value), 
                                          var = var(measured_value)),
                                   by = strain]
Crowley1.30.strains <- Crowley1.30.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Crowley1.60

Crowley1.60 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_60days.csv'))

Crowley1.60.strains <- Crowley1.60[sex == 'm',
                                   list(study = 'Crowley1.60',
                                          dur = 60,
                                          n = .N,
                                          mean = mean(measured_value), 
                                          var = var(measured_value)),
                                   by = strain]
Crowley1.60.strains <- Crowley1.60.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Crowley1.120

Crowley1.120 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_120days.csv'))

Crowley1.120.strains <- Crowley1.120[sex == 'm',
                                     list(study = 'Crowley1.120',
                                            dur = 60,
                                            n = .N,
                                            mean = mean(measured_value), 
                                            var = var(measured_value)),
                                     by = strain]
Crowley1.120.strains <- Crowley1.120.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Donahue3

Consomic mice only in this study so I exclude it.

Donahue4

Consomic mice only in this study so I exclude it.

Gershenfeld1

Gershenfeld1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Gershenfeld1.csv'))

Gershenfeld1.strains <- Gershenfeld1[sex == 'm',
                                     list(study = 'Gershenfeld1',
                                dur = 5,
                                n = .N,
                                mean = mean(measured_value), 
                                var = var(measured_value)),
                         by = strain]
Gershenfeld1.strains <- Gershenfeld1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Golani1

Golani1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Golani1.csv'))

Golani1.strains <- Golani1[sex == 'm',
                           list(study = 'Golani',
                                  dur = 30,
                                  n = .N,
                                  mean = mean(measured_value), 
                                  var = var(measured_value)),
                           by = strain]
Golani1.strains <- Golani1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Metten1

Metten1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Metten1.csv'))

Metten1.strains <- Metten1[sex == 'm',
                           list(study = 'Metten1',
                                  dur = 0.75,
                                  n = .N,
                                  mean = mean(measured_value), 
                                  var = var(measured_value)),
                           by = strain]
Metten1.strains <- Metten1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Metten2

Metten2 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Metten2.csv'))

Metten2.strains <- Metten2[sex == 'm',
                           list(study = 'Metten2',
                                  dur = 0.75,
                                  n = .N,
                                  mean = mean(measured_value), 
                                  var = var(measured_value)),
                           by = strain]
Metten2.strains <- Metten2.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Palmer2

Consomic mice only in this study so I exclude it.

Pletcher1

Pletcher1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Pletcher1.csv'))

Pletcher1.strains <- Pletcher1[sex == 'm',
                               list(study = 'Pletcher1',
                                      dur = 7,
                                      n = .N,
                                      mean = mean(measured_value), 
                                      var = var(measured_value)),
                               by = strain]
Pletcher1.strains <- Pletcher1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Schalkwyk1

Schalkwyk1a <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Schalkwyk1a.csv'))
Schalkwyk1b <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Schalkwyk1b.csv'))
Schalkwyk1 <- Schalkwyk1a
Schalkwyk1$measured_value <- Schalkwyk1a$measured_value + Schalkwyk1b$measured_value

Schalkwyk1.strains <- Schalkwyk1[sex == 'm',
                                 list(study = 'Schalkwyk1',
                                      dur = 5,
                                      n = .N,
                                      mean = mean(measured_value), 
                                      var = var(measured_value)),
                               by = strain]
Schalkwyk1.strains <- Schalkwyk1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Wahlsten6

Wahlsten6 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Wahlsten6.csv'))
#Wahlsten6$measured_value <- 1/Wahlsten6$measured_value

Wahlsten6.strains <- Wahlsten6[sex == 'm',
                               list(study = 'Wahlsten6',
                                      dur = 5,
                                      n = .N,
                                      mean = mean(measured_value), 
                                      var = var(measured_value)),
                               by = strain]
Wahlsten6.strains <- Wahlsten6.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Wiltshire1

Wiltshire1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Wiltshire1.csv'))

Wiltshire1.strains <- Wiltshire1[sex == 'm',
                                 list(study = 'Wiltshire1',
                                      dur = 10,
                                      n = .N,
                                      mean = mean(measured_value), 
                                      var = var(measured_value)),
                               by = strain]
Wiltshire1.strains <- Wiltshire1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Wiltshire2

Wiltshire2 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Wiltshire2.csv'))

Wiltshire2.strains <- Wiltshire2[sex == 'm',
                                 list(study = 'Wiltshire2',
                                      dur = 7,
                                      n = .N,
                                      mean = mean(measured_value), 
                                      var = var(measured_value)),
                               by = strain]
Wiltshire2.strains <- Wiltshire2.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Tarantino1

Tarantino1 <- readRDS('Tarantino_day1_phenotypes_raw.RDS')
setnames(Tarantino1, 'Strain', 'strain')

Tarantino1.strains <- Tarantino1[, list(study = 'Tarantino1',
                                        dur = 30,
                                        n = .N,
                                        mean = mean(TotDst, na.rm = TRUE),
                                        var = var(TotDst, na.rm = TRUE)),
                                 by = strain]

Tarantino1.strains <- Tarantino1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]

Combine Data

studies <- list(Brown1.strains, Chesler4.strains,
                Crowley1.0.strains, Crowley1.30.strains,
                Crowley1.60.strains, Crowley1.120.strains, 
                Gershenfeld1.strains, Golani1.strains,
                Metten1.strains, Metten2.strains, 
                Schalkwyk1.strains, Wahlsten6.strains,
                Wiltshire1.strains, Wiltshire2.strains,
                Pletcher1.strains, Tarantino1.strains)

hist(sapply(X = studies, FUN = dim)[1,], main = 'number of strains in each study')

oft <- rbindlist(studies)
oft <- oft[order(dur, study),]
studies.in.order <- unique(oft$study)

write.table(x = oft, file = 'oft_strain_means_and_var_from_mpd.csv', row.names = FALSE, quote = FALSE, sep = ', ')
saveRDS(object = oft, file = 'oft_strain_means_and_var_from_mpd.RDS')

str(oft)
## Classes 'data.table' and 'data.frame':   339 obs. of  8 variables:
##  $ strain   : Factor w/ 57 levels "129S1/SvImJ",..: 35 1 2 4 19 6 7 9 10 35 ...
##  $ study    : chr  "Metten1" "Metten1" "Metten1" "Metten1" ...
##  $ dur      : num  0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ n        : int  2 4 4 6 4 4 7 4 4 3 ...
##  $ mean     : num  12 4.5 2.75 8.33 23.25 ...
##  $ var      : num  0 16.3 12.9 43.9 19.6 ...
##  $ mean.rank: num  8 3 2 5 9 7 6 1 4 7 ...
##  $ var.rank : num  1.5 4 3 9 5.5 5.5 8 1.5 7 2 ...
##  - attr(*, ".internal.selfref")=<externalptr>
lapply(X = oft[, list(strain, study, dur)], FUN = table)
## $strain
## 
##       129S1/SvImJ               A/J             AKR/J         BALB/cByJ 
##                15                16                11                12 
##           BALB/cJ           C3H/HeJ          C57BL/6J          CAST/EiJ 
##                 4                15                16                 7 
##            DBA/2J            FVB/NJ          MOLF/EiJ             SJL/J 
##                15                15                 7                12 
##              SM/J         SPRET/EiJ        NOD/ShiLtJ         NZO/HlLtJ 
##                10                 2                10                 7 
##           PWK/PhJ           WSB/EiJ      BTBR T+ tf/J         C57BLKS/J 
##                 6                 6                11                 7 
##             CBA/J DDY/JclSidSeyFrkJ            KK/HlJ            MA/MyJ 
##                 8                 6                 5                 8 
##           MRL/MpJ            MSM/Ms        NON/ShiLtJ           NZL/LtJ 
##                 8                 5                 6                 4 
##          NZW/LacJ     129S6/SvEvTac     Crl:NMRI(Han)              LP/J 
##                 7                 1                 1                 5 
##       SENCARA/PtJ             SWR/J           129P3/J            C57L/J 
##                 1                 6                 2                 2 
##             C58/J          NZB/BlNJ          PERA/EiJ              PL/J 
##                 5                 4                 4                 5 
##           BUB/BnJ         C57BR/cdJ              CE/J       CZECHII/EiJ 
##                 4                 3                 4                 2 
##             I/LnJ              LG/J           NOR/LtJ               P/J 
##                 4                 4                 4                 3 
##           PWD/PhJ           RIIIS/J           SEA/GnJ          NZB/BINJ 
##                 2                 4                 2                 1 
##         SKIVE/EiJ       ZALENDE/EiJ            KK/HIJ      TALLYHO/JngJ 
##                 1                 1                 1                 1 
##         NZO/HILtJ 
##                 1 
## 
## $study
## 
##       Brown1     Chesler4   Crowley1.0 Crowley1.120  Crowley1.30 
##           14            8           27           26           27 
##  Crowley1.60 Gershenfeld1       Golani      Metten1      Metten2 
##           27           12            8            9            9 
##    Pletcher1   Schalkwyk1   Tarantino1    Wahlsten6   Wiltshire1 
##           31            8           45           21           37 
##   Wiltshire2 
##           30 
## 
## $dur
## 
## 0.75    5    7   10   20   30   60 
##   18   55   61   37    8   53  107
hist(oft$n, main = 'mice per strain*study')

Make Parallel Coordinates Plot of mean and variance

library(reshape2)
## Warning: package 'reshape2' was built under R version 3.1.2
library(MASS)
scale01 <- function(v) { return((v - min(v))/(max(v) - min(v))) }

mean.rank.dt <- dcast.data.table(data = oft, formula = strain ~ study, fun.aggregate = mean, value.var = 'mean.rank')
row.names <- mean.rank.dt$strain
setcolorder(mean.rank.dt, c('strain', studies.in.order))
mean.rank.mtx <- as.matrix(mean.rank.dt[ , -1, with = FALSE])
mean.rank.mtx[is.nan(mean.rank.mtx)] <- NA
rownames(mean.rank.mtx) <- row.names


var.rank.dt <- dcast.data.table(data = oft, formula = strain ~ study, fun.aggregate = mean, value.var = 'var.rank')
setcolorder(var.rank.dt, c('strain', studies.in.order))
var.rank.mtx <- as.matrix(var.rank.dt[,-1, with = FALSE])
var.rank.mtx[is.nan(var.rank.mtx)] <- NA
rownames(var.rank.mtx) <- row.names
 
par(las = 2, cex = 0.8)
parcoord(x = mean.rank.mtx,  
         col =gray(1-scale01(rowMeans(var.rank.mtx, na.rm = TRUE))),
         main = 'mean rank colored by var rank (dark = high var)')

parcoord(x = var.rank.mtx,
         col = gray(1-scale01(rowMeans(mean.rank.mtx, na.rm = TRUE))),
         main = 'var rank colored by mean rank (dark = high mean)')