I load each OFT dataset, add duration and study name columns, and rank order each strain by mean and variance within each study.
library(data.table)
library(rCharts)
library(knitr)
## Warning: package 'knitr' was built under R version 3.1.2
Brown1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Brown1.csv'))
Brown1.strains <- Brown1[sex == 'm',
list(study = 'Brown1',
dur = 5,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Brown1.strains <- Brown1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Chesler4 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Chesler4.csv'))
Chesler4.strains <- Chesler4[sex == 'm',
list(study = 'Chesler4',
dur = 20,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Chesler4.strains <- Chesler4.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Crowley1.0 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_0days.csv'))
Crowley1.0.strains <- Crowley1.0[sex == 'm',
list(study = 'Crowley1.0',
dur = 60,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Crowley1.0.strains <- Crowley1.0.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Crowley1.30 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_30days.csv'))
Crowley1.30.strains <- Crowley1.30[sex == 'm',
list(study = 'Crowley1.30',
dur = 60,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Crowley1.30.strains <- Crowley1.30.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Crowley1.60 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_60days.csv'))
Crowley1.60.strains <- Crowley1.60[sex == 'm',
list(study = 'Crowley1.60',
dur = 60,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Crowley1.60.strains <- Crowley1.60.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Crowley1.120 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Crowley1_120days.csv'))
Crowley1.120.strains <- Crowley1.120[sex == 'm',
list(study = 'Crowley1.120',
dur = 60,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Crowley1.120.strains <- Crowley1.120.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Consomic mice only in this study so I exclude it.
Consomic mice only in this study so I exclude it.
Gershenfeld1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Gershenfeld1.csv'))
Gershenfeld1.strains <- Gershenfeld1[sex == 'm',
list(study = 'Gershenfeld1',
dur = 5,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Gershenfeld1.strains <- Gershenfeld1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Golani1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Golani1.csv'))
Golani1.strains <- Golani1[sex == 'm',
list(study = 'Golani',
dur = 30,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Golani1.strains <- Golani1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Metten1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Metten1.csv'))
Metten1.strains <- Metten1[sex == 'm',
list(study = 'Metten1',
dur = 0.75,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Metten1.strains <- Metten1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Metten2 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Metten2.csv'))
Metten2.strains <- Metten2[sex == 'm',
list(study = 'Metten2',
dur = 0.75,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Metten2.strains <- Metten2.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Consomic mice only in this study so I exclude it.
Pletcher1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Pletcher1.csv'))
Pletcher1.strains <- Pletcher1[sex == 'm',
list(study = 'Pletcher1',
dur = 7,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Pletcher1.strains <- Pletcher1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Schalkwyk1a <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Schalkwyk1a.csv'))
Schalkwyk1b <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Schalkwyk1b.csv'))
Schalkwyk1 <- Schalkwyk1a
Schalkwyk1$measured_value <- Schalkwyk1a$measured_value + Schalkwyk1b$measured_value
Schalkwyk1.strains <- Schalkwyk1[sex == 'm',
list(study = 'Schalkwyk1',
dur = 5,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Schalkwyk1.strains <- Schalkwyk1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Wahlsten6 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Wahlsten6.csv'))
#Wahlsten6$measured_value <- 1/Wahlsten6$measured_value
Wahlsten6.strains <- Wahlsten6[sex == 'm',
list(study = 'Wahlsten6',
dur = 5,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Wahlsten6.strains <- Wahlsten6.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Wiltshire1 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Wiltshire1.csv'))
Wiltshire1.strains <- Wiltshire1[sex == 'm',
list(study = 'Wiltshire1',
dur = 10,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Wiltshire1.strains <- Wiltshire1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Wiltshire2 <- data.table(read.csv(file = 'raw_data_from_MPD/total_distance/Wiltshire2.csv'))
Wiltshire2.strains <- Wiltshire2[sex == 'm',
list(study = 'Wiltshire2',
dur = 7,
n = .N,
mean = mean(measured_value),
var = var(measured_value)),
by = strain]
Wiltshire2.strains <- Wiltshire2.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
Tarantino1 <- readRDS('Tarantino_day1_phenotypes_raw.RDS')
setnames(Tarantino1, 'Strain', 'strain')
Tarantino1.strains <- Tarantino1[, list(study = 'Tarantino1',
dur = 30,
n = .N,
mean = mean(TotDst, na.rm = TRUE),
var = var(TotDst, na.rm = TRUE)),
by = strain]
Tarantino1.strains <- Tarantino1.strains[, c('mean.rank', 'var.rank') := list(rank(mean), rank(var))]
studies <- list(Brown1.strains, Chesler4.strains,
Crowley1.0.strains, Crowley1.30.strains,
Crowley1.60.strains, Crowley1.120.strains,
Gershenfeld1.strains, Golani1.strains,
Metten1.strains, Metten2.strains,
Schalkwyk1.strains, Wahlsten6.strains,
Wiltshire1.strains, Wiltshire2.strains,
Pletcher1.strains, Tarantino1.strains)
hist(sapply(X = studies, FUN = dim)[1,], main = 'number of strains in each study')
oft <- rbindlist(studies)
oft <- oft[order(dur, study),]
studies.in.order <- unique(oft$study)
write.table(x = oft, file = 'oft_strain_means_and_var_from_mpd.csv', row.names = FALSE, quote = FALSE, sep = ', ')
saveRDS(object = oft, file = 'oft_strain_means_and_var_from_mpd.RDS')
str(oft)
## Classes 'data.table' and 'data.frame': 339 obs. of 8 variables:
## $ strain : Factor w/ 57 levels "129S1/SvImJ",..: 35 1 2 4 19 6 7 9 10 35 ...
## $ study : chr "Metten1" "Metten1" "Metten1" "Metten1" ...
## $ dur : num 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ n : int 2 4 4 6 4 4 7 4 4 3 ...
## $ mean : num 12 4.5 2.75 8.33 23.25 ...
## $ var : num 0 16.3 12.9 43.9 19.6 ...
## $ mean.rank: num 8 3 2 5 9 7 6 1 4 7 ...
## $ var.rank : num 1.5 4 3 9 5.5 5.5 8 1.5 7 2 ...
## - attr(*, ".internal.selfref")=<externalptr>
lapply(X = oft[, list(strain, study, dur)], FUN = table)
## $strain
##
## 129S1/SvImJ A/J AKR/J BALB/cByJ
## 15 16 11 12
## BALB/cJ C3H/HeJ C57BL/6J CAST/EiJ
## 4 15 16 7
## DBA/2J FVB/NJ MOLF/EiJ SJL/J
## 15 15 7 12
## SM/J SPRET/EiJ NOD/ShiLtJ NZO/HlLtJ
## 10 2 10 7
## PWK/PhJ WSB/EiJ BTBR T+ tf/J C57BLKS/J
## 6 6 11 7
## CBA/J DDY/JclSidSeyFrkJ KK/HlJ MA/MyJ
## 8 6 5 8
## MRL/MpJ MSM/Ms NON/ShiLtJ NZL/LtJ
## 8 5 6 4
## NZW/LacJ 129S6/SvEvTac Crl:NMRI(Han) LP/J
## 7 1 1 5
## SENCARA/PtJ SWR/J 129P3/J C57L/J
## 1 6 2 2
## C58/J NZB/BlNJ PERA/EiJ PL/J
## 5 4 4 5
## BUB/BnJ C57BR/cdJ CE/J CZECHII/EiJ
## 4 3 4 2
## I/LnJ LG/J NOR/LtJ P/J
## 4 4 4 3
## PWD/PhJ RIIIS/J SEA/GnJ NZB/BINJ
## 2 4 2 1
## SKIVE/EiJ ZALENDE/EiJ KK/HIJ TALLYHO/JngJ
## 1 1 1 1
## NZO/HILtJ
## 1
##
## $study
##
## Brown1 Chesler4 Crowley1.0 Crowley1.120 Crowley1.30
## 14 8 27 26 27
## Crowley1.60 Gershenfeld1 Golani Metten1 Metten2
## 27 12 8 9 9
## Pletcher1 Schalkwyk1 Tarantino1 Wahlsten6 Wiltshire1
## 31 8 45 21 37
## Wiltshire2
## 30
##
## $dur
##
## 0.75 5 7 10 20 30 60
## 18 55 61 37 8 53 107
hist(oft$n, main = 'mice per strain*study')
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.1.2
library(MASS)
scale01 <- function(v) { return((v - min(v))/(max(v) - min(v))) }
mean.rank.dt <- dcast.data.table(data = oft, formula = strain ~ study, fun.aggregate = mean, value.var = 'mean.rank')
row.names <- mean.rank.dt$strain
setcolorder(mean.rank.dt, c('strain', studies.in.order))
mean.rank.mtx <- as.matrix(mean.rank.dt[ , -1, with = FALSE])
mean.rank.mtx[is.nan(mean.rank.mtx)] <- NA
rownames(mean.rank.mtx) <- row.names
var.rank.dt <- dcast.data.table(data = oft, formula = strain ~ study, fun.aggregate = mean, value.var = 'var.rank')
setcolorder(var.rank.dt, c('strain', studies.in.order))
var.rank.mtx <- as.matrix(var.rank.dt[,-1, with = FALSE])
var.rank.mtx[is.nan(var.rank.mtx)] <- NA
rownames(var.rank.mtx) <- row.names
par(las = 2, cex = 0.8)
parcoord(x = mean.rank.mtx,
col =gray(1-scale01(rowMeans(var.rank.mtx, na.rm = TRUE))),
main = 'mean rank colored by var rank (dark = high var)')
parcoord(x = var.rank.mtx,
col = gray(1-scale01(rowMeans(mean.rank.mtx, na.rm = TRUE))),
main = 'var rank colored by mean rank (dark = high mean)')