rm(list = ls(all.names  = TRUE))
library(fpc)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(DRrequiredAgeing)
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## 
##  >===================================================================================<
##  "DRrequiredAgeing" is developed by International Mouse Phenotyping Consortium (IMPC) 
##  More details  https://www.mousephenotype.org/                                        
##  Contact us hamedhm@ebi.ac.uk                                                         
##  Source code and issues:  https://git.io/fj9WP                                        
##  >===================================================================================<
expList = c('time', 'data_point (for continuous data)','Weight')
df      = read.csv(
    file = UnzipAndfilePath('https://www.ebi.ac.uk/~hamedhm/CC/data/cc_all.zip'),
    as.is = TRUE,
    check.names = FALSE
)
## 2019-11-26 17:05:57. Unziping file ...
## 2019-11-26 17:05:58. Successfully unziped. List of files:
##      1 ==> C:\Users\hamedhm\AppData\Local\Temp\RtmpO2ebAQ/NDPU/cc_all.csv
numVars = sapply(df, function(x) {
    is.numeric(x) &&
        sum(!is.na(x)) >= 0
})
numVars = numVars & !(names(df) %in% expList)
############
a = aggregate(
    x = df[, numVars],
    by = list(Strain = df$strain_original),
    FUN = function(x) {
        if (sum(!is.na(x)) > 0)
            mean(x, na.rm = TRUE)
        else
            NA
    }
)

except.name = function(x, exp = 'Weight') {
    return(x[!x %in% exp])
}

aa=aggregate(x=df,by =  list(Procedure = df$procedure_name),function(x){sum(!is.na(x))})
for(i in 1:nrow(aa)){
    cri = names(a) %in% except.name(names(df[,aa[i,-1]>0,drop=FALSE]))
    names(a)[cri] = paste(aa[i,1],names(a)[cri],sep = '.')
}
a = a[,order(names(a))]

a2 = a[, sapply(a, function(x) {
    sum(is.na(x))
}) == 0]


rownames(a2) = interaction(a2[, 'Strain'])
c = cor(t(a2[, !names(a2) %in% c('Strain')]))

a3 =  a2[, sapply(a2, function(x) {
    if (is.numeric(x)) {
        ifelse(sd(x, na.rm = TRUE) > 0, TRUE, FALSE)
    } else{
        TRUE
    }
    
})]
d  = cor(a3[, !names(a3) %in% c('Strain')])

write.csv(a, file = 'RawDataContinuousParamters - MeanParametersForEachStrain.csv', row.names = FALSE)
write.csv(c, file = 'Correlation of Strains - MeanParametersForEachStrain.csv')
write.csv(d, file = 'Correlation of Parameters - MeanParametersForEachStrain.csv')


heatmap.2(
    d,
    trace = 'none',
    scale = 'none',
    density.info = "none",
    breaks = c(seq(-1,1,length.out = 501)),
    col= colorRampPalette(c("blue3", "white", "blue3"))(n = 500),
    dendrogram = 'none',
    keysize = 1,
    key=FALSE,
    Rowv = FALSE,
    Colv = FALSE,
    lwid=c(1,6),
    lhei=c(1,6),
    margins = c(9,9)
)

# legend(
#   'top',
#   legend = c('More blue = 1/-1', 'More white = 0'),
#   fill = c('blue3', 'white'),
#   horiz = TRUE
# )