data = read.csv("https://data.broadinstitute.org/Trinity/TMP/bhaas/assembly_metrics.csv", header=T, com="#")

examine correlation structure across all data

org_program = paste(data$Organism, data$Program, sep=",")

org_prog_data = data
rownames(org_prog_data) = org_program
org_prog_data = org_prog_data[,-c(1,2)]
org_prog_data_scaled = scale(org_prog_data)
org_prog_data_cor = cor(org_prog_data_scaled, use='complete.obs')
#correlations can be pos or neg, showing all as pos here.
pheatmap(abs(org_prog_data_cor), show_colnames = T, show_rownames = F) 

examine metric correlations by organism

data_split_by_org = split(data, data[,1])

for (organism in names(data_split_by_org)) {
    message(organism)
    org_data = data_split_by_org[[organism]]
    rownames(org_data) = org_data$Program
    org_data = org_data[,-c(1,2)] #remove organism and prog name
    org_data = scale(org_data)
    NA_cols = which(apply(org_data, 2, function(x) {all(is.na(x))}) == TRUE)
    if (length(NA_cols) > 0) {
      org_data = org_data[, -1 * NA_cols] # remove columns with just NA values
    }
    correlations = cor(org_data, use="complete.obs")
    correlations[is.na(correlations)] = 0
    correlations = abs(correlations)
    pheatmap(correlations, main=paste0(c(organism, " metrics")), show_colnames = T, show_rownames = F)
}