library("data.table")
#dat = data.frame(data.table::fread("cat SHARE.csv | sed -e 's/ *//g; s/\"\"//g'", na.strings=c("",NA)))###this allows us to preserve variable types
#nums <- unlist(lapply(dat, is.numeric))
#quant=dat[,nums]
#imputes = mice(quant, m=1, maxit=5, method='cart', seed=500)
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
imputes=readRDS("~/Dropbox/impute.rds")
namat=is.na(complete(imputes))
complete=complete(imputes)[,colSums(namat)==0] ###return only the columns that still don't have navalues
cc=complete[,colSums(complete)!=0]
dd = as.matrix(cc[,-(1:4)])
dd[dd<0] = 0
library("CountClust")
## Loading required package: ggplot2
#f=FitGoM(dd,K = 5)
#saveRDS(f,"~/Dropbox/fk5.rds")
f=readRDS("~/Dropbox/fk5.rds")
omega <- f$fit$omega
cols <- c("blue", "darkgoldenrod1", "cyan", "red","green")
StructureGGplot(omega = omega,
palette = cols,
yaxis_label = "",
order_sample = TRUE,
split_line = list(split_lwd = .4,
split_col = "white"),
axis_tick = list(axis_ticks_length = .1,
axis_ticks_lwd_y = .1,
axis_ticks_lwd_x = .1,
axis_label_size = 7,
axis_label_face = "bold"))
This makes us think that
theta_mat <- f$fit$theta;
top_features <- ExtractTopFeatures(theta_mat, top_features=100,
method="poisson", options="min");
variable_list <- do.call(rbind, lapply(1:dim(top_features$indices)[1],
function(x) colnames(dd)[top_features$indices[x,]]))
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("kableExtra")
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library("flashr")
tmp <- do.call(rbind, lapply(1:5, function(i) toString(variable_list[,i])))
rownames(tmp) <- paste("Cluster", c(1:5))
tmp %>%
kable("html") %>%
kable_styling()
| Cluster 1 | Echo_Z_ParentID, Demographics_Death_Year, Genetics_Nucleotide_Locus, LabValues_NT_proBNP, Genetics_Enrichment |
| Cluster 2 | NA, Demographics_Diagnosis_Year, NA, LabValues_Creatinine, Echo_Z_Septal_E_prime |
| Cluster 3 | NA, Demographics_Birth_Year, NA, MedicalHx_LVEF35, Echo_Z_Lateral_E_prime |
| Cluster 4 | NA, CMRI_LV_Mass, NA, NA, NA |
| Cluster 5 | NA, CMRI_LVEDV, NA, NA, NA |
#flash=flashr::flash(as.matrix(scale(cc,center = T,scale = T)),Kmax = 10,var_type = "by_row",verbose = FALSE)
#saveRDS(flash,file="~/Dropbox/flash10.rds")
f=readRDS("~/Dropbox/flash10.rds")
plot(f,plot_factors = TRUE,factor_kset = 1:4)