Libraries

library(tidyr)
library(readr)
library(ggplot2) # ggplot graphs
library(knitr)
library(readxl)
library(xlsx)
library(openxlsx)
library(reactable) # reactable(df)
library(naniar) # miss_case_summary

library(dplyr)

## KNN imputation
library(caret)
library(RANN)

# CLustering 
library(factoextra)    # Clustering visualization
library(cluster)       # Clustering algorithms
library(dendextend)    # For comparing two dendrograms
library(corrplot)      # Corelation between dendrograms
library(tidyverse)     # Data manupulation
library(NbClust)       # Determine optimal no. of clusters  [not working...]
library(TSclust)
library(mclust)        # Adjusted Rand index

#RandomForest
library(randomForest) # RandomForest Discrete Classification
library(imbalance) # To create a more balanced dataset

Functions

source("../../scripts/useful-functions/get_column_position.R")
# In a normal script it will be:  source("./scripts/useful-functions/get_column_position.R")

Reading Data

Time Series Data: Heart Rate and SatO2

cuantiles_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/cuantiles_TS_HR_valid_patients_input_P2.xlsx", sheet = "FC_valid_patients_input_P2" ))

SatO2_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/SatO2_valid_patients_input_P2.xlsx", sheet = "SatO2_valid_patients_input_P2" ))

FC_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/FC_valid_patients_input_P2.xlsx", sheet = "FC_valid_patients_input_P2" ))

# First patients with OAF
name_patients_DETERIORO_OAF_0 <- data.frame(read_csv("../../data/clean-data/name_patients_DETERIORO_OAF_0.csv"))
name_patients_DETERIORO_OAF_0 <- name_patients_DETERIORO_OAF_0$x
name_patients_DETERIORO_OAF_0_8 <- data.frame(read_csv("../../data/clean-data/name_patients_DETERIORO_OAF_0_8.csv"))
name_patients_DETERIORO_OAF_0_8 <- name_patients_DETERIORO_OAF_0_8$x
## Deterioro and Not deterioro
file_patient_name_NO_DETERIORO <- data.frame(read_csv("../../data/info-patients/file_patient_name_NO_DETERIORO.csv"))
file_patient_name_NO_DETERIORO <- file_patient_name_NO_DETERIORO$x
file_patient_name_DETERIORO <- data.frame(read_csv("../../data/info-patients/file_patient_name_DETERIORO.csv"))
file_patient_name_DETERIORO <- file_patient_name_DETERIORO$x

Descriptive Data for Discriminant analysis

valid_patients_P2 <- data.frame(read_xlsx("../../data/clean-data/valid_patients_P2.xlsx"))
valid_patients_P2 <- valid_patients_P2$x

file_patient_name <- data.frame(read_csv("../../data/clean-data/file_patient_name.csv", show_col_types = FALSE))
file_patient_name <- file_patient_name$x
  
df1 <- data.frame(read_xlsx("../../data/clean-data/descriptive-data/descriptive_data.xlsx"))
rownames(df1) <- file_patient_name
df1 <- df1[valid_patients_P2,]
## Delete first patients
valid_patients_P2 <- valid_patients_P2[! valid_patients_P2 %in% union(name_patients_DETERIORO_OAF_0,name_patients_DETERIORO_OAF_0_8)]

cuantiles_TS_HR_P2 = cuantiles_TS_HR_P2[,valid_patients_P2]
SatO2_TS_HR_P2 = cuantiles_TS_HR_P2[,valid_patients_P2]
FC_TS_HR_P2 = cuantiles_TS_HR_P2[,valid_patients_P2]

Descriptive Data

df_descriptive <- data.frame(read_xlsx("../../data/clean-data/descriptive-data/descriptive_data_imputed.xlsx"), row.names = TRUE)
rownames(df_descriptive) <- file_patient_name
df_descriptive <- df_descriptive %>% select(-c(FR_8_16h, FR_16_24h, FLUJO2_8_16h,FLUJO2_16_24h,SCORE_WOOD_DOWNES_24H,SAPI_16_24h, SAPI_8_16h))
# Class
pos_1 = get_column_position(df_descriptive,"SAPI_0_8h")
pos_2 = get_column_position(df_descriptive,"PAUSAS_APNEA")
df_descriptive[,c(pos_1:pos_2)] <- lapply(df_descriptive[,c(pos_1:pos_2)], as.factor)
#lapply(df_descriptive,class)
df_descriptive <- df_descriptive[valid_patients_P2,]

Create a dataframe with CCF [Heart Rate and SatO2]

dimension_col <- dim(FC_TS_HR_P2)[2]
dimension_row <- 480 #lag.max -1
SatO2_FC_CCF <- data.frame(matrix(nrow = dimension_row * 2 - 1, ncol = dimension_col))
colnames(SatO2_FC_CCF) <- names(FC_TS_HR_P2)[1:dimension_col]
m <- forecast::Ccf(FC_TS_HR_P2[[1]], SatO2_TS_HR_P2[[1]], lag.max = dimension_row - 1, plot = FALSE, drop.lag.0 = FALSE, type = "correlation", ylab = "CCF")
for (i in names(SatO2_FC_CCF)) {
  ccf_result <- forecast::Ccf(FC_TS_HR_P2[[i]], SatO2_TS_HR_P2[[i]], lag.max = dimension_row - 1, plot = FALSE, drop.lag.0 = FALSE, type = "correlation", ylab = "CCF")
  SatO2_FC_CCF[, i] <- ccf_result$acf
}

CCFidean Distance first 100 CCF

distance <- dist(t(SatO2_FC_CCF[c(1:101),]), method = "euclidean")
distance_matrix_CCF <- as.matrix(distance)
DD_CCF <- distance

Agnes study

To find which hierarchical clustering methods that can identify stronger clustering structures. Here we see that Ward’s method identifies the strongest clustering structure of the four methods assessed.

datos = t(SatO2_FC_CCF[c(1:101),])
datos_CCF = data.frame(t(SatO2_FC_CCF[c(1:101),]))
#method to assess
m <- c("average", "single","complete","ward")
names(m) <- c("average", "single","complete","ward.D2")

#function to compute coefficient
ac <- function(x){agnes(datos, method = x)$ac}
map_dbl(m,ac)
##   average    single  complete   ward.D2 
## 0.7992972 0.7091074 0.8910785 0.9437709

NbClust study

This package will help us identify the optimum number of clusters based our criteria in the silhouette index

diss_matrix<- DD_CCF 
res<-NbClust(datos_CCF, diss=diss_matrix, distance = NULL, min.nc=2, max.nc=5, method = "ward.D2", index = "silhouette")

res$All.index
##      2      3      4      5 
## 0.5024 0.3152 0.2589 0.2670
res$Best.nc
## Number_clusters     Value_Index 
##          2.0000          0.5024
#res$Best.partition
hcintper_CCF <- hclust(DD_CCF , "ward.D2")
fviz_dend(hcintper_CCF, palette = "jco",
          rect = TRUE, show_labels = FALSE, k = 2)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

DDclust_CCF  <- cutree( hclust(DD_CCF , "ward.D2"), k = 2)
fviz_cluster(list(data = datos_CCF, cluster = DDclust_CCF ))

fviz_silhouette(silhouette(DDclust_CCF , DD_CCF ))
##   cluster size ave.sil.width
## 1       1   49          0.48
## 2       2    9          0.61

Contingency CCF lag.max = 100

DETERIORO_CLUST <- union(intersect(file_patient_name_DETERIORO,names_1),intersect(file_patient_name_DETERIORO,names_2)) # DET and names_1 + DET and names_2, not all the patients are selected so DETERIORO_CLUST represents the DETERIORO patients that we are working with.

NO_DETERIORO_CLUST <- union(intersect(file_patient_name_NO_DETERIORO,names_1),intersect(file_patient_name_NO_DETERIORO,names_2)) #NO_DET and names_1 + NO_DET and names_2, not all the patients are selected so NO_DETERIORO_CLUST represents the NO_DETERIORO patients that we are working with.
#DETERIORO
DETERIORO_patients = data.frame(t(rep("#4A235A", length(DETERIORO_CLUST))))
colnames(DETERIORO_patients)<- DETERIORO_CLUST # Assign a color to all DETERIORO patients


#NO DETERIORO
NO_DETERIORO_patients = data.frame(t(rep("#117864", length(NO_DETERIORO_CLUST))))
colnames(NO_DETERIORO_patients)<- NO_DETERIORO_CLUST # Assign a color to all NO_DETERIORO patients

COLOR_CCF <- cbind(DETERIORO_patients,NO_DETERIORO_patients)
fviz_dend(hcintper_CCF, k = 2,  
          k_colors = c("blue", "green3"),
          label_cols =   as.vector(COLOR_CCF[names(DD_CCF)[hcintper_CCF$order]]), cex = 0.6) 

n1 = length(intersect(file_patient_name_DETERIORO,names_1))
n2 = length(intersect(file_patient_name_DETERIORO,names_2))
n3 = length(intersect(file_patient_name_NO_DETERIORO,names_1))
n4 = length(intersect(file_patient_name_NO_DETERIORO,names_2))

conttingency_table <- data.frame("CLust1" = c(n1,n3), "Clust2" = c(n2,n4))
rownames(conttingency_table) <- c("DETERIORO","NO DETERIORO")


knitr::kable(conttingency_table, align = "lccrr")
CLust1 Clust2
DETERIORO 5 1
NO DETERIORO 44 8
conttingency_table_prop <- data.frame(c(n1,n3)/(n1+n3),c(n2,n4)/(n2+n4))
rownames(conttingency_table_prop) <- c("DETERIORO","NO DETERIORO")
colnames(conttingency_table_prop) <- c("Clust1","Clust2")

knitr::kable(conttingency_table_prop, align = "lccrr")
Clust1 Clust2
DETERIORO 0.1020408 0.1111111
NO DETERIORO 0.8979592 0.8888889

Random Forest: Discriminant TSCLust CCF

data_frame1_CCF = data.frame("CLUSTER" = DDclust_CCF)
data_frame2 = df_descriptive
data_frame_merge_CCF <-
  merge(data_frame1_CCF, data_frame2,                      by = 'row.names', all = TRUE)
data_frame_merge_CCF <- data_frame_merge_CCF[, 2:dim(data_frame_merge_CCF)[2]]
data_frame_merge_CCF$CLUSTER = factor(data_frame_merge_CCF$CLUSTER)
table(data_frame_merge_CCF$CLUSTER)
## 
##  1  2 
## 49  9
data_frame_merge_CCF[,c(1:dim(data_frame_merge_CCF)[2])]<- lapply(data_frame_merge_CCF[,c(1:dim(data_frame_merge_CCF)[2])], as.numeric)
head(data_frame_merge_CCF)
##   CLUSTER EDAD PESO EG FR_0_8h FLUJO2_0_8H DIAS_GN DIAS_O2_TOTAL DIAS_OAF
## 1       1 10.0 8.20 41      48        2.00       3             3        0
## 2       1 13.0 7.78 40      56        2.00       2             2        0
## 3       1  3.1 5.66 37      44        1.00       4             4        0
## 4       2  5.3 8.44 38      65        0.40       3             3        0
## 5       1 15.0 7.00 34      37        2.00       4             4        0
## 6       1  1.6 3.80 37      42        0.94       4             4        0
##   SAPI_0_8h SCORE_CRUCES_INGRESO SCORE_WOOD_DOWNES_INGRESO SEXO PALIVIZUMAB LM
## 1         3                    3                         6    1           1  2
## 2         4                    4                         8    1           1  1
## 3         3                    3                         7    1           1  2
## 4         4                    3                         6    1           1  2
## 5         1                    3                         6    1           2  1
## 6         2                    4                         7    1           1  2
##   DERMATITIS ALERGIAS TABACO ENFERMEDAD_BASE RADIOGRAFIA ANALITICA SUERO
## 1          1        2      1               1           1         1     1
## 2          1        2      2               2           1         1     2
## 3          1        1      1               1           1         1     1
## 4          1        1      1               1           1         1     1
## 5          1        1      2               2           1         1     2
## 6          1        1      2               2           1         1     1
##   ETIOLOGIA PREMATURIDAD ALIMENTACION SNG GN_INGRESO OAF OAF_AL_INGRESO
## 1         2            1            2   1          2   1              1
## 2         1            1            1   1          2   1              1
## 3         2            1            2   1          2   1              1
## 4         2            1            2   1          1   1              1
## 5         2            2            2   1          2   1              1
## 6         1            1            2   1          1   1              1
##   OAF_TRAS_INGRESO UCIP DETERIORO PAUSAS_APNEA
## 1                1    1         1            1
## 2                1    1         1            1
## 3                1    1         1            1
## 4                1    1         1            1
## 5                1    1         1            1
## 6                1    1         1            1
data_frame_merge_CCF$CLUSTER <- factor(data_frame_merge_CCF$CLUSTER)
newSMOTE_CCF <- oversample(data_frame_merge_CCF, ratio = 0.85, method = "SMOTE", classAttr = "CLUSTER")
newSMOTE_CCF <- data.frame(newSMOTE_CCF)
pos_1 <- get_column_position(newSMOTE_CCF, "SAPI_0_8h")
pos_2 <- get_column_position(newSMOTE_CCF, "PAUSAS_APNEA")
columns_to_round <- c(pos_1:pos_2)
newSMOTE_CCF[, columns_to_round] <- lapply(newSMOTE_CCF[, columns_to_round], function(x) round(x, 1))
table(newSMOTE_CCF$CLUSTER)
## 
##  1  2 
## 49 42
set.seed(123)
pos_1 = get_column_position(newSMOTE_CCF, "SAPI_0_8h")
pos_2 = get_column_position(newSMOTE_CCF, "PAUSAS_APNEA")
col_names_factor <- names(newSMOTE_CCF[pos_1:pos_2])
newSMOTE_CCF[col_names_factor] <- lapply(newSMOTE_CCF[col_names_factor] , factor)

RF_CCF <- randomForest(CLUSTER ~ ., data = newSMOTE_CCF)
print(RF_CCF)
## 
## Call:
##  randomForest(formula = CLUSTER ~ ., data = newSMOTE_CCF) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 10.99%
## Confusion matrix:
##    1  2 class.error
## 1 44  5   0.1020408
## 2  5 37   0.1190476

Importance

kable(RF_CCF$importance[order(RF_CCF$importance, decreasing = TRUE),])
x
SCORE_WOOD_DOWNES_INGRESO 9.3362067
SCORE_CRUCES_INGRESO 6.5509986
SAPI_0_8h 4.3842712
ETIOLOGIA 2.8116091
PESO 2.3407629
RADIOGRAFIA 2.2294075
SEXO 1.6634895
DIAS_GN 1.4541484
DIAS_O2_TOTAL 1.4505291
EDAD 1.4445559
LM 1.4356390
FR_0_8h 1.2697476
GN_INGRESO 1.2292744
TABACO 1.2020252
FLUJO2_0_8H 1.1696037
EG 0.9153303
ALIMENTACION 0.7486863
PREMATURIDAD 0.6391867
ANALITICA 0.4853217
SNG 0.3521633
ENFERMEDAD_BASE 0.2711781
DIAS_OAF 0.2377083
SUERO 0.2108443
ALERGIAS 0.1217301
UCIP 0.1028728
DETERIORO 0.0966294
PALIVIZUMAB 0.0934120
OAF_TRAS_INGRESO 0.0860743
PAUSAS_APNEA 0.0802490
OAF 0.0603296
DERMATITIS 0.0488441
OAF_AL_INGRESO 0.0000000

Importance of first 50 CCF

data_frame1_CCF = data.frame("CLUSTER" = DDclust_CCF)
data_frame2 = data.frame(t(SatO2_FC_CCF[c(1:101),]))
data_frame_merge_CCF <-
  merge(data_frame1_CCF, data_frame2,                      by = 'row.names', all = TRUE)
data_frame_merge_CCF <- data_frame_merge_CCF[, 2:dim(data_frame_merge_CCF)[2]]
set.seed(123)
data_frame_merge_CCF$CLUSTER <- as.factor(data_frame_merge_CCF$CLUSTER)
RF_0 <- randomForest(CLUSTER ~ ., data = data_frame_merge_CCF)
print(RF_0)
## 
## Call:
##  randomForest(formula = CLUSTER ~ ., data = data_frame_merge_CCF) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 10
## 
##         OOB estimate of  error rate: 0%
## Confusion matrix:
##    1 2 class.error
## 1 49 0           0
## 2  0 9           0
plot(RF_0$importance, type = "h")

CCF by clusters

plot_data_CCF <- data.frame(t(SatO2_FC_CCF[c(1:101),]))
cluster_data_CCF <- data.frame(DDclust_CCF)
plotting_CCF <- cbind(plot_data_CCF, cluster_data_CCF)
head(plotting_CCF)
##                          X1            X2            X3            X4
## ACR_11231843   0.0004335791  0.0026800010  0.0039622812  0.0046441295
## ADAO_11159808  0.0001094381 -0.0003372668  0.0003987779  0.0002092462
## AGG_11236448  -0.0013688104 -0.0025258245 -0.0032101939 -0.0042764317
## AHL_11239959  -0.0023989337 -0.0052266590 -0.0054464697 -0.0081471913
## AJGD_11119689 -0.0003576793 -0.0008162598 -0.0005385093 -0.0007230021
## AMP_11228639   0.0002634324  0.0006663009 -0.0005199333  0.0008299215
##                          X5           X6            X7            X8
## ACR_11231843   7.203981e-03  0.007061645  0.0076842907  7.630654e-03
## ADAO_11159808 -1.623079e-04  0.001405437  0.0020338192 -2.433889e-03
## AGG_11236448  -4.098981e-03 -0.003913110 -0.0044643766 -3.812243e-03
## AHL_11239959  -1.182286e-02 -0.015537484 -0.0182401880 -2.248151e-02
## AJGD_11119689  2.780979e-03  0.002467757  0.0009709092 -5.704056e-05
## AMP_11228639   7.965372e-05  0.002581463  0.0009556513 -5.763339e-04
##                          X9           X10           X11           X12
## ACR_11231843   0.0011541477 -0.0014977354  0.0015747985  0.0027412959
## ADAO_11159808  0.0034024126  0.0005693701  0.0004382390  0.0028263403
## AGG_11236448  -0.0034884464 -0.0018366516 -0.0030502209 -0.0030932730
## AHL_11239959  -0.0246426111 -0.0257563218 -0.0303770734 -0.0305383603
## AJGD_11119689  0.0012180638  0.0008889351  0.0002642562 -0.0005845125
## AMP_11228639   0.0006360153 -0.0021070910 -0.0001457457 -0.0006286650
##                         X13           X14           X15           X16
## ACR_11231843  -0.0012563791 -0.0072490300 -0.0061775054 -0.0042857758
## ADAO_11159808  0.0013409788  0.0019777046  0.0012957525  0.0004308224
## AGG_11236448   0.0024329065  0.0022840021  0.0006671297  0.0024455144
## AHL_11239959  -0.0300882314 -0.0399208646 -0.0364802586 -0.0390877414
## AJGD_11119689 -0.0010046334 -0.0003411414  0.0012670173 -0.0019547537
## AMP_11228639  -0.0005184107  0.0011136237  0.0028704016  0.0011150101
##                         X17          X18           X19          X20
## ACR_11231843  -3.636382e-03 -0.003516933 -0.0027122897 -0.001284843
## ADAO_11159808 -3.576277e-03  0.001442159  0.0006848414 -0.000669266
## AGG_11236448  -4.504692e-05  0.003706636  0.0041543671  0.003884811
## AHL_11239959  -4.355986e-02 -0.040192137 -0.0427527366 -0.039106720
## AJGD_11119689  2.037051e-03 -0.001724106 -0.0010821281 -0.005371884
## AMP_11228639   4.760198e-03 -0.001430710  0.0035563337 -0.001026648
##                         X21          X22           X23          X24
## ACR_11231843  -0.0019719796 -0.003068912 -0.0007996714  0.001418854
## ADAO_11159808 -0.0008796252  0.003904162 -0.0037915855  0.003203702
## AGG_11236448   0.0048978034  0.002993458  0.0077458111  0.008945708
## AHL_11239959  -0.0367113603 -0.033335049 -0.0363302775 -0.031059899
## AJGD_11119689 -0.0021673785  0.006887099 -0.0070032804 -0.009363823
## AMP_11228639  -0.0014857750  0.004153272 -0.0021751878  0.003863868
##                         X25          X26          X27          X28          X29
## ACR_11231843   0.0009437696  0.003782053  0.010887237  0.013280146  0.016968351
## ADAO_11159808 -0.0007678796 -0.001109457  0.003944547  0.005774560 -0.009068616
## AGG_11236448   0.0057222462  0.017769404  0.028777302  0.038949199  0.035914986
## AHL_11239959  -0.0352128645 -0.036601861 -0.043029479 -0.048333482 -0.059119791
## AJGD_11119689 -0.0081886603 -0.011067657 -0.008622958 -0.010048992 -0.002165678
## AMP_11228639   0.0003253959  0.008311592  0.006560036  0.002216409  0.009591607
##                         X30          X31          X32          X33          X34
## ACR_11231843   0.0136167903  0.008385831  0.010921905  0.008648812  0.006103761
## ADAO_11159808  0.0003874439 -0.002466101 -0.009799162 -0.008931975 -0.016326710
## AGG_11236448   0.0362769742  0.040192627  0.035510527  0.032862108  0.037695124
## AHL_11239959  -0.0634132755 -0.074786468 -0.068685563 -0.079150534 -0.077537483
## AJGD_11119689 -0.0049804151  0.008837385 -0.005148626 -0.013302486  0.002273300
## AMP_11228639  -0.0004985741  0.002128356  0.008011105 -0.001738909  0.006397814
##                         X35          X36          X37          X38          X39
## ACR_11231843   0.0012823704 -0.001064345  0.005539160  0.001633917  0.002720487
## ADAO_11159808 -0.0122746108 -0.022561360 -0.013583560 -0.027417224 -0.018211320
## AGG_11236448   0.0410813463  0.051974296  0.052692149  0.061916401  0.074867491
## AHL_11239959  -0.0821848400 -0.074948409 -0.074274509 -0.077629529 -0.078331075
## AJGD_11119689 -0.0183615004 -0.018595368  0.002216751  0.004792426  0.003452190
## AMP_11228639   0.0006064979  0.007679668  0.009109502  0.015024574  0.025892060
##                        X40           X41          X42          X43         X44
## ACR_11231843   0.002095688 -0.0007767707  0.007790719  0.008091110  0.01025183
## ADAO_11159808 -0.019288467 -0.0219156396 -0.033166034 -0.026608065 -0.02464164
## AGG_11236448   0.095602206  0.1005956040  0.095352375  0.096222011  0.08995313
## AHL_11239959  -0.083351334 -0.0829452806 -0.088129677 -0.092405923 -0.09412752
## AJGD_11119689 -0.013032020 -0.0104414413 -0.016322045 -0.002454743 -0.01615293
## AMP_11228639   0.014230001  0.0188068271  0.015089930  0.008690895  0.01359409
##                        X45          X46          X47         X48          X49
## ACR_11231843   0.011461143  0.005265604 -0.006500759 -0.01653930 -0.019145954
## ADAO_11159808 -0.029244691 -0.022192961 -0.016925910 -0.01309521 -0.006670672
## AGG_11236448   0.075513576  0.072721718  0.074447261  0.08080160  0.096337513
## AHL_11239959  -0.095718411 -0.101214713 -0.099351766 -0.09540954 -0.097778015
## AJGD_11119689 -0.002605373  0.017402271  0.021764256  0.04339378  0.026945155
## AMP_11228639   0.017587160  0.022144110  0.025556164  0.02227766  0.026539077
##                       X50           X51          X52          X53          X54
## ACR_11231843  -0.01328539  0.0466660069  0.078539004  0.003436640 -0.040578171
## ADAO_11159808 -0.01122056 -0.0192040012 -0.008480597 -0.015677095 -0.015684548
## AGG_11236448   0.11275699  0.1202005447  0.121362273  0.118260219  0.113192804
## AHL_11239959  -0.10259836 -0.0975660015 -0.097635034 -0.099399656 -0.104142381
## AJGD_11119689  0.01736604 -0.0008852263  0.003920554  0.001447258  0.005990889
## AMP_11228639   0.01741680  0.0255593119  0.017152138  0.020785147  0.019921068
##                       X55         X56         X57         X58         X59
## ACR_11231843  -0.03923186 -0.03843734 -0.03185135 -0.03375636 -0.03155462
## ADAO_11159808 -0.02824683 -0.02761742 -0.03231723 -0.03411898 -0.04269926
## AGG_11236448   0.10657797  0.09517184  0.10000647  0.08734770  0.06026680
## AHL_11239959  -0.10927454 -0.10858498 -0.10513668 -0.11227425 -0.11115260
## AJGD_11119689  0.01952905  0.02519347  0.02603430  0.01601270  0.03185822
## AMP_11228639   0.02036761  0.02478283  0.01488865  0.02440095  0.01461187
##                       X60          X61          X62           X63          X64
## ACR_11231843  -0.01614012  0.017230667  0.019998453  0.0220845508  0.035200236
## ADAO_11159808 -0.03476621 -0.022482786 -0.018676508 -0.0114794245  0.001431606
## AGG_11236448   0.04530848  0.063400777  0.069710921  0.0768386681  0.091449601
## AHL_11239959  -0.11393258 -0.116610055 -0.119626833 -0.1171933414 -0.125775259
## AJGD_11119689  0.02309504 -0.004168741  0.001406862 -0.0003629521  0.015114474
## AMP_11228639   0.01527477  0.021681867  0.033218459  0.0403875606  0.042386444
##                        X65          X66         X67          X68         X69
## ACR_11231843  -0.014768194 -0.011308070  0.01165559  0.039247108  0.04067433
## ADAO_11159808  0.021322108  0.016238848  0.02015872  0.028304374  0.02867736
## AGG_11236448   0.088309944  0.084025304  0.08346901  0.043776711  0.01928346
## AHL_11239959  -0.125271045 -0.134881079 -0.13846456 -0.138021332 -0.14516406
## AJGD_11119689 -0.007612185 -0.008576289 -0.01310511  0.001155695  0.02072063
## AMP_11228639   0.046761078  0.041478845  0.03415545  0.038253282  0.04345439
##                        X70         X71         X72         X73          X74
## ACR_11231843   0.054938263  0.05216774  0.02893450  0.04565816  0.016317459
## ADAO_11159808  0.031406913  0.03285366  0.03034807  0.01507817  0.007630274
## AGG_11236448   0.006741361  0.02914898  0.02863798  0.04718177  0.057838065
## AHL_11239959  -0.148491209 -0.15009041 -0.14325691 -0.14896503 -0.148111238
## AJGD_11119689  0.006656645  0.02928379  0.03218864  0.01925793  0.020437607
## AMP_11228639   0.044788032  0.04858658  0.04187590  0.04227944  0.037635781
##                        X75           X76          X77         X78          X79
## ACR_11231843  -0.003875787  0.0256742079  0.018148296  0.04219174  0.027417819
## ADAO_11159808  0.018474492  0.0009056106  0.010227621  0.01560631  0.009562942
## AGG_11236448   0.071557804  0.0639719008  0.076988919  0.05250665  0.053139109
## AHL_11239959  -0.152727955 -0.1479525477 -0.145243487 -0.14707256 -0.136525385
## AJGD_11119689  0.004447869  0.0176834733 -0.005893729  0.01482346 -0.002029224
## AMP_11228639   0.049409292  0.0438721130  0.046904276  0.04721456  0.034240507
##                        X80           X81          X82          X83          X84
## ACR_11231843   0.011569835 -2.315659e-02 -0.026266905  0.002209966  0.006781454
## ADAO_11159808  0.005644969  4.665312e-05  0.009562882  0.028231420  0.035291980
## AGG_11236448   0.053179236  6.266205e-02  0.038755605  0.037632958  0.053041903
## AHL_11239959  -0.145777797 -1.373812e-01 -0.134550696 -0.127767682 -0.123160078
## AJGD_11119689 -0.000442201 -8.141466e-03 -0.028236727 -0.021561516 -0.020813623
## AMP_11228639   0.043992079  4.623287e-02  0.049872969  0.055800867  0.051210087
##                        X85           X86         X87          X88         X89
## ACR_11231843  -0.015863194 -0.0008034249 -0.03327019 -0.030903612  0.01948229
## ADAO_11159808  0.030878793  0.0365097797  0.04058043  0.035934982  0.03357107
## AGG_11236448   0.083892800  0.0716219676  0.08203580  0.081094153  0.07663220
## AHL_11239959  -0.119413282 -0.1126149959 -0.10323170 -0.101827184 -0.10077971
## AJGD_11119689 -0.009651539 -0.0163368826 -0.02784260  0.005658919 -0.01588456
## AMP_11228639   0.057112175  0.0585364164  0.05274634  0.067226000  0.05090611
##                        X90          X91         X92         X93          X94
## ACR_11231843  -0.008660467  0.006913348 -0.04212957 -0.04023577 -0.041013020
## ADAO_11159808  0.039765226  0.047538257  0.02947733  0.04493890  0.021011893
## AGG_11236448   0.092984113  0.091885797  0.07406543  0.04952749  0.055319778
## AHL_11239959  -0.105786144 -0.102096714 -0.10295521 -0.09423088 -0.108538702
## AJGD_11119689 -0.014811634 -0.023097969 -0.02852232 -0.02476067 -0.007509145
## AMP_11228639   0.067204569  0.058703439  0.07059440  0.07205772  0.065932511
##                        X95         X96          X97         X98          X99
## ACR_11231843  -0.049501935 -0.04770504 -0.047457036 -0.04491680 -0.044618932
## ADAO_11159808  0.019975653  0.02109524  0.006412441  0.00139112  0.005480962
## AGG_11236448   0.046182840  0.03734793  0.024458350  0.02264219  0.005794925
## AHL_11239959  -0.101857508 -0.10723867 -0.108969619 -0.10277380 -0.102861916
## AJGD_11119689  0.006218254  0.01048821  0.039346488  0.01535082  0.008367036
## AMP_11228639   0.074911235  0.06127015  0.070413020  0.06243588  0.051809416
##                       X100        X101 DDclust_CCF
## ACR_11231843  -0.045242042 -0.03506496           1
## ADAO_11159808  0.001913931  0.01776448           1
## AGG_11236448   0.027707410  0.01951120           1
## AHL_11239959  -0.111476327 -0.11508806           2
## AJGD_11119689  0.023412887  0.01173931           1
## AMP_11228639   0.056981920  0.05601130           1
## Mean by groups
rp_tbl <- aggregate(plotting_CCF, by = list(plotting_CCF$DDclust_CCF), mean)
row.names(rp_tbl) <- paste0("Group",rp_tbl$DDclust_CCF)
rp_tbl <- rp_tbl %>%
  select(starts_with('X'))
rp_tbl <- data.frame(t(rp_tbl))
head(rp_tbl)
##           Group1       Group2
## X1  2.769181e-04 -0.001403052
## X2  5.712380e-04 -0.002722402
## X3 -8.583775e-05 -0.003634944
## X4  4.847546e-04 -0.004069677
## X5  6.010612e-04 -0.006732902
## X6  3.038989e-04 -0.008340906
# Create plotting data-frame
CCF_values_by_group <- data.frame("value_CCF" = c(rp_tbl$Group1,rp_tbl$Group2), 
                                  "cluster" = c(rep("Group1", times = length(rp_tbl$Group1)),
                                              rep("Group2", times = length(rp_tbl$Group2))),
                                  "index" = c(c(1:length(rp_tbl$Group1)),c(1:length(rp_tbl$Group2))))

p <- ggplot(CCF_values_by_group, aes(x = index, y = value_CCF, group = cluster)) +
  geom_line(aes(color=cluster)) +
  scale_color_brewer(palette="Paired") + theme_minimal()

p

Adjusted Rand index

cluster_study_CCF <- list(DDclust_CCF)
write.csv(cluster_study_CCF, "../../data/clusters/cluster_study_CCF.csv")