Libraries
library(tidyr)
library(readr)
library(ggplot2) # ggplot graphs
library(knitr)
library(readxl)
library(xlsx)
library(openxlsx)
library(reactable) # reactable(df)
library(naniar) # miss_case_summary
library(dplyr)
## KNN imputation
library(caret)
library(RANN)
# CLustering
library(factoextra) # Clustering visualization
library(cluster) # Clustering algorithms
library(dendextend) # For comparing two dendrograms
library(corrplot) # Corelation between dendrograms
library(tidyverse) # Data manupulation
library(NbClust) # Determine optimal no. of clusters [not working...]
library(TSclust)
library(mclust) # Adjusted Rand index
#RandomForest
library(randomForest) # RandomForest Discrete Classification
library(imbalance) # To create a more balanced dataset
Functions
source("../../scripts/useful-functions/get_column_position.R")
# In a normal script it will be: source("./scripts/useful-functions/get_column_position.R")
Reading Data
cuantiles_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/cuantiles_TS_HR_valid_patients_input_P2.xlsx", sheet = "FC_valid_patients_input_P2" ))
SatO2_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/SatO2_valid_patients_input_P2.xlsx", sheet = "SatO2_valid_patients_input_P2" ))
FC_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/FC_valid_patients_input_P2.xlsx", sheet = "FC_valid_patients_input_P2" ))
# First patients with OAF
name_patients_DETERIORO_OAF_0 <- data.frame(read_csv("../../data/clean-data/name_patients_DETERIORO_OAF_0.csv"))
name_patients_DETERIORO_OAF_0 <- name_patients_DETERIORO_OAF_0$x
name_patients_DETERIORO_OAF_0_8 <- data.frame(read_csv("../../data/clean-data/name_patients_DETERIORO_OAF_0_8.csv"))
name_patients_DETERIORO_OAF_0_8 <- name_patients_DETERIORO_OAF_0_8$x
## Deterioro and Not deterioro
file_patient_name_NO_DETERIORO <- data.frame(read_csv("../../data/info-patients/file_patient_name_NO_DETERIORO.csv"))
file_patient_name_NO_DETERIORO <- file_patient_name_NO_DETERIORO$x
file_patient_name_DETERIORO <- data.frame(read_csv("../../data/info-patients/file_patient_name_DETERIORO.csv"))
file_patient_name_DETERIORO <- file_patient_name_DETERIORO$x
valid_patients_P2 <- data.frame(read_xlsx("../../data/clean-data/valid_patients_P2.xlsx"))
valid_patients_P2 <- valid_patients_P2$x
file_patient_name <- data.frame(read_csv("../../data/clean-data/file_patient_name.csv", show_col_types = FALSE))
file_patient_name <- file_patient_name$x
df1 <- data.frame(read_xlsx("../../data/clean-data/descriptive-data/descriptive_data.xlsx"))
rownames(df1) <- file_patient_name
df1 <- df1[valid_patients_P2,]
## Delete first patients
valid_patients_P2 <- valid_patients_P2[! valid_patients_P2 %in% union(name_patients_DETERIORO_OAF_0,name_patients_DETERIORO_OAF_0_8)]
cuantiles_TS_HR_P2 = cuantiles_TS_HR_P2[,valid_patients_P2]
SatO2_TS_HR_P2 = cuantiles_TS_HR_P2[,valid_patients_P2]
FC_TS_HR_P2 = cuantiles_TS_HR_P2[,valid_patients_P2]
Descriptive Data
df_descriptive <- data.frame(read_xlsx("../../data/clean-data/descriptive-data/descriptive_data_imputed.xlsx"), row.names = TRUE)
rownames(df_descriptive) <- file_patient_name
df_descriptive <- df_descriptive %>% select(-c(FR_8_16h, FR_16_24h, FLUJO2_8_16h,FLUJO2_16_24h,SCORE_WOOD_DOWNES_24H,SAPI_16_24h, SAPI_8_16h))
# Class
pos_1 = get_column_position(df_descriptive,"SAPI_0_8h")
pos_2 = get_column_position(df_descriptive,"PAUSAS_APNEA")
df_descriptive[,c(pos_1:pos_2)] <- lapply(df_descriptive[,c(pos_1:pos_2)], as.factor)
#lapply(df_descriptive,class)
df_descriptive <- df_descriptive[valid_patients_P2,]
Create a dataframe with CCF [Heart Rate and SatO2]
dimension_col <- dim(FC_TS_HR_P2)[2]
dimension_row <- 480 #lag.max -1
SatO2_FC_CCF <- data.frame(matrix(nrow = dimension_row * 2 - 1, ncol = dimension_col))
colnames(SatO2_FC_CCF) <- names(FC_TS_HR_P2)[1:dimension_col]
m <- forecast::Ccf(FC_TS_HR_P2[[1]], SatO2_TS_HR_P2[[1]], lag.max = dimension_row - 1, plot = FALSE, drop.lag.0 = FALSE, type = "correlation", ylab = "CCF")
for (i in names(SatO2_FC_CCF)) {
ccf_result <- forecast::Ccf(FC_TS_HR_P2[[i]], SatO2_TS_HR_P2[[i]], lag.max = dimension_row - 1, plot = FALSE, drop.lag.0 = FALSE, type = "correlation", ylab = "CCF")
SatO2_FC_CCF[, i] <- ccf_result$acf
}
distance <- dist(t(SatO2_FC_CCF[c(1:101),]), method = "euclidean")
distance_matrix_CCF <- as.matrix(distance)
DD_CCF <- distance
To find which hierarchical clustering methods that can identify stronger clustering structures. Here we see that Ward’s method identifies the strongest clustering structure of the four methods assessed.
datos = t(SatO2_FC_CCF[c(1:101),])
datos_CCF = data.frame(t(SatO2_FC_CCF[c(1:101),]))
#method to assess
m <- c("average", "single","complete","ward")
names(m) <- c("average", "single","complete","ward.D2")
#function to compute coefficient
ac <- function(x){agnes(datos, method = x)$ac}
map_dbl(m,ac)
## average single complete ward.D2
## 0.7992972 0.7091074 0.8910785 0.9437709
This package will help us identify the optimum number of clusters
based our criteria in the silhouette
index
diss_matrix<- DD_CCF
res<-NbClust(datos_CCF, diss=diss_matrix, distance = NULL, min.nc=2, max.nc=5, method = "ward.D2", index = "silhouette")
res$All.index
## 2 3 4 5
## 0.5024 0.3152 0.2589 0.2670
res$Best.nc
## Number_clusters Value_Index
## 2.0000 0.5024
#res$Best.partition
hcintper_CCF <- hclust(DD_CCF , "ward.D2")
fviz_dend(hcintper_CCF, palette = "jco",
rect = TRUE, show_labels = FALSE, k = 2)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
DDclust_CCF <- cutree( hclust(DD_CCF , "ward.D2"), k = 2)
fviz_cluster(list(data = datos_CCF, cluster = DDclust_CCF ))
fviz_silhouette(silhouette(DDclust_CCF , DD_CCF ))
## cluster size ave.sil.width
## 1 1 49 0.48
## 2 2 9 0.61
DETERIORO_CLUST <- union(intersect(file_patient_name_DETERIORO,names_1),intersect(file_patient_name_DETERIORO,names_2)) # DET and names_1 + DET and names_2, not all the patients are selected so DETERIORO_CLUST represents the DETERIORO patients that we are working with.
NO_DETERIORO_CLUST <- union(intersect(file_patient_name_NO_DETERIORO,names_1),intersect(file_patient_name_NO_DETERIORO,names_2)) #NO_DET and names_1 + NO_DET and names_2, not all the patients are selected so NO_DETERIORO_CLUST represents the NO_DETERIORO patients that we are working with.
#DETERIORO
DETERIORO_patients = data.frame(t(rep("#4A235A", length(DETERIORO_CLUST))))
colnames(DETERIORO_patients)<- DETERIORO_CLUST # Assign a color to all DETERIORO patients
#NO DETERIORO
NO_DETERIORO_patients = data.frame(t(rep("#117864", length(NO_DETERIORO_CLUST))))
colnames(NO_DETERIORO_patients)<- NO_DETERIORO_CLUST # Assign a color to all NO_DETERIORO patients
COLOR_CCF <- cbind(DETERIORO_patients,NO_DETERIORO_patients)
fviz_dend(hcintper_CCF, k = 2,
k_colors = c("blue", "green3"),
label_cols = as.vector(COLOR_CCF[names(DD_CCF)[hcintper_CCF$order]]), cex = 0.6)
n1 = length(intersect(file_patient_name_DETERIORO,names_1))
n2 = length(intersect(file_patient_name_DETERIORO,names_2))
n3 = length(intersect(file_patient_name_NO_DETERIORO,names_1))
n4 = length(intersect(file_patient_name_NO_DETERIORO,names_2))
conttingency_table <- data.frame("CLust1" = c(n1,n3), "Clust2" = c(n2,n4))
rownames(conttingency_table) <- c("DETERIORO","NO DETERIORO")
knitr::kable(conttingency_table, align = "lccrr")
CLust1 | Clust2 | |
---|---|---|
DETERIORO | 5 | 1 |
NO DETERIORO | 44 | 8 |
conttingency_table_prop <- data.frame(c(n1,n3)/(n1+n3),c(n2,n4)/(n2+n4))
rownames(conttingency_table_prop) <- c("DETERIORO","NO DETERIORO")
colnames(conttingency_table_prop) <- c("Clust1","Clust2")
knitr::kable(conttingency_table_prop, align = "lccrr")
Clust1 | Clust2 | |
---|---|---|
DETERIORO | 0.1020408 | 0.1111111 |
NO DETERIORO | 0.8979592 | 0.8888889 |
data_frame1_CCF = data.frame("CLUSTER" = DDclust_CCF)
data_frame2 = df_descriptive
data_frame_merge_CCF <-
merge(data_frame1_CCF, data_frame2, by = 'row.names', all = TRUE)
data_frame_merge_CCF <- data_frame_merge_CCF[, 2:dim(data_frame_merge_CCF)[2]]
data_frame_merge_CCF$CLUSTER = factor(data_frame_merge_CCF$CLUSTER)
table(data_frame_merge_CCF$CLUSTER)
##
## 1 2
## 49 9
data_frame_merge_CCF[,c(1:dim(data_frame_merge_CCF)[2])]<- lapply(data_frame_merge_CCF[,c(1:dim(data_frame_merge_CCF)[2])], as.numeric)
head(data_frame_merge_CCF)
## CLUSTER EDAD PESO EG FR_0_8h FLUJO2_0_8H DIAS_GN DIAS_O2_TOTAL DIAS_OAF
## 1 1 10.0 8.20 41 48 2.00 3 3 0
## 2 1 13.0 7.78 40 56 2.00 2 2 0
## 3 1 3.1 5.66 37 44 1.00 4 4 0
## 4 2 5.3 8.44 38 65 0.40 3 3 0
## 5 1 15.0 7.00 34 37 2.00 4 4 0
## 6 1 1.6 3.80 37 42 0.94 4 4 0
## SAPI_0_8h SCORE_CRUCES_INGRESO SCORE_WOOD_DOWNES_INGRESO SEXO PALIVIZUMAB LM
## 1 3 3 6 1 1 2
## 2 4 4 8 1 1 1
## 3 3 3 7 1 1 2
## 4 4 3 6 1 1 2
## 5 1 3 6 1 2 1
## 6 2 4 7 1 1 2
## DERMATITIS ALERGIAS TABACO ENFERMEDAD_BASE RADIOGRAFIA ANALITICA SUERO
## 1 1 2 1 1 1 1 1
## 2 1 2 2 2 1 1 2
## 3 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 5 1 1 2 2 1 1 2
## 6 1 1 2 2 1 1 1
## ETIOLOGIA PREMATURIDAD ALIMENTACION SNG GN_INGRESO OAF OAF_AL_INGRESO
## 1 2 1 2 1 2 1 1
## 2 1 1 1 1 2 1 1
## 3 2 1 2 1 2 1 1
## 4 2 1 2 1 1 1 1
## 5 2 2 2 1 2 1 1
## 6 1 1 2 1 1 1 1
## OAF_TRAS_INGRESO UCIP DETERIORO PAUSAS_APNEA
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
## 4 1 1 1 1
## 5 1 1 1 1
## 6 1 1 1 1
data_frame_merge_CCF$CLUSTER <- factor(data_frame_merge_CCF$CLUSTER)
newSMOTE_CCF <- oversample(data_frame_merge_CCF, ratio = 0.85, method = "SMOTE", classAttr = "CLUSTER")
newSMOTE_CCF <- data.frame(newSMOTE_CCF)
pos_1 <- get_column_position(newSMOTE_CCF, "SAPI_0_8h")
pos_2 <- get_column_position(newSMOTE_CCF, "PAUSAS_APNEA")
columns_to_round <- c(pos_1:pos_2)
newSMOTE_CCF[, columns_to_round] <- lapply(newSMOTE_CCF[, columns_to_round], function(x) round(x, 1))
table(newSMOTE_CCF$CLUSTER)
##
## 1 2
## 49 42
set.seed(123)
pos_1 = get_column_position(newSMOTE_CCF, "SAPI_0_8h")
pos_2 = get_column_position(newSMOTE_CCF, "PAUSAS_APNEA")
col_names_factor <- names(newSMOTE_CCF[pos_1:pos_2])
newSMOTE_CCF[col_names_factor] <- lapply(newSMOTE_CCF[col_names_factor] , factor)
RF_CCF <- randomForest(CLUSTER ~ ., data = newSMOTE_CCF)
print(RF_CCF)
##
## Call:
## randomForest(formula = CLUSTER ~ ., data = newSMOTE_CCF)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 10.99%
## Confusion matrix:
## 1 2 class.error
## 1 44 5 0.1020408
## 2 5 37 0.1190476
Importance
kable(RF_CCF$importance[order(RF_CCF$importance, decreasing = TRUE),])
x | |
---|---|
SCORE_WOOD_DOWNES_INGRESO | 9.3362067 |
SCORE_CRUCES_INGRESO | 6.5509986 |
SAPI_0_8h | 4.3842712 |
ETIOLOGIA | 2.8116091 |
PESO | 2.3407629 |
RADIOGRAFIA | 2.2294075 |
SEXO | 1.6634895 |
DIAS_GN | 1.4541484 |
DIAS_O2_TOTAL | 1.4505291 |
EDAD | 1.4445559 |
LM | 1.4356390 |
FR_0_8h | 1.2697476 |
GN_INGRESO | 1.2292744 |
TABACO | 1.2020252 |
FLUJO2_0_8H | 1.1696037 |
EG | 0.9153303 |
ALIMENTACION | 0.7486863 |
PREMATURIDAD | 0.6391867 |
ANALITICA | 0.4853217 |
SNG | 0.3521633 |
ENFERMEDAD_BASE | 0.2711781 |
DIAS_OAF | 0.2377083 |
SUERO | 0.2108443 |
ALERGIAS | 0.1217301 |
UCIP | 0.1028728 |
DETERIORO | 0.0966294 |
PALIVIZUMAB | 0.0934120 |
OAF_TRAS_INGRESO | 0.0860743 |
PAUSAS_APNEA | 0.0802490 |
OAF | 0.0603296 |
DERMATITIS | 0.0488441 |
OAF_AL_INGRESO | 0.0000000 |
data_frame1_CCF = data.frame("CLUSTER" = DDclust_CCF)
data_frame2 = data.frame(t(SatO2_FC_CCF[c(1:101),]))
data_frame_merge_CCF <-
merge(data_frame1_CCF, data_frame2, by = 'row.names', all = TRUE)
data_frame_merge_CCF <- data_frame_merge_CCF[, 2:dim(data_frame_merge_CCF)[2]]
set.seed(123)
data_frame_merge_CCF$CLUSTER <- as.factor(data_frame_merge_CCF$CLUSTER)
RF_0 <- randomForest(CLUSTER ~ ., data = data_frame_merge_CCF)
print(RF_0)
##
## Call:
## randomForest(formula = CLUSTER ~ ., data = data_frame_merge_CCF)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 10
##
## OOB estimate of error rate: 0%
## Confusion matrix:
## 1 2 class.error
## 1 49 0 0
## 2 0 9 0
plot(RF_0$importance, type = "h")
plot_data_CCF <- data.frame(t(SatO2_FC_CCF[c(1:101),]))
cluster_data_CCF <- data.frame(DDclust_CCF)
plotting_CCF <- cbind(plot_data_CCF, cluster_data_CCF)
head(plotting_CCF)
## X1 X2 X3 X4
## ACR_11231843 0.0004335791 0.0026800010 0.0039622812 0.0046441295
## ADAO_11159808 0.0001094381 -0.0003372668 0.0003987779 0.0002092462
## AGG_11236448 -0.0013688104 -0.0025258245 -0.0032101939 -0.0042764317
## AHL_11239959 -0.0023989337 -0.0052266590 -0.0054464697 -0.0081471913
## AJGD_11119689 -0.0003576793 -0.0008162598 -0.0005385093 -0.0007230021
## AMP_11228639 0.0002634324 0.0006663009 -0.0005199333 0.0008299215
## X5 X6 X7 X8
## ACR_11231843 7.203981e-03 0.007061645 0.0076842907 7.630654e-03
## ADAO_11159808 -1.623079e-04 0.001405437 0.0020338192 -2.433889e-03
## AGG_11236448 -4.098981e-03 -0.003913110 -0.0044643766 -3.812243e-03
## AHL_11239959 -1.182286e-02 -0.015537484 -0.0182401880 -2.248151e-02
## AJGD_11119689 2.780979e-03 0.002467757 0.0009709092 -5.704056e-05
## AMP_11228639 7.965372e-05 0.002581463 0.0009556513 -5.763339e-04
## X9 X10 X11 X12
## ACR_11231843 0.0011541477 -0.0014977354 0.0015747985 0.0027412959
## ADAO_11159808 0.0034024126 0.0005693701 0.0004382390 0.0028263403
## AGG_11236448 -0.0034884464 -0.0018366516 -0.0030502209 -0.0030932730
## AHL_11239959 -0.0246426111 -0.0257563218 -0.0303770734 -0.0305383603
## AJGD_11119689 0.0012180638 0.0008889351 0.0002642562 -0.0005845125
## AMP_11228639 0.0006360153 -0.0021070910 -0.0001457457 -0.0006286650
## X13 X14 X15 X16
## ACR_11231843 -0.0012563791 -0.0072490300 -0.0061775054 -0.0042857758
## ADAO_11159808 0.0013409788 0.0019777046 0.0012957525 0.0004308224
## AGG_11236448 0.0024329065 0.0022840021 0.0006671297 0.0024455144
## AHL_11239959 -0.0300882314 -0.0399208646 -0.0364802586 -0.0390877414
## AJGD_11119689 -0.0010046334 -0.0003411414 0.0012670173 -0.0019547537
## AMP_11228639 -0.0005184107 0.0011136237 0.0028704016 0.0011150101
## X17 X18 X19 X20
## ACR_11231843 -3.636382e-03 -0.003516933 -0.0027122897 -0.001284843
## ADAO_11159808 -3.576277e-03 0.001442159 0.0006848414 -0.000669266
## AGG_11236448 -4.504692e-05 0.003706636 0.0041543671 0.003884811
## AHL_11239959 -4.355986e-02 -0.040192137 -0.0427527366 -0.039106720
## AJGD_11119689 2.037051e-03 -0.001724106 -0.0010821281 -0.005371884
## AMP_11228639 4.760198e-03 -0.001430710 0.0035563337 -0.001026648
## X21 X22 X23 X24
## ACR_11231843 -0.0019719796 -0.003068912 -0.0007996714 0.001418854
## ADAO_11159808 -0.0008796252 0.003904162 -0.0037915855 0.003203702
## AGG_11236448 0.0048978034 0.002993458 0.0077458111 0.008945708
## AHL_11239959 -0.0367113603 -0.033335049 -0.0363302775 -0.031059899
## AJGD_11119689 -0.0021673785 0.006887099 -0.0070032804 -0.009363823
## AMP_11228639 -0.0014857750 0.004153272 -0.0021751878 0.003863868
## X25 X26 X27 X28 X29
## ACR_11231843 0.0009437696 0.003782053 0.010887237 0.013280146 0.016968351
## ADAO_11159808 -0.0007678796 -0.001109457 0.003944547 0.005774560 -0.009068616
## AGG_11236448 0.0057222462 0.017769404 0.028777302 0.038949199 0.035914986
## AHL_11239959 -0.0352128645 -0.036601861 -0.043029479 -0.048333482 -0.059119791
## AJGD_11119689 -0.0081886603 -0.011067657 -0.008622958 -0.010048992 -0.002165678
## AMP_11228639 0.0003253959 0.008311592 0.006560036 0.002216409 0.009591607
## X30 X31 X32 X33 X34
## ACR_11231843 0.0136167903 0.008385831 0.010921905 0.008648812 0.006103761
## ADAO_11159808 0.0003874439 -0.002466101 -0.009799162 -0.008931975 -0.016326710
## AGG_11236448 0.0362769742 0.040192627 0.035510527 0.032862108 0.037695124
## AHL_11239959 -0.0634132755 -0.074786468 -0.068685563 -0.079150534 -0.077537483
## AJGD_11119689 -0.0049804151 0.008837385 -0.005148626 -0.013302486 0.002273300
## AMP_11228639 -0.0004985741 0.002128356 0.008011105 -0.001738909 0.006397814
## X35 X36 X37 X38 X39
## ACR_11231843 0.0012823704 -0.001064345 0.005539160 0.001633917 0.002720487
## ADAO_11159808 -0.0122746108 -0.022561360 -0.013583560 -0.027417224 -0.018211320
## AGG_11236448 0.0410813463 0.051974296 0.052692149 0.061916401 0.074867491
## AHL_11239959 -0.0821848400 -0.074948409 -0.074274509 -0.077629529 -0.078331075
## AJGD_11119689 -0.0183615004 -0.018595368 0.002216751 0.004792426 0.003452190
## AMP_11228639 0.0006064979 0.007679668 0.009109502 0.015024574 0.025892060
## X40 X41 X42 X43 X44
## ACR_11231843 0.002095688 -0.0007767707 0.007790719 0.008091110 0.01025183
## ADAO_11159808 -0.019288467 -0.0219156396 -0.033166034 -0.026608065 -0.02464164
## AGG_11236448 0.095602206 0.1005956040 0.095352375 0.096222011 0.08995313
## AHL_11239959 -0.083351334 -0.0829452806 -0.088129677 -0.092405923 -0.09412752
## AJGD_11119689 -0.013032020 -0.0104414413 -0.016322045 -0.002454743 -0.01615293
## AMP_11228639 0.014230001 0.0188068271 0.015089930 0.008690895 0.01359409
## X45 X46 X47 X48 X49
## ACR_11231843 0.011461143 0.005265604 -0.006500759 -0.01653930 -0.019145954
## ADAO_11159808 -0.029244691 -0.022192961 -0.016925910 -0.01309521 -0.006670672
## AGG_11236448 0.075513576 0.072721718 0.074447261 0.08080160 0.096337513
## AHL_11239959 -0.095718411 -0.101214713 -0.099351766 -0.09540954 -0.097778015
## AJGD_11119689 -0.002605373 0.017402271 0.021764256 0.04339378 0.026945155
## AMP_11228639 0.017587160 0.022144110 0.025556164 0.02227766 0.026539077
## X50 X51 X52 X53 X54
## ACR_11231843 -0.01328539 0.0466660069 0.078539004 0.003436640 -0.040578171
## ADAO_11159808 -0.01122056 -0.0192040012 -0.008480597 -0.015677095 -0.015684548
## AGG_11236448 0.11275699 0.1202005447 0.121362273 0.118260219 0.113192804
## AHL_11239959 -0.10259836 -0.0975660015 -0.097635034 -0.099399656 -0.104142381
## AJGD_11119689 0.01736604 -0.0008852263 0.003920554 0.001447258 0.005990889
## AMP_11228639 0.01741680 0.0255593119 0.017152138 0.020785147 0.019921068
## X55 X56 X57 X58 X59
## ACR_11231843 -0.03923186 -0.03843734 -0.03185135 -0.03375636 -0.03155462
## ADAO_11159808 -0.02824683 -0.02761742 -0.03231723 -0.03411898 -0.04269926
## AGG_11236448 0.10657797 0.09517184 0.10000647 0.08734770 0.06026680
## AHL_11239959 -0.10927454 -0.10858498 -0.10513668 -0.11227425 -0.11115260
## AJGD_11119689 0.01952905 0.02519347 0.02603430 0.01601270 0.03185822
## AMP_11228639 0.02036761 0.02478283 0.01488865 0.02440095 0.01461187
## X60 X61 X62 X63 X64
## ACR_11231843 -0.01614012 0.017230667 0.019998453 0.0220845508 0.035200236
## ADAO_11159808 -0.03476621 -0.022482786 -0.018676508 -0.0114794245 0.001431606
## AGG_11236448 0.04530848 0.063400777 0.069710921 0.0768386681 0.091449601
## AHL_11239959 -0.11393258 -0.116610055 -0.119626833 -0.1171933414 -0.125775259
## AJGD_11119689 0.02309504 -0.004168741 0.001406862 -0.0003629521 0.015114474
## AMP_11228639 0.01527477 0.021681867 0.033218459 0.0403875606 0.042386444
## X65 X66 X67 X68 X69
## ACR_11231843 -0.014768194 -0.011308070 0.01165559 0.039247108 0.04067433
## ADAO_11159808 0.021322108 0.016238848 0.02015872 0.028304374 0.02867736
## AGG_11236448 0.088309944 0.084025304 0.08346901 0.043776711 0.01928346
## AHL_11239959 -0.125271045 -0.134881079 -0.13846456 -0.138021332 -0.14516406
## AJGD_11119689 -0.007612185 -0.008576289 -0.01310511 0.001155695 0.02072063
## AMP_11228639 0.046761078 0.041478845 0.03415545 0.038253282 0.04345439
## X70 X71 X72 X73 X74
## ACR_11231843 0.054938263 0.05216774 0.02893450 0.04565816 0.016317459
## ADAO_11159808 0.031406913 0.03285366 0.03034807 0.01507817 0.007630274
## AGG_11236448 0.006741361 0.02914898 0.02863798 0.04718177 0.057838065
## AHL_11239959 -0.148491209 -0.15009041 -0.14325691 -0.14896503 -0.148111238
## AJGD_11119689 0.006656645 0.02928379 0.03218864 0.01925793 0.020437607
## AMP_11228639 0.044788032 0.04858658 0.04187590 0.04227944 0.037635781
## X75 X76 X77 X78 X79
## ACR_11231843 -0.003875787 0.0256742079 0.018148296 0.04219174 0.027417819
## ADAO_11159808 0.018474492 0.0009056106 0.010227621 0.01560631 0.009562942
## AGG_11236448 0.071557804 0.0639719008 0.076988919 0.05250665 0.053139109
## AHL_11239959 -0.152727955 -0.1479525477 -0.145243487 -0.14707256 -0.136525385
## AJGD_11119689 0.004447869 0.0176834733 -0.005893729 0.01482346 -0.002029224
## AMP_11228639 0.049409292 0.0438721130 0.046904276 0.04721456 0.034240507
## X80 X81 X82 X83 X84
## ACR_11231843 0.011569835 -2.315659e-02 -0.026266905 0.002209966 0.006781454
## ADAO_11159808 0.005644969 4.665312e-05 0.009562882 0.028231420 0.035291980
## AGG_11236448 0.053179236 6.266205e-02 0.038755605 0.037632958 0.053041903
## AHL_11239959 -0.145777797 -1.373812e-01 -0.134550696 -0.127767682 -0.123160078
## AJGD_11119689 -0.000442201 -8.141466e-03 -0.028236727 -0.021561516 -0.020813623
## AMP_11228639 0.043992079 4.623287e-02 0.049872969 0.055800867 0.051210087
## X85 X86 X87 X88 X89
## ACR_11231843 -0.015863194 -0.0008034249 -0.03327019 -0.030903612 0.01948229
## ADAO_11159808 0.030878793 0.0365097797 0.04058043 0.035934982 0.03357107
## AGG_11236448 0.083892800 0.0716219676 0.08203580 0.081094153 0.07663220
## AHL_11239959 -0.119413282 -0.1126149959 -0.10323170 -0.101827184 -0.10077971
## AJGD_11119689 -0.009651539 -0.0163368826 -0.02784260 0.005658919 -0.01588456
## AMP_11228639 0.057112175 0.0585364164 0.05274634 0.067226000 0.05090611
## X90 X91 X92 X93 X94
## ACR_11231843 -0.008660467 0.006913348 -0.04212957 -0.04023577 -0.041013020
## ADAO_11159808 0.039765226 0.047538257 0.02947733 0.04493890 0.021011893
## AGG_11236448 0.092984113 0.091885797 0.07406543 0.04952749 0.055319778
## AHL_11239959 -0.105786144 -0.102096714 -0.10295521 -0.09423088 -0.108538702
## AJGD_11119689 -0.014811634 -0.023097969 -0.02852232 -0.02476067 -0.007509145
## AMP_11228639 0.067204569 0.058703439 0.07059440 0.07205772 0.065932511
## X95 X96 X97 X98 X99
## ACR_11231843 -0.049501935 -0.04770504 -0.047457036 -0.04491680 -0.044618932
## ADAO_11159808 0.019975653 0.02109524 0.006412441 0.00139112 0.005480962
## AGG_11236448 0.046182840 0.03734793 0.024458350 0.02264219 0.005794925
## AHL_11239959 -0.101857508 -0.10723867 -0.108969619 -0.10277380 -0.102861916
## AJGD_11119689 0.006218254 0.01048821 0.039346488 0.01535082 0.008367036
## AMP_11228639 0.074911235 0.06127015 0.070413020 0.06243588 0.051809416
## X100 X101 DDclust_CCF
## ACR_11231843 -0.045242042 -0.03506496 1
## ADAO_11159808 0.001913931 0.01776448 1
## AGG_11236448 0.027707410 0.01951120 1
## AHL_11239959 -0.111476327 -0.11508806 2
## AJGD_11119689 0.023412887 0.01173931 1
## AMP_11228639 0.056981920 0.05601130 1
## Mean by groups
rp_tbl <- aggregate(plotting_CCF, by = list(plotting_CCF$DDclust_CCF), mean)
row.names(rp_tbl) <- paste0("Group",rp_tbl$DDclust_CCF)
rp_tbl <- rp_tbl %>%
select(starts_with('X'))
rp_tbl <- data.frame(t(rp_tbl))
head(rp_tbl)
## Group1 Group2
## X1 2.769181e-04 -0.001403052
## X2 5.712380e-04 -0.002722402
## X3 -8.583775e-05 -0.003634944
## X4 4.847546e-04 -0.004069677
## X5 6.010612e-04 -0.006732902
## X6 3.038989e-04 -0.008340906
# Create plotting data-frame
CCF_values_by_group <- data.frame("value_CCF" = c(rp_tbl$Group1,rp_tbl$Group2),
"cluster" = c(rep("Group1", times = length(rp_tbl$Group1)),
rep("Group2", times = length(rp_tbl$Group2))),
"index" = c(c(1:length(rp_tbl$Group1)),c(1:length(rp_tbl$Group2))))
p <- ggplot(CCF_values_by_group, aes(x = index, y = value_CCF, group = cluster)) +
geom_line(aes(color=cluster)) +
scale_color_brewer(palette="Paired") + theme_minimal()
p
cluster_study_CCF <- list(DDclust_CCF)
write.csv(cluster_study_CCF, "../../data/clusters/cluster_study_CCF.csv")