# Clustering
# Petter Storm 2016-04-06
# Function for calculating distance between each patient and cluster-center and returning cluster with minimum distance
clusters <- function(x, centers) {
# compute squared euclidean distance from each sample to each cluster center
tmp <- sapply(seq_len(nrow(x)),
function(i) apply(centers, 1,
function(v) sum((x[i, ]-v)^2)))
max.col(-t(tmp)) # find index of min distance
}
#/////////////////////////////////////////////////////////////////////////////
# Read data
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Read clinical variables from file
clusterData<-read.csv("~/Dropbox/DIREVA.clusterdata.20160406.csv")
head(clusterData,n=30)
## X patid Immigrant diagresult_id sex BMI HbA1c gadab
## 1 1 1 0 T2DRIN 2 17.55587 13.8 2.198335
## 2 2 2 0 T2DRIN 1 39.45209 7.9 2.198335
## 3 3 3 0 2 24.56747 12.3 7.601407
## 4 4 4 0 T1DABS 1 16.25050 12.4 3.806885
## 5 5 5 0 T1DEL 1 29.53704 8.8 7.601407
## 6 6 6 0 SEK 1 24.42659 14.0 7.601407
## 7 7 7 0 T1DABS 1 23.39693 9.4 4.990501
## 8 8 8 0 T1DRIN 2 21.67969 9.0 4.634826
## 9 9 9 0 T1DABS 2 21.87242 14.0 7.601407
## 10 10 10 0 1 17.53469 14.0 4.219655
## 11 11 11 0 T2DRIN 1 26.83016 11.1 2.198335
## 12 13 13 0 T2DRIN 1 25.35693 5.4 2.198335
## 13 14 14 0 T2DCLC 1 37.94832 7.2 2.198335
## 14 15 15 0 T2DCLC 1 30.45776 8.0 2.198335
## 15 16 16 0 T2DCLC 1 33.39506 8.5 2.198335
## 16 18 18 0 T2DCLC 2 30.71740 5.3 2.198335
## 17 19 19 0 T2DCLC 1 26.85121 6.6 2.198335
## 18 20 20 0 T2DCLC 1 30.42185 6.8 1.388791
## 19 21 21 0 T2DCLC 1 22.87640 10.2 2.198335
## 20 22 22 0 T2DCLC 2 31.24524 6.4 2.198335
## 21 23 23 0 T2DCLC 1 32.44712 10.1 2.198335
## 22 24 25 0 T2DRIN 2 20.14568 5.6 2.198335
## 23 25 26 0 T2DCLC 1 34.77551 5.8 2.198335
## 24 26 27 0 T2DCLC 1 32.07161 6.5 2.198335
## 25 27 28 0 T2DCLC 2 22.98169 13.7 2.198335
## 26 28 29 0 LDA 2 29.07331 9.2 7.601407
## 27 29 30 0 LDA 2 35.94415 6.4 3.091497
## 28 30 31 0 T2DRIN 2 33.57377 5.7 2.198335
## 29 31 32 0 T2DCLC 1 29.11999 6.8 2.198335
## 30 32 33 0 T2YNG 2 35.32124 6.5 2.198335
## AgeAtDiagnosis HOMAB HOMAIR
## 1 67 1.9047619 0.04977778
## 2 57 3.4545455 0.09626667
## 3 41 0.5714286 0.05226667
## 4 25 0.2941176 0.09133333
## 5 40 1.8620690 0.07680000
## 6 39 0.6238532 0.21760000
## 7 33 0.3750000 0.10480000
## 8 24 1.2083333 0.10697778
## 9 25 0.7307692 0.07346667
## 10 23 1.5952381 0.35435556
## 11 64 0.6792453 0.46560000
## 12 45 4.5217391 0.13404444
## 13 48 3.1320755 0.32462222
## 14 54 3.0400000 0.28711111
## 15 56 7.5789474 0.17280000
## 16 58 6.2222222 0.23146667
## 17 65 3.5652174 0.29520000
## 18 58 6.1739130 0.18302222
## 19 55 3.0952381 0.22244444
## 20 71 4.4864865 0.26560000
## 21 74 6.0000000 0.36986667
## 22 69 3.0000000 0.13000000
## 23 69 7.6153846 0.26840000
## 24 78 6.8888889 0.25626667
## 25 62 4.7333333 0.20511111
## 26 56 3.3000000 0.22000000
## 27 60 7.1250000 0.33946667
## 28 79 5.4117647 0.10631111
## 29 77 3.8245614 0.44568889
## 30 35 5.2857143 0.20720000
# read clustercenters
clusterCenters <- read.csv("~/ANDIS/ShinyPanel/klustercenters.csv",row.names = 1)
#/////////////////////////////////////////////////////////////////////////////
# Prepare clinical data
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Scale variables
clusterData.scale <- data.frame(scale(clusterData[,colnames(clusterData) %nin% c("patid","Immigrant", "diagresult_id","sex")]))
clusterData.scale$patid <- clusterData$patid
#rearrange columnorder
clusterData.scale<-clusterData.scale[,c("HbA1c","BMI","gadab","AgeAtDiagnosis", "HOMAB", "HOMAIR")]
# take a look
head(clusterData.scale,n=30)
## HbA1c BMI gadab AgeAtDiagnosis HOMAB
## 1 3.106358222 -2.265765e+00 0.07810134 0.55838383 -0.886970814
## 2 0.333550465 1.579389e+00 0.07810134 -0.15389468 -0.403740909
## 3 2.401407097 -1.034470e+00 4.63858433 -1.29354030 -1.302710508
## 4 2.448403839 -2.494999e+00 1.43580355 -2.43318593 -1.389177377
## 5 0.756521140 -1.617746e-01 4.63858433 -1.36476816 -0.900282676
## 6 3.200351705 -1.059211e+00 4.63858433 -1.43599601 -1.286364256
## 7 1.038501590 -1.240027e+00 2.43483892 -1.86336312 -1.363957874
## 8 0.850514623 -1.541589e+00 2.13463052 -2.50441378 -1.104120565
## 9 3.200351705 -1.507743e+00 4.63858433 -2.43318593 -1.253027331
## 10 3.200351705 -2.269483e+00 1.78420364 -2.57564163 -0.983481814
## 11 1.837446198 -6.371234e-01 0.07810134 0.34470028 -1.269092743
## 12 -0.841368075 -8.958353e-01 0.07810134 -1.00862890 -0.070984830
## 13 0.004573274 1.315314e+00 0.07810134 -0.79494535 -0.504288588
## 14 0.380547207 -8.782917e-05 0.07810134 -0.36757824 -0.532998160
## 15 0.615530915 5.157258e-01 0.07810134 -0.22512253 0.882267284
## 16 -0.888364817 4.550665e-02 0.07810134 -0.08266683 0.459233911
## 17 -0.277407176 -6.334275e-01 0.07810134 0.41592813 -0.369232871
## 18 -0.183413693 -6.394373e-03 -0.60519710 -0.08266683 0.444170878
## 19 1.414475523 -1.331436e+00 0.07810134 -0.29635039 -0.515774658
## 20 -0.371400659 1.381991e-01 0.07810134 0.84329524 -0.081976772
## 21 1.367478781 3.492590e-01 0.07810134 1.05697879 0.389943962
## 22 -0.747374592 -1.810973e+00 0.07810134 0.70083953 -0.545470351
## 23 -0.653381109 7.581437e-01 0.07810134 0.70083953 0.893628592
## 24 -0.324403917 2.833165e-01 0.07810134 1.34189020 0.667103758
## 25 3.059361480 -1.312947e+00 0.07810134 0.20224457 -0.005008748
## 26 0.944508107 -2.432085e-01 4.63858433 -0.22512253 -0.451928919
## 27 -0.371400659 9.633655e-01 0.83197786 0.05978887 0.740724329
## 28 -0.700377851 5.471076e-01 0.07810134 1.41311805 0.206529391
## 29 -0.183413693 -2.350114e-01 0.07810134 1.27066234 -0.288368171
## 30 -0.324403917 8.539778e-01 0.07810134 -1.72090741 0.167226268
## HOMAIR
## 1 -0.95592541
## 2 -0.77806744
## 3 -0.94640338
## 4 -0.79694147
## 5 -0.85254334
## 6 -0.31386834
## 7 -0.74542047
## 8 -0.73708869
## 9 -0.86529606
## 10 0.20933337
## 11 0.63493423
## 12 -0.63353658
## 13 0.09557908
## 14 -0.04793156
## 15 -0.48526493
## 16 -0.26081701
## 17 -0.01698495
## 18 -0.44615658
## 19 -0.29533438
## 20 -0.13022913
## 21 0.26867604
## 22 -0.64900989
## 23 -0.11951684
## 24 -0.16593675
## 25 -0.36164854
## 26 -0.30468638
## 27 0.15237121
## 28 -0.73963924
## 29 0.55875797
## 30 -0.35365683
# give same column names as for cluster centers
colnames(clusterData.scale)<-colnames(clusterCenters)
#/////////////////////////////////////////////////////////////////////////////
# Assign clusters to new patients
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Assign clusters to new patients
clusterData.scale$Cluster<-rownames(clusterCenters)[clusters(clusterData.scale,clusterCenters)]
# take a look
head(clusterData.scale,n=30)
## HBA1C BMI GADA AGEATDIAGNOSIS HOMAB
## 1 3.106358222 -2.265765e+00 0.07810134 0.55838383 -0.886970814
## 2 0.333550465 1.579389e+00 0.07810134 -0.15389468 -0.403740909
## 3 2.401407097 -1.034470e+00 4.63858433 -1.29354030 -1.302710508
## 4 2.448403839 -2.494999e+00 1.43580355 -2.43318593 -1.389177377
## 5 0.756521140 -1.617746e-01 4.63858433 -1.36476816 -0.900282676
## 6 3.200351705 -1.059211e+00 4.63858433 -1.43599601 -1.286364256
## 7 1.038501590 -1.240027e+00 2.43483892 -1.86336312 -1.363957874
## 8 0.850514623 -1.541589e+00 2.13463052 -2.50441378 -1.104120565
## 9 3.200351705 -1.507743e+00 4.63858433 -2.43318593 -1.253027331
## 10 3.200351705 -2.269483e+00 1.78420364 -2.57564163 -0.983481814
## 11 1.837446198 -6.371234e-01 0.07810134 0.34470028 -1.269092743
## 12 -0.841368075 -8.958353e-01 0.07810134 -1.00862890 -0.070984830
## 13 0.004573274 1.315314e+00 0.07810134 -0.79494535 -0.504288588
## 14 0.380547207 -8.782917e-05 0.07810134 -0.36757824 -0.532998160
## 15 0.615530915 5.157258e-01 0.07810134 -0.22512253 0.882267284
## 16 -0.888364817 4.550665e-02 0.07810134 -0.08266683 0.459233911
## 17 -0.277407176 -6.334275e-01 0.07810134 0.41592813 -0.369232871
## 18 -0.183413693 -6.394373e-03 -0.60519710 -0.08266683 0.444170878
## 19 1.414475523 -1.331436e+00 0.07810134 -0.29635039 -0.515774658
## 20 -0.371400659 1.381991e-01 0.07810134 0.84329524 -0.081976772
## 21 1.367478781 3.492590e-01 0.07810134 1.05697879 0.389943962
## 22 -0.747374592 -1.810973e+00 0.07810134 0.70083953 -0.545470351
## 23 -0.653381109 7.581437e-01 0.07810134 0.70083953 0.893628592
## 24 -0.324403917 2.833165e-01 0.07810134 1.34189020 0.667103758
## 25 3.059361480 -1.312947e+00 0.07810134 0.20224457 -0.005008748
## 26 0.944508107 -2.432085e-01 4.63858433 -0.22512253 -0.451928919
## 27 -0.371400659 9.633655e-01 0.83197786 0.05978887 0.740724329
## 28 -0.700377851 5.471076e-01 0.07810134 1.41311805 0.206529391
## 29 -0.183413693 -2.350114e-01 0.07810134 1.27066234 -0.288368171
## 30 -0.324403917 8.539778e-01 0.07810134 -1.72090741 0.167226268
## HOMAIR Cluster
## 1 -0.95592541 2_SIDD
## 2 -0.77806744 4_MOD
## 3 -0.94640338 1_SAID
## 4 -0.79694147 1_SAID
## 5 -0.85254334 1_SAID
## 6 -0.31386834 1_SAID
## 7 -0.74542047 1_SAID
## 8 -0.73708869 1_SAID
## 9 -0.86529606 1_SAID
## 10 0.20933337 1_SAID
## 11 0.63493423 2_SIDD
## 12 -0.63353658 5_MARD
## 13 0.09557908 2_SIDD
## 14 -0.04793156 2_SIDD
## 15 -0.48526493 4_MOD
## 16 -0.26081701 5_MARD
## 17 -0.01698495 5_MARD
## 18 -0.44615658 5_MARD
## 19 -0.29533438 2_SIDD
## 20 -0.13022913 5_MARD
## 21 0.26867604 2_SIDD
## 22 -0.64900989 5_MARD
## 23 -0.11951684 4_MOD
## 24 -0.16593675 5_MARD
## 25 -0.36164854 2_SIDD
## 26 -0.30468638 1_SAID
## 27 0.15237121 4_MOD
## 28 -0.73963924 5_MARD
## 29 0.55875797 5_MARD
## 30 -0.35365683 4_MOD
#/////////////////////////////////////////////////////////////////////////////
# Compare with old diagnosis
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
clusterData.scale$patid <-clusterData$patid
clusterData.newold <- merge(clusterData.scale,clusterData,by="patid")
table(Old=clusterData.newold$diagresult_id,New=clusterData.newold$Cluster)
## New
## Old 1_SAID 2_SIDD 3_SIRD 4_MOD 5_MARD
## 21 8 1 0 2
## LDA 19 1 3 16 20
## LDAL 0 1 1 7 13
## SEK 6 11 1 2 13
## T1DABS 52 3 0 0 0
## T1DEL 28 3 0 0 10
## T1DRIN 6 0 0 0 0
## T2DCLC 0 132 60 314 495
## T2DRIN 0 88 0 14 309
## T2YNG 0 5 7 16 3