# Clustering
# Petter Storm 2016-04-06
# Function for calculating distance between each patient and cluster-center and returning cluster with minimum distance
clusters <- function(x, centers) {
# compute squared euclidean distance from each sample to each cluster center
tmp <- sapply(seq_len(nrow(x)),
function(i) apply(centers, 1,
function(v) sum((x[i, ]-v)^2)))
max.col(-t(tmp)) # find index of min distance
}
#/////////////////////////////////////////////////////////////////////////////
# Read data from file
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Read clinical variables from file
clusterData<-read.csv("~/Dropbox/DIREVA.clusterdata.20160406.csv")
# read clustercenters
clusterCenters <- read.csv("https://dl.dropboxusercontent.com/u/178837/clusterCenters.csv",row.names = 1)
clusterCenters
## HBA1C BMI GADA AGEATDIAGNOSIS HOMAB
## 2_SIDD 1.0952958 -0.1192128 -0.1938179 -0.11298639 -0.772066752
## 1_SAID 0.9090041 -1.5881351 2.0272612 -2.25376103 -1.051204710
## 3_SIRD 0.8468572 0.7584651 -0.1549492 0.01730112 -0.381055879
## 4_MOD -0.5186486 0.9900636 -0.1806745 0.07170192 1.218962566
## 5_MARD -0.5413320 -0.2456463 -0.1635800 0.42326741 0.004496165
## HOMAIR
## 2_SIDD 0.2384151
## 1_SAID -0.6339935
## 3_SIRD 2.7455749
## 4_MOD 0.1275066
## 5_MARD -0.4196750
#/////////////////////////////////////////////////////////////////////////////
# Prepare clinical data
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Scale variables
clusterData.scale <- data.frame(scale(clusterData[,colnames(clusterData) %nin% c("patid","Immigrant", "diagresult_id","sex")]))
clusterData.scale$patid <- clusterData$patid
#rearrange columnorder
clusterData.scale<-clusterData.scale[,c("HbA1c","BMI","gadab","AgeAtDiagnosis", "HOMAB", "HOMAIR")]
# take a look
head(clusterData.scale,n=30)
## HbA1c BMI gadab AgeAtDiagnosis HOMAB
## 1 3.106358222 -2.265765e+00 0.07810134 0.55838383 -0.886970814
## 2 0.333550465 1.579389e+00 0.07810134 -0.15389468 -0.403740909
## 3 2.401407097 -1.034470e+00 4.63858433 -1.29354030 -1.302710508
## 4 2.448403839 -2.494999e+00 1.43580355 -2.43318593 -1.389177377
## 5 0.756521140 -1.617746e-01 4.63858433 -1.36476816 -0.900282676
## 6 3.200351705 -1.059211e+00 4.63858433 -1.43599601 -1.286364256
## 7 1.038501590 -1.240027e+00 2.43483892 -1.86336312 -1.363957874
## 8 0.850514623 -1.541589e+00 2.13463052 -2.50441378 -1.104120565
## 9 3.200351705 -1.507743e+00 4.63858433 -2.43318593 -1.253027331
## 10 3.200351705 -2.269483e+00 1.78420364 -2.57564163 -0.983481814
## 11 1.837446198 -6.371234e-01 0.07810134 0.34470028 -1.269092743
## 12 -0.841368075 -8.958353e-01 0.07810134 -1.00862890 -0.070984830
## 13 0.004573274 1.315314e+00 0.07810134 -0.79494535 -0.504288588
## 14 0.380547207 -8.782917e-05 0.07810134 -0.36757824 -0.532998160
## 15 0.615530915 5.157258e-01 0.07810134 -0.22512253 0.882267284
## 16 -0.888364817 4.550665e-02 0.07810134 -0.08266683 0.459233911
## 17 -0.277407176 -6.334275e-01 0.07810134 0.41592813 -0.369232871
## 18 -0.183413693 -6.394373e-03 -0.60519710 -0.08266683 0.444170878
## 19 1.414475523 -1.331436e+00 0.07810134 -0.29635039 -0.515774658
## 20 -0.371400659 1.381991e-01 0.07810134 0.84329524 -0.081976772
## 21 1.367478781 3.492590e-01 0.07810134 1.05697879 0.389943962
## 22 -0.747374592 -1.810973e+00 0.07810134 0.70083953 -0.545470351
## 23 -0.653381109 7.581437e-01 0.07810134 0.70083953 0.893628592
## 24 -0.324403917 2.833165e-01 0.07810134 1.34189020 0.667103758
## 25 3.059361480 -1.312947e+00 0.07810134 0.20224457 -0.005008748
## 26 0.944508107 -2.432085e-01 4.63858433 -0.22512253 -0.451928919
## 27 -0.371400659 9.633655e-01 0.83197786 0.05978887 0.740724329
## 28 -0.700377851 5.471076e-01 0.07810134 1.41311805 0.206529391
## 29 -0.183413693 -2.350114e-01 0.07810134 1.27066234 -0.288368171
## 30 -0.324403917 8.539778e-01 0.07810134 -1.72090741 0.167226268
## HOMAIR
## 1 -0.95592541
## 2 -0.77806744
## 3 -0.94640338
## 4 -0.79694147
## 5 -0.85254334
## 6 -0.31386834
## 7 -0.74542047
## 8 -0.73708869
## 9 -0.86529606
## 10 0.20933337
## 11 0.63493423
## 12 -0.63353658
## 13 0.09557908
## 14 -0.04793156
## 15 -0.48526493
## 16 -0.26081701
## 17 -0.01698495
## 18 -0.44615658
## 19 -0.29533438
## 20 -0.13022913
## 21 0.26867604
## 22 -0.64900989
## 23 -0.11951684
## 24 -0.16593675
## 25 -0.36164854
## 26 -0.30468638
## 27 0.15237121
## 28 -0.73963924
## 29 0.55875797
## 30 -0.35365683
# give same column names as for cluster centers
colnames(clusterData.scale)<-colnames(clusterCenters)
#/////////////////////////////////////////////////////////////////////////////
# Assign clusters to new patients
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Assign clusters to new patients
clusterData.scale$Cluster<-rownames(clusterCenters)[clusters(clusterData.scale,clusterCenters)]
# take a look
head(clusterData.scale,n=30)
## HBA1C BMI GADA AGEATDIAGNOSIS HOMAB
## 1 3.106358222 -2.265765e+00 0.07810134 0.55838383 -0.886970814
## 2 0.333550465 1.579389e+00 0.07810134 -0.15389468 -0.403740909
## 3 2.401407097 -1.034470e+00 4.63858433 -1.29354030 -1.302710508
## 4 2.448403839 -2.494999e+00 1.43580355 -2.43318593 -1.389177377
## 5 0.756521140 -1.617746e-01 4.63858433 -1.36476816 -0.900282676
## 6 3.200351705 -1.059211e+00 4.63858433 -1.43599601 -1.286364256
## 7 1.038501590 -1.240027e+00 2.43483892 -1.86336312 -1.363957874
## 8 0.850514623 -1.541589e+00 2.13463052 -2.50441378 -1.104120565
## 9 3.200351705 -1.507743e+00 4.63858433 -2.43318593 -1.253027331
## 10 3.200351705 -2.269483e+00 1.78420364 -2.57564163 -0.983481814
## 11 1.837446198 -6.371234e-01 0.07810134 0.34470028 -1.269092743
## 12 -0.841368075 -8.958353e-01 0.07810134 -1.00862890 -0.070984830
## 13 0.004573274 1.315314e+00 0.07810134 -0.79494535 -0.504288588
## 14 0.380547207 -8.782917e-05 0.07810134 -0.36757824 -0.532998160
## 15 0.615530915 5.157258e-01 0.07810134 -0.22512253 0.882267284
## 16 -0.888364817 4.550665e-02 0.07810134 -0.08266683 0.459233911
## 17 -0.277407176 -6.334275e-01 0.07810134 0.41592813 -0.369232871
## 18 -0.183413693 -6.394373e-03 -0.60519710 -0.08266683 0.444170878
## 19 1.414475523 -1.331436e+00 0.07810134 -0.29635039 -0.515774658
## 20 -0.371400659 1.381991e-01 0.07810134 0.84329524 -0.081976772
## 21 1.367478781 3.492590e-01 0.07810134 1.05697879 0.389943962
## 22 -0.747374592 -1.810973e+00 0.07810134 0.70083953 -0.545470351
## 23 -0.653381109 7.581437e-01 0.07810134 0.70083953 0.893628592
## 24 -0.324403917 2.833165e-01 0.07810134 1.34189020 0.667103758
## 25 3.059361480 -1.312947e+00 0.07810134 0.20224457 -0.005008748
## 26 0.944508107 -2.432085e-01 4.63858433 -0.22512253 -0.451928919
## 27 -0.371400659 9.633655e-01 0.83197786 0.05978887 0.740724329
## 28 -0.700377851 5.471076e-01 0.07810134 1.41311805 0.206529391
## 29 -0.183413693 -2.350114e-01 0.07810134 1.27066234 -0.288368171
## 30 -0.324403917 8.539778e-01 0.07810134 -1.72090741 0.167226268
## HOMAIR Cluster
## 1 -0.95592541 2_SIDD
## 2 -0.77806744 4_MOD
## 3 -0.94640338 1_SAID
## 4 -0.79694147 1_SAID
## 5 -0.85254334 1_SAID
## 6 -0.31386834 1_SAID
## 7 -0.74542047 1_SAID
## 8 -0.73708869 1_SAID
## 9 -0.86529606 1_SAID
## 10 0.20933337 1_SAID
## 11 0.63493423 2_SIDD
## 12 -0.63353658 5_MARD
## 13 0.09557908 2_SIDD
## 14 -0.04793156 2_SIDD
## 15 -0.48526493 4_MOD
## 16 -0.26081701 5_MARD
## 17 -0.01698495 5_MARD
## 18 -0.44615658 5_MARD
## 19 -0.29533438 2_SIDD
## 20 -0.13022913 5_MARD
## 21 0.26867604 2_SIDD
## 22 -0.64900989 5_MARD
## 23 -0.11951684 4_MOD
## 24 -0.16593675 5_MARD
## 25 -0.36164854 2_SIDD
## 26 -0.30468638 1_SAID
## 27 0.15237121 4_MOD
## 28 -0.73963924 5_MARD
## 29 0.55875797 5_MARD
## 30 -0.35365683 4_MOD
#/////////////////////////////////////////////////////////////////////////////
# Compare with old diagnosis
#\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
clusterData.scale$patid <-clusterData$patid
clusterData.newold <- merge(clusterData.scale,clusterData,by="patid")
table(Old=clusterData.newold$diagresult_id,New=clusterData.newold$Cluster)
## New
## Old 1_SAID 2_SIDD 3_SIRD 4_MOD 5_MARD
## 21 8 1 0 2
## LDA 19 1 3 16 20
## LDAL 0 1 1 7 13
## SEK 6 11 1 2 13
## T1DABS 52 3 0 0 0
## T1DEL 28 3 0 0 10
## T1DRIN 6 0 0 0 0
## T2DCLC 0 132 60 314 495
## T2DRIN 0 88 0 14 309
## T2YNG 0 5 7 16 3