library(data.table)
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:data.table':
## 
##     between, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(NbClust)
library(d3heatmap)
library(corrplot)
library(knitr)
library(imputeMissings)
## 
## Attaching package: 'imputeMissings'
## 
## The following object is masked from 'package:dplyr':
## 
##     compute
library(sparcl)

##=============================================================
  ##Exploratory analysis of AoV Diagnostics_data
##=============================================================

opts_chunk$set(cache = TRUE, message = FALSE, echo = FALSE, fig.width = 7, fig.height = 5)


## Import data

diag <- read_excel("diagnew.xlsx")
head(diag)
## Source: local data frame [6 x 61]
## 
##   PCT10 ACR tests Data ALT tests Data Audiology assessments Data
##   (chr)          (dbl)          (dbl)                      (dbl)
## 1   5A3         72.821          309.7                      11.75
## 2   5A4         13.303          221.5                      18.57
## 3   5A5         17.880          246.5                      21.08
## 4   5A7          0.376          272.3                      13.48
## 5   5A8          0.274          215.7                      15.71
## 6   5A9         46.010          292.5                      14.59
## Variables not shown: BNP tests Data (dbl), Barium Enema Data (dbl), Blood
##   glucose fasting tests Data (dbl), Blood glucose tests (2h) Data (dbl),
##   CA125 tests Data (dbl), COPD with record of FEV Data (dbl), CT
##   Colonoscopy Data (dbl), CT scan Data (dbl), Calcium tests Data (dbl),
##   Calprotectin tests Data (dbl), Capsule endoscopy Data (dbl),
##   Carbamazepine tests Data (dbl), Cholesterol tests Data (dbl),
##   Colonoscopy & FlexiSig Data (dbl), Creatine kinase tests Data (dbl),
##   DEXA scan Data (dbl), Diagnostics electrophysiology Data (dbl), Digoxin
##   tests Data (dbl), EVAR procedures Data (dbl), Echocardiography tests
##   Data (dbl), Endoscopic ultrasound Data (dbl), Ferritin tests Data (dbl),
##   Folate RBC tests Data (dbl), Folate tests Data (dbl), Gastroscopy Data
##   (dbl), Gastroscopy under 55yrs Data (dbl), HBA1c IFCC tests Data (dbl),
##   HDL cholesterol tests Data (dbl), Haemoglobin tests Data (dbl), Lithium
##   tests Data (dbl), MRI Data (dbl), Neurophysiology tests Data (dbl), Non
##   obstetric ultrasound Data (dbl), PET CT Data (dbl), PSA tests Data
##   (dbl), PTH tests Data (dbl), Paediatric endoscopy Data (dbl), Phenytoin
##   tests Data (dbl), Proportion AAA as EVAR Data (dbl), RAST tests Data
##   (dbl), Ratio Colonoscopy to Flexisig Data (dbl), Rheumatoid tests Data
##   (dbl), Serum creatinine tests Data (dbl), Sleep Studies Data (dbl),
##   T3free tests Data (dbl), T4free tests Data (dbl), TSH tests Data (dbl),
##   Thyroid tests Data (dbl), Triglycerides tests Data (dbl), Troponin tests
##   Data (dbl), Urate tests Data (dbl), Urine protein-creatinine tests Data
##   (dbl), Urodynamic tests Data (dbl), Valproate tests Data (dbl), Vitamin
##   B12 tests Data (dbl), Vitamin D tests Data (dbl), eGFR tests Data (dbl)
dim(diag)
## [1] 151  61
summary(diag)
##     PCT10           ACR tests Data    ALT tests Data 
##  Length:151         Min.   :  0.066   Min.   :  1.9  
##  Class :character   1st Qu.: 28.058   1st Qu.:237.8  
##  Mode  :character   Median : 41.510   Median :288.7  
##                     Mean   : 40.121   Mean   :269.9  
##                     3rd Qu.: 50.967   3rd Qu.:325.4  
##                     Max.   :108.424   Max.   :468.9  
##                     NA's   :2                        
##  Audiology assessments Data BNP tests Data   Barium Enema Data
##  Min.   : 6.48              Min.   : 0.048   Min.   :0.0000   
##  1st Qu.:15.30              1st Qu.: 1.721   1st Qu.:0.1200   
##  Median :21.08              Median : 4.268   Median :0.3600   
##  Mean   :22.62              Mean   : 4.433   Mean   :0.8352   
##  3rd Qu.:27.84              3rd Qu.: 6.331   3rd Qu.:1.1200   
##  Max.   :70.60              Max.   :14.368   Max.   :8.5100   
##                             NA's   :40                        
##  Blood glucose fasting tests Data Blood glucose tests (2h) Data
##  Min.   :  0.050                  Min.   : 0.0380              
##  1st Qu.:  3.882                  1st Qu.: 0.8832              
##  Median : 61.145                  Median : 2.2325              
##  Mean   : 65.740                  Mean   : 3.1163              
##  3rd Qu.:113.480                  3rd Qu.: 4.2770              
##  Max.   :203.680                  Max.   :14.6050              
##  NA's   :13                       NA's   :69                   
##  CA125 tests Data COPD with record of FEV Data CT Colonoscopy Data
##  Min.   :0.113    Min.   :65.13                Min.   : 0.340     
##  1st Qu.:3.012    1st Qu.:76.88                1st Qu.: 2.555     
##  Median :4.038    Median :78.90                Median : 5.280     
##  Mean   :4.371    Mean   :79.01                Mean   : 6.863     
##  3rd Qu.:5.730    3rd Qu.:81.32                3rd Qu.: 9.140     
##  Max.   :9.033    Max.   :87.41                Max.   :24.480     
##  NA's   :10                                    NA's   :13         
##   CT scan Data    Calcium tests Data Calprotectin tests Data
##  Min.   : 37.20   Min.   :  5.77     Min.   :0.01100        
##  1st Qu.: 62.45   1st Qu.:125.66     1st Qu.:0.06775        
##  Median : 72.00   Median :158.34     Median :0.15550        
##  Mean   : 74.58   Mean   :192.57     Mean   :0.56188        
##  3rd Qu.: 85.50   3rd Qu.:221.78     3rd Qu.:0.31925        
##  Max.   :132.10   Max.   :880.20     Max.   :5.11300        
##                                      NA's   :127            
##  Capsule endoscopy Data Carbamazepine tests Data Cholesterol tests Data
##  Min.   :0.180          Min.   :0.0420           Min.   :  4.97        
##  1st Qu.:0.615          1st Qu.:0.1535           1st Qu.:175.60        
##  Median :0.910          Median :0.2270           Median :202.84        
##  Mean   :1.178          Mean   :0.2670           Mean   :196.84        
##  3rd Qu.:1.510          3rd Qu.:0.3410           3rd Qu.:223.69        
##  Max.   :5.310          Max.   :1.2460           Max.   :335.80        
##  NA's   :16             NA's   :12                                     
##  Colonoscopy & FlexiSig Data Creatine kinase tests Data DEXA scan Data  
##  Min.   :119.7               Min.   : 0.570             Min.   : 0.260  
##  1st Qu.:195.1               1st Qu.: 5.527             1st Qu.: 4.720  
##  Median :220.5               Median : 9.110             Median : 6.220  
##  Mean   :222.8               Mean   :10.473             Mean   : 6.303  
##  3rd Qu.:245.7               3rd Qu.:12.883             3rd Qu.: 7.855  
##  Max.   :329.3               Max.   :44.490             Max.   :15.240  
##                              NA's   :1                                  
##  Diagnostics electrophysiology Data Digoxin tests Data
##  Min.   : 0.0000                    Min.   :0.0490    
##  1st Qu.: 0.0415                    1st Qu.:0.3920    
##  Median : 0.1140                    Median :0.6450    
##  Mean   : 0.7334                    Mean   :0.6723    
##  3rd Qu.: 0.3430                    3rd Qu.:0.9225    
##  Max.   :21.4690                    Max.   :1.9230    
##                                     NA's   :8         
##  EVAR procedures Data Echocardiography tests Data
##  Min.   :1.620        Min.   : 1.22              
##  1st Qu.:3.560        1st Qu.:17.36              
##  Median :4.360        Median :21.31              
##  Mean   :4.811        Mean   :21.03              
##  3rd Qu.:5.965        3rd Qu.:25.44              
##  Max.   :9.980        Max.   :42.02              
##                                                  
##  Endoscopic ultrasound Data Ferritin tests Data Folate RBC tests Data
##  Min.   :0.120              Min.   :  3.69      Min.   : 0.022       
##  1st Qu.:1.312              1st Qu.: 41.70      1st Qu.: 0.085       
##  Median :2.005              Median : 60.26      Median : 0.545       
##  Mean   :2.152              Mean   : 61.66      Mean   : 3.997       
##  3rd Qu.:2.565              3rd Qu.: 80.73      3rd Qu.: 1.827       
##  Max.   :6.760              Max.   :139.50      Max.   :52.771       
##  NA's   :1                                      NA's   :77           
##  Folate tests Data Gastroscopy Data Gastroscopy under 55yrs Data
##  Min.   :  0.05    Min.   : 78.2    Min.   :25.20               
##  1st Qu.: 34.05    1st Qu.:114.1    1st Qu.:32.35               
##  Median : 43.34    Median :128.8    Median :35.60               
##  Mean   : 44.42    Mean   :130.9    Mean   :36.48               
##  3rd Qu.: 51.90    3rd Qu.:144.8    3rd Qu.:39.65               
##  Max.   :131.36    Max.   :208.3    Max.   :56.20               
##  NA's   :1                                                      
##  HBA1c IFCC tests Data HDL cholesterol tests Data Haemoglobin tests Data
##  Min.   :  4.56        Min.   :  4.97             Min.   :  7.1         
##  1st Qu.: 69.41        1st Qu.:151.65             1st Qu.:286.6         
##  Median : 82.47        Median :179.60             Median :336.7         
##  Mean   : 84.69        Mean   :177.22             Mean   :329.2         
##  3rd Qu.: 97.08        3rd Qu.:213.97             3rd Qu.:369.4         
##  Max.   :252.41        Max.   :270.41             Max.   :643.5         
##  NA's   :2                                                              
##  Lithium tests Data    MRI Data     Neurophysiology tests Data
##  Min.   :0.053      Min.   :22.80   Min.   :0.070             
##  1st Qu.:1.433      1st Qu.:39.25   1st Qu.:1.982             
##  Median :2.321      Median :45.30   Median :2.785             
##  Mean   :2.282      Mean   :45.65   Mean   :3.220             
##  3rd Qu.:3.074      3rd Qu.:50.40   3rd Qu.:4.088             
##  Max.   :6.198      Max.   :99.00   Max.   :9.260             
##  NA's   :1                          NA's   :1                 
##  Non obstetric ultrasound Data  PET CT Data     PSA tests Data 
##  Min.   : 54.40                Min.   : 0.570   Min.   : 0.64  
##  1st Qu.: 97.85                1st Qu.: 6.160   1st Qu.:17.41  
##  Median :113.50                Median : 7.885   Median :22.24  
##  Mean   :112.89                Mean   : 7.704   Mean   :23.04  
##  3rd Qu.:126.10                3rd Qu.: 9.553   3rd Qu.:29.02  
##  Max.   :161.80                Max.   :13.760   Max.   :46.09  
##                                NA's   :73                      
##  PTH tests Data    Paediatric endoscopy Data Phenytoin tests Data
##  Min.   : 0.0430   Min.   : 32.7             Min.   :0.0430      
##  1st Qu.: 0.8635   1st Qu.: 96.5             1st Qu.:0.1653      
##  Median : 1.6420   Median :113.9             Median :0.2885      
##  Mean   : 2.1860   Mean   :115.8             Mean   :0.3090      
##  3rd Qu.: 2.6710   3rd Qu.:132.8             3rd Qu.:0.4295      
##  Max.   :19.8400   Max.   :237.1             Max.   :0.9500      
##  NA's   :8                                   NA's   :13          
##  Proportion AAA as EVAR Data RAST tests Data  
##  Min.   :29.40               Min.   : 0.0400  
##  1st Qu.:55.00               1st Qu.: 0.5115  
##  Median :63.20               Median : 1.3860  
##  Mean   :63.74               Mean   : 2.1844  
##  3rd Qu.:75.00               3rd Qu.: 2.9695  
##  Max.   :91.70               Max.   :17.2220  
##  NA's   :10                  NA's   :40       
##  Ratio Colonoscopy to Flexisig Data Rheumatoid tests Data
##  Min.   : 0.610                     Min.   : 0.054       
##  1st Qu.: 1.385                     1st Qu.: 3.254       
##  Median : 1.760                     Median : 8.555       
##  Mean   : 2.016                     Mean   : 7.835       
##  3rd Qu.: 2.305                     3rd Qu.:11.192       
##  Max.   :10.470                     Max.   :22.502       
##                                     NA's   :6            
##  Serum creatinine tests Data Sleep Studies Data T3free tests Data
##  Min.   :  8.2               Min.   :0.100      Min.   : 0.050   
##  1st Qu.:308.6               1st Qu.:0.795      1st Qu.: 1.870   
##  Median :366.3               Median :1.540      Median : 3.530   
##  Mean   :367.0               Mean   :1.808      Mean   : 5.254   
##  3rd Qu.:409.9               3rd Qu.:2.415      3rd Qu.: 6.810   
##  Max.   :870.7               Max.   :7.570      Max.   :53.350   
##                                                 NA's   :2        
##  T4free tests Data TSH tests Data  Thyroid tests Data
##  Min.   :  4.90    Min.   :  6.2   Min.   :0.0400    
##  1st Qu.: 33.75    1st Qu.:174.7   1st Qu.:0.9175    
##  Median : 52.80    Median :199.3   Median :1.8600    
##  Mean   : 86.89    Mean   :199.4   Mean   :2.0318    
##  3rd Qu.:142.40    3rd Qu.:224.4   3rd Qu.:2.8725    
##  Max.   :256.80    Max.   :355.8   Max.   :7.0300    
##                                    NA's   :15        
##  Triglycerides tests Data Troponin tests Data Urate tests Data
##  Min.   :  4.86           Min.   :0.0270      Min.   : 0.06   
##  1st Qu.:124.63           1st Qu.:0.2855      1st Qu.: 9.09   
##  Median :164.25           Median :0.6730      Median :11.22   
##  Mean   :161.13           Mean   :1.0558      Mean   :11.86   
##  3rd Qu.:202.46           3rd Qu.:1.4583      3rd Qu.:14.15   
##  Max.   :279.20           Max.   :5.7180      Max.   :52.96   
##                           NA's   :17                          
##  Urine protein-creatinine tests Data Urodynamic tests Data
##  Min.   : 0.0360                     Min.   :0.046        
##  1st Qu.: 0.7738                     1st Qu.:0.759        
##  Median : 2.4685                     Median :1.282        
##  Mean   : 6.9702                     Mean   :1.687        
##  3rd Qu.: 5.5393                     3rd Qu.:2.101        
##  Max.   :67.9560                     Max.   :6.675        
##  NA's   :11                                               
##  Valproate tests Data Vitamin B12 tests Data Vitamin D tests Data
##  Min.   :0.0200       Min.   :  1.82         Min.   :  0.05      
##  1st Qu.:0.1310       1st Qu.: 38.91         1st Qu.:  3.37      
##  Median :0.2640       Median : 47.58         Median :  8.92      
##  Mean   :0.3231       Mean   : 49.96         Mean   : 19.15      
##  3rd Qu.:0.4250       3rd Qu.: 58.06         3rd Qu.: 23.38      
##  Max.   :1.4560       Max.   :131.28         Max.   :193.44      
##  NA's   :32           NA's   :1              NA's   :6           
##  eGFR tests Data 
##  Min.   :  0.31  
##  1st Qu.:248.91  
##  Median :325.98  
##  Mean   :300.41  
##  3rd Qu.:375.40  
##  Max.   :774.29  
## 
## Tidy column names
names(diag) <- make.names(tolower(colnames(diag)))
names(diag) <- gsub(".data", "", names(diag))
names(diag) <- gsub(".tests", "", names(diag))

head(diag)
## Source: local data frame [6 x 61]
## 
##   pct10    acr   alt audiology.assessments    bnp barium.enema
##   (chr)  (dbl) (dbl)                 (dbl)  (dbl)        (dbl)
## 1   5A3 72.821 309.7                 11.75     NA         2.51
## 2   5A4 13.303 221.5                 18.57     NA         1.59
## 3   5A5 17.880 246.5                 21.08     NA         0.04
## 4   5A7  0.376 272.3                 13.48 13.593         0.04
## 5   5A8  0.274 215.7                 15.71  3.950         0.63
## 6   5A9 46.010 292.5                 14.59  1.313         0.05
## Variables not shown: blood.glucose.fasting (dbl), blood.glucose..2h.
##   (dbl), ca125 (dbl), copd.with.record.of.fev (dbl), ct.colonoscopy (dbl),
##   ct.scan (dbl), calcium (dbl), calprotectin (dbl), capsule.endoscopy
##   (dbl), carbamazepine (dbl), cholesterol (dbl), colonoscopy...flexisig
##   (dbl), creatine.kinase (dbl), dexa.scan (dbl),
##   diagnostics.electrophysiology (dbl), digoxin (dbl), evar.procedures
##   (dbl), echocardiography (dbl), endoscopic.ultrasound (dbl), ferritin
##   (dbl), folate.rbc (dbl), folate (dbl), gastroscopy (dbl),
##   gastroscopy.under.55yrs (dbl), hba1c.ifcc (dbl), hdl.cholesterol (dbl),
##   haemoglobin (dbl), lithium (dbl), mri (dbl), neurophysiology (dbl),
##   non.obstetric.ultrasound (dbl), pet.ct (dbl), psa (dbl), pth (dbl),
##   paediatric.endoscopy (dbl), phenytoin (dbl), proportion.aaa.as.evar
##   (dbl), rast (dbl), ratio.colonoscopy.to.flexisig (dbl), rheumatoid
##   (dbl), serum.creatinine (dbl), sleep.studies (dbl), t3free (dbl), t4free
##   (dbl), tsh (dbl), thyroid (dbl), triglycerides (dbl), troponin (dbl),
##   urate (dbl), urine.protein.creatinine (dbl), urodynamic (dbl), valproate
##   (dbl), vitamin.b12 (dbl), vitamin.d (dbl), egfr (dbl)
str(diag)
## Classes 'tbl_df', 'tbl' and 'data.frame':    151 obs. of  61 variables:
##  $ pct10                        : chr  "5A3" "5A4" "5A5" "5A7" ...
##  $ acr                          : num  72.821 13.303 17.88 0.376 0.274 ...
##  $ alt                          : num  310 222 246 272 216 ...
##  $ audiology.assessments        : num  11.8 18.6 21.1 13.5 15.7 ...
##  $ bnp                          : num  NA NA NA 13.59 3.95 ...
##  $ barium.enema                 : num  2.51 1.59 0.04 0.04 0.63 0.05 0.22 0.07 1.17 0.08 ...
##  $ blood.glucose.fasting        : num  56.4 130.1 NA 142.5 91.8 ...
##  $ blood.glucose..2h.           : num  3.5 2.02 NA 2.92 4.28 ...
##  $ ca125                        : num  3.439 4.038 0.238 3.104 4.225 ...
##  $ copd.with.record.of.fev      : num  74.5 78.3 81.9 76.5 85.1 ...
##  $ ct.colonoscopy               : num  4.2 3.67 5.15 3.5 1.49 ...
##  $ ct.scan                      : num  132.1 81.6 72 72.9 54.7 ...
##  $ calcium                      : num  125.4 142.2 133.5 93.8 67.4 ...
##  $ calprotectin                 : num  0.237 NA NA NA NA NA NA NA NA NA ...
##  $ capsule.endoscopy            : num  NA 1.45 1.81 2.7 0.67 2.3 1.42 1.85 1.04 2.43 ...
##  $ carbamazepine                : num  0.415 0.297 0.079 0.282 0.274 0.451 0.277 0.463 0.078 0.108 ...
##  $ cholesterol                  : num  209 210 175 210 169 ...
##  $ colonoscopy...flexisig       : num  192 254 226 266 226 ...
##  $ creatine.kinase              : num  9.61 32.84 11.68 16.56 9.99 ...
##  $ dexa.scan                    : num  5.64 5.71 3.02 8.63 6.29 ...
##  $ diagnostics.electrophysiology: num  0.014 0.797 0.189 0.128 0.621 ...
##  $ digoxin                      : num  0.534 0.534 NA 0.706 0.549 0.657 0.443 0.309 0.156 0.433 ...
##  $ evar.procedures              : num  5.31 5.23 5.45 8.89 9.56 5.92 3.96 5.9 6.76 4.36 ...
##  $ echocardiography             : num  19.3 26.4 26.8 19.7 25.5 ...
##  $ endoscopic.ultrasound        : num  2.44 3.09 2.02 1.02 1.05 1.79 1.65 1.33 2.23 2.18 ...
##  $ ferritin                     : num  83.6 61.1 42.4 73.4 69.7 ...
##  $ folate.rbc                   : num  NA NA 0.079 NA NA NA NA NA NA NA ...
##  $ folate                       : num  43.8 62.2 36 38.1 44 ...
##  $ gastroscopy                  : num  137 139 140 136 136 ...
##  $ gastroscopy.under.55yrs      : num  33.3 35.3 40.8 35.6 44.5 42.1 39 37.5 48.4 54.9 ...
##  $ hba1c.ifcc                   : num  83.7 66.4 34.1 92.8 93.4 ...
##  $ hdl.cholesterol              : num  173 151 169 168 153 ...
##  $ haemoglobin                  : num  376 256 277 295 278 ...
##  $ lithium                      : num  2.609 0.891 2.622 2.352 1.591 ...
##  $ mri                          : num  57.3 62 50.2 50.8 46.1 66.5 41.3 53.2 55.2 45.3 ...
##  $ neurophysiology              : num  2.92 3.92 4.33 2.6 1.1 2.07 2.58 1.73 3.21 1.12 ...
##  $ non.obstetric.ultrasound     : num  126 155 121.8 106.8 97.8 ...
##  $ pet.ct                       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ psa                          : num  33.6 26.4 21 30.6 18.7 ...
##  $ pth                          : num  2.965 NA 0.795 0.47 1.207 ...
##  $ paediatric.endoscopy         : num  73.9 119 113.6 107.1 124.4 ...
##  $ phenytoin                    : num  0.178 0.238 0.079 0.376 0.329 0.534 0.166 0.103 0.391 0.271 ...
##  $ proportion.aaa.as.evar       : num  64.1 68.1 NA 77.6 60 88 60.8 82.5 76.9 76.9 ...
##  $ rast                         : num  1.72 NA 7.788 0.376 0.768 ...
##  $ ratio.colonoscopy.to.flexisig: num  1.31 2.23 2.39 4.15 1.61 1.88 3.25 2.52 2.13 2.46 ...
##  $ rheumatoid                   : num  13.758 NA 0.477 15.663 11.741 ...
##  $ serum.creatinine             : num  401 302 279 659 469 ...
##  $ sleep.studies                : num  4.55 1.48 0.93 1.54 2.64 1.19 1.89 1.78 1.7 1.19 ...
##  $ t3free                       : num  4.63 0.89 4.21 5.74 3.79 4.76 1.77 2.99 0.7 1.14 ...
##  $ t4free                       : num  40.6 31.5 14.6 219 170.7 ...
##  $ tsh                          : num  188 172 180 219 171 ...
##  $ thyroid                      : num  2.79 6.3 1.67 3.81 3.02 2.83 1.88 0.77 5.4 NA ...
##  $ triglycerides                : num  95.1 152.4 168.7 209.4 168.8 ...
##  $ troponin                     : num  2.372 NA 1.192 0.282 0.658 ...
##  $ urate                        : num  10.14 2.38 13.03 16.27 21.45 ...
##  $ urine.protein.creatinine     : num  8.539 1.96 0.079 3.34 0.219 ...
##  $ urodynamic                   : num  2.67 0.69 6.23 2.56 0.99 ...
##  $ valproate                    : num  NA 0.178 NA 0.282 0.165 0.534 0.332 0.309 0.156 0.271 ...
##  $ vitamin.b12                  : num  51.7 62.2 39.7 41.2 47.1 ...
##  $ vitamin.d                    : num  10.85 8.08 0.64 4.28 14.81 ...
##  $ egfr                         : num  384.21 1.25 263.67 323.56 232.36 ...
## Missing data

missData <- apply(diag, 2, function(x) mean(is.na(x)))
plot(missData, pch = 20)

## Exclude columns with > 20% missing data

miss1 <- which(missData > .2)
kable(as.data.frame(miss1))
miss1
bnp 5
blood.glucose..2h. 8
calprotectin 14
folate.rbc 27
pet.ct 38
rast 44
valproate 58
diag <- diag[,-miss1]
dim(diag)
## [1] 151  54
summary(diag)
##     pct10                acr               alt       
##  Length:151         Min.   :  0.066   Min.   :  1.9  
##  Class :character   1st Qu.: 28.058   1st Qu.:237.8  
##  Mode  :character   Median : 41.510   Median :288.7  
##                     Mean   : 40.121   Mean   :269.9  
##                     3rd Qu.: 50.967   3rd Qu.:325.4  
##                     Max.   :108.424   Max.   :468.9  
##                     NA's   :2                        
##  audiology.assessments  barium.enema    blood.glucose.fasting
##  Min.   : 6.48         Min.   :0.0000   Min.   :  0.050      
##  1st Qu.:15.30         1st Qu.:0.1200   1st Qu.:  3.882      
##  Median :21.08         Median :0.3600   Median : 61.145      
##  Mean   :22.62         Mean   :0.8352   Mean   : 65.740      
##  3rd Qu.:27.84         3rd Qu.:1.1200   3rd Qu.:113.480      
##  Max.   :70.60         Max.   :8.5100   Max.   :203.680      
##                                         NA's   :13           
##      ca125       copd.with.record.of.fev ct.colonoscopy      ct.scan      
##  Min.   :0.113   Min.   :65.13           Min.   : 0.340   Min.   : 37.20  
##  1st Qu.:3.012   1st Qu.:76.88           1st Qu.: 2.555   1st Qu.: 62.45  
##  Median :4.038   Median :78.90           Median : 5.280   Median : 72.00  
##  Mean   :4.371   Mean   :79.01           Mean   : 6.863   Mean   : 74.58  
##  3rd Qu.:5.730   3rd Qu.:81.32           3rd Qu.: 9.140   3rd Qu.: 85.50  
##  Max.   :9.033   Max.   :87.41           Max.   :24.480   Max.   :132.10  
##  NA's   :10                              NA's   :13                       
##     calcium       capsule.endoscopy carbamazepine     cholesterol    
##  Min.   :  5.77   Min.   :0.180     Min.   :0.0420   Min.   :  4.97  
##  1st Qu.:125.66   1st Qu.:0.615     1st Qu.:0.1535   1st Qu.:175.60  
##  Median :158.34   Median :0.910     Median :0.2270   Median :202.84  
##  Mean   :192.57   Mean   :1.178     Mean   :0.2670   Mean   :196.84  
##  3rd Qu.:221.78   3rd Qu.:1.510     3rd Qu.:0.3410   3rd Qu.:223.69  
##  Max.   :880.20   Max.   :5.310     Max.   :1.2460   Max.   :335.80  
##                   NA's   :16        NA's   :12                       
##  colonoscopy...flexisig creatine.kinase    dexa.scan     
##  Min.   :119.7          Min.   : 0.570   Min.   : 0.260  
##  1st Qu.:195.1          1st Qu.: 5.527   1st Qu.: 4.720  
##  Median :220.5          Median : 9.110   Median : 6.220  
##  Mean   :222.8          Mean   :10.473   Mean   : 6.303  
##  3rd Qu.:245.7          3rd Qu.:12.883   3rd Qu.: 7.855  
##  Max.   :329.3          Max.   :44.490   Max.   :15.240  
##                         NA's   :1                        
##  diagnostics.electrophysiology    digoxin       evar.procedures
##  Min.   : 0.0000               Min.   :0.0490   Min.   :1.620  
##  1st Qu.: 0.0415               1st Qu.:0.3920   1st Qu.:3.560  
##  Median : 0.1140               Median :0.6450   Median :4.360  
##  Mean   : 0.7334               Mean   :0.6723   Mean   :4.811  
##  3rd Qu.: 0.3430               3rd Qu.:0.9225   3rd Qu.:5.965  
##  Max.   :21.4690               Max.   :1.9230   Max.   :9.980  
##                                NA's   :8                       
##  echocardiography endoscopic.ultrasound    ferritin          folate      
##  Min.   : 1.22    Min.   :0.120         Min.   :  3.69   Min.   :  0.05  
##  1st Qu.:17.36    1st Qu.:1.312         1st Qu.: 41.70   1st Qu.: 34.05  
##  Median :21.31    Median :2.005         Median : 60.26   Median : 43.34  
##  Mean   :21.03    Mean   :2.152         Mean   : 61.66   Mean   : 44.42  
##  3rd Qu.:25.44    3rd Qu.:2.565         3rd Qu.: 80.73   3rd Qu.: 51.90  
##  Max.   :42.02    Max.   :6.760         Max.   :139.50   Max.   :131.36  
##                   NA's   :1                              NA's   :1       
##   gastroscopy    gastroscopy.under.55yrs   hba1c.ifcc     hdl.cholesterol 
##  Min.   : 78.2   Min.   :25.20           Min.   :  4.56   Min.   :  4.97  
##  1st Qu.:114.1   1st Qu.:32.35           1st Qu.: 69.41   1st Qu.:151.65  
##  Median :128.8   Median :35.60           Median : 82.47   Median :179.60  
##  Mean   :130.9   Mean   :36.48           Mean   : 84.69   Mean   :177.22  
##  3rd Qu.:144.8   3rd Qu.:39.65           3rd Qu.: 97.08   3rd Qu.:213.97  
##  Max.   :208.3   Max.   :56.20           Max.   :252.41   Max.   :270.41  
##                                          NA's   :2                        
##   haemoglobin       lithium           mri        neurophysiology
##  Min.   :  7.1   Min.   :0.053   Min.   :22.80   Min.   :0.070  
##  1st Qu.:286.6   1st Qu.:1.433   1st Qu.:39.25   1st Qu.:1.982  
##  Median :336.7   Median :2.321   Median :45.30   Median :2.785  
##  Mean   :329.2   Mean   :2.282   Mean   :45.65   Mean   :3.220  
##  3rd Qu.:369.4   3rd Qu.:3.074   3rd Qu.:50.40   3rd Qu.:4.088  
##  Max.   :643.5   Max.   :6.198   Max.   :99.00   Max.   :9.260  
##                  NA's   :1                       NA's   :1      
##  non.obstetric.ultrasound      psa             pth         
##  Min.   : 54.40           Min.   : 0.64   Min.   : 0.0430  
##  1st Qu.: 97.85           1st Qu.:17.41   1st Qu.: 0.8635  
##  Median :113.50           Median :22.24   Median : 1.6420  
##  Mean   :112.89           Mean   :23.04   Mean   : 2.1860  
##  3rd Qu.:126.10           3rd Qu.:29.02   3rd Qu.: 2.6710  
##  Max.   :161.80           Max.   :46.09   Max.   :19.8400  
##                                           NA's   :8        
##  paediatric.endoscopy   phenytoin      proportion.aaa.as.evar
##  Min.   : 32.7        Min.   :0.0430   Min.   :29.40         
##  1st Qu.: 96.5        1st Qu.:0.1653   1st Qu.:55.00         
##  Median :113.9        Median :0.2885   Median :63.20         
##  Mean   :115.8        Mean   :0.3090   Mean   :63.74         
##  3rd Qu.:132.8        3rd Qu.:0.4295   3rd Qu.:75.00         
##  Max.   :237.1        Max.   :0.9500   Max.   :91.70         
##                       NA's   :13       NA's   :10            
##  ratio.colonoscopy.to.flexisig   rheumatoid     serum.creatinine
##  Min.   : 0.610                Min.   : 0.054   Min.   :  8.2   
##  1st Qu.: 1.385                1st Qu.: 3.254   1st Qu.:308.6   
##  Median : 1.760                Median : 8.555   Median :366.3   
##  Mean   : 2.016                Mean   : 7.835   Mean   :367.0   
##  3rd Qu.: 2.305                3rd Qu.:11.192   3rd Qu.:409.9   
##  Max.   :10.470                Max.   :22.502   Max.   :870.7   
##                                NA's   :6                        
##  sleep.studies       t3free           t4free            tsh       
##  Min.   :0.100   Min.   : 0.050   Min.   :  4.90   Min.   :  6.2  
##  1st Qu.:0.795   1st Qu.: 1.870   1st Qu.: 33.75   1st Qu.:174.7  
##  Median :1.540   Median : 3.530   Median : 52.80   Median :199.3  
##  Mean   :1.808   Mean   : 5.254   Mean   : 86.89   Mean   :199.4  
##  3rd Qu.:2.415   3rd Qu.: 6.810   3rd Qu.:142.40   3rd Qu.:224.4  
##  Max.   :7.570   Max.   :53.350   Max.   :256.80   Max.   :355.8  
##                  NA's   :2                                        
##     thyroid       triglycerides       troponin          urate      
##  Min.   :0.0400   Min.   :  4.86   Min.   :0.0270   Min.   : 0.06  
##  1st Qu.:0.9175   1st Qu.:124.63   1st Qu.:0.2855   1st Qu.: 9.09  
##  Median :1.8600   Median :164.25   Median :0.6730   Median :11.22  
##  Mean   :2.0318   Mean   :161.13   Mean   :1.0558   Mean   :11.86  
##  3rd Qu.:2.8725   3rd Qu.:202.46   3rd Qu.:1.4583   3rd Qu.:14.15  
##  Max.   :7.0300   Max.   :279.20   Max.   :5.7180   Max.   :52.96  
##  NA's   :15                        NA's   :17                      
##  urine.protein.creatinine   urodynamic     vitamin.b12    
##  Min.   : 0.0360          Min.   :0.046   Min.   :  1.82  
##  1st Qu.: 0.7738          1st Qu.:0.759   1st Qu.: 38.91  
##  Median : 2.4685          Median :1.282   Median : 47.58  
##  Mean   : 6.9702          Mean   :1.687   Mean   : 49.96  
##  3rd Qu.: 5.5393          3rd Qu.:2.101   3rd Qu.: 58.06  
##  Max.   :67.9560          Max.   :6.675   Max.   :131.28  
##  NA's   :11                               NA's   :1       
##    vitamin.d           egfr       
##  Min.   :  0.05   Min.   :  0.31  
##  1st Qu.:  3.37   1st Qu.:248.91  
##  Median :  8.92   Median :325.98  
##  Mean   : 19.15   Mean   :300.41  
##  3rd Qu.: 23.38   3rd Qu.:375.40  
##  Max.   :193.44   Max.   :774.29  
##  NA's   :6
## impute missing data using random forest algorithm
diag1 <- impute(diag[,-1], method = "randomForest")
summary(diag1)
##       acr               alt        audiology.assessments  barium.enema   
##  Min.   :  0.066   Min.   :  1.9   Min.   : 6.48         Min.   :0.0000  
##  1st Qu.: 28.509   1st Qu.:237.8   1st Qu.:15.30         1st Qu.:0.1200  
##  Median : 41.752   Median :288.7   Median :21.08         Median :0.3600  
##  Mean   : 40.188   Mean   :269.9   Mean   :22.62         Mean   :0.8352  
##  3rd Qu.: 50.947   3rd Qu.:325.4   3rd Qu.:27.84         3rd Qu.:1.1200  
##  Max.   :108.424   Max.   :468.9   Max.   :70.60         Max.   :8.5100  
##  blood.glucose.fasting     ca125       copd.with.record.of.fev
##  Min.   :  0.05        Min.   :0.113   Min.   :65.13          
##  1st Qu.:  5.77        1st Qu.:3.087   1st Qu.:76.88          
##  Median : 63.84        Median :4.124   Median :78.90          
##  Mean   : 65.50        Mean   :4.354   Mean   :79.01          
##  3rd Qu.:108.37        3rd Qu.:5.548   3rd Qu.:81.32          
##  Max.   :203.68        Max.   :9.033   Max.   :87.41          
##  ct.colonoscopy      ct.scan          calcium       capsule.endoscopy
##  Min.   : 0.340   Min.   : 37.20   Min.   :  5.77   Min.   :0.1800   
##  1st Qu.: 2.935   1st Qu.: 62.45   1st Qu.:125.66   1st Qu.:0.6500   
##  Median : 5.653   Median : 72.00   Median :158.34   Median :0.9228   
##  Mean   : 6.774   Mean   : 74.58   Mean   :192.57   Mean   :1.1540   
##  3rd Qu.: 8.720   3rd Qu.: 85.50   3rd Qu.:221.78   3rd Qu.:1.4350   
##  Max.   :24.480   Max.   :132.10   Max.   :880.20   Max.   :5.3100   
##  carbamazepine     cholesterol     colonoscopy...flexisig creatine.kinase 
##  Min.   :0.0420   Min.   :  4.97   Min.   :119.7          Min.   : 0.570  
##  1st Qu.:0.1625   1st Qu.:175.60   1st Qu.:195.1          1st Qu.: 5.535  
##  Median :0.2370   Median :202.84   Median :220.5          Median : 9.120  
##  Mean   :0.2656   Mean   :196.84   Mean   :222.8          Mean   :10.466  
##  3rd Qu.:0.3305   3rd Qu.:223.69   3rd Qu.:245.7          3rd Qu.:12.735  
##  Max.   :1.2460   Max.   :335.80   Max.   :329.3          Max.   :44.490  
##    dexa.scan      diagnostics.electrophysiology    digoxin      
##  Min.   : 0.260   Min.   : 0.0000               Min.   :0.0490  
##  1st Qu.: 4.720   1st Qu.: 0.0415               1st Qu.:0.3950  
##  Median : 6.220   Median : 0.1140               Median :0.6070  
##  Mean   : 6.303   Mean   : 0.7334               Mean   :0.6683  
##  3rd Qu.: 7.855   3rd Qu.: 0.3430               3rd Qu.:0.8945  
##  Max.   :15.240   Max.   :21.4690               Max.   :1.9230  
##  evar.procedures echocardiography endoscopic.ultrasound    ferritin     
##  Min.   :1.620   Min.   : 1.22    Min.   :0.120         Min.   :  3.69  
##  1st Qu.:3.560   1st Qu.:17.36    1st Qu.:1.315         1st Qu.: 41.70  
##  Median :4.360   Median :21.31    Median :2.000         Median : 60.26  
##  Mean   :4.811   Mean   :21.03    Mean   :2.150         Mean   : 61.66  
##  3rd Qu.:5.965   3rd Qu.:25.44    3rd Qu.:2.560         3rd Qu.: 80.73  
##  Max.   :9.980   Max.   :42.02    Max.   :6.760         Max.   :139.50  
##      folate        gastroscopy    gastroscopy.under.55yrs   hba1c.ifcc    
##  Min.   :  0.05   Min.   : 78.2   Min.   :25.20           Min.   :  4.56  
##  1st Qu.: 34.10   1st Qu.:114.1   1st Qu.:32.35           1st Qu.: 68.83  
##  Median : 43.01   Median :128.8   Median :35.60           Median : 82.33  
##  Mean   : 44.40   Mean   :130.9   Mean   :36.48           Mean   : 84.47  
##  3rd Qu.: 51.73   3rd Qu.:144.8   3rd Qu.:39.65           3rd Qu.: 97.02  
##  Max.   :131.36   Max.   :208.3   Max.   :56.20           Max.   :252.41  
##  hdl.cholesterol   haemoglobin       lithium           mri       
##  Min.   :  4.97   Min.   :  7.1   Min.   :0.053   Min.   :22.80  
##  1st Qu.:151.65   1st Qu.:286.6   1st Qu.:1.438   1st Qu.:39.25  
##  Median :179.60   Median :336.7   Median :2.317   Median :45.30  
##  Mean   :177.22   Mean   :329.2   Mean   :2.279   Mean   :45.65  
##  3rd Qu.:213.97   3rd Qu.:369.4   3rd Qu.:3.073   3rd Qu.:50.40  
##  Max.   :270.41   Max.   :643.5   Max.   :6.198   Max.   :99.00  
##  neurophysiology non.obstetric.ultrasound      psa             pth        
##  Min.   :0.070   Min.   : 54.40           Min.   : 0.64   Min.   : 0.043  
##  1st Qu.:1.995   1st Qu.: 97.85           1st Qu.:17.41   1st Qu.: 0.920  
##  Median :2.790   Median :113.50           Median :22.24   Median : 1.720  
##  Mean   :3.220   Mean   :112.89           Mean   :23.04   Mean   : 2.170  
##  3rd Qu.:4.085   3rd Qu.:126.10           3rd Qu.:29.02   3rd Qu.: 2.647  
##  Max.   :9.260   Max.   :161.80           Max.   :46.09   Max.   :19.840  
##  paediatric.endoscopy   phenytoin      proportion.aaa.as.evar
##  Min.   : 32.7        Min.   :0.0430   Min.   :29.40         
##  1st Qu.: 96.5        1st Qu.:0.1805   1st Qu.:56.15         
##  Median :113.9        Median :0.2909   Median :63.74         
##  Mean   :115.8        Mean   :0.3074   Mean   :63.87         
##  3rd Qu.:132.8        3rd Qu.:0.4045   3rd Qu.:74.40         
##  Max.   :237.1        Max.   :0.9500   Max.   :91.70         
##  ratio.colonoscopy.to.flexisig   rheumatoid     serum.creatinine
##  Min.   : 0.610                Min.   : 0.054   Min.   :  8.2   
##  1st Qu.: 1.385                1st Qu.: 3.321   1st Qu.:308.6   
##  Median : 1.760                Median : 8.339   Median :366.3   
##  Mean   : 2.016                Mean   : 7.835   Mean   :367.0   
##  3rd Qu.: 2.305                3rd Qu.:11.100   3rd Qu.:409.9   
##  Max.   :10.470                Max.   :22.502   Max.   :870.7   
##  sleep.studies       t3free           t4free            tsh       
##  Min.   :0.100   Min.   : 0.050   Min.   :  4.90   Min.   :  6.2  
##  1st Qu.:0.795   1st Qu.: 1.895   1st Qu.: 33.75   1st Qu.:174.7  
##  Median :1.540   Median : 3.570   Median : 52.80   Median :199.3  
##  Mean   :1.808   Mean   : 5.235   Mean   : 86.89   Mean   :199.4  
##  3rd Qu.:2.415   3rd Qu.: 6.765   3rd Qu.:142.40   3rd Qu.:224.4  
##  Max.   :7.570   Max.   :53.350   Max.   :256.80   Max.   :355.8  
##     thyroid      triglycerides       troponin          urate      
##  Min.   :0.040   Min.   :  4.86   Min.   :0.0270   Min.   : 0.06  
##  1st Qu.:1.040   1st Qu.:124.63   1st Qu.:0.3550   1st Qu.: 9.09  
##  Median :1.853   Median :164.25   Median :0.6905   Median :11.22  
##  Mean   :2.013   Mean   :161.13   Mean   :1.0180   Mean   :11.86  
##  3rd Qu.:2.745   3rd Qu.:202.46   3rd Qu.:1.3820   3rd Qu.:14.15  
##  Max.   :7.030   Max.   :279.20   Max.   :5.7180   Max.   :52.96  
##  urine.protein.creatinine   urodynamic     vitamin.b12    
##  Min.   : 0.0360          Min.   :0.046   Min.   :  1.82  
##  1st Qu.: 0.8675          1st Qu.:0.759   1st Qu.: 38.92  
##  Median : 2.9110          Median :1.282   Median : 47.54  
##  Mean   : 6.8444          Mean   :1.687   Mean   : 49.94  
##  3rd Qu.: 6.0172          3rd Qu.:2.101   3rd Qu.: 57.88  
##  Max.   :67.9560          Max.   :6.675   Max.   :131.28  
##    vitamin.d            egfr       
##  Min.   :  0.050   Min.   :  0.31  
##  1st Qu.:  3.415   1st Qu.:248.91  
##  Median :  9.280   Median :325.98  
##  Mean   : 18.859   Mean   :300.41  
##  3rd Qu.: 22.880   3rd Qu.:375.40  
##  Max.   :193.440   Max.   :774.29
## correlation matrix
par(mfrow = c(1,1))
cor <- cor(diag1)

## plot matrix/ heatmap
corrplot(cor, method = "shade",tl.col = "black", tl.cex = 0.5, diag = FALSE, order = "hclust", hclust.method = "complete")

d3heatmap(cor, colours = "Blues")

d3heatmap(diag1, cexRow = .4, cexCol = .2, k_row = 5, scale = 'column',colors = "RdBu", labRow = diag$pct10)

## cluster analysis - estimate optimal number of clusters
NbClust(diag1, distance = "euclidean", min.nc = 3, max.nc = 10, method = "complete", index = "all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 2 proposed 3 as the best number of clusters 
## * 4 proposed 4 as the best number of clusters 
## * 10 proposed 5 as the best number of clusters 
## * 3 proposed 6 as the best number of clusters 
## * 2 proposed 7 as the best number of clusters 
## * 2 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  5 
##  
##  
## *******************************************************************
## $All.index
##        KL      CH Hartigan     CCC    Scott       Marriot       TrCovW
## 3  1.2929 40.2843  24.1285  9.8931 1650.154 1.800745e+195 172087306267
## 4  1.5535 39.0120  17.1423  9.7509 1815.367 1.071905e+195 108941866508
## 5  4.1124 36.7052   4.9446 10.3586 2116.630 2.277756e+194  86806441841
## 6  1.1289 31.1329   4.3894  9.2692 2202.394 1.858665e+194  77762175967
## 7  0.5287 27.2716   7.6990  8.6820 2330.603 1.082304e+194  73083298293
## 8  1.6936 25.5467   4.8421  8.7944 2476.420 5.382035e+193  62218446947
## 9  0.3868 23.5497  12.2390  8.3120 2621.176 2.611661e+193  58399406374
## 10 2.2594 23.9274   5.8290  9.1722 2840.018 7.568581e+192  48924421115
##     TraceW Friedman   Rubin Cindex     DB Silhouette   Duda Pseudot2
## 3  8356449 4280.670 14.6012 0.3292 1.4784     0.2106 0.6640  29.3478
## 4  7185063 4333.104 16.9817 0.3158 1.2719     0.2069 0.8243  17.4762
## 5  6434686 4540.720 18.9620 0.3503 1.2459     0.2166 1.2700 -15.9435
## 6  6223899 4565.965 19.6041 0.4385 1.1061     0.2223 0.6946   2.1982
## 7  6041025 4615.163 20.1976 0.4957 1.1972     0.2184 0.8963   8.5591
## 8  5734431 4718.605 21.2775 0.4881 1.5964     0.1293 0.9182   4.8112
## 9  5546620 4792.660 21.9979 0.5055 1.5388     0.1313 0.7861  14.1531
## 10 5106491 5040.926 23.8939 0.4908 1.5092     0.1466 0.3043   6.8586
##      Beale Ratkowsky      Ball Ptbiserial    Frey McClain   Dunn Hubert
## 3  18.7695    0.1570 2785483.0     0.3904 -0.4322  0.8056 0.1368      0
## 4   7.9451    0.1677 1796265.7     0.4423 -0.0156  0.8270 0.1381      0
## 5  -7.9159    0.1584 1286937.1     0.4897 -0.1525  0.9432 0.1635      0
## 6  13.8244    0.1483 1037316.5     0.5009 -0.1365  0.9558 0.1781      0
## 7   4.3062    0.1447  863003.6     0.5023  4.9650  0.9575 0.2017      0
## 8   3.3008    0.1449  716803.9     0.4000  0.0623  1.7407 0.1859      0
## 9  10.0763    0.1409  616291.1     0.4069  0.3583  1.7975 0.1960      0
## 10 64.7001    0.1379  510649.1     0.4046 -0.0478  2.1139 0.1986      0
##    SDindex   Dindex   SDbw
## 3   0.0213 218.5671 0.8146
## 4   0.0196 204.9616 0.5798
## 5   0.0195 196.4495 0.5736
## 6   0.0172 193.3958 0.4725
## 7   0.0175 191.3438 0.5147
## 8   0.0250 186.1440 0.4915
## 9   0.0248 183.5621 0.4759
## 10  0.0245 176.3109 0.4414
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 3          0.9084             5.8518            0
## 4          0.9207             7.0642            0
## 5          0.9177             6.7268            1
## 6          0.7548             1.6239            0
## 7          0.9172             6.6776            0
## 8          0.9056             5.6317            0
## 9          0.9040             5.5192            0
## 10         0.7121             1.2127            0
## 
## $Best.nc
##                     KL      CH Hartigan     CCC   Scott       Marriot
## Number_clusters 5.0000  3.0000   5.0000  5.0000   5.000  5.000000e+00
## Value_Index     4.1124 40.2843  12.1977 10.3586 301.263 8.022204e+194
##                      TrCovW   TraceW Friedman   Rubin Cindex     DB
## Number_clusters           4      5.0  10.0000  5.0000 4.0000 6.0000
## Value_Index     63145439759 539590.7 248.2651 -1.3381 0.3158 1.1061
##                 Silhouette Duda PseudoT2   Beale Ratkowsky     Ball
## Number_clusters     6.0000 5.00   5.0000  5.0000    4.0000      4.0
## Value_Index         0.2223 1.27 -15.9435 -7.9159    0.1677 989217.2
##                 PtBiserial Frey McClain   Dunn Hubert SDindex Dindex
## Number_clusters     7.0000    2  3.0000 7.0000      0  6.0000      0
## Value_Index         0.5023   NA  0.8056 0.2017      0  0.0172      0
##                    SDbw
## Number_clusters 10.0000
## Value_Index      0.4414
## 
## $Best.partition
##   [1] 1 2 2 3 2 1 2 2 2 2 2 2 4 1 2 1 1 1 1 1 2 1 1 1 1 1 2 1 2 3 1 5 2 1 2
##  [36] 1 1 1 2 1 3 1 2 4 4 2 4 1 1 1 2 1 1 2 2 2 2 2 2 2 1 2 2 2 3 2 1 1 3 2
##  [71] 2 1 2 2 2 1 1 1 1 1 1 2 1 1 1 5 1 1 1 1 1 1 1 1 1 2 2 1 5 5 1 2 2 2 2
## [106] 1 1 2 1 1 1 1 1 2 1 1 1 2 3 2 2 1 1 1 2 1 1 1 1 1 2 2 2 2 5 2 2 1 1 1
## [141] 3 1 5 5 2 1 2 1 1 1 1
## suggests 5 clusters

## calculate cluster dendrogram
par(mfrow=c(1, 1))
hclust <- hclust(dist(cor), method = "complete")

## in colour
cut10<- cutree(hclust, 5)
ColorDendrogram(hclust, y = cut10, xlab = NULL,  cex = 0.5, labels = names(cut10), branchlength = 3)

## heatmap

d3heatmap(cor, colours = "Blues")