library(data.table)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:data.table':
##
## between, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(NbClust)
library(d3heatmap)
library(corrplot)
library(knitr)
library(imputeMissings)
##
## Attaching package: 'imputeMissings'
##
## The following object is masked from 'package:dplyr':
##
## compute
library(sparcl)
##=============================================================
##Exploratory analysis of AoV Diagnostics_data
##=============================================================
opts_chunk$set(cache = TRUE, message = FALSE, echo = FALSE, fig.width = 7, fig.height = 5)
## Import data
diag <- read_excel("diagnew.xlsx")
head(diag)
## Source: local data frame [6 x 61]
##
## PCT10 ACR tests Data ALT tests Data Audiology assessments Data
## (chr) (dbl) (dbl) (dbl)
## 1 5A3 72.821 309.7 11.75
## 2 5A4 13.303 221.5 18.57
## 3 5A5 17.880 246.5 21.08
## 4 5A7 0.376 272.3 13.48
## 5 5A8 0.274 215.7 15.71
## 6 5A9 46.010 292.5 14.59
## Variables not shown: BNP tests Data (dbl), Barium Enema Data (dbl), Blood
## glucose fasting tests Data (dbl), Blood glucose tests (2h) Data (dbl),
## CA125 tests Data (dbl), COPD with record of FEV Data (dbl), CT
## Colonoscopy Data (dbl), CT scan Data (dbl), Calcium tests Data (dbl),
## Calprotectin tests Data (dbl), Capsule endoscopy Data (dbl),
## Carbamazepine tests Data (dbl), Cholesterol tests Data (dbl),
## Colonoscopy & FlexiSig Data (dbl), Creatine kinase tests Data (dbl),
## DEXA scan Data (dbl), Diagnostics electrophysiology Data (dbl), Digoxin
## tests Data (dbl), EVAR procedures Data (dbl), Echocardiography tests
## Data (dbl), Endoscopic ultrasound Data (dbl), Ferritin tests Data (dbl),
## Folate RBC tests Data (dbl), Folate tests Data (dbl), Gastroscopy Data
## (dbl), Gastroscopy under 55yrs Data (dbl), HBA1c IFCC tests Data (dbl),
## HDL cholesterol tests Data (dbl), Haemoglobin tests Data (dbl), Lithium
## tests Data (dbl), MRI Data (dbl), Neurophysiology tests Data (dbl), Non
## obstetric ultrasound Data (dbl), PET CT Data (dbl), PSA tests Data
## (dbl), PTH tests Data (dbl), Paediatric endoscopy Data (dbl), Phenytoin
## tests Data (dbl), Proportion AAA as EVAR Data (dbl), RAST tests Data
## (dbl), Ratio Colonoscopy to Flexisig Data (dbl), Rheumatoid tests Data
## (dbl), Serum creatinine tests Data (dbl), Sleep Studies Data (dbl),
## T3free tests Data (dbl), T4free tests Data (dbl), TSH tests Data (dbl),
## Thyroid tests Data (dbl), Triglycerides tests Data (dbl), Troponin tests
## Data (dbl), Urate tests Data (dbl), Urine protein-creatinine tests Data
## (dbl), Urodynamic tests Data (dbl), Valproate tests Data (dbl), Vitamin
## B12 tests Data (dbl), Vitamin D tests Data (dbl), eGFR tests Data (dbl)
dim(diag)
## [1] 151 61
summary(diag)
## PCT10 ACR tests Data ALT tests Data
## Length:151 Min. : 0.066 Min. : 1.9
## Class :character 1st Qu.: 28.058 1st Qu.:237.8
## Mode :character Median : 41.510 Median :288.7
## Mean : 40.121 Mean :269.9
## 3rd Qu.: 50.967 3rd Qu.:325.4
## Max. :108.424 Max. :468.9
## NA's :2
## Audiology assessments Data BNP tests Data Barium Enema Data
## Min. : 6.48 Min. : 0.048 Min. :0.0000
## 1st Qu.:15.30 1st Qu.: 1.721 1st Qu.:0.1200
## Median :21.08 Median : 4.268 Median :0.3600
## Mean :22.62 Mean : 4.433 Mean :0.8352
## 3rd Qu.:27.84 3rd Qu.: 6.331 3rd Qu.:1.1200
## Max. :70.60 Max. :14.368 Max. :8.5100
## NA's :40
## Blood glucose fasting tests Data Blood glucose tests (2h) Data
## Min. : 0.050 Min. : 0.0380
## 1st Qu.: 3.882 1st Qu.: 0.8832
## Median : 61.145 Median : 2.2325
## Mean : 65.740 Mean : 3.1163
## 3rd Qu.:113.480 3rd Qu.: 4.2770
## Max. :203.680 Max. :14.6050
## NA's :13 NA's :69
## CA125 tests Data COPD with record of FEV Data CT Colonoscopy Data
## Min. :0.113 Min. :65.13 Min. : 0.340
## 1st Qu.:3.012 1st Qu.:76.88 1st Qu.: 2.555
## Median :4.038 Median :78.90 Median : 5.280
## Mean :4.371 Mean :79.01 Mean : 6.863
## 3rd Qu.:5.730 3rd Qu.:81.32 3rd Qu.: 9.140
## Max. :9.033 Max. :87.41 Max. :24.480
## NA's :10 NA's :13
## CT scan Data Calcium tests Data Calprotectin tests Data
## Min. : 37.20 Min. : 5.77 Min. :0.01100
## 1st Qu.: 62.45 1st Qu.:125.66 1st Qu.:0.06775
## Median : 72.00 Median :158.34 Median :0.15550
## Mean : 74.58 Mean :192.57 Mean :0.56188
## 3rd Qu.: 85.50 3rd Qu.:221.78 3rd Qu.:0.31925
## Max. :132.10 Max. :880.20 Max. :5.11300
## NA's :127
## Capsule endoscopy Data Carbamazepine tests Data Cholesterol tests Data
## Min. :0.180 Min. :0.0420 Min. : 4.97
## 1st Qu.:0.615 1st Qu.:0.1535 1st Qu.:175.60
## Median :0.910 Median :0.2270 Median :202.84
## Mean :1.178 Mean :0.2670 Mean :196.84
## 3rd Qu.:1.510 3rd Qu.:0.3410 3rd Qu.:223.69
## Max. :5.310 Max. :1.2460 Max. :335.80
## NA's :16 NA's :12
## Colonoscopy & FlexiSig Data Creatine kinase tests Data DEXA scan Data
## Min. :119.7 Min. : 0.570 Min. : 0.260
## 1st Qu.:195.1 1st Qu.: 5.527 1st Qu.: 4.720
## Median :220.5 Median : 9.110 Median : 6.220
## Mean :222.8 Mean :10.473 Mean : 6.303
## 3rd Qu.:245.7 3rd Qu.:12.883 3rd Qu.: 7.855
## Max. :329.3 Max. :44.490 Max. :15.240
## NA's :1
## Diagnostics electrophysiology Data Digoxin tests Data
## Min. : 0.0000 Min. :0.0490
## 1st Qu.: 0.0415 1st Qu.:0.3920
## Median : 0.1140 Median :0.6450
## Mean : 0.7334 Mean :0.6723
## 3rd Qu.: 0.3430 3rd Qu.:0.9225
## Max. :21.4690 Max. :1.9230
## NA's :8
## EVAR procedures Data Echocardiography tests Data
## Min. :1.620 Min. : 1.22
## 1st Qu.:3.560 1st Qu.:17.36
## Median :4.360 Median :21.31
## Mean :4.811 Mean :21.03
## 3rd Qu.:5.965 3rd Qu.:25.44
## Max. :9.980 Max. :42.02
##
## Endoscopic ultrasound Data Ferritin tests Data Folate RBC tests Data
## Min. :0.120 Min. : 3.69 Min. : 0.022
## 1st Qu.:1.312 1st Qu.: 41.70 1st Qu.: 0.085
## Median :2.005 Median : 60.26 Median : 0.545
## Mean :2.152 Mean : 61.66 Mean : 3.997
## 3rd Qu.:2.565 3rd Qu.: 80.73 3rd Qu.: 1.827
## Max. :6.760 Max. :139.50 Max. :52.771
## NA's :1 NA's :77
## Folate tests Data Gastroscopy Data Gastroscopy under 55yrs Data
## Min. : 0.05 Min. : 78.2 Min. :25.20
## 1st Qu.: 34.05 1st Qu.:114.1 1st Qu.:32.35
## Median : 43.34 Median :128.8 Median :35.60
## Mean : 44.42 Mean :130.9 Mean :36.48
## 3rd Qu.: 51.90 3rd Qu.:144.8 3rd Qu.:39.65
## Max. :131.36 Max. :208.3 Max. :56.20
## NA's :1
## HBA1c IFCC tests Data HDL cholesterol tests Data Haemoglobin tests Data
## Min. : 4.56 Min. : 4.97 Min. : 7.1
## 1st Qu.: 69.41 1st Qu.:151.65 1st Qu.:286.6
## Median : 82.47 Median :179.60 Median :336.7
## Mean : 84.69 Mean :177.22 Mean :329.2
## 3rd Qu.: 97.08 3rd Qu.:213.97 3rd Qu.:369.4
## Max. :252.41 Max. :270.41 Max. :643.5
## NA's :2
## Lithium tests Data MRI Data Neurophysiology tests Data
## Min. :0.053 Min. :22.80 Min. :0.070
## 1st Qu.:1.433 1st Qu.:39.25 1st Qu.:1.982
## Median :2.321 Median :45.30 Median :2.785
## Mean :2.282 Mean :45.65 Mean :3.220
## 3rd Qu.:3.074 3rd Qu.:50.40 3rd Qu.:4.088
## Max. :6.198 Max. :99.00 Max. :9.260
## NA's :1 NA's :1
## Non obstetric ultrasound Data PET CT Data PSA tests Data
## Min. : 54.40 Min. : 0.570 Min. : 0.64
## 1st Qu.: 97.85 1st Qu.: 6.160 1st Qu.:17.41
## Median :113.50 Median : 7.885 Median :22.24
## Mean :112.89 Mean : 7.704 Mean :23.04
## 3rd Qu.:126.10 3rd Qu.: 9.553 3rd Qu.:29.02
## Max. :161.80 Max. :13.760 Max. :46.09
## NA's :73
## PTH tests Data Paediatric endoscopy Data Phenytoin tests Data
## Min. : 0.0430 Min. : 32.7 Min. :0.0430
## 1st Qu.: 0.8635 1st Qu.: 96.5 1st Qu.:0.1653
## Median : 1.6420 Median :113.9 Median :0.2885
## Mean : 2.1860 Mean :115.8 Mean :0.3090
## 3rd Qu.: 2.6710 3rd Qu.:132.8 3rd Qu.:0.4295
## Max. :19.8400 Max. :237.1 Max. :0.9500
## NA's :8 NA's :13
## Proportion AAA as EVAR Data RAST tests Data
## Min. :29.40 Min. : 0.0400
## 1st Qu.:55.00 1st Qu.: 0.5115
## Median :63.20 Median : 1.3860
## Mean :63.74 Mean : 2.1844
## 3rd Qu.:75.00 3rd Qu.: 2.9695
## Max. :91.70 Max. :17.2220
## NA's :10 NA's :40
## Ratio Colonoscopy to Flexisig Data Rheumatoid tests Data
## Min. : 0.610 Min. : 0.054
## 1st Qu.: 1.385 1st Qu.: 3.254
## Median : 1.760 Median : 8.555
## Mean : 2.016 Mean : 7.835
## 3rd Qu.: 2.305 3rd Qu.:11.192
## Max. :10.470 Max. :22.502
## NA's :6
## Serum creatinine tests Data Sleep Studies Data T3free tests Data
## Min. : 8.2 Min. :0.100 Min. : 0.050
## 1st Qu.:308.6 1st Qu.:0.795 1st Qu.: 1.870
## Median :366.3 Median :1.540 Median : 3.530
## Mean :367.0 Mean :1.808 Mean : 5.254
## 3rd Qu.:409.9 3rd Qu.:2.415 3rd Qu.: 6.810
## Max. :870.7 Max. :7.570 Max. :53.350
## NA's :2
## T4free tests Data TSH tests Data Thyroid tests Data
## Min. : 4.90 Min. : 6.2 Min. :0.0400
## 1st Qu.: 33.75 1st Qu.:174.7 1st Qu.:0.9175
## Median : 52.80 Median :199.3 Median :1.8600
## Mean : 86.89 Mean :199.4 Mean :2.0318
## 3rd Qu.:142.40 3rd Qu.:224.4 3rd Qu.:2.8725
## Max. :256.80 Max. :355.8 Max. :7.0300
## NA's :15
## Triglycerides tests Data Troponin tests Data Urate tests Data
## Min. : 4.86 Min. :0.0270 Min. : 0.06
## 1st Qu.:124.63 1st Qu.:0.2855 1st Qu.: 9.09
## Median :164.25 Median :0.6730 Median :11.22
## Mean :161.13 Mean :1.0558 Mean :11.86
## 3rd Qu.:202.46 3rd Qu.:1.4583 3rd Qu.:14.15
## Max. :279.20 Max. :5.7180 Max. :52.96
## NA's :17
## Urine protein-creatinine tests Data Urodynamic tests Data
## Min. : 0.0360 Min. :0.046
## 1st Qu.: 0.7738 1st Qu.:0.759
## Median : 2.4685 Median :1.282
## Mean : 6.9702 Mean :1.687
## 3rd Qu.: 5.5393 3rd Qu.:2.101
## Max. :67.9560 Max. :6.675
## NA's :11
## Valproate tests Data Vitamin B12 tests Data Vitamin D tests Data
## Min. :0.0200 Min. : 1.82 Min. : 0.05
## 1st Qu.:0.1310 1st Qu.: 38.91 1st Qu.: 3.37
## Median :0.2640 Median : 47.58 Median : 8.92
## Mean :0.3231 Mean : 49.96 Mean : 19.15
## 3rd Qu.:0.4250 3rd Qu.: 58.06 3rd Qu.: 23.38
## Max. :1.4560 Max. :131.28 Max. :193.44
## NA's :32 NA's :1 NA's :6
## eGFR tests Data
## Min. : 0.31
## 1st Qu.:248.91
## Median :325.98
## Mean :300.41
## 3rd Qu.:375.40
## Max. :774.29
##
## Tidy column names
names(diag) <- make.names(tolower(colnames(diag)))
names(diag) <- gsub(".data", "", names(diag))
names(diag) <- gsub(".tests", "", names(diag))
head(diag)
## Source: local data frame [6 x 61]
##
## pct10 acr alt audiology.assessments bnp barium.enema
## (chr) (dbl) (dbl) (dbl) (dbl) (dbl)
## 1 5A3 72.821 309.7 11.75 NA 2.51
## 2 5A4 13.303 221.5 18.57 NA 1.59
## 3 5A5 17.880 246.5 21.08 NA 0.04
## 4 5A7 0.376 272.3 13.48 13.593 0.04
## 5 5A8 0.274 215.7 15.71 3.950 0.63
## 6 5A9 46.010 292.5 14.59 1.313 0.05
## Variables not shown: blood.glucose.fasting (dbl), blood.glucose..2h.
## (dbl), ca125 (dbl), copd.with.record.of.fev (dbl), ct.colonoscopy (dbl),
## ct.scan (dbl), calcium (dbl), calprotectin (dbl), capsule.endoscopy
## (dbl), carbamazepine (dbl), cholesterol (dbl), colonoscopy...flexisig
## (dbl), creatine.kinase (dbl), dexa.scan (dbl),
## diagnostics.electrophysiology (dbl), digoxin (dbl), evar.procedures
## (dbl), echocardiography (dbl), endoscopic.ultrasound (dbl), ferritin
## (dbl), folate.rbc (dbl), folate (dbl), gastroscopy (dbl),
## gastroscopy.under.55yrs (dbl), hba1c.ifcc (dbl), hdl.cholesterol (dbl),
## haemoglobin (dbl), lithium (dbl), mri (dbl), neurophysiology (dbl),
## non.obstetric.ultrasound (dbl), pet.ct (dbl), psa (dbl), pth (dbl),
## paediatric.endoscopy (dbl), phenytoin (dbl), proportion.aaa.as.evar
## (dbl), rast (dbl), ratio.colonoscopy.to.flexisig (dbl), rheumatoid
## (dbl), serum.creatinine (dbl), sleep.studies (dbl), t3free (dbl), t4free
## (dbl), tsh (dbl), thyroid (dbl), triglycerides (dbl), troponin (dbl),
## urate (dbl), urine.protein.creatinine (dbl), urodynamic (dbl), valproate
## (dbl), vitamin.b12 (dbl), vitamin.d (dbl), egfr (dbl)
str(diag)
## Classes 'tbl_df', 'tbl' and 'data.frame': 151 obs. of 61 variables:
## $ pct10 : chr "5A3" "5A4" "5A5" "5A7" ...
## $ acr : num 72.821 13.303 17.88 0.376 0.274 ...
## $ alt : num 310 222 246 272 216 ...
## $ audiology.assessments : num 11.8 18.6 21.1 13.5 15.7 ...
## $ bnp : num NA NA NA 13.59 3.95 ...
## $ barium.enema : num 2.51 1.59 0.04 0.04 0.63 0.05 0.22 0.07 1.17 0.08 ...
## $ blood.glucose.fasting : num 56.4 130.1 NA 142.5 91.8 ...
## $ blood.glucose..2h. : num 3.5 2.02 NA 2.92 4.28 ...
## $ ca125 : num 3.439 4.038 0.238 3.104 4.225 ...
## $ copd.with.record.of.fev : num 74.5 78.3 81.9 76.5 85.1 ...
## $ ct.colonoscopy : num 4.2 3.67 5.15 3.5 1.49 ...
## $ ct.scan : num 132.1 81.6 72 72.9 54.7 ...
## $ calcium : num 125.4 142.2 133.5 93.8 67.4 ...
## $ calprotectin : num 0.237 NA NA NA NA NA NA NA NA NA ...
## $ capsule.endoscopy : num NA 1.45 1.81 2.7 0.67 2.3 1.42 1.85 1.04 2.43 ...
## $ carbamazepine : num 0.415 0.297 0.079 0.282 0.274 0.451 0.277 0.463 0.078 0.108 ...
## $ cholesterol : num 209 210 175 210 169 ...
## $ colonoscopy...flexisig : num 192 254 226 266 226 ...
## $ creatine.kinase : num 9.61 32.84 11.68 16.56 9.99 ...
## $ dexa.scan : num 5.64 5.71 3.02 8.63 6.29 ...
## $ diagnostics.electrophysiology: num 0.014 0.797 0.189 0.128 0.621 ...
## $ digoxin : num 0.534 0.534 NA 0.706 0.549 0.657 0.443 0.309 0.156 0.433 ...
## $ evar.procedures : num 5.31 5.23 5.45 8.89 9.56 5.92 3.96 5.9 6.76 4.36 ...
## $ echocardiography : num 19.3 26.4 26.8 19.7 25.5 ...
## $ endoscopic.ultrasound : num 2.44 3.09 2.02 1.02 1.05 1.79 1.65 1.33 2.23 2.18 ...
## $ ferritin : num 83.6 61.1 42.4 73.4 69.7 ...
## $ folate.rbc : num NA NA 0.079 NA NA NA NA NA NA NA ...
## $ folate : num 43.8 62.2 36 38.1 44 ...
## $ gastroscopy : num 137 139 140 136 136 ...
## $ gastroscopy.under.55yrs : num 33.3 35.3 40.8 35.6 44.5 42.1 39 37.5 48.4 54.9 ...
## $ hba1c.ifcc : num 83.7 66.4 34.1 92.8 93.4 ...
## $ hdl.cholesterol : num 173 151 169 168 153 ...
## $ haemoglobin : num 376 256 277 295 278 ...
## $ lithium : num 2.609 0.891 2.622 2.352 1.591 ...
## $ mri : num 57.3 62 50.2 50.8 46.1 66.5 41.3 53.2 55.2 45.3 ...
## $ neurophysiology : num 2.92 3.92 4.33 2.6 1.1 2.07 2.58 1.73 3.21 1.12 ...
## $ non.obstetric.ultrasound : num 126 155 121.8 106.8 97.8 ...
## $ pet.ct : num NA NA NA NA NA NA NA NA NA NA ...
## $ psa : num 33.6 26.4 21 30.6 18.7 ...
## $ pth : num 2.965 NA 0.795 0.47 1.207 ...
## $ paediatric.endoscopy : num 73.9 119 113.6 107.1 124.4 ...
## $ phenytoin : num 0.178 0.238 0.079 0.376 0.329 0.534 0.166 0.103 0.391 0.271 ...
## $ proportion.aaa.as.evar : num 64.1 68.1 NA 77.6 60 88 60.8 82.5 76.9 76.9 ...
## $ rast : num 1.72 NA 7.788 0.376 0.768 ...
## $ ratio.colonoscopy.to.flexisig: num 1.31 2.23 2.39 4.15 1.61 1.88 3.25 2.52 2.13 2.46 ...
## $ rheumatoid : num 13.758 NA 0.477 15.663 11.741 ...
## $ serum.creatinine : num 401 302 279 659 469 ...
## $ sleep.studies : num 4.55 1.48 0.93 1.54 2.64 1.19 1.89 1.78 1.7 1.19 ...
## $ t3free : num 4.63 0.89 4.21 5.74 3.79 4.76 1.77 2.99 0.7 1.14 ...
## $ t4free : num 40.6 31.5 14.6 219 170.7 ...
## $ tsh : num 188 172 180 219 171 ...
## $ thyroid : num 2.79 6.3 1.67 3.81 3.02 2.83 1.88 0.77 5.4 NA ...
## $ triglycerides : num 95.1 152.4 168.7 209.4 168.8 ...
## $ troponin : num 2.372 NA 1.192 0.282 0.658 ...
## $ urate : num 10.14 2.38 13.03 16.27 21.45 ...
## $ urine.protein.creatinine : num 8.539 1.96 0.079 3.34 0.219 ...
## $ urodynamic : num 2.67 0.69 6.23 2.56 0.99 ...
## $ valproate : num NA 0.178 NA 0.282 0.165 0.534 0.332 0.309 0.156 0.271 ...
## $ vitamin.b12 : num 51.7 62.2 39.7 41.2 47.1 ...
## $ vitamin.d : num 10.85 8.08 0.64 4.28 14.81 ...
## $ egfr : num 384.21 1.25 263.67 323.56 232.36 ...
## Missing data
missData <- apply(diag, 2, function(x) mean(is.na(x)))
plot(missData, pch = 20)

## Exclude columns with > 20% missing data
miss1 <- which(missData > .2)
kable(as.data.frame(miss1))
bnp |
5 |
blood.glucose..2h. |
8 |
calprotectin |
14 |
folate.rbc |
27 |
pet.ct |
38 |
rast |
44 |
valproate |
58 |
diag <- diag[,-miss1]
dim(diag)
## [1] 151 54
summary(diag)
## pct10 acr alt
## Length:151 Min. : 0.066 Min. : 1.9
## Class :character 1st Qu.: 28.058 1st Qu.:237.8
## Mode :character Median : 41.510 Median :288.7
## Mean : 40.121 Mean :269.9
## 3rd Qu.: 50.967 3rd Qu.:325.4
## Max. :108.424 Max. :468.9
## NA's :2
## audiology.assessments barium.enema blood.glucose.fasting
## Min. : 6.48 Min. :0.0000 Min. : 0.050
## 1st Qu.:15.30 1st Qu.:0.1200 1st Qu.: 3.882
## Median :21.08 Median :0.3600 Median : 61.145
## Mean :22.62 Mean :0.8352 Mean : 65.740
## 3rd Qu.:27.84 3rd Qu.:1.1200 3rd Qu.:113.480
## Max. :70.60 Max. :8.5100 Max. :203.680
## NA's :13
## ca125 copd.with.record.of.fev ct.colonoscopy ct.scan
## Min. :0.113 Min. :65.13 Min. : 0.340 Min. : 37.20
## 1st Qu.:3.012 1st Qu.:76.88 1st Qu.: 2.555 1st Qu.: 62.45
## Median :4.038 Median :78.90 Median : 5.280 Median : 72.00
## Mean :4.371 Mean :79.01 Mean : 6.863 Mean : 74.58
## 3rd Qu.:5.730 3rd Qu.:81.32 3rd Qu.: 9.140 3rd Qu.: 85.50
## Max. :9.033 Max. :87.41 Max. :24.480 Max. :132.10
## NA's :10 NA's :13
## calcium capsule.endoscopy carbamazepine cholesterol
## Min. : 5.77 Min. :0.180 Min. :0.0420 Min. : 4.97
## 1st Qu.:125.66 1st Qu.:0.615 1st Qu.:0.1535 1st Qu.:175.60
## Median :158.34 Median :0.910 Median :0.2270 Median :202.84
## Mean :192.57 Mean :1.178 Mean :0.2670 Mean :196.84
## 3rd Qu.:221.78 3rd Qu.:1.510 3rd Qu.:0.3410 3rd Qu.:223.69
## Max. :880.20 Max. :5.310 Max. :1.2460 Max. :335.80
## NA's :16 NA's :12
## colonoscopy...flexisig creatine.kinase dexa.scan
## Min. :119.7 Min. : 0.570 Min. : 0.260
## 1st Qu.:195.1 1st Qu.: 5.527 1st Qu.: 4.720
## Median :220.5 Median : 9.110 Median : 6.220
## Mean :222.8 Mean :10.473 Mean : 6.303
## 3rd Qu.:245.7 3rd Qu.:12.883 3rd Qu.: 7.855
## Max. :329.3 Max. :44.490 Max. :15.240
## NA's :1
## diagnostics.electrophysiology digoxin evar.procedures
## Min. : 0.0000 Min. :0.0490 Min. :1.620
## 1st Qu.: 0.0415 1st Qu.:0.3920 1st Qu.:3.560
## Median : 0.1140 Median :0.6450 Median :4.360
## Mean : 0.7334 Mean :0.6723 Mean :4.811
## 3rd Qu.: 0.3430 3rd Qu.:0.9225 3rd Qu.:5.965
## Max. :21.4690 Max. :1.9230 Max. :9.980
## NA's :8
## echocardiography endoscopic.ultrasound ferritin folate
## Min. : 1.22 Min. :0.120 Min. : 3.69 Min. : 0.05
## 1st Qu.:17.36 1st Qu.:1.312 1st Qu.: 41.70 1st Qu.: 34.05
## Median :21.31 Median :2.005 Median : 60.26 Median : 43.34
## Mean :21.03 Mean :2.152 Mean : 61.66 Mean : 44.42
## 3rd Qu.:25.44 3rd Qu.:2.565 3rd Qu.: 80.73 3rd Qu.: 51.90
## Max. :42.02 Max. :6.760 Max. :139.50 Max. :131.36
## NA's :1 NA's :1
## gastroscopy gastroscopy.under.55yrs hba1c.ifcc hdl.cholesterol
## Min. : 78.2 Min. :25.20 Min. : 4.56 Min. : 4.97
## 1st Qu.:114.1 1st Qu.:32.35 1st Qu.: 69.41 1st Qu.:151.65
## Median :128.8 Median :35.60 Median : 82.47 Median :179.60
## Mean :130.9 Mean :36.48 Mean : 84.69 Mean :177.22
## 3rd Qu.:144.8 3rd Qu.:39.65 3rd Qu.: 97.08 3rd Qu.:213.97
## Max. :208.3 Max. :56.20 Max. :252.41 Max. :270.41
## NA's :2
## haemoglobin lithium mri neurophysiology
## Min. : 7.1 Min. :0.053 Min. :22.80 Min. :0.070
## 1st Qu.:286.6 1st Qu.:1.433 1st Qu.:39.25 1st Qu.:1.982
## Median :336.7 Median :2.321 Median :45.30 Median :2.785
## Mean :329.2 Mean :2.282 Mean :45.65 Mean :3.220
## 3rd Qu.:369.4 3rd Qu.:3.074 3rd Qu.:50.40 3rd Qu.:4.088
## Max. :643.5 Max. :6.198 Max. :99.00 Max. :9.260
## NA's :1 NA's :1
## non.obstetric.ultrasound psa pth
## Min. : 54.40 Min. : 0.64 Min. : 0.0430
## 1st Qu.: 97.85 1st Qu.:17.41 1st Qu.: 0.8635
## Median :113.50 Median :22.24 Median : 1.6420
## Mean :112.89 Mean :23.04 Mean : 2.1860
## 3rd Qu.:126.10 3rd Qu.:29.02 3rd Qu.: 2.6710
## Max. :161.80 Max. :46.09 Max. :19.8400
## NA's :8
## paediatric.endoscopy phenytoin proportion.aaa.as.evar
## Min. : 32.7 Min. :0.0430 Min. :29.40
## 1st Qu.: 96.5 1st Qu.:0.1653 1st Qu.:55.00
## Median :113.9 Median :0.2885 Median :63.20
## Mean :115.8 Mean :0.3090 Mean :63.74
## 3rd Qu.:132.8 3rd Qu.:0.4295 3rd Qu.:75.00
## Max. :237.1 Max. :0.9500 Max. :91.70
## NA's :13 NA's :10
## ratio.colonoscopy.to.flexisig rheumatoid serum.creatinine
## Min. : 0.610 Min. : 0.054 Min. : 8.2
## 1st Qu.: 1.385 1st Qu.: 3.254 1st Qu.:308.6
## Median : 1.760 Median : 8.555 Median :366.3
## Mean : 2.016 Mean : 7.835 Mean :367.0
## 3rd Qu.: 2.305 3rd Qu.:11.192 3rd Qu.:409.9
## Max. :10.470 Max. :22.502 Max. :870.7
## NA's :6
## sleep.studies t3free t4free tsh
## Min. :0.100 Min. : 0.050 Min. : 4.90 Min. : 6.2
## 1st Qu.:0.795 1st Qu.: 1.870 1st Qu.: 33.75 1st Qu.:174.7
## Median :1.540 Median : 3.530 Median : 52.80 Median :199.3
## Mean :1.808 Mean : 5.254 Mean : 86.89 Mean :199.4
## 3rd Qu.:2.415 3rd Qu.: 6.810 3rd Qu.:142.40 3rd Qu.:224.4
## Max. :7.570 Max. :53.350 Max. :256.80 Max. :355.8
## NA's :2
## thyroid triglycerides troponin urate
## Min. :0.0400 Min. : 4.86 Min. :0.0270 Min. : 0.06
## 1st Qu.:0.9175 1st Qu.:124.63 1st Qu.:0.2855 1st Qu.: 9.09
## Median :1.8600 Median :164.25 Median :0.6730 Median :11.22
## Mean :2.0318 Mean :161.13 Mean :1.0558 Mean :11.86
## 3rd Qu.:2.8725 3rd Qu.:202.46 3rd Qu.:1.4583 3rd Qu.:14.15
## Max. :7.0300 Max. :279.20 Max. :5.7180 Max. :52.96
## NA's :15 NA's :17
## urine.protein.creatinine urodynamic vitamin.b12
## Min. : 0.0360 Min. :0.046 Min. : 1.82
## 1st Qu.: 0.7738 1st Qu.:0.759 1st Qu.: 38.91
## Median : 2.4685 Median :1.282 Median : 47.58
## Mean : 6.9702 Mean :1.687 Mean : 49.96
## 3rd Qu.: 5.5393 3rd Qu.:2.101 3rd Qu.: 58.06
## Max. :67.9560 Max. :6.675 Max. :131.28
## NA's :11 NA's :1
## vitamin.d egfr
## Min. : 0.05 Min. : 0.31
## 1st Qu.: 3.37 1st Qu.:248.91
## Median : 8.92 Median :325.98
## Mean : 19.15 Mean :300.41
## 3rd Qu.: 23.38 3rd Qu.:375.40
## Max. :193.44 Max. :774.29
## NA's :6
## impute missing data using random forest algorithm
diag1 <- impute(diag[,-1], method = "randomForest")
summary(diag1)
## acr alt audiology.assessments barium.enema
## Min. : 0.066 Min. : 1.9 Min. : 6.48 Min. :0.0000
## 1st Qu.: 28.509 1st Qu.:237.8 1st Qu.:15.30 1st Qu.:0.1200
## Median : 41.752 Median :288.7 Median :21.08 Median :0.3600
## Mean : 40.188 Mean :269.9 Mean :22.62 Mean :0.8352
## 3rd Qu.: 50.947 3rd Qu.:325.4 3rd Qu.:27.84 3rd Qu.:1.1200
## Max. :108.424 Max. :468.9 Max. :70.60 Max. :8.5100
## blood.glucose.fasting ca125 copd.with.record.of.fev
## Min. : 0.05 Min. :0.113 Min. :65.13
## 1st Qu.: 5.77 1st Qu.:3.087 1st Qu.:76.88
## Median : 63.84 Median :4.124 Median :78.90
## Mean : 65.50 Mean :4.354 Mean :79.01
## 3rd Qu.:108.37 3rd Qu.:5.548 3rd Qu.:81.32
## Max. :203.68 Max. :9.033 Max. :87.41
## ct.colonoscopy ct.scan calcium capsule.endoscopy
## Min. : 0.340 Min. : 37.20 Min. : 5.77 Min. :0.1800
## 1st Qu.: 2.935 1st Qu.: 62.45 1st Qu.:125.66 1st Qu.:0.6500
## Median : 5.653 Median : 72.00 Median :158.34 Median :0.9228
## Mean : 6.774 Mean : 74.58 Mean :192.57 Mean :1.1540
## 3rd Qu.: 8.720 3rd Qu.: 85.50 3rd Qu.:221.78 3rd Qu.:1.4350
## Max. :24.480 Max. :132.10 Max. :880.20 Max. :5.3100
## carbamazepine cholesterol colonoscopy...flexisig creatine.kinase
## Min. :0.0420 Min. : 4.97 Min. :119.7 Min. : 0.570
## 1st Qu.:0.1625 1st Qu.:175.60 1st Qu.:195.1 1st Qu.: 5.535
## Median :0.2370 Median :202.84 Median :220.5 Median : 9.120
## Mean :0.2656 Mean :196.84 Mean :222.8 Mean :10.466
## 3rd Qu.:0.3305 3rd Qu.:223.69 3rd Qu.:245.7 3rd Qu.:12.735
## Max. :1.2460 Max. :335.80 Max. :329.3 Max. :44.490
## dexa.scan diagnostics.electrophysiology digoxin
## Min. : 0.260 Min. : 0.0000 Min. :0.0490
## 1st Qu.: 4.720 1st Qu.: 0.0415 1st Qu.:0.3950
## Median : 6.220 Median : 0.1140 Median :0.6070
## Mean : 6.303 Mean : 0.7334 Mean :0.6683
## 3rd Qu.: 7.855 3rd Qu.: 0.3430 3rd Qu.:0.8945
## Max. :15.240 Max. :21.4690 Max. :1.9230
## evar.procedures echocardiography endoscopic.ultrasound ferritin
## Min. :1.620 Min. : 1.22 Min. :0.120 Min. : 3.69
## 1st Qu.:3.560 1st Qu.:17.36 1st Qu.:1.315 1st Qu.: 41.70
## Median :4.360 Median :21.31 Median :2.000 Median : 60.26
## Mean :4.811 Mean :21.03 Mean :2.150 Mean : 61.66
## 3rd Qu.:5.965 3rd Qu.:25.44 3rd Qu.:2.560 3rd Qu.: 80.73
## Max. :9.980 Max. :42.02 Max. :6.760 Max. :139.50
## folate gastroscopy gastroscopy.under.55yrs hba1c.ifcc
## Min. : 0.05 Min. : 78.2 Min. :25.20 Min. : 4.56
## 1st Qu.: 34.10 1st Qu.:114.1 1st Qu.:32.35 1st Qu.: 68.83
## Median : 43.01 Median :128.8 Median :35.60 Median : 82.33
## Mean : 44.40 Mean :130.9 Mean :36.48 Mean : 84.47
## 3rd Qu.: 51.73 3rd Qu.:144.8 3rd Qu.:39.65 3rd Qu.: 97.02
## Max. :131.36 Max. :208.3 Max. :56.20 Max. :252.41
## hdl.cholesterol haemoglobin lithium mri
## Min. : 4.97 Min. : 7.1 Min. :0.053 Min. :22.80
## 1st Qu.:151.65 1st Qu.:286.6 1st Qu.:1.438 1st Qu.:39.25
## Median :179.60 Median :336.7 Median :2.317 Median :45.30
## Mean :177.22 Mean :329.2 Mean :2.279 Mean :45.65
## 3rd Qu.:213.97 3rd Qu.:369.4 3rd Qu.:3.073 3rd Qu.:50.40
## Max. :270.41 Max. :643.5 Max. :6.198 Max. :99.00
## neurophysiology non.obstetric.ultrasound psa pth
## Min. :0.070 Min. : 54.40 Min. : 0.64 Min. : 0.043
## 1st Qu.:1.995 1st Qu.: 97.85 1st Qu.:17.41 1st Qu.: 0.920
## Median :2.790 Median :113.50 Median :22.24 Median : 1.720
## Mean :3.220 Mean :112.89 Mean :23.04 Mean : 2.170
## 3rd Qu.:4.085 3rd Qu.:126.10 3rd Qu.:29.02 3rd Qu.: 2.647
## Max. :9.260 Max. :161.80 Max. :46.09 Max. :19.840
## paediatric.endoscopy phenytoin proportion.aaa.as.evar
## Min. : 32.7 Min. :0.0430 Min. :29.40
## 1st Qu.: 96.5 1st Qu.:0.1805 1st Qu.:56.15
## Median :113.9 Median :0.2909 Median :63.74
## Mean :115.8 Mean :0.3074 Mean :63.87
## 3rd Qu.:132.8 3rd Qu.:0.4045 3rd Qu.:74.40
## Max. :237.1 Max. :0.9500 Max. :91.70
## ratio.colonoscopy.to.flexisig rheumatoid serum.creatinine
## Min. : 0.610 Min. : 0.054 Min. : 8.2
## 1st Qu.: 1.385 1st Qu.: 3.321 1st Qu.:308.6
## Median : 1.760 Median : 8.339 Median :366.3
## Mean : 2.016 Mean : 7.835 Mean :367.0
## 3rd Qu.: 2.305 3rd Qu.:11.100 3rd Qu.:409.9
## Max. :10.470 Max. :22.502 Max. :870.7
## sleep.studies t3free t4free tsh
## Min. :0.100 Min. : 0.050 Min. : 4.90 Min. : 6.2
## 1st Qu.:0.795 1st Qu.: 1.895 1st Qu.: 33.75 1st Qu.:174.7
## Median :1.540 Median : 3.570 Median : 52.80 Median :199.3
## Mean :1.808 Mean : 5.235 Mean : 86.89 Mean :199.4
## 3rd Qu.:2.415 3rd Qu.: 6.765 3rd Qu.:142.40 3rd Qu.:224.4
## Max. :7.570 Max. :53.350 Max. :256.80 Max. :355.8
## thyroid triglycerides troponin urate
## Min. :0.040 Min. : 4.86 Min. :0.0270 Min. : 0.06
## 1st Qu.:1.040 1st Qu.:124.63 1st Qu.:0.3550 1st Qu.: 9.09
## Median :1.853 Median :164.25 Median :0.6905 Median :11.22
## Mean :2.013 Mean :161.13 Mean :1.0180 Mean :11.86
## 3rd Qu.:2.745 3rd Qu.:202.46 3rd Qu.:1.3820 3rd Qu.:14.15
## Max. :7.030 Max. :279.20 Max. :5.7180 Max. :52.96
## urine.protein.creatinine urodynamic vitamin.b12
## Min. : 0.0360 Min. :0.046 Min. : 1.82
## 1st Qu.: 0.8675 1st Qu.:0.759 1st Qu.: 38.92
## Median : 2.9110 Median :1.282 Median : 47.54
## Mean : 6.8444 Mean :1.687 Mean : 49.94
## 3rd Qu.: 6.0172 3rd Qu.:2.101 3rd Qu.: 57.88
## Max. :67.9560 Max. :6.675 Max. :131.28
## vitamin.d egfr
## Min. : 0.050 Min. : 0.31
## 1st Qu.: 3.415 1st Qu.:248.91
## Median : 9.280 Median :325.98
## Mean : 18.859 Mean :300.41
## 3rd Qu.: 22.880 3rd Qu.:375.40
## Max. :193.440 Max. :774.29
## correlation matrix
par(mfrow = c(1,1))
cor <- cor(diag1)
## plot matrix/ heatmap
corrplot(cor, method = "shade",tl.col = "black", tl.cex = 0.5, diag = FALSE, order = "hclust", hclust.method = "complete")

d3heatmap(cor, colours = "Blues")
d3heatmap(diag1, cexRow = .4, cexCol = .2, k_row = 5, scale = 'column',colors = "RdBu", labRow = diag$pct10)
## cluster analysis - estimate optimal number of clusters
NbClust(diag1, distance = "euclidean", min.nc = 3, max.nc = 10, method = "complete", index = "all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 2 proposed 3 as the best number of clusters
## * 4 proposed 4 as the best number of clusters
## * 10 proposed 5 as the best number of clusters
## * 3 proposed 6 as the best number of clusters
## * 2 proposed 7 as the best number of clusters
## * 2 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 5
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW
## 3 1.2929 40.2843 24.1285 9.8931 1650.154 1.800745e+195 172087306267
## 4 1.5535 39.0120 17.1423 9.7509 1815.367 1.071905e+195 108941866508
## 5 4.1124 36.7052 4.9446 10.3586 2116.630 2.277756e+194 86806441841
## 6 1.1289 31.1329 4.3894 9.2692 2202.394 1.858665e+194 77762175967
## 7 0.5287 27.2716 7.6990 8.6820 2330.603 1.082304e+194 73083298293
## 8 1.6936 25.5467 4.8421 8.7944 2476.420 5.382035e+193 62218446947
## 9 0.3868 23.5497 12.2390 8.3120 2621.176 2.611661e+193 58399406374
## 10 2.2594 23.9274 5.8290 9.1722 2840.018 7.568581e+192 48924421115
## TraceW Friedman Rubin Cindex DB Silhouette Duda Pseudot2
## 3 8356449 4280.670 14.6012 0.3292 1.4784 0.2106 0.6640 29.3478
## 4 7185063 4333.104 16.9817 0.3158 1.2719 0.2069 0.8243 17.4762
## 5 6434686 4540.720 18.9620 0.3503 1.2459 0.2166 1.2700 -15.9435
## 6 6223899 4565.965 19.6041 0.4385 1.1061 0.2223 0.6946 2.1982
## 7 6041025 4615.163 20.1976 0.4957 1.1972 0.2184 0.8963 8.5591
## 8 5734431 4718.605 21.2775 0.4881 1.5964 0.1293 0.9182 4.8112
## 9 5546620 4792.660 21.9979 0.5055 1.5388 0.1313 0.7861 14.1531
## 10 5106491 5040.926 23.8939 0.4908 1.5092 0.1466 0.3043 6.8586
## Beale Ratkowsky Ball Ptbiserial Frey McClain Dunn Hubert
## 3 18.7695 0.1570 2785483.0 0.3904 -0.4322 0.8056 0.1368 0
## 4 7.9451 0.1677 1796265.7 0.4423 -0.0156 0.8270 0.1381 0
## 5 -7.9159 0.1584 1286937.1 0.4897 -0.1525 0.9432 0.1635 0
## 6 13.8244 0.1483 1037316.5 0.5009 -0.1365 0.9558 0.1781 0
## 7 4.3062 0.1447 863003.6 0.5023 4.9650 0.9575 0.2017 0
## 8 3.3008 0.1449 716803.9 0.4000 0.0623 1.7407 0.1859 0
## 9 10.0763 0.1409 616291.1 0.4069 0.3583 1.7975 0.1960 0
## 10 64.7001 0.1379 510649.1 0.4046 -0.0478 2.1139 0.1986 0
## SDindex Dindex SDbw
## 3 0.0213 218.5671 0.8146
## 4 0.0196 204.9616 0.5798
## 5 0.0195 196.4495 0.5736
## 6 0.0172 193.3958 0.4725
## 7 0.0175 191.3438 0.5147
## 8 0.0250 186.1440 0.4915
## 9 0.0248 183.5621 0.4759
## 10 0.0245 176.3109 0.4414
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 3 0.9084 5.8518 0
## 4 0.9207 7.0642 0
## 5 0.9177 6.7268 1
## 6 0.7548 1.6239 0
## 7 0.9172 6.6776 0
## 8 0.9056 5.6317 0
## 9 0.9040 5.5192 0
## 10 0.7121 1.2127 0
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot
## Number_clusters 5.0000 3.0000 5.0000 5.0000 5.000 5.000000e+00
## Value_Index 4.1124 40.2843 12.1977 10.3586 301.263 8.022204e+194
## TrCovW TraceW Friedman Rubin Cindex DB
## Number_clusters 4 5.0 10.0000 5.0000 4.0000 6.0000
## Value_Index 63145439759 539590.7 248.2651 -1.3381 0.3158 1.1061
## Silhouette Duda PseudoT2 Beale Ratkowsky Ball
## Number_clusters 6.0000 5.00 5.0000 5.0000 4.0000 4.0
## Value_Index 0.2223 1.27 -15.9435 -7.9159 0.1677 989217.2
## PtBiserial Frey McClain Dunn Hubert SDindex Dindex
## Number_clusters 7.0000 2 3.0000 7.0000 0 6.0000 0
## Value_Index 0.5023 NA 0.8056 0.2017 0 0.0172 0
## SDbw
## Number_clusters 10.0000
## Value_Index 0.4414
##
## $Best.partition
## [1] 1 2 2 3 2 1 2 2 2 2 2 2 4 1 2 1 1 1 1 1 2 1 1 1 1 1 2 1 2 3 1 5 2 1 2
## [36] 1 1 1 2 1 3 1 2 4 4 2 4 1 1 1 2 1 1 2 2 2 2 2 2 2 1 2 2 2 3 2 1 1 3 2
## [71] 2 1 2 2 2 1 1 1 1 1 1 2 1 1 1 5 1 1 1 1 1 1 1 1 1 2 2 1 5 5 1 2 2 2 2
## [106] 1 1 2 1 1 1 1 1 2 1 1 1 2 3 2 2 1 1 1 2 1 1 1 1 1 2 2 2 2 5 2 2 1 1 1
## [141] 3 1 5 5 2 1 2 1 1 1 1
## suggests 5 clusters
## calculate cluster dendrogram
par(mfrow=c(1, 1))
hclust <- hclust(dist(cor), method = "complete")
## in colour
cut10<- cutree(hclust, 5)
ColorDendrogram(hclust, y = cut10, xlab = NULL, cex = 0.5, labels = names(cut10), branchlength = 3)

## heatmap
d3heatmap(cor, colours = "Blues")