head(data, n=5) %>% kbl(caption="First five rows of analysed data") %>% kable_classic(full_width = F, html_font = "Cambria")
First five rows of analysed data
|
Age
|
Sex
|
ChestPainType
|
RestingBP
|
Cholesterol
|
FastingBS
|
RestingECG
|
MaxHR
|
ExerciseAngina
|
Oldpeak
|
ST_Slope
|
HeartDisease
|
|
40
|
M
|
ATA
|
140
|
289
|
0
|
Normal
|
172
|
N
|
0.0
|
Up
|
0
|
|
49
|
F
|
NAP
|
160
|
180
|
0
|
Normal
|
156
|
N
|
1.0
|
Flat
|
1
|
|
37
|
M
|
ATA
|
130
|
283
|
0
|
ST
|
98
|
N
|
0.0
|
Up
|
0
|
|
48
|
F
|
ASY
|
138
|
214
|
0
|
Normal
|
108
|
Y
|
1.5
|
Flat
|
1
|
|
54
|
M
|
NAP
|
150
|
195
|
0
|
Normal
|
122
|
N
|
0.0
|
Up
|
0
|
tail(data, n=5) %>% kbl(caption="Last five rows of analysed data") %>% kable_classic(full_width = F, html_font = "Cambria")
Last five rows of analysed data
|
Age
|
Sex
|
ChestPainType
|
RestingBP
|
Cholesterol
|
FastingBS
|
RestingECG
|
MaxHR
|
ExerciseAngina
|
Oldpeak
|
ST_Slope
|
HeartDisease
|
|
45
|
M
|
TA
|
110
|
264
|
0
|
Normal
|
132
|
N
|
1.2
|
Flat
|
1
|
|
68
|
M
|
ASY
|
144
|
193
|
1
|
Normal
|
141
|
N
|
3.4
|
Flat
|
1
|
|
57
|
M
|
ASY
|
130
|
131
|
0
|
Normal
|
115
|
Y
|
1.2
|
Flat
|
1
|
|
57
|
F
|
ATA
|
130
|
236
|
0
|
LVH
|
174
|
N
|
0.0
|
Flat
|
1
|
|
38
|
M
|
NAP
|
138
|
175
|
0
|
Normal
|
173
|
N
|
0.0
|
Up
|
0
|
Initial data check:
# basic statistics
source('functions/initial_info.R')
initial_info(data)
|
x
|
|
Number of variables: 12
|
|
x
|
|
Number of observations: 918
|
|
x
|
|
Percentage of missings: 0
|
|
x
|
|
Number of duplicated rows: 0
|
|
x
|
|
Number of binary variables: 2
|
|
x
|
|
Number of factors variables: 0
|
|
x
|
|
Number of continuous variables: 5
|
|
x
|
|
Number of character variables: 5
|
# basic information
psych::describe(data) %>% kbl() %>% kable_classic(full_width = T, html_font = "Cambria")
|
|
vars
|
n
|
mean
|
sd
|
median
|
trimmed
|
mad
|
min
|
max
|
range
|
skew
|
kurtosis
|
se
|
|
Age
|
1
|
918
|
53.511
|
9.433
|
54.0
|
53.711
|
10.38
|
28.0
|
77.0
|
49.0
|
-0.195
|
-0.396
|
0.311
|
|
Sex*
|
2
|
918
|
1.790
|
0.408
|
2.0
|
1.861
|
0.00
|
1.0
|
2.0
|
1.0
|
-1.420
|
0.016
|
0.013
|
|
ChestPainType*
|
3
|
918
|
1.781
|
0.957
|
1.0
|
1.664
|
0.00
|
1.0
|
4.0
|
3.0
|
0.791
|
-0.725
|
0.032
|
|
RestingBP
|
4
|
918
|
132.397
|
18.514
|
130.0
|
131.501
|
14.83
|
0.0
|
200.0
|
200.0
|
0.179
|
3.233
|
0.611
|
|
Cholesterol
|
5
|
918
|
198.800
|
109.384
|
223.0
|
204.413
|
68.20
|
0.0
|
603.0
|
603.0
|
-0.608
|
0.104
|
3.610
|
|
FastingBS
|
6
|
918
|
0.233
|
0.423
|
0.0
|
0.167
|
0.00
|
0.0
|
1.0
|
1.0
|
1.260
|
-0.412
|
0.014
|
|
RestingECG*
|
7
|
918
|
1.989
|
0.632
|
2.0
|
1.986
|
0.00
|
1.0
|
3.0
|
2.0
|
0.008
|
-0.497
|
0.021
|
|
MaxHR
|
8
|
918
|
136.809
|
25.460
|
138.0
|
137.231
|
26.69
|
60.0
|
202.0
|
142.0
|
-0.144
|
-0.458
|
0.840
|
|
ExerciseAngina*
|
9
|
918
|
1.404
|
0.491
|
1.0
|
1.380
|
0.00
|
1.0
|
2.0
|
1.0
|
0.390
|
-1.850
|
0.016
|
|
Oldpeak
|
10
|
918
|
0.887
|
1.067
|
0.6
|
0.738
|
0.89
|
-2.6
|
6.2
|
8.8
|
1.020
|
1.181
|
0.035
|
|
ST_Slope*
|
11
|
918
|
2.362
|
0.607
|
2.0
|
2.413
|
0.00
|
1.0
|
3.0
|
2.0
|
-0.380
|
-0.674
|
0.020
|
|
HeartDisease
|
12
|
918
|
0.553
|
0.497
|
1.0
|
0.567
|
0.00
|
0.0
|
1.0
|
1.0
|
-0.214
|
-1.956
|
0.016
|
Missings values
# missing values plots
source('functions/calc_missings.R')
calc_missings(data)

Individual observations
Age
|
|
Age
|
|
|
Min. :28.0
|
|
|
1st Qu.:47.0
|
|
|
Median :54.0
|
|
|
Mean :53.5
|
|
|
3rd Qu.:60.0
|
|
|
Max. :77.0
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
54.264
|
2.730
|
19.874
|
0.000
|
|
RestingBP
|
0.093
|
0.015
|
6.131
|
0.000
|
|
Cholesterol
|
-0.001
|
0.003
|
-0.259
|
0.796
|
|
FastingBS
|
2.756
|
0.687
|
4.015
|
0.000
|
|
MaxHR
|
-0.111
|
0.012
|
-9.366
|
0.000
|
|
Oldpeak
|
1.383
|
0.285
|
4.850
|
0.000
|
|
HeartDisease
|
0.832
|
0.673
|
1.236
|
0.217
|
Sex
|
|
Sex
|
|
|
Length:918
|
|
|
Class :character
|
|
|
Mode :character
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
ChestPainType
|
|
ChestPainType
|
|
|
Length:918
|
|
|
Class :character
|
|
|
Mode :character
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
RestingBP
|
|
RestingBP
|
|
|
Min. : 0
|
|
|
1st Qu.:120
|
|
|
Median :130
|
|
|
Mean :132
|
|
|
3rd Qu.:140
|
|
|
Max. :200
|
|
x
|
|
** Too many levels! Histogram hard to read! **
|
NULL
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
106.098
|
6.060
|
17.508
|
0.000
|
|
Age
|
0.427
|
0.070
|
6.131
|
0.000
|
|
Cholesterol
|
0.024
|
0.006
|
4.197
|
0.000
|
|
FastingBS
|
2.218
|
1.482
|
1.496
|
0.135
|
|
MaxHR
|
-0.026
|
0.027
|
-0.967
|
0.334
|
|
Oldpeak
|
1.497
|
0.617
|
2.425
|
0.015
|
|
HeartDisease
|
0.631
|
1.444
|
0.437
|
0.662
|
Cholesterol
|
|
Cholesterol
|
|
|
Min. : 0
|
|
|
1st Qu.:173
|
|
|
Median :223
|
|
|
Mean :199
|
|
|
3rd Qu.:267
|
|
|
Max. :603
|
|
x
|
|
** Too many levels! Histogram hard to read! **
|
NULL
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
20.809
|
39.907
|
0.521
|
0.602
|
|
Age
|
-0.105
|
0.404
|
-0.259
|
0.796
|
|
RestingBP
|
0.785
|
0.187
|
4.197
|
0.000
|
|
FastingBS
|
-53.238
|
8.270
|
-6.437
|
0.000
|
|
MaxHR
|
0.738
|
0.150
|
4.915
|
0.000
|
|
Oldpeak
|
14.436
|
3.494
|
4.131
|
0.000
|
|
HeartDisease
|
-39.046
|
8.126
|
-4.805
|
0.000
|
FastingBS
|
|
FastingBS
|
|
|
Min. :0.000
|
|
|
1st Qu.:0.000
|
|
|
Median :0.000
|
|
|
Mean :0.233
|
|
|
3rd Qu.:0.000
|
|
|
Max. :1.000
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
-0.298
|
0.156
|
-1.91
|
0.057
|
|
Age
|
0.006
|
0.002
|
4.01
|
0.000
|
|
RestingBP
|
0.001
|
0.001
|
1.50
|
0.135
|
|
Cholesterol
|
-0.001
|
0.000
|
-6.44
|
0.000
|
|
MaxHR
|
0.001
|
0.001
|
1.56
|
0.119
|
|
Oldpeak
|
-0.024
|
0.014
|
-1.75
|
0.080
|
|
HeartDisease
|
0.187
|
0.032
|
5.92
|
0.000
|
RestingECG
|
|
RestingECG
|
|
|
Length:918
|
|
|
Class :character
|
|
|
Mode :character
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
MaxHR
|
|
MaxHR
|
|
|
Min. : 60
|
|
|
1st Qu.:120
|
|
|
Median :138
|
|
|
Mean :137
|
|
|
3rd Qu.:156
|
|
|
Max. :202
|
|
x
|
|
** Too many levels! Histogram hard to read! **
|
NULL
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
184.592
|
6.182
|
29.859
|
0.000
|
|
Age
|
-0.788
|
0.084
|
-9.366
|
0.000
|
|
RestingBP
|
-0.040
|
0.041
|
-0.967
|
0.334
|
|
Cholesterol
|
0.035
|
0.007
|
4.915
|
0.000
|
|
FastingBS
|
2.874
|
1.840
|
1.562
|
0.119
|
|
Oldpeak
|
0.792
|
0.768
|
1.032
|
0.302
|
|
HeartDisease
|
-15.668
|
1.716
|
-9.131
|
0.000
|
ExerciseAngina
|
|
ExerciseAngina
|
|
|
Length:918
|
|
|
Class :character
|
|
|
Mode :character
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
Oldpeak
|
|
Oldpeak
|
|
|
Min. :-2.60
|
|
|
1st Qu.: 0.00
|
|
|
Median : 0.60
|
|
|
Mean : 0.89
|
|
|
3rd Qu.: 1.50
|
|
|
Max. : 6.20
|
|
x
|
|
** Too many levels! Histogram hard to read! **
|
NULL
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
-1.562
|
0.371
|
-4.21
|
0.000
|
|
Age
|
0.018
|
0.004
|
4.85
|
0.000
|
|
RestingBP
|
0.004
|
0.002
|
2.42
|
0.015
|
|
Cholesterol
|
0.001
|
0.000
|
4.13
|
0.000
|
|
FastingBS
|
-0.139
|
0.079
|
-1.75
|
0.080
|
|
MaxHR
|
0.001
|
0.001
|
1.03
|
0.302
|
|
HeartDisease
|
0.879
|
0.072
|
12.27
|
0.000
|
ST_Slope
|
|
ST_Slope
|
|
|
Length:918
|
|
|
Class :character
|
|
|
Mode :character
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
HeartDisease
|
|
HeartDisease
|
|
|
Min. :0.000
|
|
|
1st Qu.:0.000
|
|
|
Median :1.000
|
|
|
Mean :0.553
|
|
|
3rd Qu.:1.000
|
|
|
Max. :1.000
|
|
x
|
|
Number of missing values: 0
|
|
x
|
|
Percentage of missing values: 0%
|
=============== LINEAR MODEL ===============
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
1.070
|
0.157
|
6.828
|
0.000
|
|
Age
|
0.002
|
0.002
|
1.236
|
0.217
|
|
RestingBP
|
0.000
|
0.001
|
0.437
|
0.662
|
|
Cholesterol
|
-0.001
|
0.000
|
-4.805
|
0.000
|
|
FastingBS
|
0.198
|
0.033
|
5.923
|
0.000
|
|
MaxHR
|
-0.005
|
0.001
|
-9.131
|
0.000
|
|
Oldpeak
|
0.161
|
0.013
|
12.272
|
0.000
|
Histograms
# plot histograms for all numerical variables together
ww<- ggplot(gather(dplyr::select_if(data, is.numeric)), aes(value)) +
geom_histogram(bins = 30, fill="#464040") +
facet_wrap(~key, scales = 'free_x') +
theme(axis.title.x = element_blank(), axis.title.y = element_blank())
suppressWarnings(print(ww))

Bar plots
# plot bar plots
source('functions/names_bar.R')
all_of(names_bar(data))

Correlations
Pearson’s
# Pearson
suppressWarnings( corrplot.mixed(cor(dplyr::select_if(data, is.numeric), use = "pairwise.complete.obs"),
bg="white", upper="pie",lower="number", tl.col="black", tl.pos="lt", diag="l",
number.font=0.5, tl.cex=1, number.cex=0.55, title="Pearson's correlation", mar=c(0,0,1,0)) )

Kendall’s
# Kendall
suppressWarnings( corrplot.mixed(cor(dplyr::select_if(data[sample(1:nrow(data), min(c(750, nrow(data)))), ], is.numeric), use = "pairwise.complete.obs", method = 'kendal'),
bg="white", upper="pie",lower="number", tl.col="black", tl.pos="lt", diag="l",
number.font=0.5, tl.cex=1, number.cex=0.55, title="Kendal's correlation", mar=c(0,0,1,0)) )

Spearman’s
# Spearman
suppressWarnings( corrplot.mixed(cor(dplyr::select_if(data, is.numeric), use = "pairwise.complete.obs", method = 'spearman'),
bg="white", upper="pie",lower="number", tl.col="black", tl.pos="lt", diag="l",
number.font=0.5, tl.cex=1, number.cex=0.55, title="Spearman's correlation", mar=c(0,0,1,0)) )
