1. Load Packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(class)
library(ggplot2)
library(pROC)
## Warning: package 'pROC' was built under R version 4.4.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(corrplot)
## corrplot 0.95 loaded
library(e1071)
  1. Load Data
data <- read.csv("processed.cleveland.data", header = FALSE)
data
##     V1 V2 V3  V4  V5 V6 V7  V8 V9 V10 V11 V12 V13 V14
## 1   63  1  1 145 233  1  2 150  0 2.3   3 0.0 6.0   0
## 2   67  1  4 160 286  0  2 108  1 1.5   2 3.0 3.0   2
## 3   67  1  4 120 229  0  2 129  1 2.6   2 2.0 7.0   1
## 4   37  1  3 130 250  0  0 187  0 3.5   3 0.0 3.0   0
## 5   41  0  2 130 204  0  2 172  0 1.4   1 0.0 3.0   0
## 6   56  1  2 120 236  0  0 178  0 0.8   1 0.0 3.0   0
## 7   62  0  4 140 268  0  2 160  0 3.6   3 2.0 3.0   3
## 8   57  0  4 120 354  0  0 163  1 0.6   1 0.0 3.0   0
## 9   63  1  4 130 254  0  2 147  0 1.4   2 1.0 7.0   2
## 10  53  1  4 140 203  1  2 155  1 3.1   3 0.0 7.0   1
## 11  57  1  4 140 192  0  0 148  0 0.4   2 0.0 6.0   0
## 12  56  0  2 140 294  0  2 153  0 1.3   2 0.0 3.0   0
## 13  56  1  3 130 256  1  2 142  1 0.6   2 1.0 6.0   2
## 14  44  1  2 120 263  0  0 173  0 0.0   1 0.0 7.0   0
## 15  52  1  3 172 199  1  0 162  0 0.5   1 0.0 7.0   0
## 16  57  1  3 150 168  0  0 174  0 1.6   1 0.0 3.0   0
## 17  48  1  2 110 229  0  0 168  0 1.0   3 0.0 7.0   1
## 18  54  1  4 140 239  0  0 160  0 1.2   1 0.0 3.0   0
## 19  48  0  3 130 275  0  0 139  0 0.2   1 0.0 3.0   0
## 20  49  1  2 130 266  0  0 171  0 0.6   1 0.0 3.0   0
## 21  64  1  1 110 211  0  2 144  1 1.8   2 0.0 3.0   0
## 22  58  0  1 150 283  1  2 162  0 1.0   1 0.0 3.0   0
## 23  58  1  2 120 284  0  2 160  0 1.8   2 0.0 3.0   1
## 24  58  1  3 132 224  0  2 173  0 3.2   1 2.0 7.0   3
## 25  60  1  4 130 206  0  2 132  1 2.4   2 2.0 7.0   4
## 26  50  0  3 120 219  0  0 158  0 1.6   2 0.0 3.0   0
## 27  58  0  3 120 340  0  0 172  0 0.0   1 0.0 3.0   0
## 28  66  0  1 150 226  0  0 114  0 2.6   3 0.0 3.0   0
## 29  43  1  4 150 247  0  0 171  0 1.5   1 0.0 3.0   0
## 30  40  1  4 110 167  0  2 114  1 2.0   2 0.0 7.0   3
## 31  69  0  1 140 239  0  0 151  0 1.8   1 2.0 3.0   0
## 32  60  1  4 117 230  1  0 160  1 1.4   1 2.0 7.0   2
## 33  64  1  3 140 335  0  0 158  0 0.0   1 0.0 3.0   1
## 34  59  1  4 135 234  0  0 161  0 0.5   2 0.0 7.0   0
## 35  44  1  3 130 233  0  0 179  1 0.4   1 0.0 3.0   0
## 36  42  1  4 140 226  0  0 178  0 0.0   1 0.0 3.0   0
## 37  43  1  4 120 177  0  2 120  1 2.5   2 0.0 7.0   3
## 38  57  1  4 150 276  0  2 112  1 0.6   2 1.0 6.0   1
## 39  55  1  4 132 353  0  0 132  1 1.2   2 1.0 7.0   3
## 40  61  1  3 150 243  1  0 137  1 1.0   2 0.0 3.0   0
## 41  65  0  4 150 225  0  2 114  0 1.0   2 3.0 7.0   4
## 42  40  1  1 140 199  0  0 178  1 1.4   1 0.0 7.0   0
## 43  71  0  2 160 302  0  0 162  0 0.4   1 2.0 3.0   0
## 44  59  1  3 150 212  1  0 157  0 1.6   1 0.0 3.0   0
## 45  61  0  4 130 330  0  2 169  0 0.0   1 0.0 3.0   1
## 46  58  1  3 112 230  0  2 165  0 2.5   2 1.0 7.0   4
## 47  51  1  3 110 175  0  0 123  0 0.6   1 0.0 3.0   0
## 48  50  1  4 150 243  0  2 128  0 2.6   2 0.0 7.0   4
## 49  65  0  3 140 417  1  2 157  0 0.8   1 1.0 3.0   0
## 50  53  1  3 130 197  1  2 152  0 1.2   3 0.0 3.0   0
## 51  41  0  2 105 198  0  0 168  0 0.0   1 1.0 3.0   0
## 52  65  1  4 120 177  0  0 140  0 0.4   1 0.0 7.0   0
## 53  44  1  4 112 290  0  2 153  0 0.0   1 1.0 3.0   2
## 54  44  1  2 130 219  0  2 188  0 0.0   1 0.0 3.0   0
## 55  60  1  4 130 253  0  0 144  1 1.4   1 1.0 7.0   1
## 56  54  1  4 124 266  0  2 109  1 2.2   2 1.0 7.0   1
## 57  50  1  3 140 233  0  0 163  0 0.6   2 1.0 7.0   1
## 58  41  1  4 110 172  0  2 158  0 0.0   1 0.0 7.0   1
## 59  54  1  3 125 273  0  2 152  0 0.5   3 1.0 3.0   0
## 60  51  1  1 125 213  0  2 125  1 1.4   1 1.0 3.0   0
## 61  51  0  4 130 305  0  0 142  1 1.2   2 0.0 7.0   2
## 62  46  0  3 142 177  0  2 160  1 1.4   3 0.0 3.0   0
## 63  58  1  4 128 216  0  2 131  1 2.2   2 3.0 7.0   1
## 64  54  0  3 135 304  1  0 170  0 0.0   1 0.0 3.0   0
## 65  54  1  4 120 188  0  0 113  0 1.4   2 1.0 7.0   2
## 66  60  1  4 145 282  0  2 142  1 2.8   2 2.0 7.0   2
## 67  60  1  3 140 185  0  2 155  0 3.0   2 0.0 3.0   1
## 68  54  1  3 150 232  0  2 165  0 1.6   1 0.0 7.0   0
## 69  59  1  4 170 326  0  2 140  1 3.4   3 0.0 7.0   2
## 70  46  1  3 150 231  0  0 147  0 3.6   2 0.0 3.0   1
## 71  65  0  3 155 269  0  0 148  0 0.8   1 0.0 3.0   0
## 72  67  1  4 125 254  1  0 163  0 0.2   2 2.0 7.0   3
## 73  62  1  4 120 267  0  0  99  1 1.8   2 2.0 7.0   1
## 74  65  1  4 110 248  0  2 158  0 0.6   1 2.0 6.0   1
## 75  44  1  4 110 197  0  2 177  0 0.0   1 1.0 3.0   1
## 76  65  0  3 160 360  0  2 151  0 0.8   1 0.0 3.0   0
## 77  60  1  4 125 258  0  2 141  1 2.8   2 1.0 7.0   1
## 78  51  0  3 140 308  0  2 142  0 1.5   1 1.0 3.0   0
## 79  48  1  2 130 245  0  2 180  0 0.2   2 0.0 3.0   0
## 80  58  1  4 150 270  0  2 111  1 0.8   1 0.0 7.0   3
## 81  45  1  4 104 208  0  2 148  1 3.0   2 0.0 3.0   0
## 82  53  0  4 130 264  0  2 143  0 0.4   2 0.0 3.0   0
## 83  39  1  3 140 321  0  2 182  0 0.0   1 0.0 3.0   0
## 84  68  1  3 180 274  1  2 150  1 1.6   2 0.0 7.0   3
## 85  52  1  2 120 325  0  0 172  0 0.2   1 0.0 3.0   0
## 86  44  1  3 140 235  0  2 180  0 0.0   1 0.0 3.0   0
## 87  47  1  3 138 257  0  2 156  0 0.0   1 0.0 3.0   0
## 88  53  0  3 128 216  0  2 115  0 0.0   1 0.0   ?   0
## 89  53  0  4 138 234  0  2 160  0 0.0   1 0.0 3.0   0
## 90  51  0  3 130 256  0  2 149  0 0.5   1 0.0 3.0   0
## 91  66  1  4 120 302  0  2 151  0 0.4   2 0.0 3.0   0
## 92  62  0  4 160 164  0  2 145  0 6.2   3 3.0 7.0   3
## 93  62  1  3 130 231  0  0 146  0 1.8   2 3.0 7.0   0
## 94  44  0  3 108 141  0  0 175  0 0.6   2 0.0 3.0   0
## 95  63  0  3 135 252  0  2 172  0 0.0   1 0.0 3.0   0
## 96  52  1  4 128 255  0  0 161  1 0.0   1 1.0 7.0   1
## 97  59  1  4 110 239  0  2 142  1 1.2   2 1.0 7.0   2
## 98  60  0  4 150 258  0  2 157  0 2.6   2 2.0 7.0   3
## 99  52  1  2 134 201  0  0 158  0 0.8   1 1.0 3.0   0
## 100 48  1  4 122 222  0  2 186  0 0.0   1 0.0 3.0   0
## 101 45  1  4 115 260  0  2 185  0 0.0   1 0.0 3.0   0
## 102 34  1  1 118 182  0  2 174  0 0.0   1 0.0 3.0   0
## 103 57  0  4 128 303  0  2 159  0 0.0   1 1.0 3.0   0
## 104 71  0  3 110 265  1  2 130  0 0.0   1 1.0 3.0   0
## 105 49  1  3 120 188  0  0 139  0 2.0   2 3.0 7.0   3
## 106 54  1  2 108 309  0  0 156  0 0.0   1 0.0 7.0   0
## 107 59  1  4 140 177  0  0 162  1 0.0   1 1.0 7.0   2
## 108 57  1  3 128 229  0  2 150  0 0.4   2 1.0 7.0   1
## 109 61  1  4 120 260  0  0 140  1 3.6   2 1.0 7.0   2
## 110 39  1  4 118 219  0  0 140  0 1.2   2 0.0 7.0   3
## 111 61  0  4 145 307  0  2 146  1 1.0   2 0.0 7.0   1
## 112 56  1  4 125 249  1  2 144  1 1.2   2 1.0 3.0   1
## 113 52  1  1 118 186  0  2 190  0 0.0   2 0.0 6.0   0
## 114 43  0  4 132 341  1  2 136  1 3.0   2 0.0 7.0   2
## 115 62  0  3 130 263  0  0  97  0 1.2   2 1.0 7.0   2
## 116 41  1  2 135 203  0  0 132  0 0.0   2 0.0 6.0   0
## 117 58  1  3 140 211  1  2 165  0 0.0   1 0.0 3.0   0
## 118 35  0  4 138 183  0  0 182  0 1.4   1 0.0 3.0   0
## 119 63  1  4 130 330  1  2 132  1 1.8   1 3.0 7.0   3
## 120 65  1  4 135 254  0  2 127  0 2.8   2 1.0 7.0   2
## 121 48  1  4 130 256  1  2 150  1 0.0   1 2.0 7.0   3
## 122 63  0  4 150 407  0  2 154  0 4.0   2 3.0 7.0   4
## 123 51  1  3 100 222  0  0 143  1 1.2   2 0.0 3.0   0
## 124 55  1  4 140 217  0  0 111  1 5.6   3 0.0 7.0   3
## 125 65  1  1 138 282  1  2 174  0 1.4   2 1.0 3.0   1
## 126 45  0  2 130 234  0  2 175  0 0.6   2 0.0 3.0   0
## 127 56  0  4 200 288  1  2 133  1 4.0   3 2.0 7.0   3
## 128 54  1  4 110 239  0  0 126  1 2.8   2 1.0 7.0   3
## 129 44  1  2 120 220  0  0 170  0 0.0   1 0.0 3.0   0
## 130 62  0  4 124 209  0  0 163  0 0.0   1 0.0 3.0   0
## 131 54  1  3 120 258  0  2 147  0 0.4   2 0.0 7.0   0
## 132 51  1  3  94 227  0  0 154  1 0.0   1 1.0 7.0   0
## 133 29  1  2 130 204  0  2 202  0 0.0   1 0.0 3.0   0
## 134 51  1  4 140 261  0  2 186  1 0.0   1 0.0 3.0   0
## 135 43  0  3 122 213  0  0 165  0 0.2   2 0.0 3.0   0
## 136 55  0  2 135 250  0  2 161  0 1.4   2 0.0 3.0   0
## 137 70  1  4 145 174  0  0 125  1 2.6   3 0.0 7.0   4
## 138 62  1  2 120 281  0  2 103  0 1.4   2 1.0 7.0   3
## 139 35  1  4 120 198  0  0 130  1 1.6   2 0.0 7.0   1
## 140 51  1  3 125 245  1  2 166  0 2.4   2 0.0 3.0   0
## 141 59  1  2 140 221  0  0 164  1 0.0   1 0.0 3.0   0
## 142 59  1  1 170 288  0  2 159  0 0.2   2 0.0 7.0   1
## 143 52  1  2 128 205  1  0 184  0 0.0   1 0.0 3.0   0
## 144 64  1  3 125 309  0  0 131  1 1.8   2 0.0 7.0   1
## 145 58  1  3 105 240  0  2 154  1 0.6   2 0.0 7.0   0
## 146 47  1  3 108 243  0  0 152  0 0.0   1 0.0 3.0   1
## 147 57  1  4 165 289  1  2 124  0 1.0   2 3.0 7.0   4
## 148 41  1  3 112 250  0  0 179  0 0.0   1 0.0 3.0   0
## 149 45  1  2 128 308  0  2 170  0 0.0   1 0.0 3.0   0
## 150 60  0  3 102 318  0  0 160  0 0.0   1 1.0 3.0   0
## 151 52  1  1 152 298  1  0 178  0 1.2   2 0.0 7.0   0
## 152 42  0  4 102 265  0  2 122  0 0.6   2 0.0 3.0   0
## 153 67  0  3 115 564  0  2 160  0 1.6   2 0.0 7.0   0
## 154 55  1  4 160 289  0  2 145  1 0.8   2 1.0 7.0   4
## 155 64  1  4 120 246  0  2  96  1 2.2   3 1.0 3.0   3
## 156 70  1  4 130 322  0  2 109  0 2.4   2 3.0 3.0   1
## 157 51  1  4 140 299  0  0 173  1 1.6   1 0.0 7.0   1
## 158 58  1  4 125 300  0  2 171  0 0.0   1 2.0 7.0   1
## 159 60  1  4 140 293  0  2 170  0 1.2   2 2.0 7.0   2
## 160 68  1  3 118 277  0  0 151  0 1.0   1 1.0 7.0   0
## 161 46  1  2 101 197  1  0 156  0 0.0   1 0.0 7.0   0
## 162 77  1  4 125 304  0  2 162  1 0.0   1 3.0 3.0   4
## 163 54  0  3 110 214  0  0 158  0 1.6   2 0.0 3.0   0
## 164 58  0  4 100 248  0  2 122  0 1.0   2 0.0 3.0   0
## 165 48  1  3 124 255  1  0 175  0 0.0   1 2.0 3.0   0
## 166 57  1  4 132 207  0  0 168  1 0.0   1 0.0 7.0   0
## 167 52  1  3 138 223  0  0 169  0 0.0   1   ? 3.0   0
## 168 54  0  2 132 288  1  2 159  1 0.0   1 1.0 3.0   0
## 169 35  1  4 126 282  0  2 156  1 0.0   1 0.0 7.0   1
## 170 45  0  2 112 160  0  0 138  0 0.0   2 0.0 3.0   0
## 171 70  1  3 160 269  0  0 112  1 2.9   2 1.0 7.0   3
## 172 53  1  4 142 226  0  2 111  1 0.0   1 0.0 7.0   0
## 173 59  0  4 174 249  0  0 143  1 0.0   2 0.0 3.0   1
## 174 62  0  4 140 394  0  2 157  0 1.2   2 0.0 3.0   0
## 175 64  1  4 145 212  0  2 132  0 2.0   2 2.0 6.0   4
## 176 57  1  4 152 274  0  0  88  1 1.2   2 1.0 7.0   1
## 177 52  1  4 108 233  1  0 147  0 0.1   1 3.0 7.0   0
## 178 56  1  4 132 184  0  2 105  1 2.1   2 1.0 6.0   1
## 179 43  1  3 130 315  0  0 162  0 1.9   1 1.0 3.0   0
## 180 53  1  3 130 246  1  2 173  0 0.0   1 3.0 3.0   0
## 181 48  1  4 124 274  0  2 166  0 0.5   2 0.0 7.0   3
## 182 56  0  4 134 409  0  2 150  1 1.9   2 2.0 7.0   2
## 183 42  1  1 148 244  0  2 178  0 0.8   1 2.0 3.0   0
## 184 59  1  1 178 270  0  2 145  0 4.2   3 0.0 7.0   0
## 185 60  0  4 158 305  0  2 161  0 0.0   1 0.0 3.0   1
## 186 63  0  2 140 195  0  0 179  0 0.0   1 2.0 3.0   0
## 187 42  1  3 120 240  1  0 194  0 0.8   3 0.0 7.0   0
## 188 66  1  2 160 246  0  0 120  1 0.0   2 3.0 6.0   2
## 189 54  1  2 192 283  0  2 195  0 0.0   1 1.0 7.0   1
## 190 69  1  3 140 254  0  2 146  0 2.0   2 3.0 7.0   2
## 191 50  1  3 129 196  0  0 163  0 0.0   1 0.0 3.0   0
## 192 51  1  4 140 298  0  0 122  1 4.2   2 3.0 7.0   3
## 193 43  1  4 132 247  1  2 143  1 0.1   2   ? 7.0   1
## 194 62  0  4 138 294  1  0 106  0 1.9   2 3.0 3.0   2
## 195 68  0  3 120 211  0  2 115  0 1.5   2 0.0 3.0   0
## 196 67  1  4 100 299  0  2 125  1 0.9   2 2.0 3.0   3
## 197 69  1  1 160 234  1  2 131  0 0.1   2 1.0 3.0   0
## 198 45  0  4 138 236  0  2 152  1 0.2   2 0.0 3.0   0
## 199 50  0  2 120 244  0  0 162  0 1.1   1 0.0 3.0   0
## 200 59  1  1 160 273  0  2 125  0 0.0   1 0.0 3.0   1
## 201 50  0  4 110 254  0  2 159  0 0.0   1 0.0 3.0   0
## 202 64  0  4 180 325  0  0 154  1 0.0   1 0.0 3.0   0
## 203 57  1  3 150 126  1  0 173  0 0.2   1 1.0 7.0   0
## 204 64  0  3 140 313  0  0 133  0 0.2   1 0.0 7.0   0
## 205 43  1  4 110 211  0  0 161  0 0.0   1 0.0 7.0   0
## 206 45  1  4 142 309  0  2 147  1 0.0   2 3.0 7.0   3
## 207 58  1  4 128 259  0  2 130  1 3.0   2 2.0 7.0   3
## 208 50  1  4 144 200  0  2 126  1 0.9   2 0.0 7.0   3
## 209 55  1  2 130 262  0  0 155  0 0.0   1 0.0 3.0   0
## 210 62  0  4 150 244  0  0 154  1 1.4   2 0.0 3.0   1
## 211 37  0  3 120 215  0  0 170  0 0.0   1 0.0 3.0   0
## 212 38  1  1 120 231  0  0 182  1 3.8   2 0.0 7.0   4
## 213 41  1  3 130 214  0  2 168  0 2.0   2 0.0 3.0   0
## 214 66  0  4 178 228  1  0 165  1 1.0   2 2.0 7.0   3
## 215 52  1  4 112 230  0  0 160  0 0.0   1 1.0 3.0   1
## 216 56  1  1 120 193  0  2 162  0 1.9   2 0.0 7.0   0
## 217 46  0  2 105 204  0  0 172  0 0.0   1 0.0 3.0   0
## 218 46  0  4 138 243  0  2 152  1 0.0   2 0.0 3.0   0
## 219 64  0  4 130 303  0  0 122  0 2.0   2 2.0 3.0   0
## 220 59  1  4 138 271  0  2 182  0 0.0   1 0.0 3.0   0
## 221 41  0  3 112 268  0  2 172  1 0.0   1 0.0 3.0   0
## 222 54  0  3 108 267  0  2 167  0 0.0   1 0.0 3.0   0
## 223 39  0  3  94 199  0  0 179  0 0.0   1 0.0 3.0   0
## 224 53  1  4 123 282  0  0  95  1 2.0   2 2.0 7.0   3
## 225 63  0  4 108 269  0  0 169  1 1.8   2 2.0 3.0   1
## 226 34  0  2 118 210  0  0 192  0 0.7   1 0.0 3.0   0
## 227 47  1  4 112 204  0  0 143  0 0.1   1 0.0 3.0   0
## 228 67  0  3 152 277  0  0 172  0 0.0   1 1.0 3.0   0
## 229 54  1  4 110 206  0  2 108  1 0.0   2 1.0 3.0   3
## 230 66  1  4 112 212  0  2 132  1 0.1   1 1.0 3.0   2
## 231 52  0  3 136 196  0  2 169  0 0.1   2 0.0 3.0   0
## 232 55  0  4 180 327  0  1 117  1 3.4   2 0.0 3.0   2
## 233 49  1  3 118 149  0  2 126  0 0.8   1 3.0 3.0   1
## 234 74  0  2 120 269  0  2 121  1 0.2   1 1.0 3.0   0
## 235 54  0  3 160 201  0  0 163  0 0.0   1 1.0 3.0   0
## 236 54  1  4 122 286  0  2 116  1 3.2   2 2.0 3.0   3
## 237 56  1  4 130 283  1  2 103  1 1.6   3 0.0 7.0   2
## 238 46  1  4 120 249  0  2 144  0 0.8   1 0.0 7.0   1
## 239 49  0  2 134 271  0  0 162  0 0.0   2 0.0 3.0   0
## 240 42  1  2 120 295  0  0 162  0 0.0   1 0.0 3.0   0
## 241 41  1  2 110 235  0  0 153  0 0.0   1 0.0 3.0   0
## 242 41  0  2 126 306  0  0 163  0 0.0   1 0.0 3.0   0
## 243 49  0  4 130 269  0  0 163  0 0.0   1 0.0 3.0   0
## 244 61  1  1 134 234  0  0 145  0 2.6   2 2.0 3.0   2
## 245 60  0  3 120 178  1  0  96  0 0.0   1 0.0 3.0   0
## 246 67  1  4 120 237  0  0  71  0 1.0   2 0.0 3.0   2
## 247 58  1  4 100 234  0  0 156  0 0.1   1 1.0 7.0   2
## 248 47  1  4 110 275  0  2 118  1 1.0   2 1.0 3.0   1
## 249 52  1  4 125 212  0  0 168  0 1.0   1 2.0 7.0   3
## 250 62  1  2 128 208  1  2 140  0 0.0   1 0.0 3.0   0
## 251 57  1  4 110 201  0  0 126  1 1.5   2 0.0 6.0   0
## 252 58  1  4 146 218  0  0 105  0 2.0   2 1.0 7.0   1
## 253 64  1  4 128 263  0  0 105  1 0.2   2 1.0 7.0   0
## 254 51  0  3 120 295  0  2 157  0 0.6   1 0.0 3.0   0
## 255 43  1  4 115 303  0  0 181  0 1.2   2 0.0 3.0   0
## 256 42  0  3 120 209  0  0 173  0 0.0   2 0.0 3.0   0
## 257 67  0  4 106 223  0  0 142  0 0.3   1 2.0 3.0   0
## 258 76  0  3 140 197  0  1 116  0 1.1   2 0.0 3.0   0
## 259 70  1  2 156 245  0  2 143  0 0.0   1 0.0 3.0   0
## 260 57  1  2 124 261  0  0 141  0 0.3   1 0.0 7.0   1
## 261 44  0  3 118 242  0  0 149  0 0.3   2 1.0 3.0   0
## 262 58  0  2 136 319  1  2 152  0 0.0   1 2.0 3.0   3
## 263 60  0  1 150 240  0  0 171  0 0.9   1 0.0 3.0   0
## 264 44  1  3 120 226  0  0 169  0 0.0   1 0.0 3.0   0
## 265 61  1  4 138 166  0  2 125  1 3.6   2 1.0 3.0   4
## 266 42  1  4 136 315  0  0 125  1 1.8   2 0.0 6.0   2
## 267 52  1  4 128 204  1  0 156  1 1.0   2 0.0   ?   2
## 268 59  1  3 126 218  1  0 134  0 2.2   2 1.0 6.0   2
## 269 40  1  4 152 223  0  0 181  0 0.0   1 0.0 7.0   1
## 270 42  1  3 130 180  0  0 150  0 0.0   1 0.0 3.0   0
## 271 61  1  4 140 207  0  2 138  1 1.9   1 1.0 7.0   1
## 272 66  1  4 160 228  0  2 138  0 2.3   1 0.0 6.0   0
## 273 46  1  4 140 311  0  0 120  1 1.8   2 2.0 7.0   2
## 274 71  0  4 112 149  0  0 125  0 1.6   2 0.0 3.0   0
## 275 59  1  1 134 204  0  0 162  0 0.8   1 2.0 3.0   1
## 276 64  1  1 170 227  0  2 155  0 0.6   2 0.0 7.0   0
## 277 66  0  3 146 278  0  2 152  0 0.0   2 1.0 3.0   0
## 278 39  0  3 138 220  0  0 152  0 0.0   2 0.0 3.0   0
## 279 57  1  2 154 232  0  2 164  0 0.0   1 1.0 3.0   1
## 280 58  0  4 130 197  0  0 131  0 0.6   2 0.0 3.0   0
## 281 57  1  4 110 335  0  0 143  1 3.0   2 1.0 7.0   2
## 282 47  1  3 130 253  0  0 179  0 0.0   1 0.0 3.0   0
## 283 55  0  4 128 205  0  1 130  1 2.0   2 1.0 7.0   3
## 284 35  1  2 122 192  0  0 174  0 0.0   1 0.0 3.0   0
## 285 61  1  4 148 203  0  0 161  0 0.0   1 1.0 7.0   2
## 286 58  1  4 114 318  0  1 140  0 4.4   3 3.0 6.0   4
## 287 58  0  4 170 225  1  2 146  1 2.8   2 2.0 6.0   2
## 288 58  1  2 125 220  0  0 144  0 0.4   2   ? 7.0   0
## 289 56  1  2 130 221  0  2 163  0 0.0   1 0.0 7.0   0
## 290 56  1  2 120 240  0  0 169  0 0.0   3 0.0 3.0   0
## 291 67  1  3 152 212  0  2 150  0 0.8   2 0.0 7.0   1
## 292 55  0  2 132 342  0  0 166  0 1.2   1 0.0 3.0   0
## 293 44  1  4 120 169  0  0 144  1 2.8   3 0.0 6.0   2
## 294 63  1  4 140 187  0  2 144  1 4.0   1 2.0 7.0   2
## 295 63  0  4 124 197  0  0 136  1 0.0   2 0.0 3.0   1
## 296 41  1  2 120 157  0  0 182  0 0.0   1 0.0 3.0   0
## 297 59  1  4 164 176  1  2  90  0 1.0   2 2.0 6.0   3
## 298 57  0  4 140 241  0  0 123  1 0.2   2 0.0 7.0   1
## 299 45  1  1 110 264  0  0 132  0 1.2   2 0.0 7.0   1
## 300 68  1  4 144 193  1  0 141  0 3.4   2 2.0 7.0   2
## 301 57  1  4 130 131  0  0 115  1 1.2   2 1.0 7.0   3
## 302 57  0  2 130 236  0  2 174  0 0.0   2 1.0 3.0   1
## 303 38  1  3 138 175  0  0 173  0 0.0   1   ? 3.0   0

3 Tambahkan nama kolom

colnames(data) <- c("age", "sex", "cp", "trestbps", "chol", "fbs","restecg", "thalach", "exang", "oldpeak", "slope","ca", "thal", "num")
  1. Data Preprocessing
# Cek tipe data
glimpse(data)
## Rows: 303
## Columns: 14
## $ age      <dbl> 63, 67, 67, 37, 41, 56, 62, 57, 63, 53, 57, 56, 56, 44, 52, 5…
## $ sex      <dbl> 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ cp       <dbl> 1, 4, 4, 3, 2, 2, 4, 4, 4, 4, 4, 2, 3, 2, 3, 3, 2, 4, 3, 2, 1…
## $ trestbps <dbl> 145, 160, 120, 130, 130, 120, 140, 120, 130, 140, 140, 140, 1…
## $ chol     <dbl> 233, 286, 229, 250, 204, 236, 268, 354, 254, 203, 192, 294, 2…
## $ fbs      <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0…
## $ restecg  <dbl> 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2…
## $ thalach  <dbl> 150, 108, 129, 187, 172, 178, 160, 163, 147, 155, 148, 153, 1…
## $ exang    <dbl> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1…
## $ oldpeak  <dbl> 2.3, 1.5, 2.6, 3.5, 1.4, 0.8, 3.6, 0.6, 1.4, 3.1, 0.4, 1.3, 0…
## $ slope    <dbl> 3, 2, 2, 3, 1, 1, 3, 1, 2, 3, 2, 2, 2, 1, 1, 1, 3, 1, 1, 1, 2…
## $ ca       <chr> "0.0", "3.0", "2.0", "0.0", "0.0", "0.0", "2.0", "0.0", "1.0"…
## $ thal     <chr> "6.0", "3.0", "7.0", "3.0", "3.0", "3.0", "3.0", "3.0", "7.0"…
## $ num      <int> 0, 2, 1, 0, 0, 0, 3, 0, 2, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0…
# Cek NA
sum(is.na(data))
## [1] 0
# Cek missing value
colSums(is.na(data))
##      age      sex       cp trestbps     chol      fbs  restecg  thalach 
##        0        0        0        0        0        0        0        0 
##    exang  oldpeak    slope       ca     thal      num 
##        0        0        0        0        0        0
# ubah ca & thal menjadi numeric
data$ca <- as.numeric(data$ca)
## Warning: NAs introduced by coercion
data$thal <- as.numeric(data$thal)
## Warning: NAs introduced by coercion
glimpse(data)
## Rows: 303
## Columns: 14
## $ age      <dbl> 63, 67, 67, 37, 41, 56, 62, 57, 63, 53, 57, 56, 56, 44, 52, 5…
## $ sex      <dbl> 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ cp       <dbl> 1, 4, 4, 3, 2, 2, 4, 4, 4, 4, 4, 2, 3, 2, 3, 3, 2, 4, 3, 2, 1…
## $ trestbps <dbl> 145, 160, 120, 130, 130, 120, 140, 120, 130, 140, 140, 140, 1…
## $ chol     <dbl> 233, 286, 229, 250, 204, 236, 268, 354, 254, 203, 192, 294, 2…
## $ fbs      <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0…
## $ restecg  <dbl> 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2…
## $ thalach  <dbl> 150, 108, 129, 187, 172, 178, 160, 163, 147, 155, 148, 153, 1…
## $ exang    <dbl> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1…
## $ oldpeak  <dbl> 2.3, 1.5, 2.6, 3.5, 1.4, 0.8, 3.6, 0.6, 1.4, 3.1, 0.4, 1.3, 0…
## $ slope    <dbl> 3, 2, 2, 3, 1, 1, 3, 1, 2, 3, 2, 2, 2, 1, 1, 1, 3, 1, 1, 1, 2…
## $ ca       <dbl> 0, 3, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ thal     <dbl> 6, 3, 7, 3, 3, 3, 3, 3, 7, 7, 6, 3, 6, 7, 7, 3, 7, 3, 3, 3, 3…
## $ num      <int> 0, 2, 1, 0, 0, 0, 3, 0, 2, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0…

Konversi target ke biner: 1) 0 = tidak ada penyakit jantung, 2) 1 = ada penyakit jantung.

data$num <- ifelse(data$num == 0, 0, 1)
data$num <- as.factor(data$num)
  1. Feature Selection
corrplot(cor(data %>% select(-num)), method = "number", type = "lower", tl.cex = 0.8)

Insights: 1. Sebagian besar fitur memiliki korelasi rendah satu sama lain (kebanyakan r < 0.5), yang menunjukkan tidak adanya multikolinearitas signifikan antar variabel.

  1. Korelasi negatif yang cukup kuat:
  1. Korelasi positif cukup kuat:
  1. Fitur dengan korelasi paling rendah secara umum:

Conclusion: “sebagian besar fitur tidak menunjukkan korelasi tinggi satu sama lain, menandakan minimnya multikolinearitas dan memungkinkan penggunaan model interpretatif seperti regresi logistik. korelasi negatif antara usia dan detak jantung maksimal, serta hubungan antara bentuk segmen ST dan depresi ST, memberikan justifikasi medis yang kuat untuk pemilihan fitur. Namun, fitur fbs memiliki kontribusi rendah dan perlu dievaluasi lebih lanjut.”

Data Cleansing & Split Data

# Hapus data yang mengandung NA akibat konversi 'ca' dan 'thal'
data_clean <- na.omit(data)

# Normalisasi fitur numerik
features <- data_clean %>%  select(-num)
features_scaled <- as.data.frame(scale(features))

# Gabungkan kembali dengan target
data_model <- cbind(features_scaled, num = data_clean$num)

# Split data
set.seed(123)
trainIndex <- createDataPartition(data_model$num, p = 0.7, list = FALSE)
train <- data_model[trainIndex, ]
test <- data_model[-trainIndex, ]
  1. Logistic Regression Model
log_model <- glm(num ~ ., data = train, family = "binomial")
summary(log_model)
## 
## Call:
## glm(formula = num ~ ., family = "binomial", data = train)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.1875     0.2246  -0.835 0.403671    
## age          -0.2156     0.2511  -0.859 0.390556    
## sex           0.6050     0.2721   2.224 0.026163 *  
## cp            0.8320     0.2574   3.232 0.001230 ** 
## trestbps      0.4643     0.2261   2.054 0.040004 *  
## chol          0.1765     0.2564   0.688 0.491268    
## fbs          -0.3572     0.2339  -1.527 0.126789    
## restecg       0.1570     0.2213   0.710 0.477974    
## thalach      -0.1874     0.2690  -0.697 0.485966    
## exang         0.3286     0.2267   1.449 0.147295    
## oldpeak       0.3918     0.2964   1.322 0.186279    
## slope         0.5132     0.2661   1.928 0.053828 .  
## ca            1.0434     0.2780   3.753 0.000174 ***
## thal          0.4837     0.2386   2.028 0.042593 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 287.12  on 207  degrees of freedom
## Residual deviance: 146.13  on 194  degrees of freedom
## AIC: 174.13
## 
## Number of Fisher Scoring iterations: 6
log_pred_prob <- predict(log_model, test, type = "response")
log_pred <- ifelse(log_pred_prob > 0.5, 1, 0)
confusionMatrix(factor(log_pred), test$num)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 45 10
##          1  3 31
##                                           
##                Accuracy : 0.8539          
##                  95% CI : (0.7632, 0.9199)
##     No Information Rate : 0.5393          
##     P-Value [Acc > NIR] : 3.064e-10       
##                                           
##                   Kappa : 0.7023          
##                                           
##  Mcnemar's Test P-Value : 0.09609         
##                                           
##             Sensitivity : 0.9375          
##             Specificity : 0.7561          
##          Pos Pred Value : 0.8182          
##          Neg Pred Value : 0.9118          
##              Prevalence : 0.5393          
##          Detection Rate : 0.5056          
##    Detection Prevalence : 0.6180          
##       Balanced Accuracy : 0.8468          
##                                           
##        'Positive' Class : 0               
## 
# ROC & AUC
log_roc <- roc(as.numeric(test$num), as.numeric(log_pred_prob))
## Setting levels: control = 1, case = 2
## Setting direction: controls < cases
plot(log_roc, main = "ROC Curve - Logistic Regression")

auc(log_roc)
## Area under the curve: 0.9085

Insights: 1) Kurva Menjauhi Garis Diagonal (garis abu-abu 45 derajat): -> menunjukkan bahwa model memiliki performa yang jauh lebih baik dibandingkan tebakan acak. Model bisa misahkan kelas “ada penyakit jantung” vs “tidak ada” dengan cukup baik.

  1. Area di bawah kurva (AUC) tampak tinggi: -> walau nilai AUC-nya tidak disebutkan secara eksplisit, bentuk kurva yang melengkung tajam ke atas kiri mengindikasikan AUC > 0.9, yang tergolong Excellent discrimination ability antara dua kelas

  2. Sensitivitas tinggi dicapai sejak awal (kiri atas grafik): -> model baik dalam mendeteksi kasus positif (orang yang memiliki penyakit jantung), yang penting dalam konteks medis (mencegah false negative)

Conclusions: - Model logistic regression sudah tergolong sangat baik secara statistik dan relevan secara medis. - ROC Curve dan AUC < 0.9 menunjukkan bahwa model punya kemampuan klasifikasi yang sangat kuat.

  1. KNN Model + Optimal K
# Cari k terbaik menggunakan cross validation
ctrl <- trainControl(method = "cv", number = 10)
set.seed(123)
knn_fit <- train(num ~ ., data = train, method = "knn", tuneLength = 20, trControl = ctrl)
knn_fit
## k-Nearest Neighbors 
## 
## 208 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 186, 188, 187, 187, 187, 188, ... 
## Resampling results across tuning parameters:
## 
##   k   Accuracy   Kappa    
##    5  0.8263853  0.6491376
##    7  0.8268615  0.6484935
##    9  0.8318615  0.6588693
##   11  0.8461472  0.6880521
##   13  0.8168615  0.6294406
##   15  0.8356926  0.6670878
##   17  0.8311688  0.6567980
##   19  0.8218615  0.6390783
##   21  0.8263853  0.6463357
##   23  0.8168615  0.6274216
##   25  0.8263853  0.6463357
##   27  0.8311472  0.6561943
##   29  0.8311472  0.6562767
##   31  0.8311472  0.6549883
##   33  0.8261472  0.6451965
##   35  0.8356710  0.6639737
##   37  0.8309091  0.6544719
##   39  0.8404329  0.6761643
##   41  0.8356710  0.6643285
##   43  0.8404329  0.6750531
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 11.
plot(knn_fit)

# Prediksi dengan k terbaik
knn_best <- knn_fit$bestTune$k
knn_pred <- knn(train[, -ncol(train)], test[, -ncol(test)], cl = train$num, k = knn_best)
confusionMatrix(knn_pred, test$num)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 45  8
##          1  3 33
##                                           
##                Accuracy : 0.8764          
##                  95% CI : (0.7896, 0.9367)
##     No Information Rate : 0.5393          
##     P-Value [Acc > NIR] : 1.047e-11       
##                                           
##                   Kappa : 0.749           
##                                           
##  Mcnemar's Test P-Value : 0.2278          
##                                           
##             Sensitivity : 0.9375          
##             Specificity : 0.8049          
##          Pos Pred Value : 0.8491          
##          Neg Pred Value : 0.9167          
##              Prevalence : 0.5393          
##          Detection Rate : 0.5056          
##    Detection Prevalence : 0.5955          
##       Balanced Accuracy : 0.8712          
##                                           
##        'Positive' Class : 0               
## 

Insights: 1) Akurasi tertinggi (~0.846) terjadi pada k = 11 - menandakan bahwa model KNN paling optimal pada k = 11 dalam hal generalisasi terhadap data uji. - model ini berhasil mencapai performa terbaiknya saat mempertimbangkan tetangga yang tidak terlalu sedikit maupun terlalu banyak.

  1. Fluktuasi akurasi cukup tinggi di awal (k < 20)
  1. Akurasi cenderung stabil dan meningkat perlahan di k > 30
  1. Trade-off antara kompleksitas model dan performa

Conclusions: - nilai optimal k = 11 sebaiknya digunakan untuk model final KNN.

  1. Evaluasi Model & Diskusi ### Evaluasi Model

Metrik Evaluasi: