IMPORT CANCER DATA

cancer <- read.csv("cancer.csv")
head(cancer)
##         id diagnosis radius_mean texture_mean perimeter_mean area_mean
## 1   842302         M       17.99        10.38         122.80    1001.0
## 2   842517         M       20.57        17.77         132.90    1326.0
## 3 84300903         M       19.69        21.25         130.00    1203.0
## 4 84348301         M       11.42        20.38          77.58     386.1
## 5 84358402         M       20.29        14.34         135.10    1297.0
## 6   843786         M       12.45        15.70          82.57     477.1
##   smoothness_mean compactness_mean concavity_mean concave.points_mean
## 1         0.11840          0.27760         0.3001             0.14710
## 2         0.08474          0.07864         0.0869             0.07017
## 3         0.10960          0.15990         0.1974             0.12790
## 4         0.14250          0.28390         0.2414             0.10520
## 5         0.10030          0.13280         0.1980             0.10430
## 6         0.12780          0.17000         0.1578             0.08089
##   symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se
## 1        0.2419                0.07871    1.0950     0.9053        8.589
## 2        0.1812                0.05667    0.5435     0.7339        3.398
## 3        0.2069                0.05999    0.7456     0.7869        4.585
## 4        0.2597                0.09744    0.4956     1.1560        3.445
## 5        0.1809                0.05883    0.7572     0.7813        5.438
## 6        0.2087                0.07613    0.3345     0.8902        2.217
##   area_se smoothness_se compactness_se concavity_se concave.points_se
## 1  153.40      0.006399        0.04904      0.05373           0.01587
## 2   74.08      0.005225        0.01308      0.01860           0.01340
## 3   94.03      0.006150        0.04006      0.03832           0.02058
## 4   27.23      0.009110        0.07458      0.05661           0.01867
## 5   94.44      0.011490        0.02461      0.05688           0.01885
## 6   27.19      0.007510        0.03345      0.03672           0.01137
##   symmetry_se fractal_dimension_se radius_worst texture_worst
## 1     0.03003             0.006193        25.38         17.33
## 2     0.01389             0.003532        24.99         23.41
## 3     0.02250             0.004571        23.57         25.53
## 4     0.05963             0.009208        14.91         26.50
## 5     0.01756             0.005115        22.54         16.67
## 6     0.02165             0.005082        15.47         23.75
##   perimeter_worst area_worst smoothness_worst compactness_worst
## 1          184.60     2019.0           0.1622            0.6656
## 2          158.80     1956.0           0.1238            0.1866
## 3          152.50     1709.0           0.1444            0.4245
## 4           98.87      567.7           0.2098            0.8663
## 5          152.20     1575.0           0.1374            0.2050
## 6          103.40      741.6           0.1791            0.5249
##   concavity_worst concave.points_worst symmetry_worst
## 1          0.7119               0.2654         0.4601
## 2          0.2416               0.1860         0.2750
## 3          0.4504               0.2430         0.3613
## 4          0.6869               0.2575         0.6638
## 5          0.4000               0.1625         0.2364
## 6          0.5355               0.1741         0.3985
##   fractal_dimension_worst  X
## 1                 0.11890 NA
## 2                 0.08902 NA
## 3                 0.08758 NA
## 4                 0.17300 NA
## 5                 0.07678 NA
## 6                 0.12440 NA
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
str(cancer)
## 'data.frame':    569 obs. of  33 variables:
##  $ id                     : int  842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
##  $ diagnosis              : Factor w/ 2 levels "B","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ radius_mean            : num  18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num  1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num  0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num  0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num  0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave.points_mean    : num  0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num  0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num  0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num  1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num  0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num  8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num  153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num  0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num  0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num  0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave.points_se      : num  0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num  0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num  0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num  25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num  17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num  184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num  2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num  0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num  0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num  0.712 0.242 0.45 0.687 0.4 ...
##  $ concave.points_worst   : num  0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num  0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num  0.1189 0.089 0.0876 0.173 0.0768 ...
##  $ X                      : logi  NA NA NA NA NA NA ...

REMOVE REDUNDANT COLUMN

cancer$X <- NULL
cancer$id <- NULL

CREATE PARTITION INDEX TO SPLIT DATA INTO TRAIN AND TEST DATA

index <- createDataPartition(cancer$diagnosis, p = 0.7,list = FALSE)

SUBSET DATA

cancer2 <- subset(cancer, select = c(-diagnosis))
cancer3 <- scale(cancer2)
head(cancer3)
##   radius_mean texture_mean perimeter_mean  area_mean smoothness_mean
## 1   1.0960995   -2.0715123      1.2688173  0.9835095       1.5670875
## 2   1.8282120   -0.3533215      1.6844726  1.9070303      -0.8262354
## 3   1.5784992    0.4557859      1.5651260  1.5575132       0.9413821
## 4  -0.7682333    0.2535091     -0.5921661 -0.7637917       3.2806668
## 5   1.7487579   -1.1508038      1.7750113  1.8246238       0.2801253
## 6  -0.4759559   -0.8346009     -0.3868077 -0.5052059       2.2354545
##   compactness_mean concavity_mean concave.points_mean symmetry_mean
## 1        3.2806281     2.65054179           2.5302489   2.215565542
## 2       -0.4866435    -0.02382489           0.5476623   0.001391139
## 3        1.0519999     1.36227979           2.0354398   0.938858720
## 4        3.3999174     1.91421287           1.4504311   2.864862154
## 5        0.5388663     1.36980615           1.4272370  -0.009552062
## 6        1.2432416     0.86554001           0.8239307   1.004517928
##   fractal_dimension_mean  radius_se texture_se perimeter_se    area_se
## 1              2.2537638  2.4875451 -0.5647681    2.8305403  2.4853907
## 2             -0.8678888  0.4988157 -0.8754733    0.2630955  0.7417493
## 3             -0.3976580  1.2275958 -0.7793976    0.8501802  1.1802975
## 4              4.9066020  0.3260865 -0.1103120    0.2863415 -0.2881246
## 5             -0.5619555  1.2694258 -0.7895490    1.2720701  1.1893103
## 6              1.8883435 -0.2548461 -0.5921406   -0.3210217 -0.2890039
##   smoothness_se compactness_se concavity_se concave.points_se symmetry_se
## 1    -0.2138135     1.31570389    0.7233897        0.66023900   1.1477468
## 2    -0.6048187    -0.69231710   -0.4403926        0.25993335  -0.8047423
## 3    -0.2967439     0.81425704    0.2128891        1.42357487   0.2368272
## 4     0.6890953     2.74186785    0.8187979        1.11402678   4.7285198
## 5     1.4817634    -0.04847723    0.8277425        1.14319885  -0.3607748
## 6     0.1562093     0.44515196    0.1598845       -0.06906279   0.1340009
##   fractal_dimension_se radius_worst texture_worst perimeter_worst
## 1           0.90628565    1.8850310   -1.35809849       2.3015755
## 2          -0.09935632    1.8043398   -0.36887865       1.5337764
## 3           0.29330133    1.5105411   -0.02395331       1.3462906
## 4           2.04571087   -0.2812170    0.13386631      -0.2497196
## 5           0.49888916    1.2974336   -1.46548091       1.3373627
## 6           0.48641784   -0.1653528   -0.31356043      -0.1149083
##   area_worst smoothness_worst compactness_worst concavity_worst
## 1  1.9994782        1.3065367         2.6143647       2.1076718
## 2  1.8888270       -0.3752817        -0.4300658      -0.1466200
## 3  1.4550043        0.5269438         1.0819801       0.8542223
## 4 -0.5495377        3.3912907         3.8899747       1.9878392
## 5  1.2196511        0.2203623        -0.3131190       0.6126397
## 6 -0.2441054        2.0467119         1.7201029       1.2621327
##   concave.points_worst symmetry_worst fractal_dimension_worst
## 1            2.2940576      2.7482041               1.9353117
## 2            1.0861286     -0.2436753               0.2809428
## 3            1.9532817      1.1512420               0.2012142
## 4            2.1738732      6.0407261               4.9306719
## 5            0.7286181     -0.8675896              -0.3967505
## 6            0.9050914      1.7525273               2.2398308

FORMULATE TRAIN AND TEST DATA

traindata <- cancer3[index,]
testdata <- cancer3[-index,]

CREATE CLASS VECTORS

Ytrain <- cancer$diagnosis[index]
Ytest <- cancer$diagnosis[-index]

MODELLING

library(class)
knnmodel <- knn(traindata, testdata, k=round(sqrt(nrow(traindata)),0)-1, cl=Ytrain)
cMatrix <- t(table(Ytest, knnmodel)) ;cMatrix
##         Ytest
## knnmodel   B   M
##        B 106   7
##        M   1  56
library(caret)
sensitivity(cMatrix)
## [1] 0.9906542
specificity(cMatrix)
## [1] 0.8888889