IMPORT CANCER DATA
cancer <- read.csv("cancer.csv")
head(cancer)
## id diagnosis radius_mean texture_mean perimeter_mean area_mean
## 1 842302 M 17.99 10.38 122.80 1001.0
## 2 842517 M 20.57 17.77 132.90 1326.0
## 3 84300903 M 19.69 21.25 130.00 1203.0
## 4 84348301 M 11.42 20.38 77.58 386.1
## 5 84358402 M 20.29 14.34 135.10 1297.0
## 6 843786 M 12.45 15.70 82.57 477.1
## smoothness_mean compactness_mean concavity_mean concave.points_mean
## 1 0.11840 0.27760 0.3001 0.14710
## 2 0.08474 0.07864 0.0869 0.07017
## 3 0.10960 0.15990 0.1974 0.12790
## 4 0.14250 0.28390 0.2414 0.10520
## 5 0.10030 0.13280 0.1980 0.10430
## 6 0.12780 0.17000 0.1578 0.08089
## symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se
## 1 0.2419 0.07871 1.0950 0.9053 8.589
## 2 0.1812 0.05667 0.5435 0.7339 3.398
## 3 0.2069 0.05999 0.7456 0.7869 4.585
## 4 0.2597 0.09744 0.4956 1.1560 3.445
## 5 0.1809 0.05883 0.7572 0.7813 5.438
## 6 0.2087 0.07613 0.3345 0.8902 2.217
## area_se smoothness_se compactness_se concavity_se concave.points_se
## 1 153.40 0.006399 0.04904 0.05373 0.01587
## 2 74.08 0.005225 0.01308 0.01860 0.01340
## 3 94.03 0.006150 0.04006 0.03832 0.02058
## 4 27.23 0.009110 0.07458 0.05661 0.01867
## 5 94.44 0.011490 0.02461 0.05688 0.01885
## 6 27.19 0.007510 0.03345 0.03672 0.01137
## symmetry_se fractal_dimension_se radius_worst texture_worst
## 1 0.03003 0.006193 25.38 17.33
## 2 0.01389 0.003532 24.99 23.41
## 3 0.02250 0.004571 23.57 25.53
## 4 0.05963 0.009208 14.91 26.50
## 5 0.01756 0.005115 22.54 16.67
## 6 0.02165 0.005082 15.47 23.75
## perimeter_worst area_worst smoothness_worst compactness_worst
## 1 184.60 2019.0 0.1622 0.6656
## 2 158.80 1956.0 0.1238 0.1866
## 3 152.50 1709.0 0.1444 0.4245
## 4 98.87 567.7 0.2098 0.8663
## 5 152.20 1575.0 0.1374 0.2050
## 6 103.40 741.6 0.1791 0.5249
## concavity_worst concave.points_worst symmetry_worst
## 1 0.7119 0.2654 0.4601
## 2 0.2416 0.1860 0.2750
## 3 0.4504 0.2430 0.3613
## 4 0.6869 0.2575 0.6638
## 5 0.4000 0.1625 0.2364
## 6 0.5355 0.1741 0.3985
## fractal_dimension_worst X
## 1 0.11890 NA
## 2 0.08902 NA
## 3 0.08758 NA
## 4 0.17300 NA
## 5 0.07678 NA
## 6 0.12440 NA
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
str(cancer)
## 'data.frame': 569 obs. of 33 variables:
## $ id : int 842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
## $ diagnosis : Factor w/ 2 levels "B","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ radius_mean : num 18 20.6 19.7 11.4 20.3 ...
## $ texture_mean : num 10.4 17.8 21.2 20.4 14.3 ...
## $ perimeter_mean : num 122.8 132.9 130 77.6 135.1 ...
## $ area_mean : num 1001 1326 1203 386 1297 ...
## $ smoothness_mean : num 0.1184 0.0847 0.1096 0.1425 0.1003 ...
## $ compactness_mean : num 0.2776 0.0786 0.1599 0.2839 0.1328 ...
## $ concavity_mean : num 0.3001 0.0869 0.1974 0.2414 0.198 ...
## $ concave.points_mean : num 0.1471 0.0702 0.1279 0.1052 0.1043 ...
## $ symmetry_mean : num 0.242 0.181 0.207 0.26 0.181 ...
## $ fractal_dimension_mean : num 0.0787 0.0567 0.06 0.0974 0.0588 ...
## $ radius_se : num 1.095 0.543 0.746 0.496 0.757 ...
## $ texture_se : num 0.905 0.734 0.787 1.156 0.781 ...
## $ perimeter_se : num 8.59 3.4 4.58 3.44 5.44 ...
## $ area_se : num 153.4 74.1 94 27.2 94.4 ...
## $ smoothness_se : num 0.0064 0.00522 0.00615 0.00911 0.01149 ...
## $ compactness_se : num 0.049 0.0131 0.0401 0.0746 0.0246 ...
## $ concavity_se : num 0.0537 0.0186 0.0383 0.0566 0.0569 ...
## $ concave.points_se : num 0.0159 0.0134 0.0206 0.0187 0.0188 ...
## $ symmetry_se : num 0.03 0.0139 0.0225 0.0596 0.0176 ...
## $ fractal_dimension_se : num 0.00619 0.00353 0.00457 0.00921 0.00511 ...
## $ radius_worst : num 25.4 25 23.6 14.9 22.5 ...
## $ texture_worst : num 17.3 23.4 25.5 26.5 16.7 ...
## $ perimeter_worst : num 184.6 158.8 152.5 98.9 152.2 ...
## $ area_worst : num 2019 1956 1709 568 1575 ...
## $ smoothness_worst : num 0.162 0.124 0.144 0.21 0.137 ...
## $ compactness_worst : num 0.666 0.187 0.424 0.866 0.205 ...
## $ concavity_worst : num 0.712 0.242 0.45 0.687 0.4 ...
## $ concave.points_worst : num 0.265 0.186 0.243 0.258 0.163 ...
## $ symmetry_worst : num 0.46 0.275 0.361 0.664 0.236 ...
## $ fractal_dimension_worst: num 0.1189 0.089 0.0876 0.173 0.0768 ...
## $ X : logi NA NA NA NA NA NA ...
REMOVE REDUNDANT COLUMN
cancer$X <- NULL
cancer$id <- NULL
CREATE PARTITION INDEX TO SPLIT DATA INTO TRAIN AND TEST DATA
index <- createDataPartition(cancer$diagnosis, p = 0.7,list = FALSE)
SUBSET DATA
cancer2 <- subset(cancer, select = c(-diagnosis))
cancer3 <- scale(cancer2)
head(cancer3)
## radius_mean texture_mean perimeter_mean area_mean smoothness_mean
## 1 1.0960995 -2.0715123 1.2688173 0.9835095 1.5670875
## 2 1.8282120 -0.3533215 1.6844726 1.9070303 -0.8262354
## 3 1.5784992 0.4557859 1.5651260 1.5575132 0.9413821
## 4 -0.7682333 0.2535091 -0.5921661 -0.7637917 3.2806668
## 5 1.7487579 -1.1508038 1.7750113 1.8246238 0.2801253
## 6 -0.4759559 -0.8346009 -0.3868077 -0.5052059 2.2354545
## compactness_mean concavity_mean concave.points_mean symmetry_mean
## 1 3.2806281 2.65054179 2.5302489 2.215565542
## 2 -0.4866435 -0.02382489 0.5476623 0.001391139
## 3 1.0519999 1.36227979 2.0354398 0.938858720
## 4 3.3999174 1.91421287 1.4504311 2.864862154
## 5 0.5388663 1.36980615 1.4272370 -0.009552062
## 6 1.2432416 0.86554001 0.8239307 1.004517928
## fractal_dimension_mean radius_se texture_se perimeter_se area_se
## 1 2.2537638 2.4875451 -0.5647681 2.8305403 2.4853907
## 2 -0.8678888 0.4988157 -0.8754733 0.2630955 0.7417493
## 3 -0.3976580 1.2275958 -0.7793976 0.8501802 1.1802975
## 4 4.9066020 0.3260865 -0.1103120 0.2863415 -0.2881246
## 5 -0.5619555 1.2694258 -0.7895490 1.2720701 1.1893103
## 6 1.8883435 -0.2548461 -0.5921406 -0.3210217 -0.2890039
## smoothness_se compactness_se concavity_se concave.points_se symmetry_se
## 1 -0.2138135 1.31570389 0.7233897 0.66023900 1.1477468
## 2 -0.6048187 -0.69231710 -0.4403926 0.25993335 -0.8047423
## 3 -0.2967439 0.81425704 0.2128891 1.42357487 0.2368272
## 4 0.6890953 2.74186785 0.8187979 1.11402678 4.7285198
## 5 1.4817634 -0.04847723 0.8277425 1.14319885 -0.3607748
## 6 0.1562093 0.44515196 0.1598845 -0.06906279 0.1340009
## fractal_dimension_se radius_worst texture_worst perimeter_worst
## 1 0.90628565 1.8850310 -1.35809849 2.3015755
## 2 -0.09935632 1.8043398 -0.36887865 1.5337764
## 3 0.29330133 1.5105411 -0.02395331 1.3462906
## 4 2.04571087 -0.2812170 0.13386631 -0.2497196
## 5 0.49888916 1.2974336 -1.46548091 1.3373627
## 6 0.48641784 -0.1653528 -0.31356043 -0.1149083
## area_worst smoothness_worst compactness_worst concavity_worst
## 1 1.9994782 1.3065367 2.6143647 2.1076718
## 2 1.8888270 -0.3752817 -0.4300658 -0.1466200
## 3 1.4550043 0.5269438 1.0819801 0.8542223
## 4 -0.5495377 3.3912907 3.8899747 1.9878392
## 5 1.2196511 0.2203623 -0.3131190 0.6126397
## 6 -0.2441054 2.0467119 1.7201029 1.2621327
## concave.points_worst symmetry_worst fractal_dimension_worst
## 1 2.2940576 2.7482041 1.9353117
## 2 1.0861286 -0.2436753 0.2809428
## 3 1.9532817 1.1512420 0.2012142
## 4 2.1738732 6.0407261 4.9306719
## 5 0.7286181 -0.8675896 -0.3967505
## 6 0.9050914 1.7525273 2.2398308
FORMULATE TRAIN AND TEST DATA
traindata <- cancer3[index,]
testdata <- cancer3[-index,]
CREATE CLASS VECTORS
Ytrain <- cancer$diagnosis[index]
Ytest <- cancer$diagnosis[-index]
MODELLING
library(class)
knnmodel <- knn(traindata, testdata, k=round(sqrt(nrow(traindata)),0)-1, cl=Ytrain)
cMatrix <- t(table(Ytest, knnmodel)) ;cMatrix
## Ytest
## knnmodel B M
## B 106 7
## M 1 56
library(caret)
sensitivity(cMatrix)
## [1] 0.9906542
specificity(cMatrix)
## [1] 0.8888889