library(readr)
wisc_bc_data <- read_csv("~/Downloads/wisc_bc_data.csv")
## Rows: 569 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): diagnosis
## dbl (31): id, radius_mean, texture_mean, perimeter_mean, area_mean, smoothne...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(wisc_bc_data)
 
 WBC <- wisc_bc_data 
 dim(WBC)
## [1] 569  32
 head(WBC)
## # A tibble: 6 × 32
##         id diagnosis radius_mean texture_mean perimeter_mean area_mean
##      <dbl> <chr>           <dbl>        <dbl>          <dbl>     <dbl>
## 1   842302 M                18.0         10.4          123.      1001 
## 2   842517 M                20.6         17.8          133.      1326 
## 3 84300903 M                19.7         21.2          130       1203 
## 4 84348301 M                11.4         20.4           77.6      386.
## 5 84358402 M                20.3         14.3          135.      1297 
## 6   843786 M                12.4         15.7           82.6      477.
## # ℹ 26 more variables: smoothness_mean <dbl>, compactness_mean <dbl>,
## #   concavity_mean <dbl>, `concave points_mean` <dbl>, symmetry_mean <dbl>,
## #   fractal_dimension_mean <dbl>, radius_se <dbl>, texture_se <dbl>,
## #   perimeter_se <dbl>, area_se <dbl>, smoothness_se <dbl>,
## #   compactness_se <dbl>, concavity_se <dbl>, `concave points_se` <dbl>,
## #   symmetry_se <dbl>, fractal_dimension_se <dbl>, radius_worst <dbl>,
## #   texture_worst <dbl>, perimeter_worst <dbl>, area_worst <dbl>, …
 str(WBC)
## spc_tbl_ [569 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id                     : num [1:569] 842302 842517 84300903 84348301 84358402 ...
##  $ diagnosis              : chr [1:569] "M" "M" "M" "M" ...
##  $ radius_mean            : num [1:569] 18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num [1:569] 10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num [1:569] 122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num [1:569] 1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num [1:569] 0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num [1:569] 0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num [1:569] 0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave points_mean    : num [1:569] 0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num [1:569] 0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num [1:569] 0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num [1:569] 1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num [1:569] 0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num [1:569] 8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num [1:569] 153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num [1:569] 0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num [1:569] 0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num [1:569] 0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave points_se      : num [1:569] 0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num [1:569] 0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num [1:569] 0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num [1:569] 25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num [1:569] 17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num [1:569] 184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num [1:569] 2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num [1:569] 0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num [1:569] 0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num [1:569] 0.712 0.242 0.45 0.687 0.4 ...
##  $ concave points_worst   : num [1:569] 0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num [1:569] 0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num [1:569] 0.1189 0.089 0.0876 0.173 0.0768 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   diagnosis = col_character(),
##   ..   radius_mean = col_double(),
##   ..   texture_mean = col_double(),
##   ..   perimeter_mean = col_double(),
##   ..   area_mean = col_double(),
##   ..   smoothness_mean = col_double(),
##   ..   compactness_mean = col_double(),
##   ..   concavity_mean = col_double(),
##   ..   `concave points_mean` = col_double(),
##   ..   symmetry_mean = col_double(),
##   ..   fractal_dimension_mean = col_double(),
##   ..   radius_se = col_double(),
##   ..   texture_se = col_double(),
##   ..   perimeter_se = col_double(),
##   ..   area_se = col_double(),
##   ..   smoothness_se = col_double(),
##   ..   compactness_se = col_double(),
##   ..   concavity_se = col_double(),
##   ..   `concave points_se` = col_double(),
##   ..   symmetry_se = col_double(),
##   ..   fractal_dimension_se = col_double(),
##   ..   radius_worst = col_double(),
##   ..   texture_worst = col_double(),
##   ..   perimeter_worst = col_double(),
##   ..   area_worst = col_double(),
##   ..   smoothness_worst = col_double(),
##   ..   compactness_worst = col_double(),
##   ..   concavity_worst = col_double(),
##   ..   `concave points_worst` = col_double(),
##   ..   symmetry_worst = col_double(),
##   ..   fractal_dimension_worst = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
 WBC1<- WBC[-1]
 dim(WBC1)
## [1] 569  31
 str(WBC1)
## tibble [569 × 31] (S3: tbl_df/tbl/data.frame)
##  $ diagnosis              : chr [1:569] "M" "M" "M" "M" ...
##  $ radius_mean            : num [1:569] 18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num [1:569] 10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num [1:569] 122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num [1:569] 1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num [1:569] 0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num [1:569] 0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num [1:569] 0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave points_mean    : num [1:569] 0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num [1:569] 0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num [1:569] 0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num [1:569] 1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num [1:569] 0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num [1:569] 8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num [1:569] 153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num [1:569] 0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num [1:569] 0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num [1:569] 0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave points_se      : num [1:569] 0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num [1:569] 0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num [1:569] 0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num [1:569] 25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num [1:569] 17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num [1:569] 184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num [1:569] 2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num [1:569] 0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num [1:569] 0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num [1:569] 0.712 0.242 0.45 0.687 0.4 ...
##  $ concave points_worst   : num [1:569] 0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num [1:569] 0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num [1:569] 0.1189 0.089 0.0876 0.173 0.0768 ...
 table(WBC1$diagnosis)
## 
##   B   M 
## 357 212
 WBC1$diagnosis <- factor(WBC1$diagnosis, levels = c("B", "M"),labels = c("Benign","Malignant"))
 table(WBC1$diagnosis)
## 
##    Benign Malignant 
##       357       212
 round(prop.table(table(WBC1$diagnosis)))
## 
##    Benign Malignant 
##         1         0
 round(prop.table(table(WBC1$diagnosis))*100, digits = 1)
## 
##    Benign Malignant 
##      62.7      37.3
 summary(WBC1[c("radius_mean","area_mean","smoothness_mean")])
##   radius_mean       area_mean      smoothness_mean  
##  Min.   : 6.981   Min.   : 143.5   Min.   :0.05263  
##  1st Qu.:11.700   1st Qu.: 420.3   1st Qu.:0.08637  
##  Median :13.370   Median : 551.1   Median :0.09587  
##  Mean   :14.127   Mean   : 654.9   Mean   :0.09636  
##  3rd Qu.:15.780   3rd Qu.: 782.7   3rd Qu.:0.10530  
##  Max.   :28.110   Max.   :2501.0   Max.   :0.16340
 normalize<-function(x){}
 normalize<-function(x){return((x-min(x))/max(x))}
 normalize(c(1,2,3,4,5))
## [1] 0.0 0.2 0.4 0.6 0.8
 normalize(c(10,20,30,40,50))
## [1] 0.0 0.2 0.4 0.6 0.8
 WBC_N<-as.data.frame(lapply(WBC1[2:31], normalize))
 head(WBC_N)
##   radius_mean texture_mean perimeter_mean area_mean smoothness_mean
## 1   0.3916400   0.01705703      0.4191512 0.3428629       0.4025092
## 2   0.4834223   0.20519348      0.4727321 0.4728109       0.1965116
## 3   0.4521167   0.29378819      0.4573475 0.4236305       0.3486536
## 4   0.1579153   0.27163951      0.1792573 0.0970012       0.5500000
## 5   0.4734614   0.11787169      0.4844032 0.4612155       0.2917381
## 6   0.1945571   0.15249491      0.2057294 0.1333866       0.4600367
##   compactness_mean concavity_mean concave.points_mean symmetry_mean
## 1        0.7475970      0.7031396           0.7311133     0.4470395
## 2        0.1715692      0.2036082           0.3487575     0.2473684
## 3        0.4068327      0.4625117           0.6356859     0.3319079
## 4        0.7658367      0.5656045           0.5228628     0.5055921
## 5        0.3283729      0.4639175           0.5183897     0.2463816
## 6        0.4360741      0.3697282           0.4020378     0.3378289
##   fractal_dimension_mean  radius_se texture_se perimeter_se    area_se
## 1             0.29505337 0.34232510 0.11158649   0.35632393 0.27037624
## 2             0.06886289 0.15036547 0.07649949   0.12015469 0.12408336
## 3             0.10293514 0.22071006 0.08734903   0.17415833 0.16087790
## 4             0.48727422 0.13369300 0.16290686   0.12229299 0.03767613
## 5             0.09103038 0.22474765 0.08620266   0.21296633 0.16163408
## 6             0.26857553 0.07761921 0.10849539   0.06642402 0.03760236
##   smoothness_se compactness_se concavity_se concave.points_se symmetry_se
## 1     0.1505300     0.34555391   0.13568182         0.3006251   0.2805320
## 2     0.1128172     0.07997046   0.04696970         0.2538360   0.0760988
## 3     0.1425313     0.27923191   0.09676768         0.3898466   0.1851552
## 4     0.2376164     0.53418021   0.14295455         0.3536655   0.6554528
## 5     0.3140700     0.16512555   0.14363636         0.3570752   0.1225839
## 6     0.1862191     0.23041359   0.09272727         0.2153817   0.1743889
##   fractal_dimension_se radius_worst texture_worst perimeter_worst area_worst
## 1           0.17755362    0.4841842    0.10718611       0.5341959 0.43107663
## 2           0.08837802    0.4733629    0.22991522       0.4314889 0.41626704
## 3           0.12319705    0.4339623    0.27270892       0.4064092 0.35820404
## 4           0.27859249    0.1936737    0.29228906       0.1929140 0.08991537
## 5           0.14142761    0.4053829    0.09386354       0.4052150 0.32670428
## 6           0.14032172    0.2092120    0.23677836       0.2109475 0.13079455
##   smoothness_worst compactness_worst concavity_worst concave.points_worst
## 1        0.4089398         0.6033176       0.5686102            0.9120275
## 2        0.2364331         0.1505766       0.1929712            0.6391753
## 3        0.3289757         0.3754348       0.3597444            0.8350515
## 4        0.6227763         0.7930151       0.5486422            0.8848797
## 5        0.2975292         0.1679679       0.3194888            0.5584192
## 6        0.4848607         0.4703308       0.4277157            0.5982818
##   symmetry_worst fractal_dimension_worst
## 1      0.4573667               0.3077590
## 2      0.1785176               0.1637590
## 3      0.3085267               0.1568193
## 4      0.7642362               0.5684819
## 5      0.1203676               0.1047711
## 6      0.3645676               0.3342651
 WBCD_train<-WBC_N[1:469, ]
 WBCD_test<-WBC_N[470:569, ]
 
 WBCD_train_labels<-WBC1[1:469, 1]
 WBCD_test_labels<-WBC1[470:569, 1]
 
 library(class)
## Warning: package 'class' was built under R version 4.3.3
 WBCD_test_pred<-knn(train = WBCD_train, test = WBCD_test, cl = WBCD_train_labels$diagnosis, k=21)
 WBCD_test_pred
##   [1] Benign    Benign    Benign    Benign    Benign    Benign    Benign   
##   [8] Benign    Benign    Benign    Malignant Benign    Benign    Benign   
##  [15] Benign    Benign    Benign    Benign    Malignant Benign    Benign   
##  [22] Benign    Benign    Malignant Benign    Benign    Benign    Benign   
##  [29] Benign    Malignant Malignant Benign    Malignant Benign    Malignant
##  [36] Benign    Benign    Benign    Benign    Benign    Malignant Benign   
##  [43] Benign    Malignant Benign    Benign    Benign    Malignant Malignant
##  [50] Benign    Benign    Benign    Malignant Benign    Benign    Benign   
##  [57] Benign    Benign    Benign    Benign    Benign    Benign    Benign   
##  [64] Benign    Malignant Benign    Malignant Malignant Benign    Benign   
##  [71] Benign    Benign    Benign    Benign    Benign    Benign    Benign   
##  [78] Benign    Benign    Benign    Benign    Benign    Benign    Benign   
##  [85] Benign    Benign    Benign    Benign    Benign    Benign    Benign   
##  [92] Benign    Benign    Malignant Malignant Malignant Malignant Malignant
##  [99] Malignant Benign   
## Levels: Benign Malignant
 library(gmodels)
## Warning: package 'gmodels' was built under R version 4.3.2
 CrossTable(x = WBCD_test_labels$diagnosis, y = WBCD_test_pred, prop.chisq = FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  100 
## 
##  
##                            | WBCD_test_pred 
## WBCD_test_labels$diagnosis |    Benign | Malignant | Row Total | 
## ---------------------------|-----------|-----------|-----------|
##                     Benign |        77 |         0 |        77 | 
##                            |     1.000 |     0.000 |     0.770 | 
##                            |     0.975 |     0.000 |           | 
##                            |     0.770 |     0.000 |           | 
## ---------------------------|-----------|-----------|-----------|
##                  Malignant |         2 |        21 |        23 | 
##                            |     0.087 |     0.913 |     0.230 | 
##                            |     0.025 |     1.000 |           | 
##                            |     0.020 |     0.210 |           | 
## ---------------------------|-----------|-----------|-----------|
##               Column Total |        79 |        21 |       100 | 
##                            |     0.790 |     0.210 |           | 
## ---------------------------|-----------|-----------|-----------|
## 
##