rm(list=ls())
setwd("c:/R")
ls()
## character(0)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(recipes)
##
## 다음의 패키지를 부착합니다: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(caret)
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
df<-read.csv("wbc.csv")
df %>% glimpse()
## Rows: 569
## Columns: 32
## $ id <int> 842302, 842517, 84300903, 84348301, 84358402, ~
## $ diagnosis <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "~
## $ radius_mean <dbl> 17.990, 20.570, 19.690, 11.420, 20.290, 12.450~
## $ texture_mean <dbl> 10.38, 17.77, 21.25, 20.38, 14.34, 15.70, 19.9~
## $ perimeter_mean <dbl> 122.80, 132.90, 130.00, 77.58, 135.10, 82.57, ~
## $ area_mean <dbl> 1001.0, 1326.0, 1203.0, 386.1, 1297.0, 477.1, ~
## $ smoothness_mean <dbl> 0.11840, 0.08474, 0.10960, 0.14250, 0.10030, 0~
## $ compactness_mean <dbl> 0.27760, 0.07864, 0.15990, 0.28390, 0.13280, 0~
## $ concavity_mean <dbl> 0.30010, 0.08690, 0.19740, 0.24140, 0.19800, 0~
## $ concave.points_mean <dbl> 0.14710, 0.07017, 0.12790, 0.10520, 0.10430, 0~
## $ symmetry_mean <dbl> 0.2419, 0.1812, 0.2069, 0.2597, 0.1809, 0.2087~
## $ fractal_dimension_mean <dbl> 0.07871, 0.05667, 0.05999, 0.09744, 0.05883, 0~
## $ radius_se <dbl> 1.0950, 0.5435, 0.7456, 0.4956, 0.7572, 0.3345~
## $ texture_se <dbl> 0.9053, 0.7339, 0.7869, 1.1560, 0.7813, 0.8902~
## $ perimeter_se <dbl> 8.589, 3.398, 4.585, 3.445, 5.438, 2.217, 3.18~
## $ area_se <dbl> 153.40, 74.08, 94.03, 27.23, 94.44, 27.19, 53.~
## $ smoothness_se <dbl> 0.006399, 0.005225, 0.006150, 0.009110, 0.0114~
## $ compactness_se <dbl> 0.049040, 0.013080, 0.040060, 0.074580, 0.0246~
## $ concavity_se <dbl> 0.05373, 0.01860, 0.03832, 0.05661, 0.05688, 0~
## $ concave.points_se <dbl> 0.015870, 0.013400, 0.020580, 0.018670, 0.0188~
## $ symmetry_se <dbl> 0.03003, 0.01389, 0.02250, 0.05963, 0.01756, 0~
## $ fractal_dimension_se <dbl> 0.006193, 0.003532, 0.004571, 0.009208, 0.0051~
## $ radius_worst <dbl> 25.38, 24.99, 23.57, 14.91, 22.54, 15.47, 22.8~
## $ texture_worst <dbl> 17.33, 23.41, 25.53, 26.50, 16.67, 23.75, 27.6~
## $ perimeter_worst <dbl> 184.60, 158.80, 152.50, 98.87, 152.20, 103.40,~
## $ area_worst <dbl> 2019.0, 1956.0, 1709.0, 567.7, 1575.0, 741.6, ~
## $ smoothness_worst <dbl> 0.1622, 0.1238, 0.1444, 0.2098, 0.1374, 0.1791~
## $ compactness_worst <dbl> 0.6656, 0.1866, 0.4245, 0.8663, 0.2050, 0.5249~
## $ concavity_worst <dbl> 0.71190, 0.24160, 0.45040, 0.68690, 0.40000, 0~
## $ concave.points_worst <dbl> 0.26540, 0.18600, 0.24300, 0.25750, 0.16250, 0~
## $ symmetry_worst <dbl> 0.4601, 0.2750, 0.3613, 0.6638, 0.2364, 0.3985~
## $ fractal_dimension_worst <dbl> 0.11890, 0.08902, 0.08758, 0.17300, 0.07678, 0~
View(df)
set.seed(1357)
train_list<-createDataPartition(y=df$diagnosis,p=0.7,list = FALSE)
class(train_list)
## [1] "matrix" "array"
train_list
## Resample1
## [1,] 2
## [2,] 3
## [3,] 4
## [4,] 6
## [5,] 7
## [6,] 9
## [7,] 10
## [8,] 11
## [9,] 12
## [10,] 13
## [11,] 14
## [12,] 15
## [13,] 16
## [14,] 18
## [15,] 19
## [16,] 20
## [17,] 23
## [18,] 24
## [19,] 25
## [20,] 27
## [21,] 29
## [22,] 30
## [23,] 32
## [24,] 33
## [25,] 34
## [26,] 35
## [27,] 36
## [28,] 37
## [29,] 38
## [30,] 39
## [31,] 40
## [32,] 44
## [33,] 45
## [34,] 46
## [35,] 47
## [36,] 48
## [37,] 49
## [38,] 55
## [39,] 56
## [40,] 59
## [41,] 60
## [42,] 62
## [43,] 63
## [44,] 64
## [45,] 66
## [46,] 67
## [47,] 68
## [48,] 69
## [49,] 73
## [50,] 74
## [51,] 75
## [52,] 76
## [53,] 77
## [54,] 78
## [55,] 81
## [56,] 82
## [57,] 84
## [58,] 85
## [59,] 87
## [60,] 88
## [61,] 90
## [62,] 92
## [63,] 93
## [64,] 94
## [65,] 95
## [66,] 97
## [67,] 98
## [68,] 99
## [69,] 100
## [70,] 101
## [71,] 102
## [72,] 103
## [73,] 105
## [74,] 106
## [75,] 108
## [76,] 109
## [77,] 110
## [78,] 111
## [79,] 113
## [80,] 114
## [81,] 115
## [82,] 117
## [83,] 120
## [84,] 122
## [85,] 124
## [86,] 126
## [87,] 127
## [88,] 128
## [89,] 129
## [90,] 130
## [91,] 133
## [92,] 134
## [93,] 136
## [94,] 137
## [95,] 139
## [96,] 140
## [97,] 141
## [98,] 143
## [99,] 144
## [100,] 145
## [101,] 146
## [102,] 147
## [103,] 148
## [104,] 149
## [105,] 152
## [106,] 153
## [107,] 154
## [108,] 156
## [109,] 157
## [110,] 158
## [111,] 160
## [112,] 163
## [113,] 164
## [114,] 165
## [115,] 166
## [116,] 168
## [117,] 169
## [118,] 170
## [119,] 171
## [120,] 172
## [121,] 173
## [122,] 174
## [123,] 175
## [124,] 176
## [125,] 177
## [126,] 178
## [127,] 180
## [128,] 182
## [129,] 185
## [130,] 186
## [131,] 187
## [132,] 196
## [133,] 197
## [134,] 198
## [135,] 199
## [136,] 201
## [137,] 203
## [138,] 204
## [139,] 205
## [140,] 206
## [141,] 208
## [142,] 209
## [143,] 210
## [144,] 211
## [145,] 212
## [146,] 215
## [147,] 216
## [148,] 217
## [149,] 218
## [150,] 219
## [151,] 221
## [152,] 222
## [153,] 223
## [154,] 224
## [155,] 228
## [156,] 230
## [157,] 231
## [158,] 233
## [159,] 234
## [160,] 235
## [161,] 236
## [162,] 237
## [163,] 238
## [164,] 239
## [165,] 240
## [166,] 241
## [167,] 242
## [168,] 243
## [169,] 245
## [170,] 246
## [171,] 247
## [172,] 248
## [173,] 249
## [174,] 250
## [175,] 251
## [176,] 252
## [177,] 255
## [178,] 257
## [179,] 258
## [180,] 260
## [181,] 261
## [182,] 262
## [183,] 265
## [184,] 266
## [185,] 267
## [186,] 268
## [187,] 269
## [188,] 270
## [189,] 271
## [190,] 272
## [191,] 274
## [192,] 275
## [193,] 276
## [194,] 281
## [195,] 282
## [196,] 283
## [197,] 285
## [198,] 286
## [199,] 288
## [200,] 289
## [201,] 291
## [202,] 292
## [203,] 293
## [204,] 294
## [205,] 295
## [206,] 296
## [207,] 297
## [208,] 300
## [209,] 302
## [210,] 303
## [211,] 304
## [212,] 305
## [213,] 307
## [214,] 309
## [215,] 310
## [216,] 311
## [217,] 312
## [218,] 313
## [219,] 315
## [220,] 316
## [221,] 318
## [222,] 319
## [223,] 320
## [224,] 321
## [225,] 322
## [226,] 323
## [227,] 324
## [228,] 325
## [229,] 328
## [230,] 329
## [231,] 330
## [232,] 331
## [233,] 333
## [234,] 334
## [235,] 335
## [236,] 336
## [237,] 337
## [238,] 338
## [239,] 342
## [240,] 346
## [241,] 348
## [242,] 350
## [243,] 352
## [244,] 356
## [245,] 357
## [246,] 358
## [247,] 361
## [248,] 363
## [249,] 365
## [250,] 366
## [251,] 367
## [252,] 370
## [253,] 371
## [254,] 372
## [255,] 373
## [256,] 374
## [257,] 375
## [258,] 376
## [259,] 377
## [260,] 378
## [261,] 379
## [262,] 380
## [263,] 383
## [264,] 387
## [265,] 388
## [266,] 389
## [267,] 390
## [268,] 391
## [269,] 394
## [270,] 395
## [271,] 396
## [272,] 398
## [273,] 399
## [274,] 400
## [275,] 401
## [276,] 403
## [277,] 404
## [278,] 406
## [279,] 409
## [280,] 410
## [281,] 411
## [282,] 414
## [283,] 415
## [284,] 416
## [285,] 418
## [286,] 419
## [287,] 420
## [288,] 422
## [289,] 423
## [290,] 424
## [291,] 425
## [292,] 426
## [293,] 427
## [294,] 428
## [295,] 429
## [296,] 430
## [297,] 431
## [298,] 432
## [299,] 433
## [300,] 434
## [301,] 435
## [302,] 436
## [303,] 438
## [304,] 439
## [305,] 440
## [306,] 441
## [307,] 442
## [308,] 444
## [309,] 445
## [310,] 446
## [311,] 447
## [312,] 448
## [313,] 449
## [314,] 450
## [315,] 451
## [316,] 453
## [317,] 454
## [318,] 457
## [319,] 458
## [320,] 459
## [321,] 462
## [322,] 463
## [323,] 464
## [324,] 466
## [325,] 467
## [326,] 469
## [327,] 471
## [328,] 472
## [329,] 473
## [330,] 475
## [331,] 477
## [332,] 478
## [333,] 479
## [334,] 480
## [335,] 481
## [336,] 482
## [337,] 484
## [338,] 485
## [339,] 486
## [340,] 487
## [341,] 488
## [342,] 490
## [343,] 491
## [344,] 492
## [345,] 493
## [346,] 494
## [347,] 497
## [348,] 498
## [349,] 499
## [350,] 501
## [351,] 502
## [352,] 503
## [353,] 504
## [354,] 505
## [355,] 506
## [356,] 507
## [357,] 508
## [358,] 509
## [359,] 511
## [360,] 515
## [361,] 516
## [362,] 517
## [363,] 519
## [364,] 520
## [365,] 521
## [366,] 522
## [367,] 523
## [368,] 524
## [369,] 525
## [370,] 526
## [371,] 528
## [372,] 530
## [373,] 533
## [374,] 536
## [375,] 537
## [376,] 538
## [377,] 539
## [378,] 540
## [379,] 543
## [380,] 545
## [381,] 546
## [382,] 548
## [383,] 549
## [384,] 551
## [385,] 552
## [386,] 553
## [387,] 555
## [388,] 556
## [389,] 557
## [390,] 558
## [391,] 559
## [392,] 560
## [393,] 561
## [394,] 562
## [395,] 563
## [396,] 564
## [397,] 565
## [398,] 567
## [399,] 569
df_train<-df[train_list,]
df_test<-df[-train_list,]
NROW(df_train)
## [1] 399
NROW(df_test)
## [1] 170
df_train %>% glimpse()
## Rows: 399
## Columns: 32
## $ id <int> 842517, 84300903, 84348301, 843786, 844359, 84~
## $ diagnosis <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "~
## $ radius_mean <dbl> 20.57, 19.69, 11.42, 12.45, 18.25, 13.00, 12.4~
## $ texture_mean <dbl> 17.77, 21.25, 20.38, 15.70, 19.98, 21.82, 24.0~
## $ perimeter_mean <dbl> 132.90, 130.00, 77.58, 82.57, 119.60, 87.50, 8~
## $ area_mean <dbl> 1326.0, 1203.0, 386.1, 477.1, 1040.0, 519.8, 4~
## $ smoothness_mean <dbl> 0.08474, 0.10960, 0.14250, 0.12780, 0.09463, 0~
## $ compactness_mean <dbl> 0.07864, 0.15990, 0.28390, 0.17000, 0.10900, 0~
## $ concavity_mean <dbl> 0.08690, 0.19740, 0.24140, 0.15780, 0.11270, 0~
## $ concave.points_mean <dbl> 0.07017, 0.12790, 0.10520, 0.08089, 0.07400, 0~
## $ symmetry_mean <dbl> 0.1812, 0.2069, 0.2597, 0.2087, 0.1794, 0.2350~
## $ fractal_dimension_mean <dbl> 0.05667, 0.05999, 0.09744, 0.07613, 0.05742, 0~
## $ radius_se <dbl> 0.5435, 0.7456, 0.4956, 0.3345, 0.4467, 0.3063~
## $ texture_se <dbl> 0.7339, 0.7869, 1.1560, 0.8902, 0.7732, 1.0020~
## $ perimeter_se <dbl> 3.398, 4.585, 3.445, 2.217, 3.180, 2.406, 2.03~
## $ area_se <dbl> 74.08, 94.03, 27.23, 27.19, 53.91, 24.32, 23.9~
## $ smoothness_se <dbl> 0.005225, 0.006150, 0.009110, 0.007510, 0.0043~
## $ compactness_se <dbl> 0.013080, 0.040060, 0.074580, 0.033450, 0.0138~
## $ concavity_se <dbl> 0.01860, 0.03832, 0.05661, 0.03672, 0.02254, 0~
## $ concave.points_se <dbl> 0.013400, 0.020580, 0.018670, 0.011370, 0.0103~
## $ symmetry_se <dbl> 0.01389, 0.02250, 0.05963, 0.02165, 0.01369, 0~
## $ fractal_dimension_se <dbl> 0.003532, 0.004571, 0.009208, 0.005082, 0.0021~
## $ radius_worst <dbl> 24.99, 23.57, 14.91, 15.47, 22.88, 15.49, 15.0~
## $ texture_worst <dbl> 23.41, 25.53, 26.50, 23.75, 27.66, 30.73, 40.6~
## $ perimeter_worst <dbl> 158.80, 152.50, 98.87, 103.40, 153.20, 106.20,~
## $ area_worst <dbl> 1956.0, 1709.0, 567.7, 741.6, 1606.0, 739.3, 7~
## $ smoothness_worst <dbl> 0.1238, 0.1444, 0.2098, 0.1791, 0.1442, 0.1703~
## $ compactness_worst <dbl> 0.1866, 0.4245, 0.8663, 0.5249, 0.2576, 0.5401~
## $ concavity_worst <dbl> 0.2416, 0.4504, 0.6869, 0.5355, 0.3784, 0.5390~
## $ concave.points_worst <dbl> 0.18600, 0.24300, 0.25750, 0.17410, 0.19320, 0~
## $ symmetry_worst <dbl> 0.2750, 0.3613, 0.6638, 0.3985, 0.3063, 0.4378~
## $ fractal_dimension_worst <dbl> 0.08902, 0.08758, 0.17300, 0.12440, 0.08368, 0~
df_test %>% glimpse()
## Rows: 170
## Columns: 32
## $ id <int> 842302, 84358402, 84458202, 848406, 8510653, 8~
## $ diagnosis <chr> "M", "M", "M", "M", "B", "B", "M", "M", "M", "~
## $ radius_mean <dbl> 17.990, 20.290, 13.710, 14.680, 13.080, 9.504,~
## $ texture_mean <dbl> 10.38, 14.34, 20.83, 20.13, 15.71, 12.44, 16.4~
## $ perimeter_mean <dbl> 122.80, 135.10, 90.20, 94.74, 85.63, 60.34, 11~
## $ area_mean <dbl> 1001.0, 1297.0, 577.9, 684.5, 520.0, 273.9, 91~
## $ smoothness_mean <dbl> 0.11840, 0.10030, 0.11890, 0.09867, 0.10750, 0~
## $ compactness_mean <dbl> 0.27760, 0.13280, 0.16450, 0.07200, 0.12700, 0~
## $ concavity_mean <dbl> 0.30010, 0.19800, 0.09366, 0.07395, 0.04568, 0~
## $ concave.points_mean <dbl> 0.14710, 0.10430, 0.05985, 0.05259, 0.03110, 0~
## $ symmetry_mean <dbl> 0.2419, 0.1809, 0.2196, 0.1586, 0.1967, 0.1815~
## $ fractal_dimension_mean <dbl> 0.07871, 0.05883, 0.07451, 0.05922, 0.06811, 0~
## $ radius_se <dbl> 1.0950, 0.7572, 0.5835, 0.4727, 0.1852, 0.2773~
## $ texture_se <dbl> 0.9053, 0.7813, 1.3770, 1.2400, 0.7477, 0.9768~
## $ perimeter_se <dbl> 8.589, 5.438, 3.856, 3.195, 1.383, 1.909, 7.27~
## $ area_se <dbl> 153.40, 94.44, 50.96, 45.40, 14.67, 15.70, 111~
## $ smoothness_se <dbl> 0.006399, 0.011490, 0.008805, 0.005718, 0.0040~
## $ compactness_se <dbl> 0.049040, 0.024610, 0.030290, 0.011620, 0.0189~
## $ concavity_se <dbl> 0.053730, 0.056880, 0.024880, 0.019980, 0.0169~
## $ concave.points_se <dbl> 0.015870, 0.018850, 0.014480, 0.011090, 0.0064~
## $ symmetry_se <dbl> 0.03003, 0.01756, 0.01486, 0.01410, 0.01678, 0~
## $ fractal_dimension_se <dbl> 0.006193, 0.005115, 0.005412, 0.002085, 0.0024~
## $ radius_worst <dbl> 25.380, 22.540, 17.060, 19.070, 14.500, 10.230~
## $ texture_worst <dbl> 17.33, 16.67, 28.14, 30.88, 20.49, 15.66, 21.4~
## $ perimeter_worst <dbl> 184.60, 152.20, 110.60, 123.40, 96.09, 65.13, ~
## $ area_worst <dbl> 2019.0, 1575.0, 897.0, 1138.0, 630.5, 314.9, 1~
## $ smoothness_worst <dbl> 0.1622, 0.1374, 0.1654, 0.1464, 0.1312, 0.1324~
## $ compactness_worst <dbl> 0.66560, 0.20500, 0.36820, 0.18710, 0.27760, 0~
## $ concavity_worst <dbl> 0.71190, 0.40000, 0.26780, 0.29140, 0.18900, 0~
## $ concave.points_worst <dbl> 0.26540, 0.16250, 0.15560, 0.16090, 0.07283, 0~
## $ symmetry_worst <dbl> 0.4601, 0.2364, 0.3196, 0.3029, 0.3184, 0.2450~
## $ fractal_dimension_worst <dbl> 0.11890, 0.07678, 0.11510, 0.08216, 0.08183, 0~
df_train %>% mutate(index="train")->df_train
df_test %>% mutate(index='test')->df_test
bind_rows(df_train,df_test)->full
full %>% glimpse()
## Rows: 569
## Columns: 33
## $ id <int> 842517, 84300903, 84348301, 843786, 844359, 84~
## $ diagnosis <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "~
## $ radius_mean <dbl> 20.57, 19.69, 11.42, 12.45, 18.25, 13.00, 12.4~
## $ texture_mean <dbl> 17.77, 21.25, 20.38, 15.70, 19.98, 21.82, 24.0~
## $ perimeter_mean <dbl> 132.90, 130.00, 77.58, 82.57, 119.60, 87.50, 8~
## $ area_mean <dbl> 1326.0, 1203.0, 386.1, 477.1, 1040.0, 519.8, 4~
## $ smoothness_mean <dbl> 0.08474, 0.10960, 0.14250, 0.12780, 0.09463, 0~
## $ compactness_mean <dbl> 0.07864, 0.15990, 0.28390, 0.17000, 0.10900, 0~
## $ concavity_mean <dbl> 0.08690, 0.19740, 0.24140, 0.15780, 0.11270, 0~
## $ concave.points_mean <dbl> 0.07017, 0.12790, 0.10520, 0.08089, 0.07400, 0~
## $ symmetry_mean <dbl> 0.1812, 0.2069, 0.2597, 0.2087, 0.1794, 0.2350~
## $ fractal_dimension_mean <dbl> 0.05667, 0.05999, 0.09744, 0.07613, 0.05742, 0~
## $ radius_se <dbl> 0.5435, 0.7456, 0.4956, 0.3345, 0.4467, 0.3063~
## $ texture_se <dbl> 0.7339, 0.7869, 1.1560, 0.8902, 0.7732, 1.0020~
## $ perimeter_se <dbl> 3.398, 4.585, 3.445, 2.217, 3.180, 2.406, 2.03~
## $ area_se <dbl> 74.08, 94.03, 27.23, 27.19, 53.91, 24.32, 23.9~
## $ smoothness_se <dbl> 0.005225, 0.006150, 0.009110, 0.007510, 0.0043~
## $ compactness_se <dbl> 0.013080, 0.040060, 0.074580, 0.033450, 0.0138~
## $ concavity_se <dbl> 0.01860, 0.03832, 0.05661, 0.03672, 0.02254, 0~
## $ concave.points_se <dbl> 0.013400, 0.020580, 0.018670, 0.011370, 0.0103~
## $ symmetry_se <dbl> 0.01389, 0.02250, 0.05963, 0.02165, 0.01369, 0~
## $ fractal_dimension_se <dbl> 0.003532, 0.004571, 0.009208, 0.005082, 0.0021~
## $ radius_worst <dbl> 24.99, 23.57, 14.91, 15.47, 22.88, 15.49, 15.0~
## $ texture_worst <dbl> 23.41, 25.53, 26.50, 23.75, 27.66, 30.73, 40.6~
## $ perimeter_worst <dbl> 158.80, 152.50, 98.87, 103.40, 153.20, 106.20,~
## $ area_worst <dbl> 1956.0, 1709.0, 567.7, 741.6, 1606.0, 739.3, 7~
## $ smoothness_worst <dbl> 0.1238, 0.1444, 0.2098, 0.1791, 0.1442, 0.1703~
## $ compactness_worst <dbl> 0.1866, 0.4245, 0.8663, 0.5249, 0.2576, 0.5401~
## $ concavity_worst <dbl> 0.2416, 0.4504, 0.6869, 0.5355, 0.3784, 0.5390~
## $ concave.points_worst <dbl> 0.18600, 0.24300, 0.25750, 0.17410, 0.19320, 0~
## $ symmetry_worst <dbl> 0.2750, 0.3613, 0.6638, 0.3985, 0.3063, 0.4378~
## $ fractal_dimension_worst <dbl> 0.08902, 0.08758, 0.17300, 0.12440, 0.08368, 0~
## $ index <chr> "train", "train", "train", "train", "train", "~
full$diagnosis<-ifelse(full$diagnosis=="B","양성종양","악성종양")
full$diagnosis<-as.factor(full$diagnosis)
colSums(is.na(full))
## id diagnosis radius_mean
## 0 0 0
## texture_mean perimeter_mean area_mean
## 0 0 0
## smoothness_mean compactness_mean concavity_mean
## 0 0 0
## concave.points_mean symmetry_mean fractal_dimension_mean
## 0 0 0
## radius_se texture_se perimeter_se
## 0 0 0
## area_se smoothness_se compactness_se
## 0 0 0
## concavity_se concave.points_se symmetry_se
## 0 0 0
## fractal_dimension_se radius_worst texture_worst
## 0 0 0
## perimeter_worst area_worst smoothness_worst
## 0 0 0
## compactness_worst concavity_worst concave.points_worst
## 0 0 0
## symmetry_worst fractal_dimension_worst index
## 0 0 0
summary(is.na(full))
## id diagnosis radius_mean texture_mean
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## perimeter_mean area_mean smoothness_mean compactness_mean
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## concavity_mean concave.points_mean symmetry_mean fractal_dimension_mean
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## radius_se texture_se perimeter_se area_se
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## smoothness_se compactness_se concavity_se concave.points_se
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## symmetry_se fractal_dimension_se radius_worst texture_worst
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## perimeter_worst area_worst smoothness_worst compactness_worst
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## concavity_worst concave.points_worst symmetry_worst fractal_dimension_worst
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:569 FALSE:569 FALSE:569 FALSE:569
## index
## Mode :logical
## FALSE:569
recipe(diagnosis~.,data=full) %>%
step_YeoJohnson(radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,
compactness_mean,concavity_mean,concave.points_mean,symmetry_mean,
fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,
smoothness_se,compactness_se,concavity_se,concave.points_se,symmetry_se,
fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,
area_worst,smoothness_worst,compactness_worst,concavity_worst,
concave.points_worst,symmetry_worst,fractal_dimension_worst) %>%
step_center(radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,
compactness_mean,concavity_mean,concave.points_mean,symmetry_mean,
fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,
smoothness_se,compactness_se,concavity_se,concave.points_se,symmetry_se,
fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,
area_worst,smoothness_worst,compactness_worst,concavity_worst,
concave.points_worst,symmetry_worst,fractal_dimension_worst) %>%
step_scale(radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,
compactness_mean,concavity_mean,concave.points_mean,symmetry_mean,
fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,
smoothness_se,compactness_se,concavity_se,concave.points_se,symmetry_se,
fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,
area_worst,smoothness_worst,compactness_worst,concavity_worst,
concave.points_worst,symmetry_worst,fractal_dimension_worst) %>%
prep() %>% juice()->data
data %>% glimpse()
## Rows: 569
## Columns: 33
## $ id <int> 842517, 84300903, 84348301, 843786, 844359, 84~
## $ radius_mean <dbl> 1.617924232, 1.463509328, -0.758594408, -0.367~
## $ texture_mean <dbl> -0.26414506, 0.54732445, 0.35740629, -0.824107~
## $ perimeter_mean <dbl> 1.52738014, 1.45338613, -0.51443375, -0.251015~
## $ area_mean <dbl> 1.6325123335, 1.4603611261, -0.8355030989, -0.~
## $ smoothness_mean <dbl> -0.82623545, 0.94138212, 3.28066684, 2.2354545~
## $ compactness_mean <dbl> -0.48664348, 1.05199990, 3.39991742, 1.2432415~
## $ concavity_mean <dbl> -0.02382489, 1.36227979, 1.91421287, 0.8655400~
## $ concave.points_mean <dbl> 0.54766227, 2.03543978, 1.45043113, 0.82393067~
## $ symmetry_mean <dbl> 0.001391139, 0.938858720, 2.864862154, 1.00451~
## $ fractal_dimension_mean <dbl> -0.86788881, -0.39765801, 4.90660199, 1.888343~
## $ radius_se <dbl> 0.957773674, 1.456535070, 0.790980012, 0.00581~
## $ texture_se <dbl> -0.99743439, -0.83324887, 0.09693632, -0.53833~
## $ perimeter_se <dbl> 0.73176211, 1.21196297, 0.75549828, -0.0841302~
## $ area_se <dbl> 1.2757387, 1.4947914, 0.0454752, 0.0432361, 0.~
## $ smoothness_se <dbl> -0.604818670, -0.296743909, 0.689095329, 0.156~
## $ compactness_se <dbl> -0.69231710, 0.81425704, 2.74186785, 0.4451519~
## $ concavity_se <dbl> -0.4403925588, 0.2128891146, 0.8187979283, 0.1~
## $ concave.points_se <dbl> 0.25993335, 1.42357487, 1.11402678, -0.0690627~
## $ symmetry_se <dbl> -0.80474229, 0.23682715, 4.72851977, 0.1340009~
## $ fractal_dimension_se <dbl> -0.099356317, 0.293301330, 2.045710868, 0.4864~
## $ radius_worst <dbl> 1.57730024, 1.41850851, -0.08361851, 0.0560057~
## $ texture_worst <dbl> -0.28812729, 0.07134428, 0.22788904, -0.228767~
## $ perimeter_worst <dbl> 1.429358435, 1.320779419, -0.039445038, 0.1195~
## $ area_worst <dbl> 1.608609957, 1.424055444, -0.436477155, 0.0789~
## $ smoothness_worst <dbl> -0.3752817, 0.5269438, 3.3912907, 2.0467119, 0~
## $ compactness_worst <dbl> -0.2963198, 1.2086378, 2.2802712, 1.5744314, 0~
## $ concavity_worst <dbl> 0.07068341, 1.00462771, 1.67361473, 1.28100735~
## $ concave.points_worst <dbl> 1.10062517, 1.72122879, 1.86074045, 0.95470737~
## $ symmetry_worst <dbl> -0.2436753, 1.1512420, 6.0407261, 1.7525273, 0~
## $ fractal_dimension_worst <dbl> 0.28094279, 0.20121416, 4.93067187, 2.23983079~
## $ index <fct> train, train, train, train, train, train, trai~
## $ diagnosis <fct> 악성종양, 악성종양, 악성종양, 악성종양, 악성종~
View(data)
data %>% filter(index=="train") %>% select(-index)->train
data %>% filter(index=='test') %>% select(-index)->test
ctrl<-trainControl(method="cv",summaryFunction = twoClassSummary,classProbs = TRUE)
train(diagnosis~.,data=train,method='rpart',metric="ROC",trControl=ctrl)->rpfit
rpfit
## CART
##
## 399 samples
## 31 predictor
## 2 classes: '악성종양', '양성종양'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 359, 359, 359, 359, 359, 359, ...
## Resampling results across tuning parameters:
##
## cp ROC Sens Spec
## 0.02013423 0.9242952 0.8785714 0.944
## 0.09395973 0.8608857 0.7785714 0.940
## 0.77852349 0.6633333 0.3666667 0.960
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02013423.
confusionMatrix(rpfit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 악성종양 양성종양
## 악성종양 32.8 3.5
## 양성종양 4.5 59.1
##
## Accuracy (average) : 0.9198
predict(rpfit,test,type='prob')->rffit1
predict(rpfit,test,type="raw")->rffit2
head(rffit1)
## 악성종양 양성종양
## 1 0.94615385 0.05384615
## 2 0.94615385 0.05384615
## 3 0.94615385 0.05384615
## 4 0.94615385 0.05384615
## 5 0.03984064 0.96015936
## 6 0.03984064 0.96015936
head(rffit2)
## [1] 악성종양 악성종양 악성종양 악성종양 양성종양 양성종양
## Levels: 악성종양 양성종양
confusionMatrix(rffit2,test$diagnosis)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 악성종양 양성종양
## 악성종양 57 3
## 양성종양 6 104
##
## Accuracy : 0.9471
## 95% CI : (0.9019, 0.9755)
## No Information Rate : 0.6294
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8854
##
## Mcnemar's Test P-Value : 0.505
##
## Sensitivity : 0.9048
## Specificity : 0.9720
## Pos Pred Value : 0.9500
## Neg Pred Value : 0.9455
## Prevalence : 0.3706
## Detection Rate : 0.3353
## Detection Prevalence : 0.3529
## Balanced Accuracy : 0.9384
##
## 'Positive' Class : 악성종양
##
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## 다음의 패키지를 부착합니다: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
rffit2_num<-as.numeric(rffit2)
rffit2_num
## [1] 1 1 1 1 2 2 1 1 1 2 2 1 2 2 2 2 1 1 1 2 1 2 1 2 1 2 1 1 2 2 1 2 2 2 2 1 1
## [38] 2 1 2 2 1 1 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 1 2 2 2 1 1 1 2 1 1 1 2 1 2 2
## [75] 2 2 1 1 2 1 1 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 1
## [112] 2 2 2 2 1 2 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2
## [149] 2 2 1 1 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 1 1
result<-roc(test$diagnosis,rffit2_num)
## Setting levels: control = 악성종양, case = 양성종양
## Setting direction: controls < cases
result
##
## Call:
## roc.default(response = test$diagnosis, predictor = rffit2_num)
##
## Data: rffit2_num in 63 controls (test$diagnosis 악성종양) < 107 cases (test$diagnosis 양성종양).
## Area under the curve: 0.9384
result$auc
## Area under the curve: 0.9384