Untitled

rm(list=ls())
setwd("c:/R")
ls()

## character(0)

library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(recipes)

## 
## 다음의 패키지를 부착합니다: 'recipes'

## The following object is masked from 'package:stats':
## 
##     step

library(caret)

## 필요한 패키지를 로딩중입니다: ggplot2

## 필요한 패키지를 로딩중입니다: lattice

df<-read.csv("wbc.csv")
df %>% glimpse()

## Rows: 569
## Columns: 32
## $ id                      <int> 842302, 842517, 84300903, 84348301, 84358402, ~
## $ diagnosis               <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "~
## $ radius_mean             <dbl> 17.990, 20.570, 19.690, 11.420, 20.290, 12.450~
## $ texture_mean            <dbl> 10.38, 17.77, 21.25, 20.38, 14.34, 15.70, 19.9~
## $ perimeter_mean          <dbl> 122.80, 132.90, 130.00, 77.58, 135.10, 82.57, ~
## $ area_mean               <dbl> 1001.0, 1326.0, 1203.0, 386.1, 1297.0, 477.1, ~
## $ smoothness_mean         <dbl> 0.11840, 0.08474, 0.10960, 0.14250, 0.10030, 0~
## $ compactness_mean        <dbl> 0.27760, 0.07864, 0.15990, 0.28390, 0.13280, 0~
## $ concavity_mean          <dbl> 0.30010, 0.08690, 0.19740, 0.24140, 0.19800, 0~
## $ concave.points_mean     <dbl> 0.14710, 0.07017, 0.12790, 0.10520, 0.10430, 0~
## $ symmetry_mean           <dbl> 0.2419, 0.1812, 0.2069, 0.2597, 0.1809, 0.2087~
## $ fractal_dimension_mean  <dbl> 0.07871, 0.05667, 0.05999, 0.09744, 0.05883, 0~
## $ radius_se               <dbl> 1.0950, 0.5435, 0.7456, 0.4956, 0.7572, 0.3345~
## $ texture_se              <dbl> 0.9053, 0.7339, 0.7869, 1.1560, 0.7813, 0.8902~
## $ perimeter_se            <dbl> 8.589, 3.398, 4.585, 3.445, 5.438, 2.217, 3.18~
## $ area_se                 <dbl> 153.40, 74.08, 94.03, 27.23, 94.44, 27.19, 53.~
## $ smoothness_se           <dbl> 0.006399, 0.005225, 0.006150, 0.009110, 0.0114~
## $ compactness_se          <dbl> 0.049040, 0.013080, 0.040060, 0.074580, 0.0246~
## $ concavity_se            <dbl> 0.05373, 0.01860, 0.03832, 0.05661, 0.05688, 0~
## $ concave.points_se       <dbl> 0.015870, 0.013400, 0.020580, 0.018670, 0.0188~
## $ symmetry_se             <dbl> 0.03003, 0.01389, 0.02250, 0.05963, 0.01756, 0~
## $ fractal_dimension_se    <dbl> 0.006193, 0.003532, 0.004571, 0.009208, 0.0051~
## $ radius_worst            <dbl> 25.38, 24.99, 23.57, 14.91, 22.54, 15.47, 22.8~
## $ texture_worst           <dbl> 17.33, 23.41, 25.53, 26.50, 16.67, 23.75, 27.6~
## $ perimeter_worst         <dbl> 184.60, 158.80, 152.50, 98.87, 152.20, 103.40,~
## $ area_worst              <dbl> 2019.0, 1956.0, 1709.0, 567.7, 1575.0, 741.6, ~
## $ smoothness_worst        <dbl> 0.1622, 0.1238, 0.1444, 0.2098, 0.1374, 0.1791~
## $ compactness_worst       <dbl> 0.6656, 0.1866, 0.4245, 0.8663, 0.2050, 0.5249~
## $ concavity_worst         <dbl> 0.71190, 0.24160, 0.45040, 0.68690, 0.40000, 0~
## $ concave.points_worst    <dbl> 0.26540, 0.18600, 0.24300, 0.25750, 0.16250, 0~
## $ symmetry_worst          <dbl> 0.4601, 0.2750, 0.3613, 0.6638, 0.2364, 0.3985~
## $ fractal_dimension_worst <dbl> 0.11890, 0.08902, 0.08758, 0.17300, 0.07678, 0~

View(df)


set.seed(1357)

train_list<-createDataPartition(y=df$diagnosis,p=0.7,list = FALSE)
class(train_list)

## [1] "matrix" "array"

train_list

##        Resample1
##   [1,]         2
##   [2,]         3
##   [3,]         4
##   [4,]         6
##   [5,]         7
##   [6,]         9
##   [7,]        10
##   [8,]        11
##   [9,]        12
##  [10,]        13
##  [11,]        14
##  [12,]        15
##  [13,]        16
##  [14,]        18
##  [15,]        19
##  [16,]        20
##  [17,]        23
##  [18,]        24
##  [19,]        25
##  [20,]        27
##  [21,]        29
##  [22,]        30
##  [23,]        32
##  [24,]        33
##  [25,]        34
##  [26,]        35
##  [27,]        36
##  [28,]        37
##  [29,]        38
##  [30,]        39
##  [31,]        40
##  [32,]        44
##  [33,]        45
##  [34,]        46
##  [35,]        47
##  [36,]        48
##  [37,]        49
##  [38,]        55
##  [39,]        56
##  [40,]        59
##  [41,]        60
##  [42,]        62
##  [43,]        63
##  [44,]        64
##  [45,]        66
##  [46,]        67
##  [47,]        68
##  [48,]        69
##  [49,]        73
##  [50,]        74
##  [51,]        75
##  [52,]        76
##  [53,]        77
##  [54,]        78
##  [55,]        81
##  [56,]        82
##  [57,]        84
##  [58,]        85
##  [59,]        87
##  [60,]        88
##  [61,]        90
##  [62,]        92
##  [63,]        93
##  [64,]        94
##  [65,]        95
##  [66,]        97
##  [67,]        98
##  [68,]        99
##  [69,]       100
##  [70,]       101
##  [71,]       102
##  [72,]       103
##  [73,]       105
##  [74,]       106
##  [75,]       108
##  [76,]       109
##  [77,]       110
##  [78,]       111
##  [79,]       113
##  [80,]       114
##  [81,]       115
##  [82,]       117
##  [83,]       120
##  [84,]       122
##  [85,]       124
##  [86,]       126
##  [87,]       127
##  [88,]       128
##  [89,]       129
##  [90,]       130
##  [91,]       133
##  [92,]       134
##  [93,]       136
##  [94,]       137
##  [95,]       139
##  [96,]       140
##  [97,]       141
##  [98,]       143
##  [99,]       144
## [100,]       145
## [101,]       146
## [102,]       147
## [103,]       148
## [104,]       149
## [105,]       152
## [106,]       153
## [107,]       154
## [108,]       156
## [109,]       157
## [110,]       158
## [111,]       160
## [112,]       163
## [113,]       164
## [114,]       165
## [115,]       166
## [116,]       168
## [117,]       169
## [118,]       170
## [119,]       171
## [120,]       172
## [121,]       173
## [122,]       174
## [123,]       175
## [124,]       176
## [125,]       177
## [126,]       178
## [127,]       180
## [128,]       182
## [129,]       185
## [130,]       186
## [131,]       187
## [132,]       196
## [133,]       197
## [134,]       198
## [135,]       199
## [136,]       201
## [137,]       203
## [138,]       204
## [139,]       205
## [140,]       206
## [141,]       208
## [142,]       209
## [143,]       210
## [144,]       211
## [145,]       212
## [146,]       215
## [147,]       216
## [148,]       217
## [149,]       218
## [150,]       219
## [151,]       221
## [152,]       222
## [153,]       223
## [154,]       224
## [155,]       228
## [156,]       230
## [157,]       231
## [158,]       233
## [159,]       234
## [160,]       235
## [161,]       236
## [162,]       237
## [163,]       238
## [164,]       239
## [165,]       240
## [166,]       241
## [167,]       242
## [168,]       243
## [169,]       245
## [170,]       246
## [171,]       247
## [172,]       248
## [173,]       249
## [174,]       250
## [175,]       251
## [176,]       252
## [177,]       255
## [178,]       257
## [179,]       258
## [180,]       260
## [181,]       261
## [182,]       262
## [183,]       265
## [184,]       266
## [185,]       267
## [186,]       268
## [187,]       269
## [188,]       270
## [189,]       271
## [190,]       272
## [191,]       274
## [192,]       275
## [193,]       276
## [194,]       281
## [195,]       282
## [196,]       283
## [197,]       285
## [198,]       286
## [199,]       288
## [200,]       289
## [201,]       291
## [202,]       292
## [203,]       293
## [204,]       294
## [205,]       295
## [206,]       296
## [207,]       297
## [208,]       300
## [209,]       302
## [210,]       303
## [211,]       304
## [212,]       305
## [213,]       307
## [214,]       309
## [215,]       310
## [216,]       311
## [217,]       312
## [218,]       313
## [219,]       315
## [220,]       316
## [221,]       318
## [222,]       319
## [223,]       320
## [224,]       321
## [225,]       322
## [226,]       323
## [227,]       324
## [228,]       325
## [229,]       328
## [230,]       329
## [231,]       330
## [232,]       331
## [233,]       333
## [234,]       334
## [235,]       335
## [236,]       336
## [237,]       337
## [238,]       338
## [239,]       342
## [240,]       346
## [241,]       348
## [242,]       350
## [243,]       352
## [244,]       356
## [245,]       357
## [246,]       358
## [247,]       361
## [248,]       363
## [249,]       365
## [250,]       366
## [251,]       367
## [252,]       370
## [253,]       371
## [254,]       372
## [255,]       373
## [256,]       374
## [257,]       375
## [258,]       376
## [259,]       377
## [260,]       378
## [261,]       379
## [262,]       380
## [263,]       383
## [264,]       387
## [265,]       388
## [266,]       389
## [267,]       390
## [268,]       391
## [269,]       394
## [270,]       395
## [271,]       396
## [272,]       398
## [273,]       399
## [274,]       400
## [275,]       401
## [276,]       403
## [277,]       404
## [278,]       406
## [279,]       409
## [280,]       410
## [281,]       411
## [282,]       414
## [283,]       415
## [284,]       416
## [285,]       418
## [286,]       419
## [287,]       420
## [288,]       422
## [289,]       423
## [290,]       424
## [291,]       425
## [292,]       426
## [293,]       427
## [294,]       428
## [295,]       429
## [296,]       430
## [297,]       431
## [298,]       432
## [299,]       433
## [300,]       434
## [301,]       435
## [302,]       436
## [303,]       438
## [304,]       439
## [305,]       440
## [306,]       441
## [307,]       442
## [308,]       444
## [309,]       445
## [310,]       446
## [311,]       447
## [312,]       448
## [313,]       449
## [314,]       450
## [315,]       451
## [316,]       453
## [317,]       454
## [318,]       457
## [319,]       458
## [320,]       459
## [321,]       462
## [322,]       463
## [323,]       464
## [324,]       466
## [325,]       467
## [326,]       469
## [327,]       471
## [328,]       472
## [329,]       473
## [330,]       475
## [331,]       477
## [332,]       478
## [333,]       479
## [334,]       480
## [335,]       481
## [336,]       482
## [337,]       484
## [338,]       485
## [339,]       486
## [340,]       487
## [341,]       488
## [342,]       490
## [343,]       491
## [344,]       492
## [345,]       493
## [346,]       494
## [347,]       497
## [348,]       498
## [349,]       499
## [350,]       501
## [351,]       502
## [352,]       503
## [353,]       504
## [354,]       505
## [355,]       506
## [356,]       507
## [357,]       508
## [358,]       509
## [359,]       511
## [360,]       515
## [361,]       516
## [362,]       517
## [363,]       519
## [364,]       520
## [365,]       521
## [366,]       522
## [367,]       523
## [368,]       524
## [369,]       525
## [370,]       526
## [371,]       528
## [372,]       530
## [373,]       533
## [374,]       536
## [375,]       537
## [376,]       538
## [377,]       539
## [378,]       540
## [379,]       543
## [380,]       545
## [381,]       546
## [382,]       548
## [383,]       549
## [384,]       551
## [385,]       552
## [386,]       553
## [387,]       555
## [388,]       556
## [389,]       557
## [390,]       558
## [391,]       559
## [392,]       560
## [393,]       561
## [394,]       562
## [395,]       563
## [396,]       564
## [397,]       565
## [398,]       567
## [399,]       569

df_train<-df[train_list,]
df_test<-df[-train_list,]

NROW(df_train)

## [1] 399

NROW(df_test)

## [1] 170

df_train %>% glimpse()

## Rows: 399
## Columns: 32
## $ id                      <int> 842517, 84300903, 84348301, 843786, 844359, 84~
## $ diagnosis               <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "~
## $ radius_mean             <dbl> 20.57, 19.69, 11.42, 12.45, 18.25, 13.00, 12.4~
## $ texture_mean            <dbl> 17.77, 21.25, 20.38, 15.70, 19.98, 21.82, 24.0~
## $ perimeter_mean          <dbl> 132.90, 130.00, 77.58, 82.57, 119.60, 87.50, 8~
## $ area_mean               <dbl> 1326.0, 1203.0, 386.1, 477.1, 1040.0, 519.8, 4~
## $ smoothness_mean         <dbl> 0.08474, 0.10960, 0.14250, 0.12780, 0.09463, 0~
## $ compactness_mean        <dbl> 0.07864, 0.15990, 0.28390, 0.17000, 0.10900, 0~
## $ concavity_mean          <dbl> 0.08690, 0.19740, 0.24140, 0.15780, 0.11270, 0~
## $ concave.points_mean     <dbl> 0.07017, 0.12790, 0.10520, 0.08089, 0.07400, 0~
## $ symmetry_mean           <dbl> 0.1812, 0.2069, 0.2597, 0.2087, 0.1794, 0.2350~
## $ fractal_dimension_mean  <dbl> 0.05667, 0.05999, 0.09744, 0.07613, 0.05742, 0~
## $ radius_se               <dbl> 0.5435, 0.7456, 0.4956, 0.3345, 0.4467, 0.3063~
## $ texture_se              <dbl> 0.7339, 0.7869, 1.1560, 0.8902, 0.7732, 1.0020~
## $ perimeter_se            <dbl> 3.398, 4.585, 3.445, 2.217, 3.180, 2.406, 2.03~
## $ area_se                 <dbl> 74.08, 94.03, 27.23, 27.19, 53.91, 24.32, 23.9~
## $ smoothness_se           <dbl> 0.005225, 0.006150, 0.009110, 0.007510, 0.0043~
## $ compactness_se          <dbl> 0.013080, 0.040060, 0.074580, 0.033450, 0.0138~
## $ concavity_se            <dbl> 0.01860, 0.03832, 0.05661, 0.03672, 0.02254, 0~
## $ concave.points_se       <dbl> 0.013400, 0.020580, 0.018670, 0.011370, 0.0103~
## $ symmetry_se             <dbl> 0.01389, 0.02250, 0.05963, 0.02165, 0.01369, 0~
## $ fractal_dimension_se    <dbl> 0.003532, 0.004571, 0.009208, 0.005082, 0.0021~
## $ radius_worst            <dbl> 24.99, 23.57, 14.91, 15.47, 22.88, 15.49, 15.0~
## $ texture_worst           <dbl> 23.41, 25.53, 26.50, 23.75, 27.66, 30.73, 40.6~
## $ perimeter_worst         <dbl> 158.80, 152.50, 98.87, 103.40, 153.20, 106.20,~
## $ area_worst              <dbl> 1956.0, 1709.0, 567.7, 741.6, 1606.0, 739.3, 7~
## $ smoothness_worst        <dbl> 0.1238, 0.1444, 0.2098, 0.1791, 0.1442, 0.1703~
## $ compactness_worst       <dbl> 0.1866, 0.4245, 0.8663, 0.5249, 0.2576, 0.5401~
## $ concavity_worst         <dbl> 0.2416, 0.4504, 0.6869, 0.5355, 0.3784, 0.5390~
## $ concave.points_worst    <dbl> 0.18600, 0.24300, 0.25750, 0.17410, 0.19320, 0~
## $ symmetry_worst          <dbl> 0.2750, 0.3613, 0.6638, 0.3985, 0.3063, 0.4378~
## $ fractal_dimension_worst <dbl> 0.08902, 0.08758, 0.17300, 0.12440, 0.08368, 0~

df_test %>% glimpse()

## Rows: 170
## Columns: 32
## $ id                      <int> 842302, 84358402, 84458202, 848406, 8510653, 8~
## $ diagnosis               <chr> "M", "M", "M", "M", "B", "B", "M", "M", "M", "~
## $ radius_mean             <dbl> 17.990, 20.290, 13.710, 14.680, 13.080, 9.504,~
## $ texture_mean            <dbl> 10.38, 14.34, 20.83, 20.13, 15.71, 12.44, 16.4~
## $ perimeter_mean          <dbl> 122.80, 135.10, 90.20, 94.74, 85.63, 60.34, 11~
## $ area_mean               <dbl> 1001.0, 1297.0, 577.9, 684.5, 520.0, 273.9, 91~
## $ smoothness_mean         <dbl> 0.11840, 0.10030, 0.11890, 0.09867, 0.10750, 0~
## $ compactness_mean        <dbl> 0.27760, 0.13280, 0.16450, 0.07200, 0.12700, 0~
## $ concavity_mean          <dbl> 0.30010, 0.19800, 0.09366, 0.07395, 0.04568, 0~
## $ concave.points_mean     <dbl> 0.14710, 0.10430, 0.05985, 0.05259, 0.03110, 0~
## $ symmetry_mean           <dbl> 0.2419, 0.1809, 0.2196, 0.1586, 0.1967, 0.1815~
## $ fractal_dimension_mean  <dbl> 0.07871, 0.05883, 0.07451, 0.05922, 0.06811, 0~
## $ radius_se               <dbl> 1.0950, 0.7572, 0.5835, 0.4727, 0.1852, 0.2773~
## $ texture_se              <dbl> 0.9053, 0.7813, 1.3770, 1.2400, 0.7477, 0.9768~
## $ perimeter_se            <dbl> 8.589, 5.438, 3.856, 3.195, 1.383, 1.909, 7.27~
## $ area_se                 <dbl> 153.40, 94.44, 50.96, 45.40, 14.67, 15.70, 111~
## $ smoothness_se           <dbl> 0.006399, 0.011490, 0.008805, 0.005718, 0.0040~
## $ compactness_se          <dbl> 0.049040, 0.024610, 0.030290, 0.011620, 0.0189~
## $ concavity_se            <dbl> 0.053730, 0.056880, 0.024880, 0.019980, 0.0169~
## $ concave.points_se       <dbl> 0.015870, 0.018850, 0.014480, 0.011090, 0.0064~
## $ symmetry_se             <dbl> 0.03003, 0.01756, 0.01486, 0.01410, 0.01678, 0~
## $ fractal_dimension_se    <dbl> 0.006193, 0.005115, 0.005412, 0.002085, 0.0024~
## $ radius_worst            <dbl> 25.380, 22.540, 17.060, 19.070, 14.500, 10.230~
## $ texture_worst           <dbl> 17.33, 16.67, 28.14, 30.88, 20.49, 15.66, 21.4~
## $ perimeter_worst         <dbl> 184.60, 152.20, 110.60, 123.40, 96.09, 65.13, ~
## $ area_worst              <dbl> 2019.0, 1575.0, 897.0, 1138.0, 630.5, 314.9, 1~
## $ smoothness_worst        <dbl> 0.1622, 0.1374, 0.1654, 0.1464, 0.1312, 0.1324~
## $ compactness_worst       <dbl> 0.66560, 0.20500, 0.36820, 0.18710, 0.27760, 0~
## $ concavity_worst         <dbl> 0.71190, 0.40000, 0.26780, 0.29140, 0.18900, 0~
## $ concave.points_worst    <dbl> 0.26540, 0.16250, 0.15560, 0.16090, 0.07283, 0~
## $ symmetry_worst          <dbl> 0.4601, 0.2364, 0.3196, 0.3029, 0.3184, 0.2450~
## $ fractal_dimension_worst <dbl> 0.11890, 0.07678, 0.11510, 0.08216, 0.08183, 0~

df_train %>% mutate(index="train")->df_train
df_test %>% mutate(index='test')->df_test
bind_rows(df_train,df_test)->full
full %>% glimpse()

## Rows: 569
## Columns: 33
## $ id                      <int> 842517, 84300903, 84348301, 843786, 844359, 84~
## $ diagnosis               <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "~
## $ radius_mean             <dbl> 20.57, 19.69, 11.42, 12.45, 18.25, 13.00, 12.4~
## $ texture_mean            <dbl> 17.77, 21.25, 20.38, 15.70, 19.98, 21.82, 24.0~
## $ perimeter_mean          <dbl> 132.90, 130.00, 77.58, 82.57, 119.60, 87.50, 8~
## $ area_mean               <dbl> 1326.0, 1203.0, 386.1, 477.1, 1040.0, 519.8, 4~
## $ smoothness_mean         <dbl> 0.08474, 0.10960, 0.14250, 0.12780, 0.09463, 0~
## $ compactness_mean        <dbl> 0.07864, 0.15990, 0.28390, 0.17000, 0.10900, 0~
## $ concavity_mean          <dbl> 0.08690, 0.19740, 0.24140, 0.15780, 0.11270, 0~
## $ concave.points_mean     <dbl> 0.07017, 0.12790, 0.10520, 0.08089, 0.07400, 0~
## $ symmetry_mean           <dbl> 0.1812, 0.2069, 0.2597, 0.2087, 0.1794, 0.2350~
## $ fractal_dimension_mean  <dbl> 0.05667, 0.05999, 0.09744, 0.07613, 0.05742, 0~
## $ radius_se               <dbl> 0.5435, 0.7456, 0.4956, 0.3345, 0.4467, 0.3063~
## $ texture_se              <dbl> 0.7339, 0.7869, 1.1560, 0.8902, 0.7732, 1.0020~
## $ perimeter_se            <dbl> 3.398, 4.585, 3.445, 2.217, 3.180, 2.406, 2.03~
## $ area_se                 <dbl> 74.08, 94.03, 27.23, 27.19, 53.91, 24.32, 23.9~
## $ smoothness_se           <dbl> 0.005225, 0.006150, 0.009110, 0.007510, 0.0043~
## $ compactness_se          <dbl> 0.013080, 0.040060, 0.074580, 0.033450, 0.0138~
## $ concavity_se            <dbl> 0.01860, 0.03832, 0.05661, 0.03672, 0.02254, 0~
## $ concave.points_se       <dbl> 0.013400, 0.020580, 0.018670, 0.011370, 0.0103~
## $ symmetry_se             <dbl> 0.01389, 0.02250, 0.05963, 0.02165, 0.01369, 0~
## $ fractal_dimension_se    <dbl> 0.003532, 0.004571, 0.009208, 0.005082, 0.0021~
## $ radius_worst            <dbl> 24.99, 23.57, 14.91, 15.47, 22.88, 15.49, 15.0~
## $ texture_worst           <dbl> 23.41, 25.53, 26.50, 23.75, 27.66, 30.73, 40.6~
## $ perimeter_worst         <dbl> 158.80, 152.50, 98.87, 103.40, 153.20, 106.20,~
## $ area_worst              <dbl> 1956.0, 1709.0, 567.7, 741.6, 1606.0, 739.3, 7~
## $ smoothness_worst        <dbl> 0.1238, 0.1444, 0.2098, 0.1791, 0.1442, 0.1703~
## $ compactness_worst       <dbl> 0.1866, 0.4245, 0.8663, 0.5249, 0.2576, 0.5401~
## $ concavity_worst         <dbl> 0.2416, 0.4504, 0.6869, 0.5355, 0.3784, 0.5390~
## $ concave.points_worst    <dbl> 0.18600, 0.24300, 0.25750, 0.17410, 0.19320, 0~
## $ symmetry_worst          <dbl> 0.2750, 0.3613, 0.6638, 0.3985, 0.3063, 0.4378~
## $ fractal_dimension_worst <dbl> 0.08902, 0.08758, 0.17300, 0.12440, 0.08368, 0~
## $ index                   <chr> "train", "train", "train", "train", "train", "~

full$diagnosis<-ifelse(full$diagnosis=="B","양성종양","악성종양")
full$diagnosis<-as.factor(full$diagnosis)

colSums(is.na(full))

##                      id               diagnosis             radius_mean 
##                       0                       0                       0 
##            texture_mean          perimeter_mean               area_mean 
##                       0                       0                       0 
##         smoothness_mean        compactness_mean          concavity_mean 
##                       0                       0                       0 
##     concave.points_mean           symmetry_mean  fractal_dimension_mean 
##                       0                       0                       0 
##               radius_se              texture_se            perimeter_se 
##                       0                       0                       0 
##                 area_se           smoothness_se          compactness_se 
##                       0                       0                       0 
##            concavity_se       concave.points_se             symmetry_se 
##                       0                       0                       0 
##    fractal_dimension_se            radius_worst           texture_worst 
##                       0                       0                       0 
##         perimeter_worst              area_worst        smoothness_worst 
##                       0                       0                       0 
##       compactness_worst         concavity_worst    concave.points_worst 
##                       0                       0                       0 
##          symmetry_worst fractal_dimension_worst                   index 
##                       0                       0                       0

summary(is.na(full))

##      id          diagnosis       radius_mean     texture_mean   
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:569       FALSE:569       FALSE:569       FALSE:569      
##  perimeter_mean  area_mean       smoothness_mean compactness_mean
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical   
##  FALSE:569       FALSE:569       FALSE:569       FALSE:569       
##  concavity_mean  concave.points_mean symmetry_mean   fractal_dimension_mean
##  Mode :logical   Mode :logical       Mode :logical   Mode :logical         
##  FALSE:569       FALSE:569           FALSE:569       FALSE:569             
##  radius_se       texture_se      perimeter_se     area_se       
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:569       FALSE:569       FALSE:569       FALSE:569      
##  smoothness_se   compactness_se  concavity_se    concave.points_se
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical    
##  FALSE:569       FALSE:569       FALSE:569       FALSE:569        
##  symmetry_se     fractal_dimension_se radius_worst    texture_worst  
##  Mode :logical   Mode :logical        Mode :logical   Mode :logical  
##  FALSE:569       FALSE:569            FALSE:569       FALSE:569      
##  perimeter_worst area_worst      smoothness_worst compactness_worst
##  Mode :logical   Mode :logical   Mode :logical    Mode :logical    
##  FALSE:569       FALSE:569       FALSE:569        FALSE:569        
##  concavity_worst concave.points_worst symmetry_worst  fractal_dimension_worst
##  Mode :logical   Mode :logical        Mode :logical   Mode :logical          
##  FALSE:569       FALSE:569            FALSE:569       FALSE:569              
##    index        
##  Mode :logical  
##  FALSE:569

recipe(diagnosis~.,data=full) %>% 
  step_YeoJohnson(radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,
                  compactness_mean,concavity_mean,concave.points_mean,symmetry_mean,
                  fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,
                  smoothness_se,compactness_se,concavity_se,concave.points_se,symmetry_se,
                  fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,
                  area_worst,smoothness_worst,compactness_worst,concavity_worst,
                  concave.points_worst,symmetry_worst,fractal_dimension_worst) %>%
  step_center(radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,
              compactness_mean,concavity_mean,concave.points_mean,symmetry_mean,
              fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,
              smoothness_se,compactness_se,concavity_se,concave.points_se,symmetry_se,
              fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,
              area_worst,smoothness_worst,compactness_worst,concavity_worst,
              concave.points_worst,symmetry_worst,fractal_dimension_worst) %>%
  step_scale(radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,
             compactness_mean,concavity_mean,concave.points_mean,symmetry_mean,
             fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,
             smoothness_se,compactness_se,concavity_se,concave.points_se,symmetry_se,
             fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,
             area_worst,smoothness_worst,compactness_worst,concavity_worst,
             concave.points_worst,symmetry_worst,fractal_dimension_worst) %>% 
  prep() %>% juice()->data
data %>% glimpse()

## Rows: 569
## Columns: 33
## $ id                      <int> 842517, 84300903, 84348301, 843786, 844359, 84~
## $ radius_mean             <dbl> 1.617924232, 1.463509328, -0.758594408, -0.367~
## $ texture_mean            <dbl> -0.26414506, 0.54732445, 0.35740629, -0.824107~
## $ perimeter_mean          <dbl> 1.52738014, 1.45338613, -0.51443375, -0.251015~
## $ area_mean               <dbl> 1.6325123335, 1.4603611261, -0.8355030989, -0.~
## $ smoothness_mean         <dbl> -0.82623545, 0.94138212, 3.28066684, 2.2354545~
## $ compactness_mean        <dbl> -0.48664348, 1.05199990, 3.39991742, 1.2432415~
## $ concavity_mean          <dbl> -0.02382489, 1.36227979, 1.91421287, 0.8655400~
## $ concave.points_mean     <dbl> 0.54766227, 2.03543978, 1.45043113, 0.82393067~
## $ symmetry_mean           <dbl> 0.001391139, 0.938858720, 2.864862154, 1.00451~
## $ fractal_dimension_mean  <dbl> -0.86788881, -0.39765801, 4.90660199, 1.888343~
## $ radius_se               <dbl> 0.957773674, 1.456535070, 0.790980012, 0.00581~
## $ texture_se              <dbl> -0.99743439, -0.83324887, 0.09693632, -0.53833~
## $ perimeter_se            <dbl> 0.73176211, 1.21196297, 0.75549828, -0.0841302~
## $ area_se                 <dbl> 1.2757387, 1.4947914, 0.0454752, 0.0432361, 0.~
## $ smoothness_se           <dbl> -0.604818670, -0.296743909, 0.689095329, 0.156~
## $ compactness_se          <dbl> -0.69231710, 0.81425704, 2.74186785, 0.4451519~
## $ concavity_se            <dbl> -0.4403925588, 0.2128891146, 0.8187979283, 0.1~
## $ concave.points_se       <dbl> 0.25993335, 1.42357487, 1.11402678, -0.0690627~
## $ symmetry_se             <dbl> -0.80474229, 0.23682715, 4.72851977, 0.1340009~
## $ fractal_dimension_se    <dbl> -0.099356317, 0.293301330, 2.045710868, 0.4864~
## $ radius_worst            <dbl> 1.57730024, 1.41850851, -0.08361851, 0.0560057~
## $ texture_worst           <dbl> -0.28812729, 0.07134428, 0.22788904, -0.228767~
## $ perimeter_worst         <dbl> 1.429358435, 1.320779419, -0.039445038, 0.1195~
## $ area_worst              <dbl> 1.608609957, 1.424055444, -0.436477155, 0.0789~
## $ smoothness_worst        <dbl> -0.3752817, 0.5269438, 3.3912907, 2.0467119, 0~
## $ compactness_worst       <dbl> -0.2963198, 1.2086378, 2.2802712, 1.5744314, 0~
## $ concavity_worst         <dbl> 0.07068341, 1.00462771, 1.67361473, 1.28100735~
## $ concave.points_worst    <dbl> 1.10062517, 1.72122879, 1.86074045, 0.95470737~
## $ symmetry_worst          <dbl> -0.2436753, 1.1512420, 6.0407261, 1.7525273, 0~
## $ fractal_dimension_worst <dbl> 0.28094279, 0.20121416, 4.93067187, 2.23983079~
## $ index                   <fct> train, train, train, train, train, train, trai~
## $ diagnosis               <fct> 악성종양, 악성종양, 악성종양, 악성종양, 악성종~

View(data)                  
data %>% filter(index=="train") %>% select(-index)->train                  
data %>% filter(index=='test') %>% select(-index)->test                

ctrl<-trainControl(method="cv",summaryFunction = twoClassSummary,classProbs = TRUE)
train(diagnosis~.,data=train,method='rpart',metric="ROC",trControl=ctrl)->rpfit
rpfit

## CART 
## 
## 399 samples
##  31 predictor
##   2 classes: '악성종양', '양성종양' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 359, 359, 359, 359, 359, 359, ... 
## Resampling results across tuning parameters:
## 
##   cp          ROC        Sens       Spec 
##   0.02013423  0.9242952  0.8785714  0.944
##   0.09395973  0.8608857  0.7785714  0.940
##   0.77852349  0.6633333  0.3666667  0.960
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02013423.

confusionMatrix(rpfit)

## Cross-Validated (10 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction 악성종양 양성종양
##   악성종양     32.8      3.5
##   양성종양      4.5     59.1
##                             
##  Accuracy (average) : 0.9198

predict(rpfit,test,type='prob')->rffit1
predict(rpfit,test,type="raw")->rffit2
head(rffit1)

##     악성종양   양성종양
## 1 0.94615385 0.05384615
## 2 0.94615385 0.05384615
## 3 0.94615385 0.05384615
## 4 0.94615385 0.05384615
## 5 0.03984064 0.96015936
## 6 0.03984064 0.96015936

head(rffit2)

## [1] 악성종양 악성종양 악성종양 악성종양 양성종양 양성종양
## Levels: 악성종양 양성종양

confusionMatrix(rffit2,test$diagnosis)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 악성종양 양성종양
##   악성종양       57        3
##   양성종양        6      104
##                                           
##                Accuracy : 0.9471          
##                  95% CI : (0.9019, 0.9755)
##     No Information Rate : 0.6294          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.8854          
##                                           
##  Mcnemar's Test P-Value : 0.505           
##                                           
##             Sensitivity : 0.9048          
##             Specificity : 0.9720          
##          Pos Pred Value : 0.9500          
##          Neg Pred Value : 0.9455          
##              Prevalence : 0.3706          
##          Detection Rate : 0.3353          
##    Detection Prevalence : 0.3529          
##       Balanced Accuracy : 0.9384          
##                                           
##        'Positive' Class : 악성종양        
##

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## 다음의 패키지를 부착합니다: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

rffit2_num<-as.numeric(rffit2)
rffit2_num

##   [1] 1 1 1 1 2 2 1 1 1 2 2 1 2 2 2 2 1 1 1 2 1 2 1 2 1 2 1 1 2 2 1 2 2 2 2 1 1
##  [38] 2 1 2 2 1 1 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 1 2 2 2 1 1 1 2 1 1 1 2 1 2 2
##  [75] 2 2 1 1 2 1 1 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 1
## [112] 2 2 2 2 1 2 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2
## [149] 2 2 1 1 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 1 1

result<-roc(test$diagnosis,rffit2_num)

## Setting levels: control = 악성종양, case = 양성종양

## Setting direction: controls < cases

result

## 
## Call:
## roc.default(response = test$diagnosis, predictor = rffit2_num)
## 
## Data: rffit2_num in 63 controls (test$diagnosis 악성종양) < 107 cases (test$diagnosis 양성종양).
## Area under the curve: 0.9384

result$auc

## Area under the curve: 0.9384

Untitled

0416_1

2022-06-18