230112

#02번 p259
library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## 필요한 패키지를 로딩중입니다: ggplot2

## 필요한 패키지를 로딩중입니다: lattice

library(recipes)

## 
## 다음의 패키지를 부착합니다: 'recipes'

## The following object is masked from 'package:stats':
## 
##     step

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## 다음의 패키지를 부착합니다: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

df<-read.csv("travel_data.csv")
set.seed(1357)
train_list<-createDataPartition(y=df$TravelInsurance,p=0.75,list=FALSE)
df_train<-df[train_list,]
df_test<-df[-train_list,]
NROW(df_train)

## [1] 1491

NROW(df_test)

## [1] 496

df_train %>% glimpse

## Rows: 1,491
## Columns: 10
## $ INDEX               <int> 2, 3, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 20, …
## $ Age                 <int> 34, 28, 28, 31, 28, 33, 31, 26, 31, 28, 28, 29, 34…
## $ Employment.Type     <chr> "Private Sector/Self Employed", "Private Sector/Se…
## $ GraduateOrNot       <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "…
## $ AnnualIncome        <int> 500000, 700000, 700000, 1350000, 1450000, 800000, …
## $ FamilyMembers       <int> 4, 3, 8, 3, 6, 3, 9, 5, 6, 4, 7, 5, 2, 6, 3, 4, 9,…
## $ ChronicDiseases     <int> 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer       <chr> "No", "No", "Yes", "Yes", "Yes", "Yes", "No", "Yes…
## $ EverTravelledAbroad <chr> "No", "No", "No", "Yes", "Yes", "No", "No", "Yes",…
## $ TravelInsurance     <int> 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,…

df_train %>% mutate(index="train")->df_train
df_test %>% mutate(index='test')->df_test
bind_rows(df_train,df_test)->full
full$TravelInsurance<-ifelse(full$TravelInsurance==0,"미가입","가입")
full$TravelInsurance<-as.factor(full$TravelInsurance)
full$GraduateOrNot<-as.factor(full$GraduateOrNot)
full$FrequentFlyer<-as.factor(full$FrequentFlyer)
full$EverTravelledAbroad<-as.factor(full$EverTravelledAbroad)
colSums(is.na(full))

##               INDEX                 Age     Employment.Type       GraduateOrNot 
##                   0                   0                   0                   0 
##        AnnualIncome       FamilyMembers     ChronicDiseases       FrequentFlyer 
##                   0                   0                   0                   0 
## EverTravelledAbroad     TravelInsurance               index 
##                   0                   0                   0

recipe(TravelInsurance~.,data=full) %>% 
  step_YeoJohnson(Age,AnnualIncome,FamilyMembers) %>% 
  step_center(Age,AnnualIncome,FamilyMembers) %>% 
  step_scale(Age,AnnualIncome,FamilyMembers) %>% prep() %>% juice()->data
data %>% filter(index=="train") %>% select(-index)->train
data %>% filter(index=="test") %>% select(-index)->test

ctrl<-trainControl(method="cv",summaryFunction = twoClassSummary,
                   classProbs=TRUE)
train(TravelInsurance~.,data=train,
      method='rpart',metric="ROC",
      trControl=ctrl)->rpfit
rpfit

## CART 
## 
## 1491 samples
##    9 predictor
##    2 classes: '가입', '미가입' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 1342, 1342, 1342, 1342, 1341, 1342, ... 
## Resampling results across tuning parameters:
## 
##   cp           ROC        Sens       Spec     
##   0.002358491  0.7800814  0.5754717  0.9593965
##   0.056603774  0.7331815  0.4830189  0.9718857
##   0.401886792  0.5834763  0.1773585  0.9895941
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.002358491.

confusionMatrix(rpfit)

## Cross-Validated (10 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction 가입 미가입
##     가입   20.5    2.6
##     미가입 15.1   61.8
##                             
##  Accuracy (average) : 0.8229

predict(rpfit,test,type='prob')->rffit1
head(rffit1)

##       가입   미가입
## 1 0.190326 0.809674
## 2 0.190326 0.809674
## 3 0.190326 0.809674
## 4 0.190326 0.809674
## 5 0.190326 0.809674
## 6 0.190326 0.809674

predict(rpfit,test,type="raw")->rffit2
head(rffit2)

## [1] 미가입 미가입 미가입 미가입 미가입 미가입
## Levels: 가입 미가입

confusionMatrix(rffit2,test$TravelInsurance)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 가입 미가입
##     가입    116     14
##     미가입   64    302
##                                           
##                Accuracy : 0.8427          
##                  95% CI : (0.8077, 0.8737)
##     No Information Rate : 0.6371          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6383          
##                                           
##  Mcnemar's Test P-Value : 2.887e-08       
##                                           
##             Sensitivity : 0.6444          
##             Specificity : 0.9557          
##          Pos Pred Value : 0.8923          
##          Neg Pred Value : 0.8251          
##              Prevalence : 0.3629          
##          Detection Rate : 0.2339          
##    Detection Prevalence : 0.2621          
##       Balanced Accuracy : 0.8001          
##                                           
##        'Positive' Class : 가입            
##

importance<-varImp(rpfit,scale=FALSE)
importance

## rpart variable importance
## 
##                                                Overall
## AnnualIncome                                  200.4404
## EverTravelledAbroadYes                        122.4265
## FamilyMembers                                  59.1438
## Age                                            39.4285
## FrequentFlyerYes                               35.6629
## Employment.TypePrivate Sector/Self Employed    11.7596
## INDEX                                           3.2728
## ChronicDiseases                                 0.9696
## GraduateOrNotYes                                0.0000
## `Employment.TypePrivate Sector/Self Employed`   0.0000

library(pROC)
rffit2_num<-as.numeric(rffit2)
head(rffit2_num)

## [1] 2 2 2 2 2 2

result<-roc(test$TravelInsurance,rffit2_num)

## Setting levels: control = 가입, case = 미가입

## Setting direction: controls < cases

result$auc

## Area under the curve: 0.8001

names(rffit1)[1]<-"y_pred"
bind_cols(df_test,rffit1) %>% select(INDEX,y_pred)->df
head(df)

##   INDEX   y_pred
## 1     0 0.190326
## 2     1 0.190326
## 3     5 0.190326
## 4     6 0.190326
## 5    12 0.190326
## 6    14 0.190326

#write.csv(df,"0000.csv",row.names=F)
read.csv("0000.csv")

##     INDEX    y_pred
## 1       0 0.1903260
## 2       1 0.1903260
## 3       5 0.1903260
## 4       6 0.1903260
## 5      12 0.1903260
## 6      14 0.1903260
## 7      15 0.8658537
## 8      27 0.1903260
## 9      33 0.1903260
## 10     37 0.2195122
## 11     38 0.2195122
## 12     39 0.9209486
## 13     43 0.1903260
## 14     46 0.9209486
## 15     48 0.1903260
## 16     56 0.1903260
## 17     64 0.9209486
## 18     65 0.1903260
## 19     72 0.1903260
## 20     74 0.1903260
## 21     83 0.2195122
## 22     85 0.9209486
## 23     92 0.1903260
## 24     94 0.2195122
## 25     97 0.1903260
## 26     99 0.1903260
## 27    101 0.9209486
## 28    105 0.1903260
## 29    106 0.2195122
## 30    110 0.1903260
## 31    111 0.1903260
## 32    112 0.1903260
## 33    125 0.1903260
## 34    127 0.1903260
## 35    128 0.9209486
## 36    133 0.9209486
## 37    135 0.1903260
## 38    140 0.1903260
## 39    154 0.1903260
## 40    155 0.8658537
## 41    158 0.1903260
## 42    161 0.9209486
## 43    162 0.8658537
## 44    171 0.1903260
## 45    176 0.8658537
## 46    178 0.1903260
## 47    183 0.9209486
## 48    184 0.1903260
## 49    187 0.1903260
## 50    188 0.1903260
## 51    195 0.9209486
## 52    198 0.9209486
## 53    200 0.1903260
## 54    210 0.1903260
## 55    212 0.1903260
## 56    213 0.9209486
## 57    222 0.1903260
## 58    223 0.1903260
## 59    224 0.1903260
## 60    233 0.1903260
## 61    234 0.2195122
## 62    238 0.1903260
## 63    242 0.2195122
## 64    243 0.1903260
## 65    249 0.1903260
## 66    250 0.8658537
## 67    257 0.2195122
## 68    266 0.1903260
## 69    267 0.2195122
## 70    269 0.1903260
## 71    270 0.8658537
## 72    272 0.1903260
## 73    273 0.8658537
## 74    283 0.1903260
## 75    286 0.1903260
## 76    298 0.9209486
## 77    300 0.1903260
## 78    301 0.1903260
## 79    306 0.2195122
## 80    309 0.2195122
## 81    312 0.1903260
## 82    320 0.2195122
## 83    321 0.1903260
## 84    323 0.9209486
## 85    324 0.1903260
## 86    334 0.1903260
## 87    337 0.1903260
## 88    339 0.1903260
## 89    343 0.1903260
## 90    347 0.8658537
## 91    352 0.9209486
## 92    356 0.2195122
## 93    359 0.1903260
## 94    361 0.9209486
## 95    362 0.1903260
## 96    365 0.9209486
## 97    366 0.1903260
## 98    369 0.1903260
## 99    372 0.1903260
## 100   378 0.9209486
## 101   392 0.1903260
## 102   394 0.1903260
## 103   403 0.1903260
## 104   404 0.2195122
## 105   414 0.9209486
## 106   422 0.9209486
## 107   423 0.9209486
## 108   425 0.1903260
## 109   428 0.1903260
## 110   429 0.1903260
## 111   430 0.1903260
## 112   431 0.1903260
## 113   443 0.1903260
## 114   449 0.1903260
## 115   456 0.9209486
## 116   463 0.2195122
## 117   464 0.9209486
## 118   468 0.1903260
## 119   469 0.9209486
## 120   470 0.1903260
## 121   472 0.1903260
## 122   473 0.9209486
## 123   479 0.2195122
## 124   482 0.1903260
## 125   484 0.8658537
## 126   486 0.2195122
## 127   490 0.1903260
## 128   499 0.1903260
## 129   505 0.1903260
## 130   512 0.1903260
## 131   515 0.1903260
## 132   518 0.9209486
## 133   528 0.1903260
## 134   529 0.1903260
## 135   530 0.1903260
## 136   532 0.1903260
## 137   533 0.1903260
## 138   534 0.1903260
## 139   536 0.1903260
## 140   538 0.1903260
## 141   547 0.9209486
## 142   549 0.1903260
## 143   550 0.2195122
## 144   551 0.9209486
## 145   553 0.9209486
## 146   554 0.1903260
## 147   559 0.1903260
## 148   561 0.1903260
## 149   562 0.2195122
## 150   563 0.2195122
## 151   565 0.1903260
## 152   570 0.1903260
## 153   584 0.1903260
## 154   586 0.1903260
## 155   593 0.1903260
## 156   594 0.2195122
## 157   598 0.8658537
## 158   600 0.9209486
## 159   606 0.1903260
## 160   610 0.1903260
## 161   611 0.1903260
## 162   614 0.2195122
## 163   615 0.1903260
## 164   616 0.1903260
## 165   617 0.1903260
## 166   619 0.8658537
## 167   623 0.1903260
## 168   625 0.9209486
## 169   628 0.1903260
## 170   633 0.1903260
## 171   642 0.1903260
## 172   654 0.9209486
## 173   655 0.1903260
## 174   659 0.2195122
## 175   662 0.1903260
## 176   667 0.1903260
## 177   675 0.1903260
## 178   676 0.1903260
## 179   677 0.1903260
## 180   681 0.1903260
## 181   682 0.9209486
## 182   688 0.2195122
## 183   689 0.1903260
## 184   690 0.9209486
## 185   698 0.8658537
## 186   699 0.1903260
## 187   703 0.1903260
## 188   708 0.2195122
## 189   713 0.1903260
## 190   717 0.1903260
## 191   719 0.2195122
## 192   724 0.1903260
## 193   731 0.1903260
## 194   738 0.1903260
## 195   741 0.8658537
## 196   742 0.8658537
## 197   744 0.1903260
## 198   746 0.1903260
## 199   749 0.8658537
## 200   750 0.2195122
## 201   752 0.1903260
## 202   754 0.1903260
## 203   756 0.1903260
## 204   758 0.9209486
## 205   759 0.8658537
## 206   761 0.9209486
## 207   771 0.1903260
## 208   775 0.1903260
## 209   778 0.1903260
## 210   779 0.9209486
## 211   781 0.1903260
## 212   784 0.9209486
## 213   791 0.8658537
## 214   795 0.1903260
## 215   801 0.1903260
## 216   804 0.1903260
## 217   805 0.1903260
## 218   807 0.1903260
## 219   819 0.1903260
## 220   822 0.1903260
## 221   823 0.1903260
## 222   830 0.8658537
## 223   833 0.9209486
## 224   837 0.1903260
## 225   853 0.1903260
## 226   856 0.8658537
## 227   859 0.1903260
## 228   863 0.1903260
## 229   865 0.9209486
## 230   876 0.1903260
## 231   879 0.1903260
## 232   885 0.1903260
## 233   886 0.9209486
## 234   889 0.2195122
## 235   892 0.1903260
## 236   893 0.1903260
## 237   894 0.1903260
## 238   896 0.2195122
## 239   902 0.8658537
## 240   903 0.1903260
## 241   904 0.1903260
## 242   909 0.9209486
## 243   913 0.9209486
## 244   924 0.1903260
## 245   926 0.2195122
## 246   927 0.9209486
## 247   938 0.1903260
## 248   939 0.1903260
## 249   947 0.2195122
## 250   948 0.1903260
## 251   952 0.9209486
## 252   959 0.1903260
## 253   965 0.1903260
## 254   968 0.1903260
## 255   973 0.9209486
## 256   975 0.9209486
## 257   978 0.1903260
## 258   981 0.1903260
## 259   982 0.1903260
## 260   983 0.8658537
## 261   988 0.9209486
## 262   994 0.1903260
## 263   995 0.1903260
## 264   999 0.8658537
## 265  1000 0.1903260
## 266  1003 0.1903260
## 267  1004 0.1903260
## 268  1005 0.1903260
## 269  1006 0.1903260
## 270  1011 0.8658537
## 271  1013 0.1903260
## 272  1017 0.1903260
## 273  1019 0.1903260
## 274  1022 0.1903260
## 275  1031 0.1903260
## 276  1037 0.1903260
## 277  1044 0.9209486
## 278  1046 0.2195122
## 279  1050 0.2195122
## 280  1053 0.2195122
## 281  1055 0.9209486
## 282  1057 0.1903260
## 283  1063 0.1903260
## 284  1067 0.2195122
## 285  1079 0.2195122
## 286  1080 0.1903260
## 287  1082 0.1903260
## 288  1087 0.1903260
## 289  1090 0.1903260
## 290  1092 0.9209486
## 291  1095 0.8658537
## 292  1098 0.1903260
## 293  1106 0.2195122
## 294  1110 0.1903260
## 295  1111 0.2195122
## 296  1112 0.2195122
## 297  1135 0.1903260
## 298  1140 0.1903260
## 299  1146 0.1903260
## 300  1147 0.9209486
## 301  1150 0.1903260
## 302  1151 0.8658537
## 303  1153 0.1903260
## 304  1156 0.1903260
## 305  1158 0.2195122
## 306  1162 0.9209486
## 307  1169 0.9209486
## 308  1172 0.1903260
## 309  1173 0.1903260
## 310  1175 0.2195122
## 311  1182 0.1903260
## 312  1186 0.1903260
## 313  1188 0.1903260
## 314  1190 0.1903260
## 315  1193 0.2195122
## 316  1195 0.1903260
## 317  1198 0.2195122
## 318  1202 0.9209486
## 319  1206 0.1903260
## 320  1207 0.1903260
## 321  1209 0.9209486
## 322  1210 0.1903260
## 323  1212 0.2195122
## 324  1213 0.1903260
## 325  1219 0.8658537
## 326  1230 0.9209486
## 327  1235 0.1903260
## 328  1245 0.9209486
## 329  1247 0.1903260
## 330  1257 0.1903260
## 331  1263 0.1903260
## 332  1264 0.1903260
## 333  1271 0.1903260
## 334  1272 0.2195122
## 335  1274 0.2195122
## 336  1275 0.1903260
## 337  1282 0.9209486
## 338  1284 0.2195122
## 339  1297 0.1903260
## 340  1302 0.1903260
## 341  1308 0.1903260
## 342  1309 0.9209486
## 343  1319 0.1903260
## 344  1335 0.1903260
## 345  1337 0.1903260
## 346  1346 0.1903260
## 347  1349 0.9209486
## 348  1352 0.1903260
## 349  1354 0.9209486
## 350  1358 0.9209486
## 351  1366 0.9209486
## 352  1369 0.2195122
## 353  1374 0.1903260
## 354  1379 0.2195122
## 355  1381 0.9209486
## 356  1383 0.9209486
## 357  1385 0.1903260
## 358  1389 0.1903260
## 359  1391 0.9209486
## 360  1400 0.1903260
## 361  1401 0.1903260
## 362  1407 0.1903260
## 363  1414 0.2195122
## 364  1416 0.1903260
## 365  1418 0.1903260
## 366  1420 0.2195122
## 367  1421 0.9209486
## 368  1427 0.1903260
## 369  1428 0.8658537
## 370  1431 0.1903260
## 371  1440 0.2195122
## 372  1449 0.1903260
## 373  1459 0.9209486
## 374  1460 0.1903260
## 375  1462 0.1903260
## 376  1464 0.1903260
## 377  1466 0.1903260
## 378  1469 0.1903260
## 379  1472 0.1903260
## 380  1479 0.9209486
## 381  1487 0.1903260
## 382  1491 0.9209486
## 383  1493 0.1903260
## 384  1496 0.1903260
## 385  1501 0.1903260
## 386  1506 0.1903260
## 387  1510 0.1903260
## 388  1511 0.1903260
## 389  1516 0.2195122
## 390  1523 0.1903260
## 391  1527 0.1903260
## 392  1537 0.1903260
## 393  1538 0.1903260
## 394  1540 0.2195122
## 395  1541 0.1903260
## 396  1545 0.9209486
## 397  1549 0.1903260
## 398  1555 0.9209486
## 399  1556 0.9209486
## 400  1559 0.2195122
## 401  1566 0.9209486
## 402  1568 0.1903260
## 403  1572 0.1903260
## 404  1585 0.9209486
## 405  1588 0.1903260
## 406  1589 0.9209486
## 407  1607 0.1903260
## 408  1615 0.1903260
## 409  1617 0.1903260
## 410  1618 0.1903260
## 411  1620 0.1903260
## 412  1630 0.1903260
## 413  1640 0.1903260
## 414  1647 0.2195122
## 415  1648 0.1903260
## 416  1650 0.9209486
## 417  1651 0.9209486
## 418  1656 0.1903260
## 419  1658 0.8658537
## 420  1661 0.1903260
## 421  1662 0.1903260
## 422  1670 0.8658537
## 423  1671 0.9209486
## 424  1675 0.8658537
## 425  1678 0.9209486
## 426  1689 0.1903260
## 427  1691 0.9209486
## 428  1692 0.1903260
## 429  1714 0.1903260
## 430  1722 0.9209486
## 431  1727 0.1903260
## 432  1732 0.1903260
## 433  1734 0.2195122
## 434  1736 0.8658537
## 435  1738 0.1903260
## 436  1742 0.1903260
## 437  1747 0.9209486
## 438  1750 0.1903260
## 439  1754 0.9209486
## 440  1756 0.1903260
## 441  1764 0.1903260
## 442  1765 0.8658537
## 443  1767 0.2195122
## 444  1769 0.1903260
## 445  1770 0.2195122
## 446  1773 0.1903260
## 447  1774 0.1903260
## 448  1785 0.1903260
## 449  1791 0.1903260
## 450  1793 0.9209486
## 451  1798 0.9209486
## 452  1804 0.1903260
## 453  1806 0.1903260
## 454  1809 0.1903260
## 455  1812 0.1903260
## 456  1813 0.9209486
## 457  1818 0.1903260
## 458  1827 0.1903260
## 459  1828 0.1903260
## 460  1831 0.1903260
## 461  1834 0.1903260
## 462  1835 0.1903260
## 463  1848 0.1903260
## 464  1858 0.1903260
## 465  1863 0.2195122
## 466  1865 0.9209486
## 467  1873 0.2195122
## 468  1882 0.9209486
## 469  1886 0.1903260
## 470  1887 0.1903260
## 471  1893 0.1903260
## 472  1896 0.9209486
## 473  1899 0.1903260
## 474  1901 0.1903260
## 475  1909 0.1903260
## 476  1912 0.2195122
## 477  1918 0.9209486
## 478  1920 0.1903260
## 479  1921 0.1903260
## 480  1922 0.2195122
## 481  1930 0.1903260
## 482  1937 0.2195122
## 483  1938 0.9209486
## 484  1940 0.9209486
## 485  1942 0.1903260
## 486  1949 0.8658537
## 487  1951 0.1903260
## 488  1953 0.8658537
## 489  1957 0.1903260
## 490  1958 0.1903260
## 491  1959 0.1903260
## 492  1975 0.9209486
## 493  1976 0.1903260
## 494  1980 0.1903260
## 495  1983 0.9209486
## 496  1986 0.2195122

#03 267p
train<-read.csv("insurance_train_10.csv")
test<-read.csv("insurance_test_10.csv")
train %>% glimpse

## Rows: 6,969
## Columns: 9
## $ Gender          <chr> "Male", "Female", "Male", "Male", "Male", "Female", "F…
## $ Ever_Married    <chr> "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "Yes", "…
## $ Age             <int> 22, 67, 67, 56, 32, 33, 61, 55, 26, 19, 58, 41, 32, 31…
## $ Graduated       <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession      <chr> "Healthcare", "Engineer", "Lawyer", "Artist", "Healthc…
## $ Work_Experience <int> 1, 1, 0, 0, 1, 1, 0, 1, 1, 4, 0, 1, 9, 1, 1, 0, 12, 3,…
## $ Spending_Score  <chr> "Low", "Low", "High", "Average", "Low", "Low", "Low", …
## $ Family_Size     <int> 4, 1, 2, 2, 3, 3, 3, 4, 3, 4, 1, 2, 5, 6, 4, 1, 1, 4, …
## $ Segmentation    <int> 4, 2, 2, 3, 3, 4, 4, 3, 1, 4, 2, 3, 4, 2, 2, 3, 1, 4, …

colSums(is.na(train))

##          Gender    Ever_Married             Age       Graduated      Profession 
##               0               0               0               0               0 
## Work_Experience  Spending_Score     Family_Size    Segmentation 
##               0               0               0               0

train$Segmentation<-as.factor(train$Segmentation)

library(caret)
ctrl<-trainControl(method="cv",number=10)
train(Segmentation~.,data=train,
      method='knn',trControl=ctrl,
      preProcess=c("center","scale"))->knn_fit
knn_fit

## k-Nearest Neighbors 
## 
## 6969 samples
##    8 predictor
##    4 classes: '1', '2', '3', '4' 
## 
## Pre-processing: centered (19), scaled (19) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 6273, 6272, 6272, 6271, 6272, 6274, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.4811274  0.3070117
##   7  0.4936085  0.3234522
##   9  0.4898745  0.3182787
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 7.

confusionMatrix(knn_fit)

## Cross-Validated (10 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction    1    2    3    4
##          1  9.8  5.5  2.9  5.1
##          2  5.6  7.6  5.7  2.1
##          3  3.7  7.6 13.9  1.5
##          4  5.2  2.7  3.1 18.0
##                             
##  Accuracy (average) : 0.4936

predict(knn_fit,test)->pred_fit
head(pred_fit)

## [1] 2 3 1 3 3 1
## Levels: 1 2 3 4

NROW(pred_fit)

## [1] 2267

test %>% glimpse

## Rows: 2,267
## Columns: 9
## $ X               <int> 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ Gender          <chr> "Female", "Male", "Female", "Male", "Male", "Male", "F…
## $ Ever_Married    <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ Age             <int> 36, 37, 69, 59, 47, 61, 47, 50, 19, 22, 22, 50, 27, 18…
## $ Graduated       <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession      <chr> "Engineer", "Healthcare", "", "Executive", "Doctor", "…
## $ Work_Experience <int> 0, 8, 0, 11, 0, 5, 1, 2, 0, 0, 0, 1, 8, 0, 0, 1, 1, 8,…
## $ Spending_Score  <chr> "Low", "Average", "Low", "High", "High", "Low", "Avera…
## $ Family_Size     <int> 1, 4, 1, 2, 5, 3, 3, 4, 4, 3, 6, 5, 3, 3, 1, 3, 2, 1, …

bind_cols(test,pred_fit)->df

## New names:
## • `` -> `...10`

names(df)[9]<-"Segmentaton_pred"
df %>% select(9)->df1
write.csv(df1,"2022.csv",row.names = FALSE)
set.seed(12345)
IDX<-createDataPartition(train$Segmentation,p=0.7,list=FALSE)
train_t<-train[IDX,]
test_v<-train[-IDX,]
train_t$Segmentation<-as.factor(train_t$Segmentation)
test_v$Segmentation<-as.factor(test_v$Segmentation)
ctrl<-trainControl(method="cv",number=10)
train(Segmentation~.,data=train_t,
      method='knn',trControl=ctrl,
      preProcess=c("center","scale"))->knn_fit1
predict(knn_fit1,newdata=test_v)->test_pred
confusionMatrix(test_pred,test_v$Segmentation,mode="prec_recall")

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3   4
##          1 207 114  62 115
##          2 120 152 123  53
##          3  71 163 284  33
##          4 109  60  64 358
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4794          
##                  95% CI : (0.4578, 0.5011)
##     No Information Rate : 0.2677          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3047          
##                                           
##  Mcnemar's Test P-Value : 0.009816        
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3 Class: 4
## Precision             0.41566   0.3393   0.5154   0.6058
## Recall                0.40828   0.3108   0.5328   0.6404
## F1                    0.41194   0.3244   0.5240   0.6226
## Prevalence            0.24282   0.2342   0.2553   0.2677
## Detection Rate        0.09914   0.0728   0.1360   0.1715
## Detection Prevalence  0.23851   0.2146   0.2639   0.2830
## Balanced Accuracy     0.61211   0.5629   0.6806   0.7440

#02 타이타닉 데이터 277p
rm(list=ls())
ls()

## character(0)

library(dplyr)
library(recipes)
library(caret)
read.delim("titanic3.txt",header=TRUE,sep=",")->full
full %>% glimpse

## Rows: 1,309
## Columns: 14
## $ pclass    <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ survived  <int> 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, …
## $ name      <chr> "Allen, Miss. Elisabeth Walton", "Allison, Master. Hudson Tr…
## $ sex       <chr> "female", "male", "female", "male", "female", "male", "femal…
## $ age       <dbl> 29.00, 0.92, 2.00, 30.00, 25.00, 48.00, 63.00, 39.00, 53.00,…
## $ sibsp     <int> 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ parch     <int> 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, …
## $ ticket    <chr> "24160", "113781", "113781", "113781", "113781", "19952", "1…
## $ fare      <dbl> 211.3375, 151.5500, 151.5500, 151.5500, 151.5500, 26.5500, 7…
## $ cabin     <chr> "B5", "C22 C26", "C22 C26", "C22 C26", "C22 C26", "E12", "D7…
## $ embarked  <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "C", "C", "C", …
## $ boat      <chr> "2", "11", "", "", "", "3", "10", "", "D", "", "", "4", "9",…
## $ body      <int> NA, NA, NA, 135, NA, NA, NA, NA, NA, 22, 124, NA, NA, NA, NA…
## $ home.dest <chr> "St Louis, MO", "Montreal, PQ / Chesterville, ON", "Montreal…

set.seed(1357)
train_list<-createDataPartition(full$survived,p=0.7,list=FALSE)
full_train<-full[train_list,]
full_test<-full[-train_list,]
NROW(full_train)

## [1] 917

NROW(full_test)

## [1] 392

train<-full_train
test<-full_test
train %>% mutate(index='train')->train
test %>% mutate(index='test')->test
bind_rows(train,test)->full
full %>% select(-boat,-body,-home.dest)->full
full$survived<-ifelse(full$survived==0,"생존","사망")
full$survived<-as.factor(full$survived)
full$pclass<-as.factor(full$pclass)
full$sex<-as.factor(full$sex)
full$embarked<-as.factor(full$embarked)
colSums(is.na(full))

##   pclass survived     name      sex      age    sibsp    parch   ticket 
##        0        0        0        0      263        0        0        0 
##     fare    cabin embarked    index 
##        1        0        0        0

table(full$embarked)

## 
##       C   Q   S 
##   2 270 123 914

levels(full$embarked)[1]<-NA
table(full$embarked,useNA="always")

## 
##    C    Q    S <NA> 
##  270  123  914    2

full %>% filter(!is.na(age)&!is.na(fare)&!is.na(embarked))->full
colSums(is.na(full))

##   pclass survived     name      sex      age    sibsp    parch   ticket 
##        0        0        0        0        0        0        0        0 
##     fare    cabin embarked    index 
##        0        0        0        0

recipe(survived~.,data=full) %>% step_YeoJohnson(age,sibsp,parch,fare) %>% 
  step_center(age,sibsp,parch,fare) %>% 
  step_scale(age,sibsp,parch,fare) %>% 
  prep() %>% juice()->data
data %>% filter(index=="train") %>% select(-index,-name,-ticket,-cabin)->train
data %>% filter(index=='test') %>% select(-index,-name,-ticket,-cabin)->test
ctrl<-trainControl(method="cv",summaryFunction = twoClassSummary,
                   classProbs = TRUE)
train(survived~.,data=train,
      method="rpart",metric='ROC',
      trControl=ctrl)->rffit
rffit

## CART 
## 
## 731 samples
##   7 predictor
##   2 classes: '사망', '생존' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 659, 659, 657, 658, 658, 657, ... 
## Resampling results across tuning parameters:
## 
##   cp          ROC        Sens       Spec     
##   0.02542373  0.7882034  0.5548276  0.9239429
##   0.03728814  0.7730226  0.5442529  0.9191860
##   0.44406780  0.6106002  0.3158621  0.9053383
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02542373.

confusionMatrix(rffit)

## Cross-Validated (10 fold) Confusion Matrix 
## 
## (entries are percentual average cell counts across resamples)
##  
##           Reference
## Prediction 사망 생존
##       사망 22.4  4.5
##       생존 17.9 55.1
##                             
##  Accuracy (average) : 0.7756

predict(rffit,test,type="prob")->rffit1
predict(rffit,test,type="raw")->rffit2
confusionMatrix(rffit2,test$survived)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 사망 생존
##       사망   66    6
##       생존   64  176
##                                           
##                Accuracy : 0.7756          
##                  95% CI : (0.7252, 0.8207)
##     No Information Rate : 0.5833          
##     P-Value [Acc > NIR] : 6.712e-13       
##                                           
##                   Kappa : 0.507           
##                                           
##  Mcnemar's Test P-Value : 9.572e-12       
##                                           
##             Sensitivity : 0.5077          
##             Specificity : 0.9670          
##          Pos Pred Value : 0.9167          
##          Neg Pred Value : 0.7333          
##              Prevalence : 0.4167          
##          Detection Rate : 0.2115          
##    Detection Prevalence : 0.2308          
##       Balanced Accuracy : 0.7374          
##                                           
##        'Positive' Class : 사망            
##

library(pROC)
rffit2_num<-as.numeric(rffit2)
rffit2_num

##   [1] 1 2 2 1 1 2 1 2 1 2 1 2 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 2 1 2 2 1 2 2 2 1 2
##  [38] 1 1 2 1 1 1 1 1 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 1 2
##  [75] 2 1 2 2 2 1 2 1 2 2 1 2 1 1 2 2 1 1 1 2 2 1 1 1 1 2 2 2 2 1 2 1 2 2 1 2 2
## [112] 2 1 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 2 2 1 2 1 2 1 2 2 2
## [149] 1 1 2 2 2 1 1 2 1 1 2 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [297] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2

result<-roc(test$survived,rffit2_num)

## Setting levels: control = 사망, case = 생존
## Setting direction: controls < cases

result$auc

## Area under the curve: 0.7374

names(rffit1)[2]<-"survived"
rffit1 %>% select(survived)->df
head(df)

##    survived
## 1 0.0617284
## 2 0.7905983
## 3 0.7905983
## 4 0.0617284
## 5 0.0617284
## 6 0.7905983

write.csv(df,"00001.csv",row.names=F)
read.csv("00001.csv")

##      survived
## 1   0.0617284
## 2   0.7905983
## 3   0.7905983
## 4   0.0617284
## 5   0.0617284
## 6   0.7905983
## 7   0.0617284
## 8   0.7905983
## 9   0.0617284
## 10  0.7905983
## 11  0.0617284
## 12  0.7905983
## 13  0.0617284
## 14  0.0617284
## 15  0.0617284
## 16  0.7905983
## 17  0.0617284
## 18  0.0617284
## 19  0.7905983
## 20  0.7905983
## 21  0.0617284
## 22  0.0617284
## 23  0.0617284
## 24  0.0617284
## 25  0.0617284
## 26  0.0617284
## 27  0.0617284
## 28  0.7905983
## 29  0.0617284
## 30  0.7905983
## 31  0.7905983
## 32  0.0617284
## 33  0.7905983
## 34  0.7905983
## 35  0.7905983
## 36  0.0617284
## 37  0.7905983
## 38  0.0617284
## 39  0.0617284
## 40  0.7905983
## 41  0.0617284
## 42  0.0617284
## 43  0.0617284
## 44  0.0617284
## 45  0.0617284
## 46  0.7905983
## 47  0.7905983
## 48  0.7905983
## 49  0.0617284
## 50  0.0617284
## 51  0.7905983
## 52  0.7905983
## 53  0.7905983
## 54  0.7905983
## 55  0.0617284
## 56  0.7905983
## 57  0.7905983
## 58  0.7905983
## 59  0.7905983
## 60  0.7905983
## 61  0.0617284
## 62  0.7905983
## 63  0.0617284
## 64  0.7905983
## 65  0.7905983
## 66  0.0617284
## 67  0.7905983
## 68  0.0617284
## 69  0.7905983
## 70  0.7905983
## 71  0.7905983
## 72  0.7905983
## 73  0.0617284
## 74  0.7905983
## 75  0.7905983
## 76  0.0617284
## 77  0.7905983
## 78  0.7905983
## 79  0.7905983
## 80  0.0617284
## 81  0.7905983
## 82  0.0617284
## 83  0.7905983
## 84  0.7905983
## 85  0.0617284
## 86  0.7905983
## 87  0.0617284
## 88  0.0617284
## 89  0.7905983
## 90  0.7905983
## 91  0.0617284
## 92  0.0617284
## 93  0.0617284
## 94  0.7905983
## 95  0.7905983
## 96  0.0617284
## 97  0.0617284
## 98  0.0617284
## 99  0.0617284
## 100 0.7905983
## 101 0.7905983
## 102 0.7905983
## 103 0.7905983
## 104 0.0617284
## 105 0.7905983
## 106 0.0617284
## 107 0.7905983
## 108 0.7905983
## 109 0.0617284
## 110 0.7905983
## 111 0.7905983
## 112 0.7905983
## 113 0.0617284
## 114 0.7905983
## 115 0.7905983
## 116 0.7905983
## 117 0.0617284
## 118 0.7905983
## 119 0.7905983
## 120 0.7905983
## 121 0.7905983
## 122 0.0617284
## 123 0.0617284
## 124 0.7905983
## 125 0.7905983
## 126 0.7905983
## 127 0.7905983
## 128 0.7905983
## 129 0.7905983
## 130 0.0617284
## 131 0.7905983
## 132 0.0617284
## 133 0.7905983
## 134 0.0617284
## 135 0.7905983
## 136 0.7905983
## 137 0.7905983
## 138 0.7905983
## 139 0.7905983
## 140 0.7905983
## 141 0.0617284
## 142 0.7905983
## 143 0.0617284
## 144 0.7905983
## 145 0.0617284
## 146 0.7905983
## 147 0.7905983
## 148 0.7905983
## 149 0.0617284
## 150 0.0617284
## 151 0.7905983
## 152 0.7905983
## 153 0.7905983
## 154 0.0617284
## 155 0.0617284
## 156 0.7905983
## 157 0.0617284
## 158 0.0617284
## 159 0.7905983
## 160 0.0617284
## 161 0.0617284
## 162 0.0617284
## 163 0.7905983
## 164 0.7905983
## 165 0.7905983
## 166 0.0617284
## 167 0.7905983
## 168 0.7905983
## 169 0.7905983
## 170 0.7905983
## 171 0.7905983
## 172 0.7905983
## 173 0.7905983
## 174 0.7905983
## 175 0.5544554
## 176 0.5544554
## 177 0.7905983
## 178 0.7905983
## 179 0.7905983
## 180 0.5544554
## 181 0.5544554
## 182 0.7905983
## 183 0.5544554
## 184 0.5544554
## 185 0.5544554
## 186 0.5544554
## 187 0.7905983
## 188 0.7905983
## 189 0.7905983
## 190 0.5544554
## 191 0.5544554
## 192 0.7905983
## 193 0.7905983
## 194 0.5544554
## 195 0.7905983
## 196 0.5544554
## 197 0.7905983
## 198 0.7905983
## 199 0.7905983
## 200 0.7905983
## 201 0.7905983
## 202 0.7905983
## 203 0.7905983
## 204 0.7905983
## 205 0.7905983
## 206 0.7905983
## 207 0.7905983
## 208 0.5544554
## 209 0.7905983
## 210 0.7905983
## 211 0.7905983
## 212 0.7905983
## 213 0.7905983
## 214 0.7905983
## 215 0.7905983
## 216 0.5544554
## 217 0.7905983
## 218 0.7905983
## 219 0.5544554
## 220 0.7905983
## 221 0.5544554
## 222 0.7905983
## 223 0.7905983
## 224 0.7905983
## 225 0.7905983
## 226 0.5544554
## 227 0.5544554
## 228 0.5544554
## 229 0.7905983
## 230 0.5544554
## 231 0.7905983
## 232 0.5544554
## 233 0.7905983
## 234 0.7905983
## 235 0.7905983
## 236 0.5544554
## 237 0.5544554
## 238 0.7905983
## 239 0.5544554
## 240 0.5544554
## 241 0.7905983
## 242 0.7905983
## 243 0.7905983
## 244 0.5544554
## 245 0.7905983
## 246 0.7905983
## 247 0.7905983
## 248 0.5544554
## 249 0.7905983
## 250 0.7905983
## 251 0.5544554
## 252 0.5544554
## 253 0.7905983
## 254 0.7905983
## 255 0.7905983
## 256 0.7905983
## 257 0.5544554
## 258 0.5544554
## 259 0.7905983
## 260 0.7905983
## 261 0.7905983
## 262 0.5544554
## 263 0.5544554
## 264 0.7905983
## 265 0.5544554
## 266 0.7905983
## 267 0.7905983
## 268 0.5544554
## 269 0.7905983
## 270 0.5544554
## 271 0.7905983
## 272 0.5544554
## 273 0.7905983
## 274 0.7905983
## 275 0.5544554
## 276 0.7905983
## 277 0.7905983
## 278 0.5544554
## 279 0.7905983
## 280 0.5544554
## 281 0.7905983
## 282 0.7905983
## 283 0.7905983
## 284 0.7905983
## 285 0.5544554
## 286 0.5544554
## 287 0.7905983
## 288 0.7905983
## 289 0.7905983
## 290 0.5544554
## 291 0.7905983
## 292 0.7905983
## 293 0.7905983
## 294 0.5544554
## 295 0.7905983
## 296 0.7905983
## 297 0.5544554
## 298 0.7905983
## 299 0.7905983
## 300 0.7905983
## 301 0.5544554
## 302 0.7905983
## 303 0.7905983
## 304 0.5544554
## 305 0.5544554
## 306 0.7905983
## 307 0.5544554
## 308 0.7905983
## 309 0.7905983
## 310 0.5544554
## 311 0.7905983
## 312 0.5544554

# 02 285p
library(dplyr)
library(recipes)
library(caret)
df<-read.csv("nyc.csv")
df %>% glimpse

## Rows: 165
## Columns: 9
## $ Case       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ Restaurant <chr> "Daniella Ristorante", "Tello's Ristorante", "Biricchino", …
## $ Price      <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47,…
## $ Food       <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20,…
## $ Decor      <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18,…
## $ Service    <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21,…
## $ East       <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ latitude   <dbl> 40.74683, 40.74342, 40.74886, 40.74848, 40.73958, 40.74069,…
## $ longitude  <dbl> -73.99676, -73.99954, -73.99552, -74.00331, -73.99591, -73.…

nyc<-df %>% select(3:7)
summary(nyc)

##      Price            Food           Decor          Service     
##  Min.   :19.00   Min.   :16.00   Min.   : 6.00   Min.   :14.00  
##  1st Qu.:36.00   1st Qu.:19.00   1st Qu.:16.00   1st Qu.:18.00  
##  Median :43.00   Median :21.00   Median :18.00   Median :20.00  
##  Mean   :42.67   Mean   :20.59   Mean   :17.68   Mean   :19.39  
##  3rd Qu.:50.00   3rd Qu.:22.00   3rd Qu.:19.00   3rd Qu.:21.00  
##  Max.   :65.00   Max.   :25.00   Max.   :25.00   Max.   :24.00  
##       East       
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :1.0000  
##  Mean   :0.6303  
##  3rd Qu.:1.0000  
##  Max.   :1.0000

nyc$East<-as.factor(nyc$East)
set.seed(1357)
nyc %>% glimpse

## Rows: 165
## Columns: 5
## $ Price   <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47, 37…
## $ Food    <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20, 21…
## $ Decor   <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18, 19…
## $ Service <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21, 21…
## $ East    <fct> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…

train_list<-createDataPartition(nyc$Price,p=0.7,list=FALSE)
train<-nyc[train_list,]
test<-nyc[-train_list,]
NROW(train)

## [1] 118

NROW(test)

## [1] 47

train %>% mutate(index='train')->train
test %>% mutate(index='test')->test
bind_rows(train,test)->full
full %>% glimpse

## Rows: 165
## Columns: 6
## $ Price   <int> 43, 32, 34, 41, 52, 34, 39, 44, 45, 47, 52, 35, 37, 45, 38, 51…
## $ Food    <int> 22, 20, 21, 20, 22, 20, 22, 21, 19, 21, 21, 19, 21, 22, 19, 22…
## $ Decor   <int> 18, 19, 13, 20, 22, 18, 19, 17, 17, 19, 19, 17, 19, 18, 17, 20…
## $ Service <int> 20, 19, 18, 17, 21, 21, 22, 19, 20, 21, 20, 19, 21, 23, 18, 22…
## $ East    <fct> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ index   <chr> "train", "train", "train", "train", "train", "train", "train",…

recipe(Price~.,data=full) %>% step_YeoJohnson(Food,Decor,Service) %>% 
  step_center(Food,Decor,Service) %>% 
  step_scale(Food,Decor,Service) %>% 
  prep() %>% juice()->data
data %>% glimpse

## Rows: 165
## Columns: 6
## $ Food    <dbl> 0.7123134, -0.2891237, 0.2132304, -0.2891237, 0.7123134, -0.28…
## $ Decor   <dbl> 0.07529626, 0.45931463, -1.65771532, 0.85519632, 1.68166534, 0…
## $ Service <dbl> 0.2626569, -0.2101456, -0.6716854, -1.1216590, 0.7464411, 0.74…
## $ East    <fct> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ index   <fct> train, train, train, train, train, train, train, train, train,…
## $ Price   <int> 43, 32, 34, 41, 52, 34, 39, 44, 45, 47, 52, 35, 37, 45, 38, 51…

data %>% filter(index=="train") %>% select(-index)->train
data %>% filter(index=="test") %>% select(-index)->test
tc<-trainControl(method="cv",number=10)
model_2<-train(Price~.,train,method="lm",trControl=tc)
model_2

## Linear Regression 
## 
## 118 samples
##   4 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 106, 107, 106, 105, 105, 108, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   5.810832  0.6599844  4.538747
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

predict(model_2,newdata=test)->lmfit
write.csv(lmfit,"0002.csv",row.names=F)
read.csv("0002.csv")

##           x
## 1  48.34718
## 2  39.77896
## 3  43.15862
## 4  54.90123
## 5  45.16370
## 6  42.79491
## 7  52.14283
## 8  50.10081
## 9  47.99391
## 10 38.90202
## 11 47.74301
## 12 47.99391
## 13 42.39190
## 14 48.45489
## 15 43.03455
## 16 47.18521
## 17 35.86358
## 18 43.71990
## 19 35.59448
## 20 52.01875
## 21 41.93043
## 22 44.24999
## 23 39.02040
## 24 63.33453
## 25 32.46550
## 26 34.01732
## 27 41.81274
## 28 54.77440
## 29 35.97279
## 30 36.20055
## 31 51.29371
## 32 24.45540
## 33 22.54081
## 34 37.43658
## 35 33.55841
## 36 33.44301
## 37 37.31821
## 38 35.40467
## 39 53.66757
## 40 52.09694
## 41 42.92683
## 42 40.24043
## 43 38.32688
## 44 36.35937
## 45 35.40467
## 46 33.93367
## 47 41.69250

230112

Jeong Sun Mi

2023-01-12