rm(list=ls())
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Warning: 패키지 'caret'는 R 버전 4.2.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
library(recipes)
## Warning: 패키지 'recipes'는 R 버전 4.2.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(pROC)
## Warning: 패키지 'pROC'는 R 버전 4.2.2에서 작성되었습니다
## Type 'citation("pROC")' for a citation.
##
## 다음의 패키지를 부착합니다: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
df<-read.csv("travel_data.csv")
set.seed(1357)
train_list<-createDataPartition(y=df$TravelInsurance, p=0.75, list=FALSE)
df_train<-df[train_list,]
df_test<-df[-train_list,]
NROW(df_train)
## [1] 1491
NROW(df_test)
## [1] 496
glimpse(df_train)
## Rows: 1,491
## Columns: 10
## $ INDEX <int> 2, 3, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 20, …
## $ Age <int> 34, 28, 28, 31, 28, 33, 31, 26, 31, 28, 28, 29, 34…
## $ Employment.Type <chr> "Private Sector/Self Employed", "Private Sector/Se…
## $ GraduateOrNot <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "…
## $ AnnualIncome <int> 500000, 700000, 700000, 1350000, 1450000, 800000, …
## $ FamilyMembers <int> 4, 3, 8, 3, 6, 3, 9, 5, 6, 4, 7, 5, 2, 6, 3, 4, 9,…
## $ ChronicDiseases <int> 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer <chr> "No", "No", "Yes", "Yes", "Yes", "Yes", "No", "Yes…
## $ EverTravelledAbroad <chr> "No", "No", "No", "Yes", "Yes", "No", "No", "Yes",…
## $ TravelInsurance <int> 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,…
df_train %>% mutate(index="train") ->df_train
df_test %>% mutate(index="test")-> df_test
bind_rows(df_train, df_test)->full
full$TravelInsurance<-ifelse(full$TravelInsurance==0, "미가입", "가입")
full$TravelInsurance<-as.factor(full$TravelInsurance)
full$GraduateOrNot<-as.factor(full$GraduateOrNot)
full$FrequentFlyer<-as.factor(full$FrequentFlyer)
full$EverTravelledAbroad<-as.factor(full$EverTravelledAbroad)
colSums(is.na(full))
## INDEX Age Employment.Type GraduateOrNot
## 0 0 0 0
## AnnualIncome FamilyMembers ChronicDiseases FrequentFlyer
## 0 0 0 0
## EverTravelledAbroad TravelInsurance index
## 0 0 0
recipe(TravelInsurance~., data=full) %>%
step_YeoJohnson(Age, AnnualIncome,FamilyMembers) %>%
step_center(Age, AnnualIncome, FamilyMembers) %>%
step_scale(Age,AnnualIncome, FamilyMembers) %>% prep() %>% juice() -> data
data %>% filter(index=="train") %>% select(-index)->train
data %>% filter(index=="test") %>% select(-index)->test
ctrl<-trainControl(method="cv",
summaryFunction = twoClassSummary,
classProbs = TRUE)
train(TravelInsurance~., data=train,
method="rpart", metric="ROC",
trControl=ctrl)->rpfit
rpfit
## CART
##
## 1491 samples
## 9 predictor
## 2 classes: '가입', '미가입'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 1342, 1342, 1342, 1342, 1341, 1342, ...
## Resampling results across tuning parameters:
##
## cp ROC Sens Spec
## 0.002358491 0.7800814 0.5754717 0.9593965
## 0.056603774 0.7331815 0.4830189 0.9718857
## 0.401886792 0.5834763 0.1773585 0.9895941
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.002358491.
confusionMatrix(rpfit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 가입 미가입
## 가입 20.5 2.6
## 미가입 15.1 61.8
##
## Accuracy (average) : 0.8229
predict(rpfit, test, type="prob")->rffit1 #type을 써서 팩터값과 확률값을 확인
head(rffit1)
## 가입 미가입
## 1 0.190326 0.809674
## 2 0.190326 0.809674
## 3 0.190326 0.809674
## 4 0.190326 0.809674
## 5 0.190326 0.809674
## 6 0.190326 0.809674
predict(rpfit,test,type="raw")->rffit2
head(rffit2)
## [1] 미가입 미가입 미가입 미가입 미가입 미가입
## Levels: 가입 미가입
confusionMatrix(rffit2, test$TravelInsurance)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 가입 미가입
## 가입 116 14
## 미가입 64 302
##
## Accuracy : 0.8427
## 95% CI : (0.8077, 0.8737)
## No Information Rate : 0.6371
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6383
##
## Mcnemar's Test P-Value : 2.887e-08
##
## Sensitivity : 0.6444
## Specificity : 0.9557
## Pos Pred Value : 0.8923
## Neg Pred Value : 0.8251
## Prevalence : 0.3629
## Detection Rate : 0.2339
## Detection Prevalence : 0.2621
## Balanced Accuracy : 0.8001
##
## 'Positive' Class : 가입
##
rffit2_num<-as.numeric(rffit2)
head(rffit2_num)
## [1] 2 2 2 2 2 2
result<-roc(test$TravelInsurance, rffit2_num)
## Setting levels: control = 가입, case = 미가입
## Setting direction: controls < cases
result$auc
## Area under the curve: 0.8001
names(rffit1)[1]<-"y_pred"
bind_cols(df_test,rffit1) %>% select(INDEX,y_pred)->df
head(df)
## INDEX y_pred
## 1 0 0.190326
## 2 1 0.190326
## 3 5 0.190326
## 4 6 0.190326
## 5 12 0.190326
## 6 14 0.190326
write.csv(df,"22027601.csv",row.names=F)
read.csv("22027601.csv")
## INDEX y_pred
## 1 0 0.1903260
## 2 1 0.1903260
## 3 5 0.1903260
## 4 6 0.1903260
## 5 12 0.1903260
## 6 14 0.1903260
## 7 15 0.8658537
## 8 27 0.1903260
## 9 33 0.1903260
## 10 37 0.2195122
## 11 38 0.2195122
## 12 39 0.9209486
## 13 43 0.1903260
## 14 46 0.9209486
## 15 48 0.1903260
## 16 56 0.1903260
## 17 64 0.9209486
## 18 65 0.1903260
## 19 72 0.1903260
## 20 74 0.1903260
## 21 83 0.2195122
## 22 85 0.9209486
## 23 92 0.1903260
## 24 94 0.2195122
## 25 97 0.1903260
## 26 99 0.1903260
## 27 101 0.9209486
## 28 105 0.1903260
## 29 106 0.2195122
## 30 110 0.1903260
## 31 111 0.1903260
## 32 112 0.1903260
## 33 125 0.1903260
## 34 127 0.1903260
## 35 128 0.9209486
## 36 133 0.9209486
## 37 135 0.1903260
## 38 140 0.1903260
## 39 154 0.1903260
## 40 155 0.8658537
## 41 158 0.1903260
## 42 161 0.9209486
## 43 162 0.8658537
## 44 171 0.1903260
## 45 176 0.8658537
## 46 178 0.1903260
## 47 183 0.9209486
## 48 184 0.1903260
## 49 187 0.1903260
## 50 188 0.1903260
## 51 195 0.9209486
## 52 198 0.9209486
## 53 200 0.1903260
## 54 210 0.1903260
## 55 212 0.1903260
## 56 213 0.9209486
## 57 222 0.1903260
## 58 223 0.1903260
## 59 224 0.1903260
## 60 233 0.1903260
## 61 234 0.2195122
## 62 238 0.1903260
## 63 242 0.2195122
## 64 243 0.1903260
## 65 249 0.1903260
## 66 250 0.8658537
## 67 257 0.2195122
## 68 266 0.1903260
## 69 267 0.2195122
## 70 269 0.1903260
## 71 270 0.8658537
## 72 272 0.1903260
## 73 273 0.8658537
## 74 283 0.1903260
## 75 286 0.1903260
## 76 298 0.9209486
## 77 300 0.1903260
## 78 301 0.1903260
## 79 306 0.2195122
## 80 309 0.2195122
## 81 312 0.1903260
## 82 320 0.2195122
## 83 321 0.1903260
## 84 323 0.9209486
## 85 324 0.1903260
## 86 334 0.1903260
## 87 337 0.1903260
## 88 339 0.1903260
## 89 343 0.1903260
## 90 347 0.8658537
## 91 352 0.9209486
## 92 356 0.2195122
## 93 359 0.1903260
## 94 361 0.9209486
## 95 362 0.1903260
## 96 365 0.9209486
## 97 366 0.1903260
## 98 369 0.1903260
## 99 372 0.1903260
## 100 378 0.9209486
## 101 392 0.1903260
## 102 394 0.1903260
## 103 403 0.1903260
## 104 404 0.2195122
## 105 414 0.9209486
## 106 422 0.9209486
## 107 423 0.9209486
## 108 425 0.1903260
## 109 428 0.1903260
## 110 429 0.1903260
## 111 430 0.1903260
## 112 431 0.1903260
## 113 443 0.1903260
## 114 449 0.1903260
## 115 456 0.9209486
## 116 463 0.2195122
## 117 464 0.9209486
## 118 468 0.1903260
## 119 469 0.9209486
## 120 470 0.1903260
## 121 472 0.1903260
## 122 473 0.9209486
## 123 479 0.2195122
## 124 482 0.1903260
## 125 484 0.8658537
## 126 486 0.2195122
## 127 490 0.1903260
## 128 499 0.1903260
## 129 505 0.1903260
## 130 512 0.1903260
## 131 515 0.1903260
## 132 518 0.9209486
## 133 528 0.1903260
## 134 529 0.1903260
## 135 530 0.1903260
## 136 532 0.1903260
## 137 533 0.1903260
## 138 534 0.1903260
## 139 536 0.1903260
## 140 538 0.1903260
## 141 547 0.9209486
## 142 549 0.1903260
## 143 550 0.2195122
## 144 551 0.9209486
## 145 553 0.9209486
## 146 554 0.1903260
## 147 559 0.1903260
## 148 561 0.1903260
## 149 562 0.2195122
## 150 563 0.2195122
## 151 565 0.1903260
## 152 570 0.1903260
## 153 584 0.1903260
## 154 586 0.1903260
## 155 593 0.1903260
## 156 594 0.2195122
## 157 598 0.8658537
## 158 600 0.9209486
## 159 606 0.1903260
## 160 610 0.1903260
## 161 611 0.1903260
## 162 614 0.2195122
## 163 615 0.1903260
## 164 616 0.1903260
## 165 617 0.1903260
## 166 619 0.8658537
## 167 623 0.1903260
## 168 625 0.9209486
## 169 628 0.1903260
## 170 633 0.1903260
## 171 642 0.1903260
## 172 654 0.9209486
## 173 655 0.1903260
## 174 659 0.2195122
## 175 662 0.1903260
## 176 667 0.1903260
## 177 675 0.1903260
## 178 676 0.1903260
## 179 677 0.1903260
## 180 681 0.1903260
## 181 682 0.9209486
## 182 688 0.2195122
## 183 689 0.1903260
## 184 690 0.9209486
## 185 698 0.8658537
## 186 699 0.1903260
## 187 703 0.1903260
## 188 708 0.2195122
## 189 713 0.1903260
## 190 717 0.1903260
## 191 719 0.2195122
## 192 724 0.1903260
## 193 731 0.1903260
## 194 738 0.1903260
## 195 741 0.8658537
## 196 742 0.8658537
## 197 744 0.1903260
## 198 746 0.1903260
## 199 749 0.8658537
## 200 750 0.2195122
## 201 752 0.1903260
## 202 754 0.1903260
## 203 756 0.1903260
## 204 758 0.9209486
## 205 759 0.8658537
## 206 761 0.9209486
## 207 771 0.1903260
## 208 775 0.1903260
## 209 778 0.1903260
## 210 779 0.9209486
## 211 781 0.1903260
## 212 784 0.9209486
## 213 791 0.8658537
## 214 795 0.1903260
## 215 801 0.1903260
## 216 804 0.1903260
## 217 805 0.1903260
## 218 807 0.1903260
## 219 819 0.1903260
## 220 822 0.1903260
## 221 823 0.1903260
## 222 830 0.8658537
## 223 833 0.9209486
## 224 837 0.1903260
## 225 853 0.1903260
## 226 856 0.8658537
## 227 859 0.1903260
## 228 863 0.1903260
## 229 865 0.9209486
## 230 876 0.1903260
## 231 879 0.1903260
## 232 885 0.1903260
## 233 886 0.9209486
## 234 889 0.2195122
## 235 892 0.1903260
## 236 893 0.1903260
## 237 894 0.1903260
## 238 896 0.2195122
## 239 902 0.8658537
## 240 903 0.1903260
## 241 904 0.1903260
## 242 909 0.9209486
## 243 913 0.9209486
## 244 924 0.1903260
## 245 926 0.2195122
## 246 927 0.9209486
## 247 938 0.1903260
## 248 939 0.1903260
## 249 947 0.2195122
## 250 948 0.1903260
## 251 952 0.9209486
## 252 959 0.1903260
## 253 965 0.1903260
## 254 968 0.1903260
## 255 973 0.9209486
## 256 975 0.9209486
## 257 978 0.1903260
## 258 981 0.1903260
## 259 982 0.1903260
## 260 983 0.8658537
## 261 988 0.9209486
## 262 994 0.1903260
## 263 995 0.1903260
## 264 999 0.8658537
## 265 1000 0.1903260
## 266 1003 0.1903260
## 267 1004 0.1903260
## 268 1005 0.1903260
## 269 1006 0.1903260
## 270 1011 0.8658537
## 271 1013 0.1903260
## 272 1017 0.1903260
## 273 1019 0.1903260
## 274 1022 0.1903260
## 275 1031 0.1903260
## 276 1037 0.1903260
## 277 1044 0.9209486
## 278 1046 0.2195122
## 279 1050 0.2195122
## 280 1053 0.2195122
## 281 1055 0.9209486
## 282 1057 0.1903260
## 283 1063 0.1903260
## 284 1067 0.2195122
## 285 1079 0.2195122
## 286 1080 0.1903260
## 287 1082 0.1903260
## 288 1087 0.1903260
## 289 1090 0.1903260
## 290 1092 0.9209486
## 291 1095 0.8658537
## 292 1098 0.1903260
## 293 1106 0.2195122
## 294 1110 0.1903260
## 295 1111 0.2195122
## 296 1112 0.2195122
## 297 1135 0.1903260
## 298 1140 0.1903260
## 299 1146 0.1903260
## 300 1147 0.9209486
## 301 1150 0.1903260
## 302 1151 0.8658537
## 303 1153 0.1903260
## 304 1156 0.1903260
## 305 1158 0.2195122
## 306 1162 0.9209486
## 307 1169 0.9209486
## 308 1172 0.1903260
## 309 1173 0.1903260
## 310 1175 0.2195122
## 311 1182 0.1903260
## 312 1186 0.1903260
## 313 1188 0.1903260
## 314 1190 0.1903260
## 315 1193 0.2195122
## 316 1195 0.1903260
## 317 1198 0.2195122
## 318 1202 0.9209486
## 319 1206 0.1903260
## 320 1207 0.1903260
## 321 1209 0.9209486
## 322 1210 0.1903260
## 323 1212 0.2195122
## 324 1213 0.1903260
## 325 1219 0.8658537
## 326 1230 0.9209486
## 327 1235 0.1903260
## 328 1245 0.9209486
## 329 1247 0.1903260
## 330 1257 0.1903260
## 331 1263 0.1903260
## 332 1264 0.1903260
## 333 1271 0.1903260
## 334 1272 0.2195122
## 335 1274 0.2195122
## 336 1275 0.1903260
## 337 1282 0.9209486
## 338 1284 0.2195122
## 339 1297 0.1903260
## 340 1302 0.1903260
## 341 1308 0.1903260
## 342 1309 0.9209486
## 343 1319 0.1903260
## 344 1335 0.1903260
## 345 1337 0.1903260
## 346 1346 0.1903260
## 347 1349 0.9209486
## 348 1352 0.1903260
## 349 1354 0.9209486
## 350 1358 0.9209486
## 351 1366 0.9209486
## 352 1369 0.2195122
## 353 1374 0.1903260
## 354 1379 0.2195122
## 355 1381 0.9209486
## 356 1383 0.9209486
## 357 1385 0.1903260
## 358 1389 0.1903260
## 359 1391 0.9209486
## 360 1400 0.1903260
## 361 1401 0.1903260
## 362 1407 0.1903260
## 363 1414 0.2195122
## 364 1416 0.1903260
## 365 1418 0.1903260
## 366 1420 0.2195122
## 367 1421 0.9209486
## 368 1427 0.1903260
## 369 1428 0.8658537
## 370 1431 0.1903260
## 371 1440 0.2195122
## 372 1449 0.1903260
## 373 1459 0.9209486
## 374 1460 0.1903260
## 375 1462 0.1903260
## 376 1464 0.1903260
## 377 1466 0.1903260
## 378 1469 0.1903260
## 379 1472 0.1903260
## 380 1479 0.9209486
## 381 1487 0.1903260
## 382 1491 0.9209486
## 383 1493 0.1903260
## 384 1496 0.1903260
## 385 1501 0.1903260
## 386 1506 0.1903260
## 387 1510 0.1903260
## 388 1511 0.1903260
## 389 1516 0.2195122
## 390 1523 0.1903260
## 391 1527 0.1903260
## 392 1537 0.1903260
## 393 1538 0.1903260
## 394 1540 0.2195122
## 395 1541 0.1903260
## 396 1545 0.9209486
## 397 1549 0.1903260
## 398 1555 0.9209486
## 399 1556 0.9209486
## 400 1559 0.2195122
## 401 1566 0.9209486
## 402 1568 0.1903260
## 403 1572 0.1903260
## 404 1585 0.9209486
## 405 1588 0.1903260
## 406 1589 0.9209486
## 407 1607 0.1903260
## 408 1615 0.1903260
## 409 1617 0.1903260
## 410 1618 0.1903260
## 411 1620 0.1903260
## 412 1630 0.1903260
## 413 1640 0.1903260
## 414 1647 0.2195122
## 415 1648 0.1903260
## 416 1650 0.9209486
## 417 1651 0.9209486
## 418 1656 0.1903260
## 419 1658 0.8658537
## 420 1661 0.1903260
## 421 1662 0.1903260
## 422 1670 0.8658537
## 423 1671 0.9209486
## 424 1675 0.8658537
## 425 1678 0.9209486
## 426 1689 0.1903260
## 427 1691 0.9209486
## 428 1692 0.1903260
## 429 1714 0.1903260
## 430 1722 0.9209486
## 431 1727 0.1903260
## 432 1732 0.1903260
## 433 1734 0.2195122
## 434 1736 0.8658537
## 435 1738 0.1903260
## 436 1742 0.1903260
## 437 1747 0.9209486
## 438 1750 0.1903260
## 439 1754 0.9209486
## 440 1756 0.1903260
## 441 1764 0.1903260
## 442 1765 0.8658537
## 443 1767 0.2195122
## 444 1769 0.1903260
## 445 1770 0.2195122
## 446 1773 0.1903260
## 447 1774 0.1903260
## 448 1785 0.1903260
## 449 1791 0.1903260
## 450 1793 0.9209486
## 451 1798 0.9209486
## 452 1804 0.1903260
## 453 1806 0.1903260
## 454 1809 0.1903260
## 455 1812 0.1903260
## 456 1813 0.9209486
## 457 1818 0.1903260
## 458 1827 0.1903260
## 459 1828 0.1903260
## 460 1831 0.1903260
## 461 1834 0.1903260
## 462 1835 0.1903260
## 463 1848 0.1903260
## 464 1858 0.1903260
## 465 1863 0.2195122
## 466 1865 0.9209486
## 467 1873 0.2195122
## 468 1882 0.9209486
## 469 1886 0.1903260
## 470 1887 0.1903260
## 471 1893 0.1903260
## 472 1896 0.9209486
## 473 1899 0.1903260
## 474 1901 0.1903260
## 475 1909 0.1903260
## 476 1912 0.2195122
## 477 1918 0.9209486
## 478 1920 0.1903260
## 479 1921 0.1903260
## 480 1922 0.2195122
## 481 1930 0.1903260
## 482 1937 0.2195122
## 483 1938 0.9209486
## 484 1940 0.9209486
## 485 1942 0.1903260
## 486 1949 0.8658537
## 487 1951 0.1903260
## 488 1953 0.8658537
## 489 1957 0.1903260
## 490 1958 0.1903260
## 491 1959 0.1903260
## 492 1975 0.9209486
## 493 1976 0.1903260
## 494 1980 0.1903260
## 495 1983 0.9209486
## 496 1986 0.2195122
rm(list=ls())
# 예제 3번
train<-read.csv("insurance_train_10.csv")
test<-read.csv("insurance_test_10.csv")
glimpse(train)
## Rows: 6,969
## Columns: 9
## $ Gender <chr> "Male", "Female", "Male", "Male", "Male", "Female", "F…
## $ Ever_Married <chr> "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "Yes", "…
## $ Age <int> 22, 67, 67, 56, 32, 33, 61, 55, 26, 19, 58, 41, 32, 31…
## $ Graduated <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession <chr> "Healthcare", "Engineer", "Lawyer", "Artist", "Healthc…
## $ Work_Experience <int> 1, 1, 0, 0, 1, 1, 0, 1, 1, 4, 0, 1, 9, 1, 1, 0, 12, 3,…
## $ Spending_Score <chr> "Low", "Low", "High", "Average", "Low", "Low", "Low", …
## $ Family_Size <int> 4, 1, 2, 2, 3, 3, 3, 4, 3, 4, 1, 2, 5, 6, 4, 1, 1, 4, …
## $ Segmentation <int> 4, 2, 2, 3, 3, 4, 4, 3, 1, 4, 2, 3, 4, 2, 2, 3, 1, 4, …
colSums(is.na(train))
## Gender Ever_Married Age Graduated Profession
## 0 0 0 0 0
## Work_Experience Spending_Score Family_Size Segmentation
## 0 0 0 0
train$Segmentation<-as.factor(train$Segmentation)
library(caret)
ctrl<-trainControl(method="cv", number=10)
train(Segmentation~., data=train, method="knn",
trControl=ctrl, preProcess=c("center","scale"))->knn_fit
knn_fit
## k-Nearest Neighbors
##
## 6969 samples
## 8 predictor
## 4 classes: '1', '2', '3', '4'
##
## Pre-processing: centered (19), scaled (19)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 6273, 6272, 6271, 6271, 6272, 6271, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.4878727 0.3160686
## 7 0.4933174 0.3230107
## 9 0.4936085 0.3232908
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
confusionMatrix(knn_fit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 1 2 3 4
## 1 9.8 5.6 2.8 5.4
## 2 5.7 7.3 5.3 2.0
## 3 3.7 7.7 14.3 1.4
## 4 5.1 2.9 3.1 18.0
##
## Accuracy (average) : 0.4936
predict(knn_fit, test)->pred_fit
head(pred_fit)
## [1] 2 1 2 3 3 1
## Levels: 1 2 3 4
NROW(pred_fit)
## [1] 2267
glimpse(test)
## Rows: 2,267
## Columns: 9
## $ X <int> 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ Gender <chr> "Female", "Male", "Female", "Male", "Male", "Male", "F…
## $ Ever_Married <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ Age <int> 36, 37, 69, 59, 47, 61, 47, 50, 19, 22, 22, 50, 27, 18…
## $ Graduated <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession <chr> "Engineer", "Healthcare", "", "Executive", "Doctor", "…
## $ Work_Experience <int> 0, 8, 0, 11, 0, 5, 1, 2, 0, 0, 0, 1, 8, 0, 0, 1, 1, 8,…
## $ Spending_Score <chr> "Low", "Average", "Low", "High", "High", "Low", "Avera…
## $ Family_Size <int> 1, 4, 1, 2, 5, 3, 3, 4, 4, 3, 6, 5, 3, 3, 1, 3, 2, 1, …
bind_cols(test, pred_fit)->df
## New names:
## • `` -> `...10`
names(df)[10]<-"Segmentation_pred"
names(df)[10]
## [1] "Segmentation_pred"
df %>% select(10)->df1
write.csv(df1,"22027601_1.csv",row.names=FALSE)
read.csv("22027601_1.csv")
## Segmentation_pred
## 1 2
## 2 1
## 3 2
## 4 3
## 5 3
## 6 1
## 7 3
## 8 3
## 9 4
## 10 4
## 11 4
## 12 3
## 13 4
## 14 4
## 15 2
## 16 2
## 17 3
## 18 2
## 19 3
## 20 1
## 21 4
## 22 1
## 23 3
## 24 2
## 25 1
## 26 3
## 27 1
## 28 3
## 29 3
## 30 2
## 31 1
## 32 3
## 33 3
## 34 2
## 35 3
## 36 4
## 37 3
## 38 3
## 39 2
## 40 4
## 41 3
## 42 4
## 43 2
## 44 1
## 45 3
## 46 2
## 47 3
## 48 1
## 49 3
## 50 1
## 51 3
## 52 4
## 53 3
## 54 2
## 55 3
## 56 1
## 57 1
## 58 2
## 59 3
## 60 4
## 61 3
## 62 4
## 63 3
## 64 3
## 65 1
## 66 3
## 67 2
## 68 3
## 69 3
## 70 3
## 71 1
## 72 1
## 73 1
## 74 3
## 75 1
## 76 3
## 77 4
## 78 3
## 79 4
## 80 2
## 81 2
## 82 1
## 83 3
## 84 1
## 85 3
## 86 2
## 87 3
## 88 1
## 89 1
## 90 4
## 91 4
## 92 2
## 93 4
## 94 1
## 95 3
## 96 1
## 97 3
## 98 2
## 99 3
## 100 4
## 101 2
## 102 4
## 103 1
## 104 3
## 105 1
## 106 3
## 107 3
## 108 2
## 109 4
## 110 4
## 111 3
## 112 3
## 113 4
## 114 1
## 115 3
## 116 3
## 117 1
## 118 4
## 119 1
## 120 3
## 121 3
## 122 4
## 123 4
## 124 4
## 125 1
## 126 2
## 127 3
## 128 3
## 129 2
## 130 3
## 131 1
## 132 2
## 133 3
## 134 3
## 135 1
## 136 4
## 137 4
## 138 3
## 139 2
## 140 4
## 141 2
## 142 4
## 143 3
## 144 4
## 145 4
## 146 3
## 147 4
## 148 1
## 149 4
## 150 4
## 151 3
## 152 3
## 153 3
## 154 3
## 155 2
## 156 3
## 157 1
## 158 2
## 159 3
## 160 4
## 161 1
## 162 3
## 163 2
## 164 3
## 165 3
## 166 3
## 167 1
## 168 4
## 169 1
## 170 4
## 171 3
## 172 2
## 173 2
## 174 3
## 175 4
## 176 4
## 177 2
## 178 3
## 179 3
## 180 3
## 181 1
## 182 2
## 183 4
## 184 2
## 185 3
## 186 4
## 187 2
## 188 1
## 189 3
## 190 3
## 191 2
## 192 4
## 193 3
## 194 4
## 195 2
## 196 1
## 197 1
## 198 2
## 199 1
## 200 2
## 201 2
## 202 1
## 203 1
## 204 4
## 205 2
## 206 4
## 207 2
## 208 4
## 209 3
## 210 1
## 211 2
## 212 3
## 213 4
## 214 2
## 215 1
## 216 3
## 217 3
## 218 3
## 219 1
## 220 1
## 221 4
## 222 2
## 223 4
## 224 1
## 225 2
## 226 1
## 227 3
## 228 3
## 229 1
## 230 3
## 231 3
## 232 2
## 233 1
## 234 1
## 235 2
## 236 3
## 237 1
## 238 3
## 239 2
## 240 1
## 241 1
## 242 1
## 243 2
## 244 2
## 245 4
## 246 1
## 247 1
## 248 1
## 249 4
## 250 2
## 251 2
## 252 3
## 253 1
## 254 1
## 255 4
## 256 1
## 257 1
## 258 4
## 259 2
## 260 1
## 261 1
## 262 3
## 263 4
## 264 1
## 265 3
## 266 2
## 267 2
## 268 4
## 269 1
## 270 2
## 271 2
## 272 3
## 273 3
## 274 3
## 275 1
## 276 4
## 277 3
## 278 4
## 279 2
## 280 1
## 281 2
## 282 4
## 283 2
## 284 1
## 285 3
## 286 3
## 287 1
## 288 1
## 289 4
## 290 4
## 291 4
## 292 3
## 293 3
## 294 1
## 295 1
## 296 2
## 297 4
## 298 4
## 299 4
## 300 2
## 301 4
## 302 4
## 303 4
## 304 4
## 305 1
## 306 1
## 307 1
## 308 3
## 309 1
## 310 4
## 311 4
## 312 1
## 313 4
## 314 2
## 315 2
## 316 2
## 317 2
## 318 4
## 319 2
## 320 3
## 321 1
## 322 1
## 323 1
## 324 1
## 325 3
## 326 1
## 327 1
## 328 2
## 329 4
## 330 4
## 331 4
## 332 4
## 333 1
## 334 1
## 335 2
## 336 4
## 337 1
## 338 2
## 339 4
## 340 2
## 341 3
## 342 3
## 343 3
## 344 4
## 345 4
## 346 2
## 347 4
## 348 2
## 349 3
## 350 1
## 351 1
## 352 1
## 353 1
## 354 1
## 355 1
## 356 1
## 357 2
## 358 4
## 359 1
## 360 4
## 361 4
## 362 1
## 363 4
## 364 1
## 365 1
## 366 1
## 367 4
## 368 1
## 369 1
## 370 1
## 371 1
## 372 3
## 373 1
## 374 1
## 375 1
## 376 1
## 377 4
## 378 3
## 379 2
## 380 4
## 381 1
## 382 4
## 383 4
## 384 4
## 385 4
## 386 4
## 387 1
## 388 4
## 389 2
## 390 3
## 391 2
## 392 1
## 393 3
## 394 1
## 395 3
## 396 3
## 397 3
## 398 4
## 399 1
## 400 2
## 401 4
## 402 4
## 403 1
## 404 1
## 405 4
## 406 4
## 407 3
## 408 4
## 409 4
## 410 3
## 411 1
## 412 1
## 413 3
## 414 3
## 415 4
## 416 4
## 417 1
## 418 4
## 419 4
## 420 1
## 421 1
## 422 1
## 423 4
## 424 4
## 425 4
## 426 2
## 427 4
## 428 2
## 429 3
## 430 1
## 431 3
## 432 2
## 433 3
## 434 1
## 435 3
## 436 2
## 437 4
## 438 4
## 439 3
## 440 3
## 441 4
## 442 2
## 443 3
## 444 4
## 445 1
## 446 4
## 447 2
## 448 1
## 449 4
## 450 2
## 451 1
## 452 2
## 453 2
## 454 2
## 455 4
## 456 1
## 457 2
## 458 4
## 459 1
## 460 1
## 461 4
## 462 1
## 463 2
## 464 1
## 465 3
## 466 4
## 467 4
## 468 3
## 469 3
## 470 2
## 471 4
## 472 1
## 473 3
## 474 4
## 475 3
## 476 4
## 477 4
## 478 2
## 479 2
## 480 1
## 481 3
## 482 4
## 483 3
## 484 4
## 485 1
## 486 4
## 487 1
## 488 4
## 489 3
## 490 1
## 491 1
## 492 2
## 493 4
## 494 3
## 495 4
## 496 4
## 497 3
## 498 3
## 499 4
## 500 4
## 501 3
## 502 4
## 503 1
## 504 3
## 505 4
## 506 2
## 507 1
## 508 4
## 509 4
## 510 4
## 511 2
## 512 4
## 513 4
## 514 4
## 515 2
## 516 2
## 517 1
## 518 3
## 519 2
## 520 4
## 521 4
## 522 1
## 523 3
## 524 1
## 525 3
## 526 3
## 527 4
## 528 4
## 529 4
## 530 4
## 531 4
## 532 4
## 533 2
## 534 2
## 535 4
## 536 4
## 537 2
## 538 1
## 539 4
## 540 1
## 541 1
## 542 1
## 543 4
## 544 4
## 545 4
## 546 1
## 547 3
## 548 1
## 549 4
## 550 4
## 551 4
## 552 4
## 553 4
## 554 4
## 555 4
## 556 4
## 557 4
## 558 4
## 559 4
## 560 2
## 561 4
## 562 3
## 563 3
## 564 1
## 565 2
## 566 4
## 567 4
## 568 4
## 569 4
## 570 4
## 571 4
## 572 4
## 573 2
## 574 3
## 575 4
## 576 4
## 577 4
## 578 3
## 579 3
## 580 1
## 581 3
## 582 4
## 583 3
## 584 1
## 585 3
## 586 3
## 587 3
## 588 4
## 589 4
## 590 4
## 591 1
## 592 4
## 593 2
## 594 4
## 595 4
## 596 2
## 597 2
## 598 4
## 599 2
## 600 4
## 601 4
## 602 3
## 603 2
## 604 3
## 605 3
## 606 3
## 607 3
## 608 3
## 609 2
## 610 4
## 611 1
## 612 1
## 613 4
## 614 2
## 615 2
## 616 3
## 617 3
## 618 3
## 619 3
## 620 2
## 621 2
## 622 2
## 623 2
## 624 3
## 625 3
## 626 1
## 627 3
## 628 2
## 629 2
## 630 1
## 631 4
## 632 1
## 633 1
## 634 1
## 635 4
## 636 4
## 637 1
## 638 3
## 639 3
## 640 3
## 641 3
## 642 3
## 643 4
## 644 3
## 645 1
## 646 4
## 647 1
## 648 1
## 649 3
## 650 3
## 651 3
## 652 4
## 653 3
## 654 2
## 655 3
## 656 3
## 657 3
## 658 4
## 659 3
## 660 4
## 661 3
## 662 1
## 663 4
## 664 4
## 665 2
## 666 4
## 667 1
## 668 1
## 669 4
## 670 3
## 671 3
## 672 2
## 673 3
## 674 1
## 675 1
## 676 3
## 677 3
## 678 2
## 679 4
## 680 3
## 681 1
## 682 3
## 683 3
## 684 3
## 685 3
## 686 4
## 687 3
## 688 2
## 689 3
## 690 2
## 691 1
## 692 2
## 693 3
## 694 1
## 695 3
## 696 2
## 697 4
## 698 4
## 699 2
## 700 4
## 701 1
## 702 1
## 703 4
## 704 2
## 705 4
## 706 4
## 707 3
## 708 4
## 709 3
## 710 3
## 711 2
## 712 4
## 713 4
## 714 3
## 715 2
## 716 2
## 717 4
## 718 2
## 719 3
## 720 3
## 721 4
## 722 4
## 723 2
## 724 3
## 725 3
## 726 1
## 727 1
## 728 2
## 729 3
## 730 1
## 731 1
## 732 4
## 733 2
## 734 3
## 735 4
## 736 2
## 737 2
## 738 4
## 739 4
## 740 1
## 741 4
## 742 2
## 743 2
## 744 3
## 745 1
## 746 3
## 747 4
## 748 3
## 749 4
## 750 2
## 751 1
## 752 3
## 753 4
## 754 1
## 755 1
## 756 1
## 757 1
## 758 2
## 759 3
## 760 4
## 761 3
## 762 1
## 763 3
## 764 3
## 765 3
## 766 1
## 767 3
## 768 2
## 769 4
## 770 4
## 771 2
## 772 4
## 773 3
## 774 2
## 775 4
## 776 2
## 777 1
## 778 4
## 779 2
## 780 4
## 781 3
## 782 4
## 783 1
## 784 3
## 785 3
## 786 4
## 787 2
## 788 1
## 789 2
## 790 3
## 791 2
## 792 1
## 793 3
## 794 3
## 795 2
## 796 3
## 797 2
## 798 4
## 799 3
## 800 2
## 801 4
## 802 1
## 803 1
## 804 1
## 805 1
## 806 4
## 807 4
## 808 2
## 809 2
## 810 4
## 811 1
## 812 1
## 813 2
## 814 4
## 815 4
## 816 4
## 817 1
## 818 4
## 819 1
## 820 1
## 821 4
## 822 2
## 823 4
## 824 2
## 825 1
## 826 4
## 827 3
## 828 3
## 829 4
## 830 4
## 831 4
## 832 2
## 833 2
## 834 2
## 835 1
## 836 2
## 837 3
## 838 3
## 839 1
## 840 3
## 841 3
## 842 1
## 843 3
## 844 2
## 845 3
## 846 3
## 847 1
## 848 3
## 849 4
## 850 1
## 851 1
## 852 3
## 853 3
## 854 4
## 855 3
## 856 4
## 857 2
## 858 2
## 859 2
## 860 2
## 861 4
## 862 1
## 863 1
## 864 4
## 865 4
## 866 2
## 867 3
## 868 3
## 869 3
## 870 1
## 871 4
## 872 4
## 873 4
## 874 4
## 875 4
## 876 4
## 877 4
## 878 3
## 879 1
## 880 1
## 881 4
## 882 3
## 883 3
## 884 3
## 885 2
## 886 3
## 887 4
## 888 2
## 889 4
## 890 4
## 891 4
## 892 2
## 893 3
## 894 3
## 895 1
## 896 1
## 897 2
## 898 2
## 899 3
## 900 2
## 901 4
## 902 4
## 903 3
## 904 3
## 905 1
## 906 3
## 907 1
## 908 1
## 909 2
## 910 4
## 911 3
## 912 1
## 913 2
## 914 1
## 915 4
## 916 2
## 917 4
## 918 4
## 919 4
## 920 3
## 921 2
## 922 2
## 923 1
## 924 3
## 925 1
## 926 4
## 927 2
## 928 2
## 929 2
## 930 3
## 931 2
## 932 4
## 933 2
## 934 2
## 935 4
## 936 2
## 937 1
## 938 1
## 939 2
## 940 3
## 941 4
## 942 4
## 943 4
## 944 4
## 945 2
## 946 4
## 947 3
## 948 3
## 949 3
## 950 1
## 951 1
## 952 1
## 953 1
## 954 3
## 955 3
## 956 4
## 957 4
## 958 2
## 959 2
## 960 4
## 961 2
## 962 2
## 963 4
## 964 4
## 965 4
## 966 4
## 967 4
## 968 1
## 969 3
## 970 4
## 971 3
## 972 1
## 973 3
## 974 1
## 975 4
## 976 4
## 977 3
## 978 3
## 979 3
## 980 4
## 981 2
## 982 1
## 983 4
## 984 3
## 985 4
## 986 2
## 987 4
## 988 2
## 989 3
## 990 1
## 991 2
## 992 3
## 993 4
## 994 1
## 995 3
## 996 3
## 997 4
## 998 1
## 999 3
## 1000 2
## 1001 1
## 1002 4
## 1003 3
## 1004 4
## 1005 2
## 1006 2
## 1007 1
## 1008 4
## 1009 4
## 1010 4
## 1011 1
## 1012 2
## 1013 4
## 1014 2
## 1015 1
## 1016 1
## 1017 3
## 1018 2
## 1019 1
## 1020 3
## 1021 4
## 1022 1
## 1023 4
## 1024 3
## 1025 3
## 1026 1
## 1027 3
## 1028 3
## 1029 2
## 1030 1
## 1031 3
## 1032 3
## 1033 4
## 1034 2
## 1035 3
## 1036 1
## 1037 3
## 1038 4
## 1039 3
## 1040 3
## 1041 1
## 1042 3
## 1043 4
## 1044 2
## 1045 2
## 1046 1
## 1047 3
## 1048 3
## 1049 4
## 1050 2
## 1051 3
## 1052 4
## 1053 4
## 1054 1
## 1055 2
## 1056 2
## 1057 1
## 1058 3
## 1059 3
## 1060 2
## 1061 1
## 1062 1
## 1063 2
## 1064 1
## 1065 4
## 1066 4
## 1067 3
## 1068 4
## 1069 3
## 1070 3
## 1071 1
## 1072 2
## 1073 4
## 1074 2
## 1075 1
## 1076 2
## 1077 3
## 1078 1
## 1079 4
## 1080 2
## 1081 4
## 1082 4
## 1083 4
## 1084 2
## 1085 4
## 1086 1
## 1087 4
## 1088 2
## 1089 1
## 1090 4
## 1091 4
## 1092 1
## 1093 1
## 1094 3
## 1095 4
## 1096 2
## 1097 3
## 1098 1
## 1099 4
## 1100 1
## 1101 4
## 1102 1
## 1103 4
## 1104 2
## 1105 3
## 1106 2
## 1107 2
## 1108 1
## 1109 3
## 1110 3
## 1111 3
## 1112 3
## 1113 1
## 1114 2
## 1115 2
## 1116 4
## 1117 4
## 1118 1
## 1119 4
## 1120 2
## 1121 1
## 1122 3
## 1123 1
## 1124 1
## 1125 3
## 1126 1
## 1127 3
## 1128 4
## 1129 2
## 1130 4
## 1131 3
## 1132 1
## 1133 1
## 1134 4
## 1135 4
## 1136 1
## 1137 3
## 1138 1
## 1139 4
## 1140 4
## 1141 1
## 1142 4
## 1143 4
## 1144 4
## 1145 3
## 1146 4
## 1147 2
## 1148 1
## 1149 4
## 1150 4
## 1151 2
## 1152 1
## 1153 3
## 1154 4
## 1155 4
## 1156 4
## 1157 3
## 1158 1
## 1159 3
## 1160 3
## 1161 3
## 1162 1
## 1163 2
## 1164 3
## 1165 4
## 1166 2
## 1167 2
## 1168 2
## 1169 4
## 1170 2
## 1171 4
## 1172 4
## 1173 3
## 1174 3
## 1175 4
## 1176 4
## 1177 1
## 1178 4
## 1179 4
## 1180 2
## 1181 3
## 1182 3
## 1183 4
## 1184 4
## 1185 2
## 1186 3
## 1187 2
## 1188 3
## 1189 2
## 1190 1
## 1191 1
## 1192 3
## 1193 1
## 1194 4
## 1195 3
## 1196 4
## 1197 1
## 1198 4
## 1199 2
## 1200 4
## 1201 2
## 1202 4
## 1203 2
## 1204 1
## 1205 1
## 1206 3
## 1207 2
## 1208 2
## 1209 4
## 1210 2
## 1211 4
## 1212 2
## 1213 3
## 1214 3
## 1215 2
## 1216 3
## 1217 3
## 1218 1
## 1219 1
## 1220 2
## 1221 1
## 1222 1
## 1223 2
## 1224 4
## 1225 4
## 1226 1
## 1227 3
## 1228 2
## 1229 3
## 1230 1
## 1231 1
## 1232 4
## 1233 2
## 1234 2
## 1235 3
## 1236 3
## 1237 4
## 1238 4
## 1239 1
## 1240 3
## 1241 3
## 1242 3
## 1243 1
## 1244 3
## 1245 1
## 1246 3
## 1247 4
## 1248 3
## 1249 3
## 1250 3
## 1251 4
## 1252 1
## 1253 4
## 1254 4
## 1255 4
## 1256 4
## 1257 4
## 1258 4
## 1259 4
## 1260 2
## 1261 2
## 1262 4
## 1263 4
## 1264 2
## 1265 1
## 1266 3
## 1267 2
## 1268 1
## 1269 4
## 1270 1
## 1271 2
## 1272 1
## 1273 1
## 1274 1
## 1275 3
## 1276 3
## 1277 3
## 1278 3
## 1279 3
## 1280 3
## 1281 1
## 1282 4
## 1283 1
## 1284 2
## 1285 1
## 1286 4
## 1287 1
## 1288 3
## 1289 4
## 1290 3
## 1291 2
## 1292 3
## 1293 1
## 1294 2
## 1295 3
## 1296 2
## 1297 3
## 1298 2
## 1299 3
## 1300 1
## 1301 3
## 1302 2
## 1303 4
## 1304 3
## 1305 3
## 1306 2
## 1307 1
## 1308 3
## 1309 4
## 1310 4
## 1311 3
## 1312 3
## 1313 3
## 1314 1
## 1315 3
## 1316 3
## 1317 3
## 1318 4
## 1319 1
## 1320 3
## 1321 3
## 1322 3
## 1323 1
## 1324 1
## 1325 3
## 1326 4
## 1327 3
## 1328 1
## 1329 4
## 1330 1
## 1331 2
## 1332 1
## 1333 1
## 1334 2
## 1335 1
## 1336 2
## 1337 4
## 1338 3
## 1339 2
## 1340 3
## 1341 3
## 1342 3
## 1343 3
## 1344 3
## 1345 2
## 1346 1
## 1347 2
## 1348 3
## 1349 3
## 1350 4
## 1351 4
## 1352 1
## 1353 4
## 1354 3
## 1355 2
## 1356 2
## 1357 3
## 1358 1
## 1359 3
## 1360 3
## 1361 2
## 1362 1
## 1363 3
## 1364 2
## 1365 3
## 1366 1
## 1367 4
## 1368 3
## 1369 3
## 1370 2
## 1371 4
## 1372 4
## 1373 4
## 1374 1
## 1375 3
## 1376 2
## 1377 2
## 1378 2
## 1379 2
## 1380 2
## 1381 3
## 1382 2
## 1383 1
## 1384 4
## 1385 1
## 1386 2
## 1387 4
## 1388 1
## 1389 4
## 1390 4
## 1391 4
## 1392 1
## 1393 2
## 1394 1
## 1395 1
## 1396 1
## 1397 4
## 1398 3
## 1399 3
## 1400 3
## 1401 3
## 1402 2
## 1403 3
## 1404 2
## 1405 3
## 1406 1
## 1407 2
## 1408 2
## 1409 2
## 1410 1
## 1411 2
## 1412 2
## 1413 1
## 1414 4
## 1415 2
## 1416 4
## 1417 4
## 1418 2
## 1419 1
## 1420 4
## 1421 1
## 1422 4
## 1423 1
## 1424 3
## 1425 4
## 1426 4
## 1427 1
## 1428 1
## 1429 2
## 1430 1
## 1431 3
## 1432 4
## 1433 3
## 1434 2
## 1435 2
## 1436 2
## 1437 2
## 1438 1
## 1439 2
## 1440 2
## 1441 2
## 1442 2
## 1443 2
## 1444 1
## 1445 2
## 1446 1
## 1447 1
## 1448 4
## 1449 1
## 1450 1
## 1451 1
## 1452 2
## 1453 2
## 1454 2
## 1455 4
## 1456 2
## 1457 4
## 1458 4
## 1459 4
## 1460 1
## 1461 2
## 1462 2
## 1463 4
## 1464 4
## 1465 1
## 1466 4
## 1467 4
## 1468 4
## 1469 1
## 1470 1
## 1471 3
## 1472 3
## 1473 2
## 1474 1
## 1475 2
## 1476 4
## 1477 3
## 1478 3
## 1479 3
## 1480 1
## 1481 2
## 1482 3
## 1483 1
## 1484 1
## 1485 3
## 1486 2
## 1487 1
## 1488 1
## 1489 1
## 1490 2
## 1491 4
## 1492 4
## 1493 4
## 1494 4
## 1495 2
## 1496 2
## 1497 4
## 1498 2
## 1499 2
## 1500 4
## 1501 1
## 1502 2
## 1503 1
## 1504 3
## 1505 2
## 1506 1
## 1507 3
## 1508 3
## 1509 4
## 1510 3
## 1511 3
## 1512 2
## 1513 2
## 1514 4
## 1515 2
## 1516 1
## 1517 4
## 1518 3
## 1519 1
## 1520 1
## 1521 3
## 1522 3
## 1523 1
## 1524 3
## 1525 1
## 1526 2
## 1527 3
## 1528 1
## 1529 2
## 1530 2
## 1531 2
## 1532 3
## 1533 3
## 1534 2
## 1535 3
## 1536 4
## 1537 3
## 1538 2
## 1539 3
## 1540 4
## 1541 3
## 1542 1
## 1543 3
## 1544 4
## 1545 3
## 1546 4
## 1547 2
## 1548 3
## 1549 4
## 1550 3
## 1551 4
## 1552 4
## 1553 3
## 1554 2
## 1555 3
## 1556 1
## 1557 4
## 1558 3
## 1559 3
## 1560 3
## 1561 4
## 1562 4
## 1563 4
## 1564 1
## 1565 4
## 1566 3
## 1567 1
## 1568 3
## 1569 3
## 1570 2
## 1571 4
## 1572 4
## 1573 4
## 1574 3
## 1575 4
## 1576 4
## 1577 2
## 1578 1
## 1579 1
## 1580 3
## 1581 2
## 1582 4
## 1583 3
## 1584 1
## 1585 3
## 1586 2
## 1587 4
## 1588 4
## 1589 3
## 1590 2
## 1591 4
## 1592 3
## 1593 2
## 1594 2
## 1595 3
## 1596 4
## 1597 3
## 1598 3
## 1599 2
## 1600 1
## 1601 2
## 1602 1
## 1603 1
## 1604 2
## 1605 3
## 1606 3
## 1607 3
## 1608 2
## 1609 1
## 1610 2
## 1611 3
## 1612 4
## 1613 2
## 1614 1
## 1615 3
## 1616 3
## 1617 1
## 1618 1
## 1619 1
## 1620 3
## 1621 2
## 1622 4
## 1623 2
## 1624 3
## 1625 4
## 1626 4
## 1627 4
## 1628 3
## 1629 3
## 1630 4
## 1631 4
## 1632 1
## 1633 3
## 1634 1
## 1635 4
## 1636 2
## 1637 2
## 1638 3
## 1639 3
## 1640 4
## 1641 4
## 1642 2
## 1643 3
## 1644 3
## 1645 1
## 1646 1
## 1647 4
## 1648 4
## 1649 4
## 1650 3
## 1651 1
## 1652 3
## 1653 3
## 1654 2
## 1655 2
## 1656 4
## 1657 4
## 1658 2
## 1659 1
## 1660 2
## 1661 1
## 1662 3
## 1663 1
## 1664 2
## 1665 1
## 1666 4
## 1667 2
## 1668 4
## 1669 3
## 1670 4
## 1671 2
## 1672 4
## 1673 4
## 1674 3
## 1675 2
## 1676 3
## 1677 2
## 1678 4
## 1679 4
## 1680 1
## 1681 4
## 1682 1
## 1683 4
## 1684 4
## 1685 3
## 1686 4
## 1687 2
## 1688 1
## 1689 2
## 1690 3
## 1691 3
## 1692 1
## 1693 1
## 1694 1
## 1695 4
## 1696 2
## 1697 1
## 1698 1
## 1699 2
## 1700 4
## 1701 1
## 1702 3
## 1703 2
## 1704 3
## 1705 1
## 1706 1
## 1707 4
## 1708 2
## 1709 4
## 1710 4
## 1711 1
## 1712 4
## 1713 4
## 1714 4
## 1715 3
## 1716 2
## 1717 4
## 1718 3
## 1719 4
## 1720 4
## 1721 3
## 1722 4
## 1723 4
## 1724 3
## 1725 1
## 1726 2
## 1727 2
## 1728 1
## 1729 4
## 1730 1
## 1731 3
## 1732 1
## 1733 2
## 1734 4
## 1735 4
## 1736 1
## 1737 3
## 1738 4
## 1739 2
## 1740 3
## 1741 3
## 1742 1
## 1743 3
## 1744 4
## 1745 4
## 1746 4
## 1747 3
## 1748 1
## 1749 4
## 1750 4
## 1751 4
## 1752 3
## 1753 2
## 1754 4
## 1755 4
## 1756 2
## 1757 3
## 1758 4
## 1759 4
## 1760 3
## 1761 1
## 1762 1
## 1763 2
## 1764 3
## 1765 4
## 1766 4
## 1767 4
## 1768 1
## 1769 2
## 1770 2
## 1771 3
## 1772 1
## 1773 4
## 1774 1
## 1775 3
## 1776 2
## 1777 3
## 1778 2
## 1779 1
## 1780 4
## 1781 4
## 1782 1
## 1783 4
## 1784 4
## 1785 3
## 1786 1
## 1787 1
## 1788 2
## 1789 2
## 1790 3
## 1791 1
## 1792 1
## 1793 2
## 1794 1
## 1795 3
## 1796 4
## 1797 1
## 1798 2
## 1799 1
## 1800 2
## 1801 1
## 1802 2
## 1803 1
## 1804 1
## 1805 4
## 1806 3
## 1807 3
## 1808 3
## 1809 2
## 1810 3
## 1811 3
## 1812 3
## 1813 4
## 1814 3
## 1815 2
## 1816 2
## 1817 3
## 1818 3
## 1819 1
## 1820 3
## 1821 4
## 1822 4
## 1823 2
## 1824 4
## 1825 3
## 1826 2
## 1827 3
## 1828 2
## 1829 4
## 1830 4
## 1831 2
## 1832 4
## 1833 4
## 1834 4
## 1835 3
## 1836 4
## 1837 4
## 1838 4
## 1839 4
## 1840 3
## 1841 3
## 1842 4
## 1843 2
## 1844 4
## 1845 3
## 1846 3
## 1847 4
## 1848 2
## 1849 1
## 1850 1
## 1851 4
## 1852 4
## 1853 1
## 1854 1
## 1855 1
## 1856 4
## 1857 4
## 1858 1
## 1859 3
## 1860 4
## 1861 2
## 1862 4
## 1863 2
## 1864 3
## 1865 3
## 1866 3
## 1867 1
## 1868 3
## 1869 1
## 1870 3
## 1871 2
## 1872 2
## 1873 3
## 1874 1
## 1875 3
## 1876 4
## 1877 3
## 1878 2
## 1879 3
## 1880 1
## 1881 1
## 1882 1
## 1883 3
## 1884 1
## 1885 1
## 1886 2
## 1887 1
## 1888 4
## 1889 4
## 1890 3
## 1891 4
## 1892 1
## 1893 1
## 1894 1
## 1895 4
## 1896 2
## 1897 4
## 1898 3
## 1899 3
## 1900 1
## 1901 3
## 1902 1
## 1903 3
## 1904 2
## 1905 1
## 1906 3
## 1907 3
## 1908 4
## 1909 4
## 1910 4
## 1911 4
## 1912 4
## 1913 1
## 1914 1
## 1915 4
## 1916 1
## 1917 4
## 1918 4
## 1919 4
## 1920 4
## 1921 1
## 1922 4
## 1923 4
## 1924 4
## 1925 4
## 1926 3
## 1927 4
## 1928 4
## 1929 4
## 1930 3
## 1931 3
## 1932 2
## 1933 4
## 1934 3
## 1935 4
## 1936 3
## 1937 4
## 1938 3
## 1939 4
## 1940 3
## 1941 1
## 1942 3
## 1943 4
## 1944 3
## 1945 1
## 1946 4
## 1947 4
## 1948 4
## 1949 4
## 1950 1
## 1951 3
## 1952 3
## 1953 4
## 1954 2
## 1955 3
## 1956 3
## 1957 4
## 1958 3
## 1959 2
## 1960 1
## 1961 2
## 1962 3
## 1963 4
## 1964 4
## 1965 4
## 1966 4
## 1967 3
## 1968 3
## 1969 3
## 1970 3
## 1971 1
## 1972 2
## 1973 4
## 1974 3
## 1975 1
## 1976 3
## 1977 3
## 1978 4
## 1979 1
## 1980 4
## 1981 1
## 1982 4
## 1983 4
## 1984 1
## 1985 4
## 1986 4
## 1987 3
## 1988 1
## 1989 2
## 1990 1
## 1991 3
## 1992 2
## 1993 4
## 1994 4
## 1995 4
## 1996 3
## 1997 1
## 1998 4
## 1999 4
## 2000 2
## 2001 2
## 2002 1
## 2003 2
## 2004 1
## 2005 3
## 2006 4
## 2007 4
## 2008 4
## 2009 2
## 2010 2
## 2011 1
## 2012 3
## 2013 1
## 2014 1
## 2015 2
## 2016 3
## 2017 3
## 2018 2
## 2019 1
## 2020 4
## 2021 3
## 2022 2
## 2023 2
## 2024 4
## 2025 3
## 2026 3
## 2027 4
## 2028 1
## 2029 1
## 2030 4
## 2031 4
## 2032 4
## 2033 4
## 2034 4
## 2035 4
## 2036 4
## 2037 1
## 2038 1
## 2039 2
## 2040 3
## 2041 1
## 2042 2
## 2043 4
## 2044 2
## 2045 4
## 2046 1
## 2047 2
## 2048 1
## 2049 1
## 2050 4
## 2051 3
## 2052 1
## 2053 2
## 2054 2
## 2055 2
## 2056 1
## 2057 2
## 2058 4
## 2059 1
## 2060 4
## 2061 1
## 2062 4
## 2063 1
## 2064 3
## 2065 1
## 2066 2
## 2067 3
## 2068 1
## 2069 4
## 2070 3
## 2071 4
## 2072 3
## 2073 2
## 2074 1
## 2075 2
## 2076 1
## 2077 1
## 2078 2
## 2079 1
## 2080 4
## 2081 4
## 2082 2
## 2083 3
## 2084 4
## 2085 4
## 2086 1
## 2087 4
## 2088 4
## 2089 3
## 2090 4
## 2091 1
## 2092 3
## 2093 3
## 2094 1
## 2095 3
## 2096 1
## 2097 1
## 2098 3
## 2099 3
## 2100 3
## 2101 1
## 2102 3
## 2103 2
## 2104 1
## 2105 4
## 2106 1
## 2107 2
## 2108 2
## 2109 2
## 2110 2
## 2111 4
## 2112 4
## 2113 3
## 2114 3
## 2115 2
## 2116 3
## 2117 2
## 2118 2
## 2119 3
## 2120 3
## 2121 1
## 2122 4
## 2123 2
## 2124 3
## 2125 4
## 2126 2
## 2127 2
## 2128 3
## 2129 2
## 2130 1
## 2131 3
## 2132 4
## 2133 1
## 2134 3
## 2135 2
## 2136 2
## 2137 1
## 2138 2
## 2139 2
## 2140 3
## 2141 4
## 2142 2
## 2143 3
## 2144 2
## 2145 2
## 2146 2
## 2147 2
## 2148 3
## 2149 3
## 2150 4
## 2151 1
## 2152 1
## 2153 2
## 2154 1
## 2155 1
## 2156 4
## 2157 3
## 2158 3
## 2159 3
## 2160 2
## 2161 3
## 2162 1
## 2163 1
## 2164 4
## 2165 1
## 2166 4
## 2167 4
## 2168 4
## 2169 3
## 2170 3
## 2171 1
## 2172 1
## 2173 3
## 2174 1
## 2175 2
## 2176 3
## 2177 2
## 2178 4
## 2179 4
## 2180 3
## 2181 2
## 2182 3
## 2183 1
## 2184 1
## 2185 2
## 2186 4
## 2187 4
## 2188 4
## 2189 1
## 2190 2
## 2191 3
## 2192 3
## 2193 1
## 2194 1
## 2195 3
## 2196 1
## 2197 4
## 2198 2
## 2199 3
## 2200 1
## 2201 3
## 2202 3
## 2203 1
## 2204 1
## 2205 2
## 2206 3
## 2207 4
## 2208 4
## 2209 4
## 2210 2
## 2211 3
## 2212 3
## 2213 1
## 2214 4
## 2215 2
## 2216 4
## 2217 3
## 2218 2
## 2219 2
## 2220 3
## 2221 2
## 2222 1
## 2223 3
## 2224 4
## 2225 2
## 2226 3
## 2227 1
## 2228 2
## 2229 1
## 2230 2
## 2231 1
## 2232 3
## 2233 1
## 2234 2
## 2235 3
## 2236 3
## 2237 1
## 2238 3
## 2239 4
## 2240 4
## 2241 4
## 2242 3
## 2243 2
## 2244 2
## 2245 2
## 2246 2
## 2247 3
## 2248 4
## 2249 2
## 2250 4
## 2251 2
## 2252 4
## 2253 3
## 2254 2
## 2255 4
## 2256 4
## 2257 1
## 2258 1
## 2259 4
## 2260 3
## 2261 4
## 2262 4
## 2263 1
## 2264 4
## 2265 2
## 2266 3
## 2267 4
set.seed(12345)
IDX<-createDataPartition(train$Segmentation, p=0.7,list=FALSE)
train_t<-train[IDX,]
train_v<-train[-IDX,]
train_t$Segmentation<-as.factor(train_t$Segmentation)
train_v$Segmentation<-as.factor(train_v$Segmentation)
ctrl<-trainControl(method="cv", number=10)
train(Segmentation~., data=train_t, method='knn', trControl=ctrl,
preProcess=c("center", "scale"))->knn_fit1
predict(knn_fit1, newdata=train_v)->test_pred
confusionMatrix(test_pred,train_v$Segmentation, mode="prec_recall")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 207 114 62 115
## 2 120 152 123 53
## 3 71 163 284 33
## 4 109 60 64 358
##
## Overall Statistics
##
## Accuracy : 0.4794
## 95% CI : (0.4578, 0.5011)
## No Information Rate : 0.2677
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3047
##
## Mcnemar's Test P-Value : 0.009816
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Precision 0.41566 0.3393 0.5154 0.6058
## Recall 0.40828 0.3108 0.5328 0.6404
## F1 0.41194 0.3244 0.5240 0.6226
## Prevalence 0.24282 0.2342 0.2553 0.2677
## Detection Rate 0.09914 0.0728 0.1360 0.1715
## Detection Prevalence 0.23851 0.2146 0.2639 0.2830
## Balanced Accuracy 0.61211 0.5629 0.6806 0.7440
#예제 279p
rm(list=ls())
library(dplyr)
library(caret)
library(recipes)
read.delim("titanic3.txt", header=TRUE, sep=",")->full
glimpse(full)
## Rows: 1,309
## Columns: 14
## $ pclass <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ survived <int> 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, …
## $ name <chr> "Allen, Miss. Elisabeth Walton", "Allison, Master. Hudson Tr…
## $ sex <chr> "female", "male", "female", "male", "female", "male", "femal…
## $ age <dbl> 29.00, 0.92, 2.00, 30.00, 25.00, 48.00, 63.00, 39.00, 53.00,…
## $ sibsp <int> 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ parch <int> 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, …
## $ ticket <chr> "24160", "113781", "113781", "113781", "113781", "19952", "1…
## $ fare <dbl> 211.3375, 151.5500, 151.5500, 151.5500, 151.5500, 26.5500, 7…
## $ cabin <chr> "B5", "C22 C26", "C22 C26", "C22 C26", "C22 C26", "E12", "D7…
## $ embarked <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "C", "C", "C", …
## $ boat <chr> "2", "11", "", "", "", "3", "10", "", "D", "", "", "4", "9",…
## $ body <int> NA, NA, NA, 135, NA, NA, NA, NA, NA, 22, 124, NA, NA, NA, NA…
## $ home.dest <chr> "St Louis, MO", "Montreal, PQ / Chesterville, ON", "Montreal…
set.seed(1357)
train_list<-createDataPartition(full$survived, p=0.7, list=FALSE)
full_train<-full[train_list,]
full_test<-full[-train_list,]
NROW(full_train)
## [1] 917
NROW(full_test)
## [1] 392
train<-full_train
test<-full_test
train %>% mutate(index="train")->train
test %>% mutate(index="test")->test
bind_rows(train,test)->full
full %>% select(-boat,-body,-home.dest)->full
full$survived<-ifelse(full$survived==0, "생존", "사망")
full$survived<-as.factor(full$survived)
full$pclcass<-as.factor(full$pclass)
full$sex<-as.factor(full$sex)
full$embarked<-as.factor(full$embarked)
colSums(is.na(full))
## pclass survived name sex age sibsp parch ticket
## 0 0 0 0 263 0 0 0
## fare cabin embarked index pclcass
## 1 0 0 0 0
table(full$embarked)
##
## C Q S
## 2 270 123 914
levels(full$embarked)[1]<-NA
table(full$embarked, useNA="always")
##
## C Q S <NA>
## 270 123 914 2
full %>% filter(!is.na(age)&!is.na(fare)&!is.na(embarked))->full
colSums(is.na(full))
## pclass survived name sex age sibsp parch ticket
## 0 0 0 0 0 0 0 0
## fare cabin embarked index pclcass
## 0 0 0 0 0
recipe(survived~., data=full) %>% step_YeoJohnson(age, sibsp, parch, fare) %>%
step_center(age,sibsp,parch,fare) %>% step_scale(age,sibsp,parch,fare) %>%
prep() %>% juice()->data
data %>% filter(index=="train") %>% select(-index,-name,-ticket,-cabin)->train
data %>% filter(index=="test") %>% select(-index, -name, -ticket, -cabin)-> test
ctrl<-trainControl(method="cv", summaryFunction=twoClassSummary,
classProbs=TRUE)
train(survived~., data=train, method="rpart", metric="ROC", trControl=ctrl)-> rffit
rffit
## CART
##
## 731 samples
## 8 predictor
## 2 classes: '사망', '생존'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 659, 659, 657, 658, 658, 657, ...
## Resampling results across tuning parameters:
##
## cp ROC Sens Spec
## 0.02542373 0.7882034 0.5548276 0.9239429
## 0.03728814 0.7730226 0.5442529 0.9191860
## 0.44406780 0.6106002 0.3158621 0.9053383
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02542373.
confusionMatrix(rffit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 사망 생존
## 사망 22.4 4.5
## 생존 17.9 55.1
##
## Accuracy (average) : 0.7756
predict(rffit, test, type="prob")->rffit1
predict(rffit, test, type="raw")->rffit2
confusionMatrix(rffit2, test$survived)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 사망 생존
## 사망 66 6
## 생존 64 176
##
## Accuracy : 0.7756
## 95% CI : (0.7252, 0.8207)
## No Information Rate : 0.5833
## P-Value [Acc > NIR] : 6.712e-13
##
## Kappa : 0.507
##
## Mcnemar's Test P-Value : 9.572e-12
##
## Sensitivity : 0.5077
## Specificity : 0.9670
## Pos Pred Value : 0.9167
## Neg Pred Value : 0.7333
## Prevalence : 0.4167
## Detection Rate : 0.2115
## Detection Prevalence : 0.2308
## Balanced Accuracy : 0.7374
##
## 'Positive' Class : 사망
##
library(pROC)
rffit2_num<-as.numeric(rffit2)
rffit2_num
## [1] 1 2 2 1 1 2 1 2 1 2 1 2 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 2 1 2 2 1 2 2 2 1 2
## [38] 1 1 2 1 1 1 1 1 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 1 2
## [75] 2 1 2 2 2 1 2 1 2 2 1 2 1 1 2 2 1 1 1 2 2 1 1 1 1 2 2 2 2 1 2 1 2 2 1 2 2
## [112] 2 1 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 2 2 1 2 1 2 1 2 2 2
## [149] 1 1 2 2 2 1 1 2 1 1 2 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [297] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
result<-roc(test$survived, rffit2_num)
## Setting levels: control = 사망, case = 생존
## Setting direction: controls < cases
result$auc
## Area under the curve: 0.7374
names(rffit1)[2]<-"survived"
rffit1 %>% select(survived)->df
head(df)
## survived
## 1 0.0617284
## 2 0.7905983
## 3 0.7905983
## 4 0.0617284
## 5 0.0617284
## 6 0.7905983
write.csv(df,"22027601_2.csv", row.names=F)
read.csv("22027601_2.csv")
## survived
## 1 0.0617284
## 2 0.7905983
## 3 0.7905983
## 4 0.0617284
## 5 0.0617284
## 6 0.7905983
## 7 0.0617284
## 8 0.7905983
## 9 0.0617284
## 10 0.7905983
## 11 0.0617284
## 12 0.7905983
## 13 0.0617284
## 14 0.0617284
## 15 0.0617284
## 16 0.7905983
## 17 0.0617284
## 18 0.0617284
## 19 0.7905983
## 20 0.7905983
## 21 0.0617284
## 22 0.0617284
## 23 0.0617284
## 24 0.0617284
## 25 0.0617284
## 26 0.0617284
## 27 0.0617284
## 28 0.7905983
## 29 0.0617284
## 30 0.7905983
## 31 0.7905983
## 32 0.0617284
## 33 0.7905983
## 34 0.7905983
## 35 0.7905983
## 36 0.0617284
## 37 0.7905983
## 38 0.0617284
## 39 0.0617284
## 40 0.7905983
## 41 0.0617284
## 42 0.0617284
## 43 0.0617284
## 44 0.0617284
## 45 0.0617284
## 46 0.7905983
## 47 0.7905983
## 48 0.7905983
## 49 0.0617284
## 50 0.0617284
## 51 0.7905983
## 52 0.7905983
## 53 0.7905983
## 54 0.7905983
## 55 0.0617284
## 56 0.7905983
## 57 0.7905983
## 58 0.7905983
## 59 0.7905983
## 60 0.7905983
## 61 0.0617284
## 62 0.7905983
## 63 0.0617284
## 64 0.7905983
## 65 0.7905983
## 66 0.0617284
## 67 0.7905983
## 68 0.0617284
## 69 0.7905983
## 70 0.7905983
## 71 0.7905983
## 72 0.7905983
## 73 0.0617284
## 74 0.7905983
## 75 0.7905983
## 76 0.0617284
## 77 0.7905983
## 78 0.7905983
## 79 0.7905983
## 80 0.0617284
## 81 0.7905983
## 82 0.0617284
## 83 0.7905983
## 84 0.7905983
## 85 0.0617284
## 86 0.7905983
## 87 0.0617284
## 88 0.0617284
## 89 0.7905983
## 90 0.7905983
## 91 0.0617284
## 92 0.0617284
## 93 0.0617284
## 94 0.7905983
## 95 0.7905983
## 96 0.0617284
## 97 0.0617284
## 98 0.0617284
## 99 0.0617284
## 100 0.7905983
## 101 0.7905983
## 102 0.7905983
## 103 0.7905983
## 104 0.0617284
## 105 0.7905983
## 106 0.0617284
## 107 0.7905983
## 108 0.7905983
## 109 0.0617284
## 110 0.7905983
## 111 0.7905983
## 112 0.7905983
## 113 0.0617284
## 114 0.7905983
## 115 0.7905983
## 116 0.7905983
## 117 0.0617284
## 118 0.7905983
## 119 0.7905983
## 120 0.7905983
## 121 0.7905983
## 122 0.0617284
## 123 0.0617284
## 124 0.7905983
## 125 0.7905983
## 126 0.7905983
## 127 0.7905983
## 128 0.7905983
## 129 0.7905983
## 130 0.0617284
## 131 0.7905983
## 132 0.0617284
## 133 0.7905983
## 134 0.0617284
## 135 0.7905983
## 136 0.7905983
## 137 0.7905983
## 138 0.7905983
## 139 0.7905983
## 140 0.7905983
## 141 0.0617284
## 142 0.7905983
## 143 0.0617284
## 144 0.7905983
## 145 0.0617284
## 146 0.7905983
## 147 0.7905983
## 148 0.7905983
## 149 0.0617284
## 150 0.0617284
## 151 0.7905983
## 152 0.7905983
## 153 0.7905983
## 154 0.0617284
## 155 0.0617284
## 156 0.7905983
## 157 0.0617284
## 158 0.0617284
## 159 0.7905983
## 160 0.0617284
## 161 0.0617284
## 162 0.0617284
## 163 0.7905983
## 164 0.7905983
## 165 0.7905983
## 166 0.0617284
## 167 0.7905983
## 168 0.7905983
## 169 0.7905983
## 170 0.7905983
## 171 0.7905983
## 172 0.7905983
## 173 0.7905983
## 174 0.7905983
## 175 0.5544554
## 176 0.5544554
## 177 0.7905983
## 178 0.7905983
## 179 0.7905983
## 180 0.5544554
## 181 0.5544554
## 182 0.7905983
## 183 0.5544554
## 184 0.5544554
## 185 0.5544554
## 186 0.5544554
## 187 0.7905983
## 188 0.7905983
## 189 0.7905983
## 190 0.5544554
## 191 0.5544554
## 192 0.7905983
## 193 0.7905983
## 194 0.5544554
## 195 0.7905983
## 196 0.5544554
## 197 0.7905983
## 198 0.7905983
## 199 0.7905983
## 200 0.7905983
## 201 0.7905983
## 202 0.7905983
## 203 0.7905983
## 204 0.7905983
## 205 0.7905983
## 206 0.7905983
## 207 0.7905983
## 208 0.5544554
## 209 0.7905983
## 210 0.7905983
## 211 0.7905983
## 212 0.7905983
## 213 0.7905983
## 214 0.7905983
## 215 0.7905983
## 216 0.5544554
## 217 0.7905983
## 218 0.7905983
## 219 0.5544554
## 220 0.7905983
## 221 0.5544554
## 222 0.7905983
## 223 0.7905983
## 224 0.7905983
## 225 0.7905983
## 226 0.5544554
## 227 0.5544554
## 228 0.5544554
## 229 0.7905983
## 230 0.5544554
## 231 0.7905983
## 232 0.5544554
## 233 0.7905983
## 234 0.7905983
## 235 0.7905983
## 236 0.5544554
## 237 0.5544554
## 238 0.7905983
## 239 0.5544554
## 240 0.5544554
## 241 0.7905983
## 242 0.7905983
## 243 0.7905983
## 244 0.5544554
## 245 0.7905983
## 246 0.7905983
## 247 0.7905983
## 248 0.5544554
## 249 0.7905983
## 250 0.7905983
## 251 0.5544554
## 252 0.5544554
## 253 0.7905983
## 254 0.7905983
## 255 0.7905983
## 256 0.7905983
## 257 0.5544554
## 258 0.5544554
## 259 0.7905983
## 260 0.7905983
## 261 0.7905983
## 262 0.5544554
## 263 0.5544554
## 264 0.7905983
## 265 0.5544554
## 266 0.7905983
## 267 0.7905983
## 268 0.5544554
## 269 0.7905983
## 270 0.5544554
## 271 0.7905983
## 272 0.5544554
## 273 0.7905983
## 274 0.7905983
## 275 0.5544554
## 276 0.7905983
## 277 0.7905983
## 278 0.5544554
## 279 0.7905983
## 280 0.5544554
## 281 0.7905983
## 282 0.7905983
## 283 0.7905983
## 284 0.7905983
## 285 0.5544554
## 286 0.5544554
## 287 0.7905983
## 288 0.7905983
## 289 0.7905983
## 290 0.5544554
## 291 0.7905983
## 292 0.7905983
## 293 0.7905983
## 294 0.5544554
## 295 0.7905983
## 296 0.7905983
## 297 0.5544554
## 298 0.7905983
## 299 0.7905983
## 300 0.7905983
## 301 0.5544554
## 302 0.7905983
## 303 0.7905983
## 304 0.5544554
## 305 0.5544554
## 306 0.7905983
## 307 0.5544554
## 308 0.7905983
## 309 0.7905983
## 310 0.5544554
## 311 0.7905983
## 312 0.5544554
#예제 284p
rm(list=ls())
library(dplyr)
library(recipes)
library(caret)
df<-read.csv("nyc.csv")
glimpse(df)
## Rows: 165
## Columns: 9
## $ Case <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ Restaurant <chr> "Daniella Ristorante", "Tello's Ristorante", "Biricchino", …
## $ Price <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47,…
## $ Food <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20,…
## $ Decor <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18,…
## $ Service <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21,…
## $ East <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ latitude <dbl> 40.74683, 40.74342, 40.74886, 40.74848, 40.73958, 40.74069,…
## $ longitude <dbl> -73.99676, -73.99954, -73.99552, -74.00331, -73.99591, -73.…
nyc<-df %>% select(3:7)
glimpse(nyc)
## Rows: 165
## Columns: 5
## $ Price <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47, 37…
## $ Food <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20, 21…
## $ Decor <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18, 19…
## $ Service <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21, 21…
## $ East <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
summary(nyc)
## Price Food Decor Service
## Min. :19.00 Min. :16.00 Min. : 6.00 Min. :14.00
## 1st Qu.:36.00 1st Qu.:19.00 1st Qu.:16.00 1st Qu.:18.00
## Median :43.00 Median :21.00 Median :18.00 Median :20.00
## Mean :42.67 Mean :20.59 Mean :17.68 Mean :19.39
## 3rd Qu.:50.00 3rd Qu.:22.00 3rd Qu.:19.00 3rd Qu.:21.00
## Max. :65.00 Max. :25.00 Max. :25.00 Max. :24.00
## East
## Min. :0.0000
## 1st Qu.:0.0000
## Median :1.0000
## Mean :0.6303
## 3rd Qu.:1.0000
## Max. :1.0000
nyc$East<-as.factor(nyc$East)
set.seed(1357)
glimpse(nyc)
## Rows: 165
## Columns: 5
## $ Price <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47, 37…
## $ Food <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20, 21…
## $ Decor <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18, 19…
## $ Service <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21, 21…
## $ East <fct> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
train_list<-createDataPartition(nyc$Price, p=0.7, list=FALSE)
train<-nyc[train_list,]
test<-nyc[-train_list,]
View(train)
NROW(train)
## [1] 118
NROW(test)
## [1] 47
model_2<-train(Price~., data=train, method="knn", preProcess=c("BoxCox", "center", "scale"),
trControl=trainControl(method="cv",number=10))
model_2
## k-Nearest Neighbors
##
## 118 samples
## 4 predictor
##
## Pre-processing: Box-Cox transformation (3), centered (4), scaled (4)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 106, 106, 107, 106, 106, 106, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 6.219351 0.5526612 4.878930
## 7 6.093182 0.5691867 4.758693
## 9 6.106331 0.5676038 4.754216
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 7.
predict(model_2, newdata=test)->lmfit
lmfit
## [1] 48.28571 43.85714 43.28571 54.85714 49.14286 38.57143 50.85714 49.71429
## [9] 48.28571 38.00000 47.77778 48.28571 46.42857 50.14286 43.57143 46.00000
## [17] 36.14286 47.42857 35.85714 52.28571 48.14286 43.57143 37.00000 55.42857
## [25] 35.71429 34.14286 40.71429 51.14286 36.50000 35.42857 51.14286 33.00000
## [33] 30.28571 39.71429 33.71429 30.42857 38.85714 34.77778 52.12500 49.00000
## [41] 38.62500 40.14286 38.00000 34.71429 34.77778 36.00000 46.28571
bind_cols(lmfit,test) ->result
## New names:
## • `` -> `...1`
names(result)[1]<-"pred"
head(result)
## pred Price Food Decor Service East
## 1 48.28571 54 24 19 21 0
## 2 43.85714 34 22 16 21 0
## 3 43.28571 47 20 18 21 1
## 4 54.85714 57 24 21 22 1
## 5 49.14286 51 23 17 21 1
## 6 38.57143 38 20 18 18 1
result %>% select(1:2)
## pred Price
## 1 48.28571 54
## 2 43.85714 34
## 3 43.28571 47
## 4 54.85714 57
## 5 49.14286 51
## 6 38.57143 38
## 7 50.85714 49
## 8 49.71429 45
## 9 48.28571 50
## 10 38.00000 43
## 11 47.77778 49
## 12 48.28571 52
## 13 46.42857 56
## 14 50.14286 58
## 15 43.57143 46
## 16 46.00000 40
## 17 36.14286 54
## 18 47.42857 40
## 19 35.85714 38
## 20 52.28571 55
## 21 48.14286 47
## 22 43.57143 48
## 23 37.00000 33
## 24 55.42857 54
## 25 35.71429 30
## 26 34.14286 25
## 27 40.71429 43
## 28 51.14286 51
## 29 36.50000 36
## 30 35.42857 37
## 31 51.14286 43
## 32 33.00000 19
## 33 30.28571 22
## 34 39.71429 41
## 35 33.71429 33
## 36 30.42857 29
## 37 38.85714 33
## 38 34.77778 45
## 39 52.12500 65
## 40 49.00000 46
## 41 38.62500 44
## 42 40.14286 38
## 43 38.00000 42
## 44 34.71429 42
## 45 34.77778 31
## 46 36.00000 31
## 47 46.28571 38