#02번 p259
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## 필요한 패키지를 로딩중입니다: ggplot2
## 필요한 패키지를 로딩중입니다: lattice
library(recipes)
##
## 다음의 패키지를 부착합니다: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## 다음의 패키지를 부착합니다: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
df<-read.csv("travel_data.csv")
set.seed(1357)
train_list<-createDataPartition(y=df$TravelInsurance,p=0.75,list=FALSE)
df_train<-df[train_list,]
df_test<-df[-train_list,]
NROW(df_train)
## [1] 1491
NROW(df_test)
## [1] 496
df_train %>% glimpse
## Rows: 1,491
## Columns: 10
## $ INDEX <int> 2, 3, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 20, …
## $ Age <int> 34, 28, 28, 31, 28, 33, 31, 26, 31, 28, 28, 29, 34…
## $ Employment.Type <chr> "Private Sector/Self Employed", "Private Sector/Se…
## $ GraduateOrNot <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "…
## $ AnnualIncome <int> 500000, 700000, 700000, 1350000, 1450000, 800000, …
## $ FamilyMembers <int> 4, 3, 8, 3, 6, 3, 9, 5, 6, 4, 7, 5, 2, 6, 3, 4, 9,…
## $ ChronicDiseases <int> 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,…
## $ FrequentFlyer <chr> "No", "No", "Yes", "Yes", "Yes", "Yes", "No", "Yes…
## $ EverTravelledAbroad <chr> "No", "No", "No", "Yes", "Yes", "No", "No", "Yes",…
## $ TravelInsurance <int> 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,…
df_train %>% mutate(index="train")->df_train
df_test %>% mutate(index='test')->df_test
bind_rows(df_train,df_test)->full
full$TravelInsurance<-ifelse(full$TravelInsurance==0,"미가입","가입")
full$TravelInsurance<-as.factor(full$TravelInsurance)
full$GraduateOrNot<-as.factor(full$GraduateOrNot)
full$FrequentFlyer<-as.factor(full$FrequentFlyer)
full$EverTravelledAbroad<-as.factor(full$EverTravelledAbroad)
colSums(is.na(full))
## INDEX Age Employment.Type GraduateOrNot
## 0 0 0 0
## AnnualIncome FamilyMembers ChronicDiseases FrequentFlyer
## 0 0 0 0
## EverTravelledAbroad TravelInsurance index
## 0 0 0
recipe(TravelInsurance~.,data=full) %>%
step_YeoJohnson(Age,AnnualIncome,FamilyMembers) %>%
step_center(Age,AnnualIncome,FamilyMembers) %>%
step_scale(Age,AnnualIncome,FamilyMembers) %>% prep() %>% juice()->data
data %>% filter(index=="train") %>% select(-index)->train
data %>% filter(index=="test") %>% select(-index)->test
ctrl<-trainControl(method="cv",summaryFunction = twoClassSummary,
classProbs=TRUE)
train(TravelInsurance~.,data=train,
method='rpart',metric="ROC",
trControl=ctrl)->rpfit
rpfit
## CART
##
## 1491 samples
## 9 predictor
## 2 classes: '가입', '미가입'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 1342, 1342, 1342, 1342, 1341, 1342, ...
## Resampling results across tuning parameters:
##
## cp ROC Sens Spec
## 0.002358491 0.7800814 0.5754717 0.9593965
## 0.056603774 0.7331815 0.4830189 0.9718857
## 0.401886792 0.5834763 0.1773585 0.9895941
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.002358491.
confusionMatrix(rpfit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 가입 미가입
## 가입 20.5 2.6
## 미가입 15.1 61.8
##
## Accuracy (average) : 0.8229
predict(rpfit,test,type='prob')->rffit1
head(rffit1)
## 가입 미가입
## 1 0.190326 0.809674
## 2 0.190326 0.809674
## 3 0.190326 0.809674
## 4 0.190326 0.809674
## 5 0.190326 0.809674
## 6 0.190326 0.809674
predict(rpfit,test,type="raw")->rffit2
head(rffit2)
## [1] 미가입 미가입 미가입 미가입 미가입 미가입
## Levels: 가입 미가입
confusionMatrix(rffit2,test$TravelInsurance)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 가입 미가입
## 가입 116 14
## 미가입 64 302
##
## Accuracy : 0.8427
## 95% CI : (0.8077, 0.8737)
## No Information Rate : 0.6371
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6383
##
## Mcnemar's Test P-Value : 2.887e-08
##
## Sensitivity : 0.6444
## Specificity : 0.9557
## Pos Pred Value : 0.8923
## Neg Pred Value : 0.8251
## Prevalence : 0.3629
## Detection Rate : 0.2339
## Detection Prevalence : 0.2621
## Balanced Accuracy : 0.8001
##
## 'Positive' Class : 가입
##
importance<-varImp(rpfit,scale=FALSE)
importance
## rpart variable importance
##
## Overall
## AnnualIncome 200.4404
## EverTravelledAbroadYes 122.4265
## FamilyMembers 59.1438
## Age 39.4285
## FrequentFlyerYes 35.6629
## Employment.TypePrivate Sector/Self Employed 11.7596
## INDEX 3.2728
## ChronicDiseases 0.9696
## GraduateOrNotYes 0.0000
## `Employment.TypePrivate Sector/Self Employed` 0.0000
library(pROC)
rffit2_num<-as.numeric(rffit2)
head(rffit2_num)
## [1] 2 2 2 2 2 2
result<-roc(test$TravelInsurance,rffit2_num)
## Setting levels: control = 가입, case = 미가입
## Setting direction: controls < cases
result$auc
## Area under the curve: 0.8001
names(rffit1)[1]<-"y_pred"
bind_cols(df_test,rffit1) %>% select(INDEX,y_pred)->df
head(df)
## INDEX y_pred
## 1 0 0.190326
## 2 1 0.190326
## 3 5 0.190326
## 4 6 0.190326
## 5 12 0.190326
## 6 14 0.190326
#write.csv(df,"0000.csv",row.names=F)
read.csv("0000.csv")
## INDEX y_pred
## 1 0 0.1903260
## 2 1 0.1903260
## 3 5 0.1903260
## 4 6 0.1903260
## 5 12 0.1903260
## 6 14 0.1903260
## 7 15 0.8658537
## 8 27 0.1903260
## 9 33 0.1903260
## 10 37 0.2195122
## 11 38 0.2195122
## 12 39 0.9209486
## 13 43 0.1903260
## 14 46 0.9209486
## 15 48 0.1903260
## 16 56 0.1903260
## 17 64 0.9209486
## 18 65 0.1903260
## 19 72 0.1903260
## 20 74 0.1903260
## 21 83 0.2195122
## 22 85 0.9209486
## 23 92 0.1903260
## 24 94 0.2195122
## 25 97 0.1903260
## 26 99 0.1903260
## 27 101 0.9209486
## 28 105 0.1903260
## 29 106 0.2195122
## 30 110 0.1903260
## 31 111 0.1903260
## 32 112 0.1903260
## 33 125 0.1903260
## 34 127 0.1903260
## 35 128 0.9209486
## 36 133 0.9209486
## 37 135 0.1903260
## 38 140 0.1903260
## 39 154 0.1903260
## 40 155 0.8658537
## 41 158 0.1903260
## 42 161 0.9209486
## 43 162 0.8658537
## 44 171 0.1903260
## 45 176 0.8658537
## 46 178 0.1903260
## 47 183 0.9209486
## 48 184 0.1903260
## 49 187 0.1903260
## 50 188 0.1903260
## 51 195 0.9209486
## 52 198 0.9209486
## 53 200 0.1903260
## 54 210 0.1903260
## 55 212 0.1903260
## 56 213 0.9209486
## 57 222 0.1903260
## 58 223 0.1903260
## 59 224 0.1903260
## 60 233 0.1903260
## 61 234 0.2195122
## 62 238 0.1903260
## 63 242 0.2195122
## 64 243 0.1903260
## 65 249 0.1903260
## 66 250 0.8658537
## 67 257 0.2195122
## 68 266 0.1903260
## 69 267 0.2195122
## 70 269 0.1903260
## 71 270 0.8658537
## 72 272 0.1903260
## 73 273 0.8658537
## 74 283 0.1903260
## 75 286 0.1903260
## 76 298 0.9209486
## 77 300 0.1903260
## 78 301 0.1903260
## 79 306 0.2195122
## 80 309 0.2195122
## 81 312 0.1903260
## 82 320 0.2195122
## 83 321 0.1903260
## 84 323 0.9209486
## 85 324 0.1903260
## 86 334 0.1903260
## 87 337 0.1903260
## 88 339 0.1903260
## 89 343 0.1903260
## 90 347 0.8658537
## 91 352 0.9209486
## 92 356 0.2195122
## 93 359 0.1903260
## 94 361 0.9209486
## 95 362 0.1903260
## 96 365 0.9209486
## 97 366 0.1903260
## 98 369 0.1903260
## 99 372 0.1903260
## 100 378 0.9209486
## 101 392 0.1903260
## 102 394 0.1903260
## 103 403 0.1903260
## 104 404 0.2195122
## 105 414 0.9209486
## 106 422 0.9209486
## 107 423 0.9209486
## 108 425 0.1903260
## 109 428 0.1903260
## 110 429 0.1903260
## 111 430 0.1903260
## 112 431 0.1903260
## 113 443 0.1903260
## 114 449 0.1903260
## 115 456 0.9209486
## 116 463 0.2195122
## 117 464 0.9209486
## 118 468 0.1903260
## 119 469 0.9209486
## 120 470 0.1903260
## 121 472 0.1903260
## 122 473 0.9209486
## 123 479 0.2195122
## 124 482 0.1903260
## 125 484 0.8658537
## 126 486 0.2195122
## 127 490 0.1903260
## 128 499 0.1903260
## 129 505 0.1903260
## 130 512 0.1903260
## 131 515 0.1903260
## 132 518 0.9209486
## 133 528 0.1903260
## 134 529 0.1903260
## 135 530 0.1903260
## 136 532 0.1903260
## 137 533 0.1903260
## 138 534 0.1903260
## 139 536 0.1903260
## 140 538 0.1903260
## 141 547 0.9209486
## 142 549 0.1903260
## 143 550 0.2195122
## 144 551 0.9209486
## 145 553 0.9209486
## 146 554 0.1903260
## 147 559 0.1903260
## 148 561 0.1903260
## 149 562 0.2195122
## 150 563 0.2195122
## 151 565 0.1903260
## 152 570 0.1903260
## 153 584 0.1903260
## 154 586 0.1903260
## 155 593 0.1903260
## 156 594 0.2195122
## 157 598 0.8658537
## 158 600 0.9209486
## 159 606 0.1903260
## 160 610 0.1903260
## 161 611 0.1903260
## 162 614 0.2195122
## 163 615 0.1903260
## 164 616 0.1903260
## 165 617 0.1903260
## 166 619 0.8658537
## 167 623 0.1903260
## 168 625 0.9209486
## 169 628 0.1903260
## 170 633 0.1903260
## 171 642 0.1903260
## 172 654 0.9209486
## 173 655 0.1903260
## 174 659 0.2195122
## 175 662 0.1903260
## 176 667 0.1903260
## 177 675 0.1903260
## 178 676 0.1903260
## 179 677 0.1903260
## 180 681 0.1903260
## 181 682 0.9209486
## 182 688 0.2195122
## 183 689 0.1903260
## 184 690 0.9209486
## 185 698 0.8658537
## 186 699 0.1903260
## 187 703 0.1903260
## 188 708 0.2195122
## 189 713 0.1903260
## 190 717 0.1903260
## 191 719 0.2195122
## 192 724 0.1903260
## 193 731 0.1903260
## 194 738 0.1903260
## 195 741 0.8658537
## 196 742 0.8658537
## 197 744 0.1903260
## 198 746 0.1903260
## 199 749 0.8658537
## 200 750 0.2195122
## 201 752 0.1903260
## 202 754 0.1903260
## 203 756 0.1903260
## 204 758 0.9209486
## 205 759 0.8658537
## 206 761 0.9209486
## 207 771 0.1903260
## 208 775 0.1903260
## 209 778 0.1903260
## 210 779 0.9209486
## 211 781 0.1903260
## 212 784 0.9209486
## 213 791 0.8658537
## 214 795 0.1903260
## 215 801 0.1903260
## 216 804 0.1903260
## 217 805 0.1903260
## 218 807 0.1903260
## 219 819 0.1903260
## 220 822 0.1903260
## 221 823 0.1903260
## 222 830 0.8658537
## 223 833 0.9209486
## 224 837 0.1903260
## 225 853 0.1903260
## 226 856 0.8658537
## 227 859 0.1903260
## 228 863 0.1903260
## 229 865 0.9209486
## 230 876 0.1903260
## 231 879 0.1903260
## 232 885 0.1903260
## 233 886 0.9209486
## 234 889 0.2195122
## 235 892 0.1903260
## 236 893 0.1903260
## 237 894 0.1903260
## 238 896 0.2195122
## 239 902 0.8658537
## 240 903 0.1903260
## 241 904 0.1903260
## 242 909 0.9209486
## 243 913 0.9209486
## 244 924 0.1903260
## 245 926 0.2195122
## 246 927 0.9209486
## 247 938 0.1903260
## 248 939 0.1903260
## 249 947 0.2195122
## 250 948 0.1903260
## 251 952 0.9209486
## 252 959 0.1903260
## 253 965 0.1903260
## 254 968 0.1903260
## 255 973 0.9209486
## 256 975 0.9209486
## 257 978 0.1903260
## 258 981 0.1903260
## 259 982 0.1903260
## 260 983 0.8658537
## 261 988 0.9209486
## 262 994 0.1903260
## 263 995 0.1903260
## 264 999 0.8658537
## 265 1000 0.1903260
## 266 1003 0.1903260
## 267 1004 0.1903260
## 268 1005 0.1903260
## 269 1006 0.1903260
## 270 1011 0.8658537
## 271 1013 0.1903260
## 272 1017 0.1903260
## 273 1019 0.1903260
## 274 1022 0.1903260
## 275 1031 0.1903260
## 276 1037 0.1903260
## 277 1044 0.9209486
## 278 1046 0.2195122
## 279 1050 0.2195122
## 280 1053 0.2195122
## 281 1055 0.9209486
## 282 1057 0.1903260
## 283 1063 0.1903260
## 284 1067 0.2195122
## 285 1079 0.2195122
## 286 1080 0.1903260
## 287 1082 0.1903260
## 288 1087 0.1903260
## 289 1090 0.1903260
## 290 1092 0.9209486
## 291 1095 0.8658537
## 292 1098 0.1903260
## 293 1106 0.2195122
## 294 1110 0.1903260
## 295 1111 0.2195122
## 296 1112 0.2195122
## 297 1135 0.1903260
## 298 1140 0.1903260
## 299 1146 0.1903260
## 300 1147 0.9209486
## 301 1150 0.1903260
## 302 1151 0.8658537
## 303 1153 0.1903260
## 304 1156 0.1903260
## 305 1158 0.2195122
## 306 1162 0.9209486
## 307 1169 0.9209486
## 308 1172 0.1903260
## 309 1173 0.1903260
## 310 1175 0.2195122
## 311 1182 0.1903260
## 312 1186 0.1903260
## 313 1188 0.1903260
## 314 1190 0.1903260
## 315 1193 0.2195122
## 316 1195 0.1903260
## 317 1198 0.2195122
## 318 1202 0.9209486
## 319 1206 0.1903260
## 320 1207 0.1903260
## 321 1209 0.9209486
## 322 1210 0.1903260
## 323 1212 0.2195122
## 324 1213 0.1903260
## 325 1219 0.8658537
## 326 1230 0.9209486
## 327 1235 0.1903260
## 328 1245 0.9209486
## 329 1247 0.1903260
## 330 1257 0.1903260
## 331 1263 0.1903260
## 332 1264 0.1903260
## 333 1271 0.1903260
## 334 1272 0.2195122
## 335 1274 0.2195122
## 336 1275 0.1903260
## 337 1282 0.9209486
## 338 1284 0.2195122
## 339 1297 0.1903260
## 340 1302 0.1903260
## 341 1308 0.1903260
## 342 1309 0.9209486
## 343 1319 0.1903260
## 344 1335 0.1903260
## 345 1337 0.1903260
## 346 1346 0.1903260
## 347 1349 0.9209486
## 348 1352 0.1903260
## 349 1354 0.9209486
## 350 1358 0.9209486
## 351 1366 0.9209486
## 352 1369 0.2195122
## 353 1374 0.1903260
## 354 1379 0.2195122
## 355 1381 0.9209486
## 356 1383 0.9209486
## 357 1385 0.1903260
## 358 1389 0.1903260
## 359 1391 0.9209486
## 360 1400 0.1903260
## 361 1401 0.1903260
## 362 1407 0.1903260
## 363 1414 0.2195122
## 364 1416 0.1903260
## 365 1418 0.1903260
## 366 1420 0.2195122
## 367 1421 0.9209486
## 368 1427 0.1903260
## 369 1428 0.8658537
## 370 1431 0.1903260
## 371 1440 0.2195122
## 372 1449 0.1903260
## 373 1459 0.9209486
## 374 1460 0.1903260
## 375 1462 0.1903260
## 376 1464 0.1903260
## 377 1466 0.1903260
## 378 1469 0.1903260
## 379 1472 0.1903260
## 380 1479 0.9209486
## 381 1487 0.1903260
## 382 1491 0.9209486
## 383 1493 0.1903260
## 384 1496 0.1903260
## 385 1501 0.1903260
## 386 1506 0.1903260
## 387 1510 0.1903260
## 388 1511 0.1903260
## 389 1516 0.2195122
## 390 1523 0.1903260
## 391 1527 0.1903260
## 392 1537 0.1903260
## 393 1538 0.1903260
## 394 1540 0.2195122
## 395 1541 0.1903260
## 396 1545 0.9209486
## 397 1549 0.1903260
## 398 1555 0.9209486
## 399 1556 0.9209486
## 400 1559 0.2195122
## 401 1566 0.9209486
## 402 1568 0.1903260
## 403 1572 0.1903260
## 404 1585 0.9209486
## 405 1588 0.1903260
## 406 1589 0.9209486
## 407 1607 0.1903260
## 408 1615 0.1903260
## 409 1617 0.1903260
## 410 1618 0.1903260
## 411 1620 0.1903260
## 412 1630 0.1903260
## 413 1640 0.1903260
## 414 1647 0.2195122
## 415 1648 0.1903260
## 416 1650 0.9209486
## 417 1651 0.9209486
## 418 1656 0.1903260
## 419 1658 0.8658537
## 420 1661 0.1903260
## 421 1662 0.1903260
## 422 1670 0.8658537
## 423 1671 0.9209486
## 424 1675 0.8658537
## 425 1678 0.9209486
## 426 1689 0.1903260
## 427 1691 0.9209486
## 428 1692 0.1903260
## 429 1714 0.1903260
## 430 1722 0.9209486
## 431 1727 0.1903260
## 432 1732 0.1903260
## 433 1734 0.2195122
## 434 1736 0.8658537
## 435 1738 0.1903260
## 436 1742 0.1903260
## 437 1747 0.9209486
## 438 1750 0.1903260
## 439 1754 0.9209486
## 440 1756 0.1903260
## 441 1764 0.1903260
## 442 1765 0.8658537
## 443 1767 0.2195122
## 444 1769 0.1903260
## 445 1770 0.2195122
## 446 1773 0.1903260
## 447 1774 0.1903260
## 448 1785 0.1903260
## 449 1791 0.1903260
## 450 1793 0.9209486
## 451 1798 0.9209486
## 452 1804 0.1903260
## 453 1806 0.1903260
## 454 1809 0.1903260
## 455 1812 0.1903260
## 456 1813 0.9209486
## 457 1818 0.1903260
## 458 1827 0.1903260
## 459 1828 0.1903260
## 460 1831 0.1903260
## 461 1834 0.1903260
## 462 1835 0.1903260
## 463 1848 0.1903260
## 464 1858 0.1903260
## 465 1863 0.2195122
## 466 1865 0.9209486
## 467 1873 0.2195122
## 468 1882 0.9209486
## 469 1886 0.1903260
## 470 1887 0.1903260
## 471 1893 0.1903260
## 472 1896 0.9209486
## 473 1899 0.1903260
## 474 1901 0.1903260
## 475 1909 0.1903260
## 476 1912 0.2195122
## 477 1918 0.9209486
## 478 1920 0.1903260
## 479 1921 0.1903260
## 480 1922 0.2195122
## 481 1930 0.1903260
## 482 1937 0.2195122
## 483 1938 0.9209486
## 484 1940 0.9209486
## 485 1942 0.1903260
## 486 1949 0.8658537
## 487 1951 0.1903260
## 488 1953 0.8658537
## 489 1957 0.1903260
## 490 1958 0.1903260
## 491 1959 0.1903260
## 492 1975 0.9209486
## 493 1976 0.1903260
## 494 1980 0.1903260
## 495 1983 0.9209486
## 496 1986 0.2195122
#03 267p
train<-read.csv("insurance_train_10.csv")
test<-read.csv("insurance_test_10.csv")
train %>% glimpse
## Rows: 6,969
## Columns: 9
## $ Gender <chr> "Male", "Female", "Male", "Male", "Male", "Female", "F…
## $ Ever_Married <chr> "No", "Yes", "Yes", "Yes", "No", "No", "Yes", "Yes", "…
## $ Age <int> 22, 67, 67, 56, 32, 33, 61, 55, 26, 19, 58, 41, 32, 31…
## $ Graduated <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession <chr> "Healthcare", "Engineer", "Lawyer", "Artist", "Healthc…
## $ Work_Experience <int> 1, 1, 0, 0, 1, 1, 0, 1, 1, 4, 0, 1, 9, 1, 1, 0, 12, 3,…
## $ Spending_Score <chr> "Low", "Low", "High", "Average", "Low", "Low", "Low", …
## $ Family_Size <int> 4, 1, 2, 2, 3, 3, 3, 4, 3, 4, 1, 2, 5, 6, 4, 1, 1, 4, …
## $ Segmentation <int> 4, 2, 2, 3, 3, 4, 4, 3, 1, 4, 2, 3, 4, 2, 2, 3, 1, 4, …
colSums(is.na(train))
## Gender Ever_Married Age Graduated Profession
## 0 0 0 0 0
## Work_Experience Spending_Score Family_Size Segmentation
## 0 0 0 0
train$Segmentation<-as.factor(train$Segmentation)
library(caret)
ctrl<-trainControl(method="cv",number=10)
train(Segmentation~.,data=train,
method='knn',trControl=ctrl,
preProcess=c("center","scale"))->knn_fit
knn_fit
## k-Nearest Neighbors
##
## 6969 samples
## 8 predictor
## 4 classes: '1', '2', '3', '4'
##
## Pre-processing: centered (19), scaled (19)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 6273, 6272, 6272, 6271, 6272, 6274, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.4811274 0.3070117
## 7 0.4936085 0.3234522
## 9 0.4898745 0.3182787
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 7.
confusionMatrix(knn_fit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 1 2 3 4
## 1 9.8 5.5 2.9 5.1
## 2 5.6 7.6 5.7 2.1
## 3 3.7 7.6 13.9 1.5
## 4 5.2 2.7 3.1 18.0
##
## Accuracy (average) : 0.4936
predict(knn_fit,test)->pred_fit
head(pred_fit)
## [1] 2 3 1 3 3 1
## Levels: 1 2 3 4
NROW(pred_fit)
## [1] 2267
test %>% glimpse
## Rows: 2,267
## Columns: 9
## $ X <int> 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ Gender <chr> "Female", "Male", "Female", "Male", "Male", "Male", "F…
## $ Ever_Married <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"…
## $ Age <int> 36, 37, 69, 59, 47, 61, 47, 50, 19, 22, 22, 50, 27, 18…
## $ Graduated <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Yes", …
## $ Profession <chr> "Engineer", "Healthcare", "", "Executive", "Doctor", "…
## $ Work_Experience <int> 0, 8, 0, 11, 0, 5, 1, 2, 0, 0, 0, 1, 8, 0, 0, 1, 1, 8,…
## $ Spending_Score <chr> "Low", "Average", "Low", "High", "High", "Low", "Avera…
## $ Family_Size <int> 1, 4, 1, 2, 5, 3, 3, 4, 4, 3, 6, 5, 3, 3, 1, 3, 2, 1, …
bind_cols(test,pred_fit)->df
## New names:
## • `` -> `...10`
names(df)[9]<-"Segmentaton_pred"
df %>% select(9)->df1
write.csv(df1,"2022.csv",row.names = FALSE)
set.seed(12345)
IDX<-createDataPartition(train$Segmentation,p=0.7,list=FALSE)
train_t<-train[IDX,]
test_v<-train[-IDX,]
train_t$Segmentation<-as.factor(train_t$Segmentation)
test_v$Segmentation<-as.factor(test_v$Segmentation)
ctrl<-trainControl(method="cv",number=10)
train(Segmentation~.,data=train_t,
method='knn',trControl=ctrl,
preProcess=c("center","scale"))->knn_fit1
predict(knn_fit1,newdata=test_v)->test_pred
confusionMatrix(test_pred,test_v$Segmentation,mode="prec_recall")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3 4
## 1 207 114 62 115
## 2 120 152 123 53
## 3 71 163 284 33
## 4 109 60 64 358
##
## Overall Statistics
##
## Accuracy : 0.4794
## 95% CI : (0.4578, 0.5011)
## No Information Rate : 0.2677
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3047
##
## Mcnemar's Test P-Value : 0.009816
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3 Class: 4
## Precision 0.41566 0.3393 0.5154 0.6058
## Recall 0.40828 0.3108 0.5328 0.6404
## F1 0.41194 0.3244 0.5240 0.6226
## Prevalence 0.24282 0.2342 0.2553 0.2677
## Detection Rate 0.09914 0.0728 0.1360 0.1715
## Detection Prevalence 0.23851 0.2146 0.2639 0.2830
## Balanced Accuracy 0.61211 0.5629 0.6806 0.7440
#02 타이타닉 데이터 277p
rm(list=ls())
ls()
## character(0)
library(dplyr)
library(recipes)
library(caret)
read.delim("titanic3.txt",header=TRUE,sep=",")->full
full %>% glimpse
## Rows: 1,309
## Columns: 14
## $ pclass <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ survived <int> 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, …
## $ name <chr> "Allen, Miss. Elisabeth Walton", "Allison, Master. Hudson Tr…
## $ sex <chr> "female", "male", "female", "male", "female", "male", "femal…
## $ age <dbl> 29.00, 0.92, 2.00, 30.00, 25.00, 48.00, 63.00, 39.00, 53.00,…
## $ sibsp <int> 0, 1, 1, 1, 1, 0, 1, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ parch <int> 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, …
## $ ticket <chr> "24160", "113781", "113781", "113781", "113781", "19952", "1…
## $ fare <dbl> 211.3375, 151.5500, 151.5500, 151.5500, 151.5500, 26.5500, 7…
## $ cabin <chr> "B5", "C22 C26", "C22 C26", "C22 C26", "C22 C26", "E12", "D7…
## $ embarked <chr> "S", "S", "S", "S", "S", "S", "S", "S", "S", "C", "C", "C", …
## $ boat <chr> "2", "11", "", "", "", "3", "10", "", "D", "", "", "4", "9",…
## $ body <int> NA, NA, NA, 135, NA, NA, NA, NA, NA, 22, 124, NA, NA, NA, NA…
## $ home.dest <chr> "St Louis, MO", "Montreal, PQ / Chesterville, ON", "Montreal…
set.seed(1357)
train_list<-createDataPartition(full$survived,p=0.7,list=FALSE)
full_train<-full[train_list,]
full_test<-full[-train_list,]
NROW(full_train)
## [1] 917
NROW(full_test)
## [1] 392
train<-full_train
test<-full_test
train %>% mutate(index='train')->train
test %>% mutate(index='test')->test
bind_rows(train,test)->full
full %>% select(-boat,-body,-home.dest)->full
full$survived<-ifelse(full$survived==0,"생존","사망")
full$survived<-as.factor(full$survived)
full$pclass<-as.factor(full$pclass)
full$sex<-as.factor(full$sex)
full$embarked<-as.factor(full$embarked)
colSums(is.na(full))
## pclass survived name sex age sibsp parch ticket
## 0 0 0 0 263 0 0 0
## fare cabin embarked index
## 1 0 0 0
table(full$embarked)
##
## C Q S
## 2 270 123 914
levels(full$embarked)[1]<-NA
table(full$embarked,useNA="always")
##
## C Q S <NA>
## 270 123 914 2
full %>% filter(!is.na(age)&!is.na(fare)&!is.na(embarked))->full
colSums(is.na(full))
## pclass survived name sex age sibsp parch ticket
## 0 0 0 0 0 0 0 0
## fare cabin embarked index
## 0 0 0 0
recipe(survived~.,data=full) %>% step_YeoJohnson(age,sibsp,parch,fare) %>%
step_center(age,sibsp,parch,fare) %>%
step_scale(age,sibsp,parch,fare) %>%
prep() %>% juice()->data
data %>% filter(index=="train") %>% select(-index,-name,-ticket,-cabin)->train
data %>% filter(index=='test') %>% select(-index,-name,-ticket,-cabin)->test
ctrl<-trainControl(method="cv",summaryFunction = twoClassSummary,
classProbs = TRUE)
train(survived~.,data=train,
method="rpart",metric='ROC',
trControl=ctrl)->rffit
rffit
## CART
##
## 731 samples
## 7 predictor
## 2 classes: '사망', '생존'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 659, 659, 657, 658, 658, 657, ...
## Resampling results across tuning parameters:
##
## cp ROC Sens Spec
## 0.02542373 0.7882034 0.5548276 0.9239429
## 0.03728814 0.7730226 0.5442529 0.9191860
## 0.44406780 0.6106002 0.3158621 0.9053383
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02542373.
confusionMatrix(rffit)
## Cross-Validated (10 fold) Confusion Matrix
##
## (entries are percentual average cell counts across resamples)
##
## Reference
## Prediction 사망 생존
## 사망 22.4 4.5
## 생존 17.9 55.1
##
## Accuracy (average) : 0.7756
predict(rffit,test,type="prob")->rffit1
predict(rffit,test,type="raw")->rffit2
confusionMatrix(rffit2,test$survived)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 사망 생존
## 사망 66 6
## 생존 64 176
##
## Accuracy : 0.7756
## 95% CI : (0.7252, 0.8207)
## No Information Rate : 0.5833
## P-Value [Acc > NIR] : 6.712e-13
##
## Kappa : 0.507
##
## Mcnemar's Test P-Value : 9.572e-12
##
## Sensitivity : 0.5077
## Specificity : 0.9670
## Pos Pred Value : 0.9167
## Neg Pred Value : 0.7333
## Prevalence : 0.4167
## Detection Rate : 0.2115
## Detection Prevalence : 0.2308
## Balanced Accuracy : 0.7374
##
## 'Positive' Class : 사망
##
library(pROC)
rffit2_num<-as.numeric(rffit2)
rffit2_num
## [1] 1 2 2 1 1 2 1 2 1 2 1 2 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 2 1 2 2 1 2 2 2 1 2
## [38] 1 1 2 1 1 1 1 1 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 1 2
## [75] 2 1 2 2 2 1 2 1 2 2 1 2 1 1 2 2 1 1 1 2 2 1 1 1 1 2 2 2 2 1 2 1 2 2 1 2 2
## [112] 2 1 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 2 2 1 2 1 2 1 2 2 2
## [149] 1 1 2 2 2 1 1 2 1 1 2 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [297] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
result<-roc(test$survived,rffit2_num)
## Setting levels: control = 사망, case = 생존
## Setting direction: controls < cases
result$auc
## Area under the curve: 0.7374
names(rffit1)[2]<-"survived"
rffit1 %>% select(survived)->df
head(df)
## survived
## 1 0.0617284
## 2 0.7905983
## 3 0.7905983
## 4 0.0617284
## 5 0.0617284
## 6 0.7905983
write.csv(df,"00001.csv",row.names=F)
read.csv("00001.csv")
## survived
## 1 0.0617284
## 2 0.7905983
## 3 0.7905983
## 4 0.0617284
## 5 0.0617284
## 6 0.7905983
## 7 0.0617284
## 8 0.7905983
## 9 0.0617284
## 10 0.7905983
## 11 0.0617284
## 12 0.7905983
## 13 0.0617284
## 14 0.0617284
## 15 0.0617284
## 16 0.7905983
## 17 0.0617284
## 18 0.0617284
## 19 0.7905983
## 20 0.7905983
## 21 0.0617284
## 22 0.0617284
## 23 0.0617284
## 24 0.0617284
## 25 0.0617284
## 26 0.0617284
## 27 0.0617284
## 28 0.7905983
## 29 0.0617284
## 30 0.7905983
## 31 0.7905983
## 32 0.0617284
## 33 0.7905983
## 34 0.7905983
## 35 0.7905983
## 36 0.0617284
## 37 0.7905983
## 38 0.0617284
## 39 0.0617284
## 40 0.7905983
## 41 0.0617284
## 42 0.0617284
## 43 0.0617284
## 44 0.0617284
## 45 0.0617284
## 46 0.7905983
## 47 0.7905983
## 48 0.7905983
## 49 0.0617284
## 50 0.0617284
## 51 0.7905983
## 52 0.7905983
## 53 0.7905983
## 54 0.7905983
## 55 0.0617284
## 56 0.7905983
## 57 0.7905983
## 58 0.7905983
## 59 0.7905983
## 60 0.7905983
## 61 0.0617284
## 62 0.7905983
## 63 0.0617284
## 64 0.7905983
## 65 0.7905983
## 66 0.0617284
## 67 0.7905983
## 68 0.0617284
## 69 0.7905983
## 70 0.7905983
## 71 0.7905983
## 72 0.7905983
## 73 0.0617284
## 74 0.7905983
## 75 0.7905983
## 76 0.0617284
## 77 0.7905983
## 78 0.7905983
## 79 0.7905983
## 80 0.0617284
## 81 0.7905983
## 82 0.0617284
## 83 0.7905983
## 84 0.7905983
## 85 0.0617284
## 86 0.7905983
## 87 0.0617284
## 88 0.0617284
## 89 0.7905983
## 90 0.7905983
## 91 0.0617284
## 92 0.0617284
## 93 0.0617284
## 94 0.7905983
## 95 0.7905983
## 96 0.0617284
## 97 0.0617284
## 98 0.0617284
## 99 0.0617284
## 100 0.7905983
## 101 0.7905983
## 102 0.7905983
## 103 0.7905983
## 104 0.0617284
## 105 0.7905983
## 106 0.0617284
## 107 0.7905983
## 108 0.7905983
## 109 0.0617284
## 110 0.7905983
## 111 0.7905983
## 112 0.7905983
## 113 0.0617284
## 114 0.7905983
## 115 0.7905983
## 116 0.7905983
## 117 0.0617284
## 118 0.7905983
## 119 0.7905983
## 120 0.7905983
## 121 0.7905983
## 122 0.0617284
## 123 0.0617284
## 124 0.7905983
## 125 0.7905983
## 126 0.7905983
## 127 0.7905983
## 128 0.7905983
## 129 0.7905983
## 130 0.0617284
## 131 0.7905983
## 132 0.0617284
## 133 0.7905983
## 134 0.0617284
## 135 0.7905983
## 136 0.7905983
## 137 0.7905983
## 138 0.7905983
## 139 0.7905983
## 140 0.7905983
## 141 0.0617284
## 142 0.7905983
## 143 0.0617284
## 144 0.7905983
## 145 0.0617284
## 146 0.7905983
## 147 0.7905983
## 148 0.7905983
## 149 0.0617284
## 150 0.0617284
## 151 0.7905983
## 152 0.7905983
## 153 0.7905983
## 154 0.0617284
## 155 0.0617284
## 156 0.7905983
## 157 0.0617284
## 158 0.0617284
## 159 0.7905983
## 160 0.0617284
## 161 0.0617284
## 162 0.0617284
## 163 0.7905983
## 164 0.7905983
## 165 0.7905983
## 166 0.0617284
## 167 0.7905983
## 168 0.7905983
## 169 0.7905983
## 170 0.7905983
## 171 0.7905983
## 172 0.7905983
## 173 0.7905983
## 174 0.7905983
## 175 0.5544554
## 176 0.5544554
## 177 0.7905983
## 178 0.7905983
## 179 0.7905983
## 180 0.5544554
## 181 0.5544554
## 182 0.7905983
## 183 0.5544554
## 184 0.5544554
## 185 0.5544554
## 186 0.5544554
## 187 0.7905983
## 188 0.7905983
## 189 0.7905983
## 190 0.5544554
## 191 0.5544554
## 192 0.7905983
## 193 0.7905983
## 194 0.5544554
## 195 0.7905983
## 196 0.5544554
## 197 0.7905983
## 198 0.7905983
## 199 0.7905983
## 200 0.7905983
## 201 0.7905983
## 202 0.7905983
## 203 0.7905983
## 204 0.7905983
## 205 0.7905983
## 206 0.7905983
## 207 0.7905983
## 208 0.5544554
## 209 0.7905983
## 210 0.7905983
## 211 0.7905983
## 212 0.7905983
## 213 0.7905983
## 214 0.7905983
## 215 0.7905983
## 216 0.5544554
## 217 0.7905983
## 218 0.7905983
## 219 0.5544554
## 220 0.7905983
## 221 0.5544554
## 222 0.7905983
## 223 0.7905983
## 224 0.7905983
## 225 0.7905983
## 226 0.5544554
## 227 0.5544554
## 228 0.5544554
## 229 0.7905983
## 230 0.5544554
## 231 0.7905983
## 232 0.5544554
## 233 0.7905983
## 234 0.7905983
## 235 0.7905983
## 236 0.5544554
## 237 0.5544554
## 238 0.7905983
## 239 0.5544554
## 240 0.5544554
## 241 0.7905983
## 242 0.7905983
## 243 0.7905983
## 244 0.5544554
## 245 0.7905983
## 246 0.7905983
## 247 0.7905983
## 248 0.5544554
## 249 0.7905983
## 250 0.7905983
## 251 0.5544554
## 252 0.5544554
## 253 0.7905983
## 254 0.7905983
## 255 0.7905983
## 256 0.7905983
## 257 0.5544554
## 258 0.5544554
## 259 0.7905983
## 260 0.7905983
## 261 0.7905983
## 262 0.5544554
## 263 0.5544554
## 264 0.7905983
## 265 0.5544554
## 266 0.7905983
## 267 0.7905983
## 268 0.5544554
## 269 0.7905983
## 270 0.5544554
## 271 0.7905983
## 272 0.5544554
## 273 0.7905983
## 274 0.7905983
## 275 0.5544554
## 276 0.7905983
## 277 0.7905983
## 278 0.5544554
## 279 0.7905983
## 280 0.5544554
## 281 0.7905983
## 282 0.7905983
## 283 0.7905983
## 284 0.7905983
## 285 0.5544554
## 286 0.5544554
## 287 0.7905983
## 288 0.7905983
## 289 0.7905983
## 290 0.5544554
## 291 0.7905983
## 292 0.7905983
## 293 0.7905983
## 294 0.5544554
## 295 0.7905983
## 296 0.7905983
## 297 0.5544554
## 298 0.7905983
## 299 0.7905983
## 300 0.7905983
## 301 0.5544554
## 302 0.7905983
## 303 0.7905983
## 304 0.5544554
## 305 0.5544554
## 306 0.7905983
## 307 0.5544554
## 308 0.7905983
## 309 0.7905983
## 310 0.5544554
## 311 0.7905983
## 312 0.5544554
# 02 285p
library(dplyr)
library(recipes)
library(caret)
df<-read.csv("nyc.csv")
df %>% glimpse
## Rows: 165
## Columns: 9
## $ Case <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ Restaurant <chr> "Daniella Ristorante", "Tello's Ristorante", "Biricchino", …
## $ Price <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47,…
## $ Food <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20,…
## $ Decor <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18,…
## $ Service <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21,…
## $ East <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ latitude <dbl> 40.74683, 40.74342, 40.74886, 40.74848, 40.73958, 40.74069,…
## $ longitude <dbl> -73.99676, -73.99954, -73.99552, -74.00331, -73.99591, -73.…
nyc<-df %>% select(3:7)
summary(nyc)
## Price Food Decor Service
## Min. :19.00 Min. :16.00 Min. : 6.00 Min. :14.00
## 1st Qu.:36.00 1st Qu.:19.00 1st Qu.:16.00 1st Qu.:18.00
## Median :43.00 Median :21.00 Median :18.00 Median :20.00
## Mean :42.67 Mean :20.59 Mean :17.68 Mean :19.39
## 3rd Qu.:50.00 3rd Qu.:22.00 3rd Qu.:19.00 3rd Qu.:21.00
## Max. :65.00 Max. :25.00 Max. :25.00 Max. :24.00
## East
## Min. :0.0000
## 1st Qu.:0.0000
## Median :1.0000
## Mean :0.6303
## 3rd Qu.:1.0000
## Max. :1.0000
nyc$East<-as.factor(nyc$East)
set.seed(1357)
nyc %>% glimpse
## Rows: 165
## Columns: 5
## $ Price <int> 43, 32, 34, 41, 54, 52, 34, 34, 39, 44, 45, 47, 52, 35, 47, 37…
## $ Food <int> 22, 20, 21, 20, 24, 22, 22, 20, 22, 21, 19, 21, 21, 19, 20, 21…
## $ Decor <int> 18, 19, 13, 20, 19, 22, 16, 18, 19, 17, 17, 19, 19, 17, 18, 19…
## $ Service <int> 20, 19, 18, 17, 21, 21, 21, 21, 22, 19, 20, 21, 20, 19, 21, 21…
## $ East <fct> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
train_list<-createDataPartition(nyc$Price,p=0.7,list=FALSE)
train<-nyc[train_list,]
test<-nyc[-train_list,]
NROW(train)
## [1] 118
NROW(test)
## [1] 47
train %>% mutate(index='train')->train
test %>% mutate(index='test')->test
bind_rows(train,test)->full
full %>% glimpse
## Rows: 165
## Columns: 6
## $ Price <int> 43, 32, 34, 41, 52, 34, 39, 44, 45, 47, 52, 35, 37, 45, 38, 51…
## $ Food <int> 22, 20, 21, 20, 22, 20, 22, 21, 19, 21, 21, 19, 21, 22, 19, 22…
## $ Decor <int> 18, 19, 13, 20, 22, 18, 19, 17, 17, 19, 19, 17, 19, 18, 17, 20…
## $ Service <int> 20, 19, 18, 17, 21, 21, 22, 19, 20, 21, 20, 19, 21, 23, 18, 22…
## $ East <fct> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ index <chr> "train", "train", "train", "train", "train", "train", "train",…
recipe(Price~.,data=full) %>% step_YeoJohnson(Food,Decor,Service) %>%
step_center(Food,Decor,Service) %>%
step_scale(Food,Decor,Service) %>%
prep() %>% juice()->data
data %>% glimpse
## Rows: 165
## Columns: 6
## $ Food <dbl> 0.7123134, -0.2891237, 0.2132304, -0.2891237, 0.7123134, -0.28…
## $ Decor <dbl> 0.07529626, 0.45931463, -1.65771532, 0.85519632, 1.68166534, 0…
## $ Service <dbl> 0.2626569, -0.2101456, -0.6716854, -1.1216590, 0.7464411, 0.74…
## $ East <fct> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ index <fct> train, train, train, train, train, train, train, train, train,…
## $ Price <int> 43, 32, 34, 41, 52, 34, 39, 44, 45, 47, 52, 35, 37, 45, 38, 51…
data %>% filter(index=="train") %>% select(-index)->train
data %>% filter(index=="test") %>% select(-index)->test
tc<-trainControl(method="cv",number=10)
model_2<-train(Price~.,train,method="lm",trControl=tc)
model_2
## Linear Regression
##
## 118 samples
## 4 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 106, 107, 106, 105, 105, 108, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 5.810832 0.6599844 4.538747
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
predict(model_2,newdata=test)->lmfit
write.csv(lmfit,"0002.csv",row.names=F)
read.csv("0002.csv")
## x
## 1 48.34718
## 2 39.77896
## 3 43.15862
## 4 54.90123
## 5 45.16370
## 6 42.79491
## 7 52.14283
## 8 50.10081
## 9 47.99391
## 10 38.90202
## 11 47.74301
## 12 47.99391
## 13 42.39190
## 14 48.45489
## 15 43.03455
## 16 47.18521
## 17 35.86358
## 18 43.71990
## 19 35.59448
## 20 52.01875
## 21 41.93043
## 22 44.24999
## 23 39.02040
## 24 63.33453
## 25 32.46550
## 26 34.01732
## 27 41.81274
## 28 54.77440
## 29 35.97279
## 30 36.20055
## 31 51.29371
## 32 24.45540
## 33 22.54081
## 34 37.43658
## 35 33.55841
## 36 33.44301
## 37 37.31821
## 38 35.40467
## 39 53.66757
## 40 52.09694
## 41 42.92683
## 42 40.24043
## 43 38.32688
## 44 36.35937
## 45 35.40467
## 46 33.93367
## 47 41.69250