setwd("~/Desktop/Titanic Dataset")
train <- read_csv("~/Desktop/Titanic Dataset/train.csv")
test <- read_csv("~/Desktop/Titanic Dataset/test.csv")
names(train)
## [1] "PassengerId" "Survived" "Pclass" "Name" "Sex"
## [6] "Age" "SibSp" "Parch" "Ticket" "Fare"
## [11] "Cabin" "Embarked"
sum(is.na(train$PassengerId)) # PassengerId should be discarded
## [1] 0
sum(is.na(train$Survived))
## [1] 0
sum(is.na(train$Pclass))
## [1] 0
sum(is.na(train$Name)) # Name should be discarded
## [1] 0
sum(is.na(train$Sex))
## [1] 0
sum(is.na(train$Age)) # There are 177 missing ages
## [1] 177
sum(is.na(train$SibSp))
## [1] 0
sum(is.na(train$Parch))
## [1] 0
sum(is.na(train$Ticket))
## [1] 0
sum(is.na(train$Fare))
## [1] 0
sum(is.na(train$Cabin)) # There are 687 missing observations
## [1] 687
sum(is.na(train$Embarked)) # There are 2 missing observations
## [1] 2
train<-data.frame(train)
train<-data.frame(train[,c("Survived", "Pclass", "Sex", "Age", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked")])
train$Survived<-factor(train$Survived)
train$Pclass<-factor(train$Pclass)
train$Sex<-factor(train$Sex)
train$SibSp<-factor(train$SibSp)
train$Parch<-factor(train$Parch)
train$Embarked<-factor(train$Embarked)
summary(train)
## Survived Pclass Sex Age SibSp Parch
## 0:549 1:216 female:314 Min. : 0.42 0:608 0:678
## 1:342 2:184 male :577 1st Qu.:20.12 1:209 1:118
## 3:491 Median :28.00 2: 28 2: 80
## Mean :29.70 3: 16 3: 5
## 3rd Qu.:38.00 4: 18 4: 4
## Max. :80.00 5: 5 5: 5
## NA's :177 8: 7 6: 1
## Ticket Fare Cabin Embarked
## Length:891 Min. : 0.00 Length:891 C :168
## Class :character 1st Qu.: 7.91 Class :character Q : 77
## Mode :character Median : 14.45 Mode :character S :644
## Mean : 32.20 NA's: 2
## 3rd Qu.: 31.00
## Max. :512.33
##
train<-data.frame(train[,c("Survived", "Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked")]) # Some variables have been removed to get a better picture
ggpairs(train, aes(colour=Survived))
# Plot each variable independently
ggpairs(train[, c("Survived", "Sex")], aes(colour=Survived))
ggpairs(train[, c("Survived", "Pclass")], aes(colour=Survived))
ggpairs(train[, c("Survived", "Age")], aes(colour=Survived))
ggpairs(train[, c("Survived", "SibSp")], aes(colour=Survived))
ggpairs(train[, c("Survived", "Parch")], aes(colour=Survived))
ggpairs(train[, c("Survived", "Fare")], aes(colour=Survived))
ggpairs(train[, c("Survived", "Embarked")], aes(colour=Survived))
Data1<-data.frame(train[, c("Survived", "Sex", "Pclass")])
# One hot encode the Sex
cat.variable<-subset(Data1, select = -c(Survived, Pclass))
ohe.Data1<-data.frame(model.matrix(~Sex-1, cat.variable), Survived=Data1$Survived, Pclass=Data1$Pclass)
ohe.Data1
## Sexfemale Sexmale Survived Pclass
## 1 0 1 0 3
## 2 1 0 1 1
## 3 1 0 1 3
## 4 1 0 1 1
## 5 0 1 0 3
## 6 0 1 0 3
## 7 0 1 0 1
## 8 0 1 0 3
## 9 1 0 1 3
## 10 1 0 1 2
## 11 1 0 1 3
## 12 1 0 1 1
## 13 0 1 0 3
## 14 0 1 0 3
## 15 1 0 0 3
## 16 1 0 1 2
## 17 0 1 0 3
## 18 0 1 1 2
## 19 1 0 0 3
## 20 1 0 1 3
## 21 0 1 0 2
## 22 0 1 1 2
## 23 1 0 1 3
## 24 0 1 1 1
## 25 1 0 0 3
## 26 1 0 1 3
## 27 0 1 0 3
## 28 0 1 0 1
## 29 1 0 1 3
## 30 0 1 0 3
## 31 0 1 0 1
## 32 1 0 1 1
## 33 1 0 1 3
## 34 0 1 0 2
## 35 0 1 0 1
## 36 0 1 0 1
## 37 0 1 1 3
## 38 0 1 0 3
## 39 1 0 0 3
## 40 1 0 1 3
## 41 1 0 0 3
## 42 1 0 0 2
## 43 0 1 0 3
## 44 1 0 1 2
## 45 1 0 1 3
## 46 0 1 0 3
## 47 0 1 0 3
## 48 1 0 1 3
## 49 0 1 0 3
## 50 1 0 0 3
## 51 0 1 0 3
## 52 0 1 0 3
## 53 1 0 1 1
## 54 1 0 1 2
## 55 0 1 0 1
## 56 0 1 1 1
## 57 1 0 1 2
## 58 0 1 0 3
## 59 1 0 1 2
## 60 0 1 0 3
## 61 0 1 0 3
## 62 1 0 1 1
## 63 0 1 0 1
## 64 0 1 0 3
## 65 0 1 0 1
## 66 0 1 1 3
## 67 1 0 1 2
## 68 0 1 0 3
## 69 1 0 1 3
## 70 0 1 0 3
## 71 0 1 0 2
## 72 1 0 0 3
## 73 0 1 0 2
## 74 0 1 0 3
## 75 0 1 1 3
## 76 0 1 0 3
## 77 0 1 0 3
## 78 0 1 0 3
## 79 0 1 1 2
## 80 1 0 1 3
## 81 0 1 0 3
## 82 0 1 1 3
## 83 1 0 1 3
## 84 0 1 0 1
## 85 1 0 1 2
## 86 1 0 1 3
## 87 0 1 0 3
## 88 0 1 0 3
## 89 1 0 1 1
## 90 0 1 0 3
## 91 0 1 0 3
## 92 0 1 0 3
## 93 0 1 0 1
## 94 0 1 0 3
## 95 0 1 0 3
## 96 0 1 0 3
## 97 0 1 0 1
## 98 0 1 1 1
## 99 1 0 1 2
## 100 0 1 0 2
## 101 1 0 0 3
## 102 0 1 0 3
## 103 0 1 0 1
## 104 0 1 0 3
## 105 0 1 0 3
## 106 0 1 0 3
## 107 1 0 1 3
## 108 0 1 1 3
## 109 0 1 0 3
## 110 1 0 1 3
## 111 0 1 0 1
## 112 1 0 0 3
## 113 0 1 0 3
## 114 1 0 0 3
## 115 1 0 0 3
## 116 0 1 0 3
## 117 0 1 0 3
## 118 0 1 0 2
## 119 0 1 0 1
## 120 1 0 0 3
## 121 0 1 0 2
## 122 0 1 0 3
## 123 0 1 0 2
## 124 1 0 1 2
## 125 0 1 0 1
## 126 0 1 1 3
## 127 0 1 0 3
## 128 0 1 1 3
## 129 1 0 1 3
## 130 0 1 0 3
## 131 0 1 0 3
## 132 0 1 0 3
## 133 1 0 0 3
## 134 1 0 1 2
## 135 0 1 0 2
## 136 0 1 0 2
## 137 1 0 1 1
## 138 0 1 0 1
## 139 0 1 0 3
## 140 0 1 0 1
## 141 1 0 0 3
## 142 1 0 1 3
## 143 1 0 1 3
## 144 0 1 0 3
## 145 0 1 0 2
## 146 0 1 0 2
## 147 0 1 1 3
## 148 1 0 0 3
## 149 0 1 0 2
## 150 0 1 0 2
## 151 0 1 0 2
## 152 1 0 1 1
## 153 0 1 0 3
## 154 0 1 0 3
## 155 0 1 0 3
## 156 0 1 0 1
## 157 1 0 1 3
## 158 0 1 0 3
## 159 0 1 0 3
## 160 0 1 0 3
## 161 0 1 0 3
## 162 1 0 1 2
## 163 0 1 0 3
## 164 0 1 0 3
## 165 0 1 0 3
## 166 0 1 1 3
## 167 1 0 1 1
## 168 1 0 0 3
## 169 0 1 0 1
## 170 0 1 0 3
## 171 0 1 0 1
## 172 0 1 0 3
## 173 1 0 1 3
## 174 0 1 0 3
## 175 0 1 0 1
## 176 0 1 0 3
## 177 0 1 0 3
## 178 1 0 0 1
## 179 0 1 0 2
## 180 0 1 0 3
## 181 1 0 0 3
## 182 0 1 0 2
## 183 0 1 0 3
## 184 0 1 1 2
## 185 1 0 1 3
## 186 0 1 0 1
## 187 1 0 1 3
## 188 0 1 1 1
## 189 0 1 0 3
## 190 0 1 0 3
## 191 1 0 1 2
## 192 0 1 0 2
## 193 1 0 1 3
## 194 0 1 1 2
## 195 1 0 1 1
## 196 1 0 1 1
## 197 0 1 0 3
## 198 0 1 0 3
## 199 1 0 1 3
## 200 1 0 0 2
## 201 0 1 0 3
## 202 0 1 0 3
## 203 0 1 0 3
## 204 0 1 0 3
## 205 0 1 1 3
## 206 1 0 0 3
## 207 0 1 0 3
## 208 0 1 1 3
## 209 1 0 1 3
## 210 0 1 1 1
## 211 0 1 0 3
## 212 1 0 1 2
## 213 0 1 0 3
## 214 0 1 0 2
## 215 0 1 0 3
## 216 1 0 1 1
## 217 1 0 1 3
## 218 0 1 0 2
## 219 1 0 1 1
## 220 0 1 0 2
## 221 0 1 1 3
## 222 0 1 0 2
## 223 0 1 0 3
## 224 0 1 0 3
## 225 0 1 1 1
## 226 0 1 0 3
## 227 0 1 1 2
## 228 0 1 0 3
## 229 0 1 0 2
## 230 1 0 0 3
## 231 1 0 1 1
## 232 0 1 0 3
## 233 0 1 0 2
## 234 1 0 1 3
## 235 0 1 0 2
## 236 1 0 0 3
## 237 0 1 0 2
## 238 1 0 1 2
## 239 0 1 0 2
## 240 0 1 0 2
## 241 1 0 0 3
## 242 1 0 1 3
## 243 0 1 0 2
## 244 0 1 0 3
## 245 0 1 0 3
## 246 0 1 0 1
## 247 1 0 0 3
## 248 1 0 1 2
## 249 0 1 1 1
## 250 0 1 0 2
## 251 0 1 0 3
## 252 1 0 0 3
## 253 0 1 0 1
## 254 0 1 0 3
## 255 1 0 0 3
## 256 1 0 1 3
## 257 1 0 1 1
## 258 1 0 1 1
## 259 1 0 1 1
## 260 1 0 1 2
## 261 0 1 0 3
## 262 0 1 1 3
## 263 0 1 0 1
## 264 0 1 0 1
## 265 1 0 0 3
## 266 0 1 0 2
## 267 0 1 0 3
## 268 0 1 1 3
## 269 1 0 1 1
## 270 1 0 1 1
## 271 0 1 0 1
## 272 0 1 1 3
## 273 1 0 1 2
## 274 0 1 0 1
## 275 1 0 1 3
## 276 1 0 1 1
## 277 1 0 0 3
## 278 0 1 0 2
## 279 0 1 0 3
## 280 1 0 1 3
## 281 0 1 0 3
## 282 0 1 0 3
## 283 0 1 0 3
## 284 0 1 1 3
## 285 0 1 0 1
## 286 0 1 0 3
## 287 0 1 1 3
## 288 0 1 0 3
## 289 0 1 1 2
## 290 1 0 1 3
## 291 1 0 1 1
## 292 1 0 1 1
## 293 0 1 0 2
## 294 1 0 0 3
## 295 0 1 0 3
## 296 0 1 0 1
## 297 0 1 0 3
## 298 1 0 0 1
## 299 0 1 1 1
## 300 1 0 1 1
## 301 1 0 1 3
## 302 0 1 1 3
## 303 0 1 0 3
## 304 1 0 1 2
## 305 0 1 0 3
## 306 0 1 1 1
## 307 1 0 1 1
## 308 1 0 1 1
## 309 0 1 0 2
## 310 1 0 1 1
## 311 1 0 1 1
## 312 1 0 1 1
## 313 1 0 0 2
## 314 0 1 0 3
## 315 0 1 0 2
## 316 1 0 1 3
## 317 1 0 1 2
## 318 0 1 0 2
## 319 1 0 1 1
## 320 1 0 1 1
## 321 0 1 0 3
## 322 0 1 0 3
## 323 1 0 1 2
## 324 1 0 1 2
## 325 0 1 0 3
## 326 1 0 1 1
## 327 0 1 0 3
## 328 1 0 1 2
## 329 1 0 1 3
## 330 1 0 1 1
## 331 1 0 1 3
## 332 0 1 0 1
## 333 0 1 0 1
## 334 0 1 0 3
## 335 1 0 1 1
## 336 0 1 0 3
## 337 0 1 0 1
## 338 1 0 1 1
## 339 0 1 1 3
## 340 0 1 0 1
## 341 0 1 1 2
## 342 1 0 1 1
## 343 0 1 0 2
## 344 0 1 0 2
## 345 0 1 0 2
## 346 1 0 1 2
## 347 1 0 1 2
## 348 1 0 1 3
## 349 0 1 1 3
## 350 0 1 0 3
## 351 0 1 0 3
## 352 0 1 0 1
## 353 0 1 0 3
## 354 0 1 0 3
## 355 0 1 0 3
## 356 0 1 0 3
## 357 1 0 1 1
## 358 1 0 0 2
## 359 1 0 1 3
## 360 1 0 1 3
## 361 0 1 0 3
## 362 0 1 0 2
## 363 1 0 0 3
## 364 0 1 0 3
## 365 0 1 0 3
## 366 0 1 0 3
## 367 1 0 1 1
## 368 1 0 1 3
## 369 1 0 1 3
## 370 1 0 1 1
## 371 0 1 1 1
## 372 0 1 0 3
## 373 0 1 0 3
## 374 0 1 0 1
## 375 1 0 0 3
## 376 1 0 1 1
## 377 1 0 1 3
## 378 0 1 0 1
## 379 0 1 0 3
## 380 0 1 0 3
## 381 1 0 1 1
## 382 1 0 1 3
## 383 0 1 0 3
## 384 1 0 1 1
## 385 0 1 0 3
## 386 0 1 0 2
## 387 0 1 0 3
## 388 1 0 1 2
## 389 0 1 0 3
## 390 1 0 1 2
## 391 0 1 1 1
## 392 0 1 1 3
## 393 0 1 0 3
## 394 1 0 1 1
## 395 1 0 1 3
## 396 0 1 0 3
## 397 1 0 0 3
## 398 0 1 0 2
## 399 0 1 0 2
## 400 1 0 1 2
## 401 0 1 1 3
## 402 0 1 0 3
## 403 1 0 0 3
## 404 0 1 0 3
## 405 1 0 0 3
## 406 0 1 0 2
## 407 0 1 0 3
## 408 0 1 1 2
## 409 0 1 0 3
## 410 1 0 0 3
## 411 0 1 0 3
## 412 0 1 0 3
## 413 1 0 1 1
## 414 0 1 0 2
## 415 0 1 1 3
## 416 1 0 0 3
## 417 1 0 1 2
## 418 1 0 1 2
## 419 0 1 0 2
## 420 1 0 0 3
## 421 0 1 0 3
## 422 0 1 0 3
## 423 0 1 0 3
## 424 1 0 0 3
## 425 0 1 0 3
## 426 0 1 0 3
## 427 1 0 1 2
## 428 1 0 1 2
## 429 0 1 0 3
## 430 0 1 1 3
## 431 0 1 1 1
## 432 1 0 1 3
## 433 1 0 1 2
## 434 0 1 0 3
## 435 0 1 0 1
## 436 1 0 1 1
## 437 1 0 0 3
## 438 1 0 1 2
## 439 0 1 0 1
## 440 0 1 0 2
## 441 1 0 1 2
## 442 0 1 0 3
## 443 0 1 0 3
## 444 1 0 1 2
## 445 0 1 1 3
## 446 0 1 1 1
## 447 1 0 1 2
## 448 0 1 1 1
## 449 1 0 1 3
## 450 0 1 1 1
## 451 0 1 0 2
## 452 0 1 0 3
## 453 0 1 0 1
## 454 0 1 1 1
## 455 0 1 0 3
## 456 0 1 1 3
## 457 0 1 0 1
## 458 1 0 1 1
## 459 1 0 1 2
## 460 0 1 0 3
## 461 0 1 1 1
## 462 0 1 0 3
## 463 0 1 0 1
## 464 0 1 0 2
## 465 0 1 0 3
## 466 0 1 0 3
## 467 0 1 0 2
## 468 0 1 0 1
## 469 0 1 0 3
## 470 1 0 1 3
## 471 0 1 0 3
## 472 0 1 0 3
## 473 1 0 1 2
## 474 1 0 1 2
## 475 1 0 0 3
## 476 0 1 0 1
## 477 0 1 0 2
## 478 0 1 0 3
## 479 0 1 0 3
## 480 1 0 1 3
## 481 0 1 0 3
## 482 0 1 0 2
## 483 0 1 0 3
## 484 1 0 1 3
## 485 0 1 1 1
## 486 1 0 0 3
## 487 1 0 1 1
## 488 0 1 0 1
## 489 0 1 0 3
## 490 0 1 1 3
## 491 0 1 0 3
## 492 0 1 0 3
## 493 0 1 0 1
## 494 0 1 0 1
## 495 0 1 0 3
## 496 0 1 0 3
## 497 1 0 1 1
## 498 0 1 0 3
## 499 1 0 0 1
## 500 0 1 0 3
## 501 0 1 0 3
## 502 1 0 0 3
## 503 1 0 0 3
## 504 1 0 0 3
## 505 1 0 1 1
## 506 0 1 0 1
## 507 1 0 1 2
## 508 0 1 1 1
## 509 0 1 0 3
## 510 0 1 1 3
## 511 0 1 1 3
## 512 0 1 0 3
## 513 0 1 1 1
## 514 1 0 1 1
## 515 0 1 0 3
## 516 0 1 0 1
## 517 1 0 1 2
## 518 0 1 0 3
## 519 1 0 1 2
## 520 0 1 0 3
## 521 1 0 1 1
## 522 0 1 0 3
## 523 0 1 0 3
## 524 1 0 1 1
## 525 0 1 0 3
## 526 0 1 0 3
## 527 1 0 1 2
## 528 0 1 0 1
## 529 0 1 0 3
## 530 0 1 0 2
## 531 1 0 1 2
## 532 0 1 0 3
## 533 0 1 0 3
## 534 1 0 1 3
## 535 1 0 0 3
## 536 1 0 1 2
## 537 0 1 0 1
## 538 1 0 1 1
## 539 0 1 0 3
## 540 1 0 1 1
## 541 1 0 1 1
## 542 1 0 0 3
## 543 1 0 0 3
## 544 0 1 1 2
## 545 0 1 0 1
## 546 0 1 0 1
## 547 1 0 1 2
## 548 0 1 1 2
## 549 0 1 0 3
## 550 0 1 1 2
## 551 0 1 1 1
## 552 0 1 0 2
## 553 0 1 0 3
## 554 0 1 1 3
## 555 1 0 1 3
## 556 0 1 0 1
## 557 1 0 1 1
## 558 0 1 0 1
## 559 1 0 1 1
## 560 1 0 1 3
## 561 0 1 0 3
## 562 0 1 0 3
## 563 0 1 0 2
## 564 0 1 0 3
## 565 1 0 0 3
## 566 0 1 0 3
## 567 0 1 0 3
## 568 1 0 0 3
## 569 0 1 0 3
## 570 0 1 1 3
## 571 0 1 1 2
## 572 1 0 1 1
## 573 0 1 1 1
## 574 1 0 1 3
## 575 0 1 0 3
## 576 0 1 0 3
## 577 1 0 1 2
## 578 1 0 1 1
## 579 1 0 0 3
## 580 0 1 1 3
## 581 1 0 1 2
## 582 1 0 1 1
## 583 0 1 0 2
## 584 0 1 0 1
## 585 0 1 0 3
## 586 1 0 1 1
## 587 0 1 0 2
## 588 0 1 1 1
## 589 0 1 0 3
## 590 0 1 0 3
## 591 0 1 0 3
## 592 1 0 1 1
## 593 0 1 0 3
## 594 1 0 0 3
## 595 0 1 0 2
## 596 0 1 0 3
## 597 1 0 1 2
## 598 0 1 0 3
## 599 0 1 0 3
## 600 0 1 1 1
## 601 1 0 1 2
## 602 0 1 0 3
## 603 0 1 0 1
## 604 0 1 0 3
## 605 0 1 1 1
## 606 0 1 0 3
## 607 0 1 0 3
## 608 0 1 1 1
## 609 1 0 1 2
## 610 1 0 1 1
## 611 1 0 0 3
## 612 0 1 0 3
## 613 1 0 1 3
## 614 0 1 0 3
## 615 0 1 0 3
## 616 1 0 1 2
## 617 0 1 0 3
## 618 1 0 0 3
## 619 1 0 1 2
## 620 0 1 0 2
## 621 0 1 0 3
## 622 0 1 1 1
## 623 0 1 1 3
## 624 0 1 0 3
## 625 0 1 0 3
## 626 0 1 0 1
## 627 0 1 0 2
## 628 1 0 1 1
## 629 0 1 0 3
## 630 0 1 0 3
## 631 0 1 1 1
## 632 0 1 0 3
## 633 0 1 1 1
## 634 0 1 0 1
## 635 1 0 0 3
## 636 1 0 1 2
## 637 0 1 0 3
## 638 0 1 0 2
## 639 1 0 0 3
## 640 0 1 0 3
## 641 0 1 0 3
## 642 1 0 1 1
## 643 1 0 0 3
## 644 0 1 1 3
## 645 1 0 1 3
## 646 0 1 1 1
## 647 0 1 0 3
## 648 0 1 1 1
## 649 0 1 0 3
## 650 1 0 1 3
## 651 0 1 0 3
## 652 1 0 1 2
## 653 0 1 0 3
## 654 1 0 1 3
## 655 1 0 0 3
## 656 0 1 0 2
## 657 0 1 0 3
## 658 1 0 0 3
## 659 0 1 0 2
## 660 0 1 0 1
## 661 0 1 1 1
## 662 0 1 0 3
## 663 0 1 0 1
## 664 0 1 0 3
## 665 0 1 1 3
## 666 0 1 0 2
## 667 0 1 0 2
## 668 0 1 0 3
## 669 0 1 0 3
## 670 1 0 1 1
## 671 1 0 1 2
## 672 0 1 0 1
## 673 0 1 0 2
## 674 0 1 1 2
## 675 0 1 0 2
## 676 0 1 0 3
## 677 0 1 0 3
## 678 1 0 1 3
## 679 1 0 0 3
## 680 0 1 1 1
## 681 1 0 0 3
## 682 0 1 1 1
## 683 0 1 0 3
## 684 0 1 0 3
## 685 0 1 0 2
## 686 0 1 0 2
## 687 0 1 0 3
## 688 0 1 0 3
## 689 0 1 0 3
## 690 1 0 1 1
## 691 0 1 1 1
## 692 1 0 1 3
## 693 0 1 1 3
## 694 0 1 0 3
## 695 0 1 0 1
## 696 0 1 0 2
## 697 0 1 0 3
## 698 1 0 1 3
## 699 0 1 0 1
## 700 0 1 0 3
## 701 1 0 1 1
## 702 0 1 1 1
## 703 1 0 0 3
## 704 0 1 0 3
## 705 0 1 0 3
## 706 0 1 0 2
## 707 1 0 1 2
## 708 0 1 1 1
## 709 1 0 1 1
## 710 0 1 1 3
## 711 1 0 1 1
## 712 0 1 0 1
## 713 0 1 1 1
## 714 0 1 0 3
## 715 0 1 0 2
## 716 0 1 0 3
## 717 1 0 1 1
## 718 1 0 1 2
## 719 0 1 0 3
## 720 0 1 0 3
## 721 1 0 1 2
## 722 0 1 0 3
## 723 0 1 0 2
## 724 0 1 0 2
## 725 0 1 1 1
## 726 0 1 0 3
## 727 1 0 1 2
## 728 1 0 1 3
## 729 0 1 0 2
## 730 1 0 0 3
## 731 1 0 1 1
## 732 0 1 0 3
## 733 0 1 0 2
## 734 0 1 0 2
## 735 0 1 0 2
## 736 0 1 0 3
## 737 1 0 0 3
## 738 0 1 1 1
## 739 0 1 0 3
## 740 0 1 0 3
## 741 0 1 1 1
## 742 0 1 0 1
## 743 1 0 1 1
## 744 0 1 0 3
## 745 0 1 1 3
## 746 0 1 0 1
## 747 0 1 0 3
## 748 1 0 1 2
## 749 0 1 0 1
## 750 0 1 0 3
## 751 1 0 1 2
## 752 0 1 1 3
## 753 0 1 0 3
## 754 0 1 0 3
## 755 1 0 1 2
## 756 0 1 1 2
## 757 0 1 0 3
## 758 0 1 0 2
## 759 0 1 0 3
## 760 1 0 1 1
## 761 0 1 0 3
## 762 0 1 0 3
## 763 0 1 1 3
## 764 1 0 1 1
## 765 0 1 0 3
## 766 1 0 1 1
## 767 0 1 0 1
## 768 1 0 0 3
## 769 0 1 0 3
## 770 0 1 0 3
## 771 0 1 0 3
## 772 0 1 0 3
## 773 1 0 0 2
## 774 0 1 0 3
## 775 1 0 1 2
## 776 0 1 0 3
## 777 0 1 0 3
## 778 1 0 1 3
## 779 0 1 0 3
## 780 1 0 1 1
## 781 1 0 1 3
## 782 1 0 1 1
## 783 0 1 0 1
## 784 0 1 0 3
## 785 0 1 0 3
## 786 0 1 0 3
## 787 1 0 1 3
## 788 0 1 0 3
## 789 0 1 1 3
## 790 0 1 0 1
## 791 0 1 0 3
## 792 0 1 0 2
## 793 1 0 0 3
## 794 0 1 0 1
## 795 0 1 0 3
## 796 0 1 0 2
## 797 1 0 1 1
## 798 1 0 1 3
## 799 0 1 0 3
## 800 1 0 0 3
## 801 0 1 0 2
## 802 1 0 1 2
## 803 0 1 1 1
## 804 0 1 1 3
## 805 0 1 1 3
## 806 0 1 0 3
## 807 0 1 0 1
## 808 1 0 0 3
## 809 0 1 0 2
## 810 1 0 1 1
## 811 0 1 0 3
## 812 0 1 0 3
## 813 0 1 0 2
## 814 1 0 0 3
## 815 0 1 0 3
## 816 0 1 0 1
## 817 1 0 0 3
## 818 0 1 0 2
## 819 0 1 0 3
## 820 0 1 0 3
## 821 1 0 1 1
## 822 0 1 1 3
## 823 0 1 0 1
## 824 1 0 1 3
## 825 0 1 0 3
## 826 0 1 0 3
## 827 0 1 0 3
## 828 0 1 1 2
## 829 0 1 1 3
## 830 1 0 1 1
## 831 1 0 1 3
## 832 0 1 1 2
## 833 0 1 0 3
## 834 0 1 0 3
## 835 0 1 0 3
## 836 1 0 1 1
## 837 0 1 0 3
## 838 0 1 0 3
## 839 0 1 1 3
## 840 0 1 1 1
## 841 0 1 0 3
## 842 0 1 0 2
## 843 1 0 1 1
## 844 0 1 0 3
## 845 0 1 0 3
## 846 0 1 0 3
## 847 0 1 0 3
## 848 0 1 0 3
## 849 0 1 0 2
## 850 1 0 1 1
## 851 0 1 0 3
## 852 0 1 0 3
## 853 1 0 0 3
## 854 1 0 1 1
## 855 1 0 0 2
## 856 1 0 1 3
## 857 1 0 1 1
## 858 0 1 1 1
## 859 1 0 1 3
## 860 0 1 0 3
## 861 0 1 0 3
## 862 0 1 0 2
## 863 1 0 1 1
## 864 1 0 0 3
## 865 0 1 0 2
## 866 1 0 1 2
## 867 1 0 1 2
## 868 0 1 0 1
## 869 0 1 0 3
## 870 0 1 1 3
## 871 0 1 0 3
## 872 1 0 1 1
## 873 0 1 0 1
## 874 0 1 0 3
## 875 1 0 1 2
## 876 1 0 1 3
## 877 0 1 0 3
## 878 0 1 0 3
## 879 0 1 0 3
## 880 1 0 1 1
## 881 1 0 1 2
## 882 0 1 0 3
## 883 1 0 0 3
## 884 0 1 0 2
## 885 0 1 0 3
## 886 1 0 0 3
## 887 0 1 0 2
## 888 1 0 1 1
## 889 1 0 0 3
## 890 0 1 1 1
## 891 0 1 0 3
# One hot encode the Pclass
cat.variable<-subset(ohe.Data1, select = -c(Survived, Sexfemale, Sexmale))
ohe.Data2<-data.frame(model.matrix(~Pclass-1, cat.variable), Sexfemale=ohe.Data1$Sexfemale, Sexmale=ohe.Data1$Sexmale, Survived=ohe.Data1$Survived)
ohe.Data2
## Pclass1 Pclass2 Pclass3 Sexfemale Sexmale Survived
## 1 0 0 1 0 1 0
## 2 1 0 0 1 0 1
## 3 0 0 1 1 0 1
## 4 1 0 0 1 0 1
## 5 0 0 1 0 1 0
## 6 0 0 1 0 1 0
## 7 1 0 0 0 1 0
## 8 0 0 1 0 1 0
## 9 0 0 1 1 0 1
## 10 0 1 0 1 0 1
## 11 0 0 1 1 0 1
## 12 1 0 0 1 0 1
## 13 0 0 1 0 1 0
## 14 0 0 1 0 1 0
## 15 0 0 1 1 0 0
## 16 0 1 0 1 0 1
## 17 0 0 1 0 1 0
## 18 0 1 0 0 1 1
## 19 0 0 1 1 0 0
## 20 0 0 1 1 0 1
## 21 0 1 0 0 1 0
## 22 0 1 0 0 1 1
## 23 0 0 1 1 0 1
## 24 1 0 0 0 1 1
## 25 0 0 1 1 0 0
## 26 0 0 1 1 0 1
## 27 0 0 1 0 1 0
## 28 1 0 0 0 1 0
## 29 0 0 1 1 0 1
## 30 0 0 1 0 1 0
## 31 1 0 0 0 1 0
## 32 1 0 0 1 0 1
## 33 0 0 1 1 0 1
## 34 0 1 0 0 1 0
## 35 1 0 0 0 1 0
## 36 1 0 0 0 1 0
## 37 0 0 1 0 1 1
## 38 0 0 1 0 1 0
## 39 0 0 1 1 0 0
## 40 0 0 1 1 0 1
## 41 0 0 1 1 0 0
## 42 0 1 0 1 0 0
## 43 0 0 1 0 1 0
## 44 0 1 0 1 0 1
## 45 0 0 1 1 0 1
## 46 0 0 1 0 1 0
## 47 0 0 1 0 1 0
## 48 0 0 1 1 0 1
## 49 0 0 1 0 1 0
## 50 0 0 1 1 0 0
## 51 0 0 1 0 1 0
## 52 0 0 1 0 1 0
## 53 1 0 0 1 0 1
## 54 0 1 0 1 0 1
## 55 1 0 0 0 1 0
## 56 1 0 0 0 1 1
## 57 0 1 0 1 0 1
## 58 0 0 1 0 1 0
## 59 0 1 0 1 0 1
## 60 0 0 1 0 1 0
## 61 0 0 1 0 1 0
## 62 1 0 0 1 0 1
## 63 1 0 0 0 1 0
## 64 0 0 1 0 1 0
## 65 1 0 0 0 1 0
## 66 0 0 1 0 1 1
## 67 0 1 0 1 0 1
## 68 0 0 1 0 1 0
## 69 0 0 1 1 0 1
## 70 0 0 1 0 1 0
## 71 0 1 0 0 1 0
## 72 0 0 1 1 0 0
## 73 0 1 0 0 1 0
## 74 0 0 1 0 1 0
## 75 0 0 1 0 1 1
## 76 0 0 1 0 1 0
## 77 0 0 1 0 1 0
## 78 0 0 1 0 1 0
## 79 0 1 0 0 1 1
## 80 0 0 1 1 0 1
## 81 0 0 1 0 1 0
## 82 0 0 1 0 1 1
## 83 0 0 1 1 0 1
## 84 1 0 0 0 1 0
## 85 0 1 0 1 0 1
## 86 0 0 1 1 0 1
## 87 0 0 1 0 1 0
## 88 0 0 1 0 1 0
## 89 1 0 0 1 0 1
## 90 0 0 1 0 1 0
## 91 0 0 1 0 1 0
## 92 0 0 1 0 1 0
## 93 1 0 0 0 1 0
## 94 0 0 1 0 1 0
## 95 0 0 1 0 1 0
## 96 0 0 1 0 1 0
## 97 1 0 0 0 1 0
## 98 1 0 0 0 1 1
## 99 0 1 0 1 0 1
## 100 0 1 0 0 1 0
## 101 0 0 1 1 0 0
## 102 0 0 1 0 1 0
## 103 1 0 0 0 1 0
## 104 0 0 1 0 1 0
## 105 0 0 1 0 1 0
## 106 0 0 1 0 1 0
## 107 0 0 1 1 0 1
## 108 0 0 1 0 1 1
## 109 0 0 1 0 1 0
## 110 0 0 1 1 0 1
## 111 1 0 0 0 1 0
## 112 0 0 1 1 0 0
## 113 0 0 1 0 1 0
## 114 0 0 1 1 0 0
## 115 0 0 1 1 0 0
## 116 0 0 1 0 1 0
## 117 0 0 1 0 1 0
## 118 0 1 0 0 1 0
## 119 1 0 0 0 1 0
## 120 0 0 1 1 0 0
## 121 0 1 0 0 1 0
## 122 0 0 1 0 1 0
## 123 0 1 0 0 1 0
## 124 0 1 0 1 0 1
## 125 1 0 0 0 1 0
## 126 0 0 1 0 1 1
## 127 0 0 1 0 1 0
## 128 0 0 1 0 1 1
## 129 0 0 1 1 0 1
## 130 0 0 1 0 1 0
## 131 0 0 1 0 1 0
## 132 0 0 1 0 1 0
## 133 0 0 1 1 0 0
## 134 0 1 0 1 0 1
## 135 0 1 0 0 1 0
## 136 0 1 0 0 1 0
## 137 1 0 0 1 0 1
## 138 1 0 0 0 1 0
## 139 0 0 1 0 1 0
## 140 1 0 0 0 1 0
## 141 0 0 1 1 0 0
## 142 0 0 1 1 0 1
## 143 0 0 1 1 0 1
## 144 0 0 1 0 1 0
## 145 0 1 0 0 1 0
## 146 0 1 0 0 1 0
## 147 0 0 1 0 1 1
## 148 0 0 1 1 0 0
## 149 0 1 0 0 1 0
## 150 0 1 0 0 1 0
## 151 0 1 0 0 1 0
## 152 1 0 0 1 0 1
## 153 0 0 1 0 1 0
## 154 0 0 1 0 1 0
## 155 0 0 1 0 1 0
## 156 1 0 0 0 1 0
## 157 0 0 1 1 0 1
## 158 0 0 1 0 1 0
## 159 0 0 1 0 1 0
## 160 0 0 1 0 1 0
## 161 0 0 1 0 1 0
## 162 0 1 0 1 0 1
## 163 0 0 1 0 1 0
## 164 0 0 1 0 1 0
## 165 0 0 1 0 1 0
## 166 0 0 1 0 1 1
## 167 1 0 0 1 0 1
## 168 0 0 1 1 0 0
## 169 1 0 0 0 1 0
## 170 0 0 1 0 1 0
## 171 1 0 0 0 1 0
## 172 0 0 1 0 1 0
## 173 0 0 1 1 0 1
## 174 0 0 1 0 1 0
## 175 1 0 0 0 1 0
## 176 0 0 1 0 1 0
## 177 0 0 1 0 1 0
## 178 1 0 0 1 0 0
## 179 0 1 0 0 1 0
## 180 0 0 1 0 1 0
## 181 0 0 1 1 0 0
## 182 0 1 0 0 1 0
## 183 0 0 1 0 1 0
## 184 0 1 0 0 1 1
## 185 0 0 1 1 0 1
## 186 1 0 0 0 1 0
## 187 0 0 1 1 0 1
## 188 1 0 0 0 1 1
## 189 0 0 1 0 1 0
## 190 0 0 1 0 1 0
## 191 0 1 0 1 0 1
## 192 0 1 0 0 1 0
## 193 0 0 1 1 0 1
## 194 0 1 0 0 1 1
## 195 1 0 0 1 0 1
## 196 1 0 0 1 0 1
## 197 0 0 1 0 1 0
## 198 0 0 1 0 1 0
## 199 0 0 1 1 0 1
## 200 0 1 0 1 0 0
## 201 0 0 1 0 1 0
## 202 0 0 1 0 1 0
## 203 0 0 1 0 1 0
## 204 0 0 1 0 1 0
## 205 0 0 1 0 1 1
## 206 0 0 1 1 0 0
## 207 0 0 1 0 1 0
## 208 0 0 1 0 1 1
## 209 0 0 1 1 0 1
## 210 1 0 0 0 1 1
## 211 0 0 1 0 1 0
## 212 0 1 0 1 0 1
## 213 0 0 1 0 1 0
## 214 0 1 0 0 1 0
## 215 0 0 1 0 1 0
## 216 1 0 0 1 0 1
## 217 0 0 1 1 0 1
## 218 0 1 0 0 1 0
## 219 1 0 0 1 0 1
## 220 0 1 0 0 1 0
## 221 0 0 1 0 1 1
## 222 0 1 0 0 1 0
## 223 0 0 1 0 1 0
## 224 0 0 1 0 1 0
## 225 1 0 0 0 1 1
## 226 0 0 1 0 1 0
## 227 0 1 0 0 1 1
## 228 0 0 1 0 1 0
## 229 0 1 0 0 1 0
## 230 0 0 1 1 0 0
## 231 1 0 0 1 0 1
## 232 0 0 1 0 1 0
## 233 0 1 0 0 1 0
## 234 0 0 1 1 0 1
## 235 0 1 0 0 1 0
## 236 0 0 1 1 0 0
## 237 0 1 0 0 1 0
## 238 0 1 0 1 0 1
## 239 0 1 0 0 1 0
## 240 0 1 0 0 1 0
## 241 0 0 1 1 0 0
## 242 0 0 1 1 0 1
## 243 0 1 0 0 1 0
## 244 0 0 1 0 1 0
## 245 0 0 1 0 1 0
## 246 1 0 0 0 1 0
## 247 0 0 1 1 0 0
## 248 0 1 0 1 0 1
## 249 1 0 0 0 1 1
## 250 0 1 0 0 1 0
## 251 0 0 1 0 1 0
## 252 0 0 1 1 0 0
## 253 1 0 0 0 1 0
## 254 0 0 1 0 1 0
## 255 0 0 1 1 0 0
## 256 0 0 1 1 0 1
## 257 1 0 0 1 0 1
## 258 1 0 0 1 0 1
## 259 1 0 0 1 0 1
## 260 0 1 0 1 0 1
## 261 0 0 1 0 1 0
## 262 0 0 1 0 1 1
## 263 1 0 0 0 1 0
## 264 1 0 0 0 1 0
## 265 0 0 1 1 0 0
## 266 0 1 0 0 1 0
## 267 0 0 1 0 1 0
## 268 0 0 1 0 1 1
## 269 1 0 0 1 0 1
## 270 1 0 0 1 0 1
## 271 1 0 0 0 1 0
## 272 0 0 1 0 1 1
## 273 0 1 0 1 0 1
## 274 1 0 0 0 1 0
## 275 0 0 1 1 0 1
## 276 1 0 0 1 0 1
## 277 0 0 1 1 0 0
## 278 0 1 0 0 1 0
## 279 0 0 1 0 1 0
## 280 0 0 1 1 0 1
## 281 0 0 1 0 1 0
## 282 0 0 1 0 1 0
## 283 0 0 1 0 1 0
## 284 0 0 1 0 1 1
## 285 1 0 0 0 1 0
## 286 0 0 1 0 1 0
## 287 0 0 1 0 1 1
## 288 0 0 1 0 1 0
## 289 0 1 0 0 1 1
## 290 0 0 1 1 0 1
## 291 1 0 0 1 0 1
## 292 1 0 0 1 0 1
## 293 0 1 0 0 1 0
## 294 0 0 1 1 0 0
## 295 0 0 1 0 1 0
## 296 1 0 0 0 1 0
## 297 0 0 1 0 1 0
## 298 1 0 0 1 0 0
## 299 1 0 0 0 1 1
## 300 1 0 0 1 0 1
## 301 0 0 1 1 0 1
## 302 0 0 1 0 1 1
## 303 0 0 1 0 1 0
## 304 0 1 0 1 0 1
## 305 0 0 1 0 1 0
## 306 1 0 0 0 1 1
## 307 1 0 0 1 0 1
## 308 1 0 0 1 0 1
## 309 0 1 0 0 1 0
## 310 1 0 0 1 0 1
## 311 1 0 0 1 0 1
## 312 1 0 0 1 0 1
## 313 0 1 0 1 0 0
## 314 0 0 1 0 1 0
## 315 0 1 0 0 1 0
## 316 0 0 1 1 0 1
## 317 0 1 0 1 0 1
## 318 0 1 0 0 1 0
## 319 1 0 0 1 0 1
## 320 1 0 0 1 0 1
## 321 0 0 1 0 1 0
## 322 0 0 1 0 1 0
## 323 0 1 0 1 0 1
## 324 0 1 0 1 0 1
## 325 0 0 1 0 1 0
## 326 1 0 0 1 0 1
## 327 0 0 1 0 1 0
## 328 0 1 0 1 0 1
## 329 0 0 1 1 0 1
## 330 1 0 0 1 0 1
## 331 0 0 1 1 0 1
## 332 1 0 0 0 1 0
## 333 1 0 0 0 1 0
## 334 0 0 1 0 1 0
## 335 1 0 0 1 0 1
## 336 0 0 1 0 1 0
## 337 1 0 0 0 1 0
## 338 1 0 0 1 0 1
## 339 0 0 1 0 1 1
## 340 1 0 0 0 1 0
## 341 0 1 0 0 1 1
## 342 1 0 0 1 0 1
## 343 0 1 0 0 1 0
## 344 0 1 0 0 1 0
## 345 0 1 0 0 1 0
## 346 0 1 0 1 0 1
## 347 0 1 0 1 0 1
## 348 0 0 1 1 0 1
## 349 0 0 1 0 1 1
## 350 0 0 1 0 1 0
## 351 0 0 1 0 1 0
## 352 1 0 0 0 1 0
## 353 0 0 1 0 1 0
## 354 0 0 1 0 1 0
## 355 0 0 1 0 1 0
## 356 0 0 1 0 1 0
## 357 1 0 0 1 0 1
## 358 0 1 0 1 0 0
## 359 0 0 1 1 0 1
## 360 0 0 1 1 0 1
## 361 0 0 1 0 1 0
## 362 0 1 0 0 1 0
## 363 0 0 1 1 0 0
## 364 0 0 1 0 1 0
## 365 0 0 1 0 1 0
## 366 0 0 1 0 1 0
## 367 1 0 0 1 0 1
## 368 0 0 1 1 0 1
## 369 0 0 1 1 0 1
## 370 1 0 0 1 0 1
## 371 1 0 0 0 1 1
## 372 0 0 1 0 1 0
## 373 0 0 1 0 1 0
## 374 1 0 0 0 1 0
## 375 0 0 1 1 0 0
## 376 1 0 0 1 0 1
## 377 0 0 1 1 0 1
## 378 1 0 0 0 1 0
## 379 0 0 1 0 1 0
## 380 0 0 1 0 1 0
## 381 1 0 0 1 0 1
## 382 0 0 1 1 0 1
## 383 0 0 1 0 1 0
## 384 1 0 0 1 0 1
## 385 0 0 1 0 1 0
## 386 0 1 0 0 1 0
## 387 0 0 1 0 1 0
## 388 0 1 0 1 0 1
## 389 0 0 1 0 1 0
## 390 0 1 0 1 0 1
## 391 1 0 0 0 1 1
## 392 0 0 1 0 1 1
## 393 0 0 1 0 1 0
## 394 1 0 0 1 0 1
## 395 0 0 1 1 0 1
## 396 0 0 1 0 1 0
## 397 0 0 1 1 0 0
## 398 0 1 0 0 1 0
## 399 0 1 0 0 1 0
## 400 0 1 0 1 0 1
## 401 0 0 1 0 1 1
## 402 0 0 1 0 1 0
## 403 0 0 1 1 0 0
## 404 0 0 1 0 1 0
## 405 0 0 1 1 0 0
## 406 0 1 0 0 1 0
## 407 0 0 1 0 1 0
## 408 0 1 0 0 1 1
## 409 0 0 1 0 1 0
## 410 0 0 1 1 0 0
## 411 0 0 1 0 1 0
## 412 0 0 1 0 1 0
## 413 1 0 0 1 0 1
## 414 0 1 0 0 1 0
## 415 0 0 1 0 1 1
## 416 0 0 1 1 0 0
## 417 0 1 0 1 0 1
## 418 0 1 0 1 0 1
## 419 0 1 0 0 1 0
## 420 0 0 1 1 0 0
## 421 0 0 1 0 1 0
## 422 0 0 1 0 1 0
## 423 0 0 1 0 1 0
## 424 0 0 1 1 0 0
## 425 0 0 1 0 1 0
## 426 0 0 1 0 1 0
## 427 0 1 0 1 0 1
## 428 0 1 0 1 0 1
## 429 0 0 1 0 1 0
## 430 0 0 1 0 1 1
## 431 1 0 0 0 1 1
## 432 0 0 1 1 0 1
## 433 0 1 0 1 0 1
## 434 0 0 1 0 1 0
## 435 1 0 0 0 1 0
## 436 1 0 0 1 0 1
## 437 0 0 1 1 0 0
## 438 0 1 0 1 0 1
## 439 1 0 0 0 1 0
## 440 0 1 0 0 1 0
## 441 0 1 0 1 0 1
## 442 0 0 1 0 1 0
## 443 0 0 1 0 1 0
## 444 0 1 0 1 0 1
## 445 0 0 1 0 1 1
## 446 1 0 0 0 1 1
## 447 0 1 0 1 0 1
## 448 1 0 0 0 1 1
## 449 0 0 1 1 0 1
## 450 1 0 0 0 1 1
## 451 0 1 0 0 1 0
## 452 0 0 1 0 1 0
## 453 1 0 0 0 1 0
## 454 1 0 0 0 1 1
## 455 0 0 1 0 1 0
## 456 0 0 1 0 1 1
## 457 1 0 0 0 1 0
## 458 1 0 0 1 0 1
## 459 0 1 0 1 0 1
## 460 0 0 1 0 1 0
## 461 1 0 0 0 1 1
## 462 0 0 1 0 1 0
## 463 1 0 0 0 1 0
## 464 0 1 0 0 1 0
## 465 0 0 1 0 1 0
## 466 0 0 1 0 1 0
## 467 0 1 0 0 1 0
## 468 1 0 0 0 1 0
## 469 0 0 1 0 1 0
## 470 0 0 1 1 0 1
## 471 0 0 1 0 1 0
## 472 0 0 1 0 1 0
## 473 0 1 0 1 0 1
## 474 0 1 0 1 0 1
## 475 0 0 1 1 0 0
## 476 1 0 0 0 1 0
## 477 0 1 0 0 1 0
## 478 0 0 1 0 1 0
## 479 0 0 1 0 1 0
## 480 0 0 1 1 0 1
## 481 0 0 1 0 1 0
## 482 0 1 0 0 1 0
## 483 0 0 1 0 1 0
## 484 0 0 1 1 0 1
## 485 1 0 0 0 1 1
## 486 0 0 1 1 0 0
## 487 1 0 0 1 0 1
## 488 1 0 0 0 1 0
## 489 0 0 1 0 1 0
## 490 0 0 1 0 1 1
## 491 0 0 1 0 1 0
## 492 0 0 1 0 1 0
## 493 1 0 0 0 1 0
## 494 1 0 0 0 1 0
## 495 0 0 1 0 1 0
## 496 0 0 1 0 1 0
## 497 1 0 0 1 0 1
## 498 0 0 1 0 1 0
## 499 1 0 0 1 0 0
## 500 0 0 1 0 1 0
## 501 0 0 1 0 1 0
## 502 0 0 1 1 0 0
## 503 0 0 1 1 0 0
## 504 0 0 1 1 0 0
## 505 1 0 0 1 0 1
## 506 1 0 0 0 1 0
## 507 0 1 0 1 0 1
## 508 1 0 0 0 1 1
## 509 0 0 1 0 1 0
## 510 0 0 1 0 1 1
## 511 0 0 1 0 1 1
## 512 0 0 1 0 1 0
## 513 1 0 0 0 1 1
## 514 1 0 0 1 0 1
## 515 0 0 1 0 1 0
## 516 1 0 0 0 1 0
## 517 0 1 0 1 0 1
## 518 0 0 1 0 1 0
## 519 0 1 0 1 0 1
## 520 0 0 1 0 1 0
## 521 1 0 0 1 0 1
## 522 0 0 1 0 1 0
## 523 0 0 1 0 1 0
## 524 1 0 0 1 0 1
## 525 0 0 1 0 1 0
## 526 0 0 1 0 1 0
## 527 0 1 0 1 0 1
## 528 1 0 0 0 1 0
## 529 0 0 1 0 1 0
## 530 0 1 0 0 1 0
## 531 0 1 0 1 0 1
## 532 0 0 1 0 1 0
## 533 0 0 1 0 1 0
## 534 0 0 1 1 0 1
## 535 0 0 1 1 0 0
## 536 0 1 0 1 0 1
## 537 1 0 0 0 1 0
## 538 1 0 0 1 0 1
## 539 0 0 1 0 1 0
## 540 1 0 0 1 0 1
## 541 1 0 0 1 0 1
## 542 0 0 1 1 0 0
## 543 0 0 1 1 0 0
## 544 0 1 0 0 1 1
## 545 1 0 0 0 1 0
## 546 1 0 0 0 1 0
## 547 0 1 0 1 0 1
## 548 0 1 0 0 1 1
## 549 0 0 1 0 1 0
## 550 0 1 0 0 1 1
## 551 1 0 0 0 1 1
## 552 0 1 0 0 1 0
## 553 0 0 1 0 1 0
## 554 0 0 1 0 1 1
## 555 0 0 1 1 0 1
## 556 1 0 0 0 1 0
## 557 1 0 0 1 0 1
## 558 1 0 0 0 1 0
## 559 1 0 0 1 0 1
## 560 0 0 1 1 0 1
## 561 0 0 1 0 1 0
## 562 0 0 1 0 1 0
## 563 0 1 0 0 1 0
## 564 0 0 1 0 1 0
## 565 0 0 1 1 0 0
## 566 0 0 1 0 1 0
## 567 0 0 1 0 1 0
## 568 0 0 1 1 0 0
## 569 0 0 1 0 1 0
## 570 0 0 1 0 1 1
## 571 0 1 0 0 1 1
## 572 1 0 0 1 0 1
## 573 1 0 0 0 1 1
## 574 0 0 1 1 0 1
## 575 0 0 1 0 1 0
## 576 0 0 1 0 1 0
## 577 0 1 0 1 0 1
## 578 1 0 0 1 0 1
## 579 0 0 1 1 0 0
## 580 0 0 1 0 1 1
## 581 0 1 0 1 0 1
## 582 1 0 0 1 0 1
## 583 0 1 0 0 1 0
## 584 1 0 0 0 1 0
## 585 0 0 1 0 1 0
## 586 1 0 0 1 0 1
## 587 0 1 0 0 1 0
## 588 1 0 0 0 1 1
## 589 0 0 1 0 1 0
## 590 0 0 1 0 1 0
## 591 0 0 1 0 1 0
## 592 1 0 0 1 0 1
## 593 0 0 1 0 1 0
## 594 0 0 1 1 0 0
## 595 0 1 0 0 1 0
## 596 0 0 1 0 1 0
## 597 0 1 0 1 0 1
## 598 0 0 1 0 1 0
## 599 0 0 1 0 1 0
## 600 1 0 0 0 1 1
## 601 0 1 0 1 0 1
## 602 0 0 1 0 1 0
## 603 1 0 0 0 1 0
## 604 0 0 1 0 1 0
## 605 1 0 0 0 1 1
## 606 0 0 1 0 1 0
## 607 0 0 1 0 1 0
## 608 1 0 0 0 1 1
## 609 0 1 0 1 0 1
## 610 1 0 0 1 0 1
## 611 0 0 1 1 0 0
## 612 0 0 1 0 1 0
## 613 0 0 1 1 0 1
## 614 0 0 1 0 1 0
## 615 0 0 1 0 1 0
## 616 0 1 0 1 0 1
## 617 0 0 1 0 1 0
## 618 0 0 1 1 0 0
## 619 0 1 0 1 0 1
## 620 0 1 0 0 1 0
## 621 0 0 1 0 1 0
## 622 1 0 0 0 1 1
## 623 0 0 1 0 1 1
## 624 0 0 1 0 1 0
## 625 0 0 1 0 1 0
## 626 1 0 0 0 1 0
## 627 0 1 0 0 1 0
## 628 1 0 0 1 0 1
## 629 0 0 1 0 1 0
## 630 0 0 1 0 1 0
## 631 1 0 0 0 1 1
## 632 0 0 1 0 1 0
## 633 1 0 0 0 1 1
## 634 1 0 0 0 1 0
## 635 0 0 1 1 0 0
## 636 0 1 0 1 0 1
## 637 0 0 1 0 1 0
## 638 0 1 0 0 1 0
## 639 0 0 1 1 0 0
## 640 0 0 1 0 1 0
## 641 0 0 1 0 1 0
## 642 1 0 0 1 0 1
## 643 0 0 1 1 0 0
## 644 0 0 1 0 1 1
## 645 0 0 1 1 0 1
## 646 1 0 0 0 1 1
## 647 0 0 1 0 1 0
## 648 1 0 0 0 1 1
## 649 0 0 1 0 1 0
## 650 0 0 1 1 0 1
## 651 0 0 1 0 1 0
## 652 0 1 0 1 0 1
## 653 0 0 1 0 1 0
## 654 0 0 1 1 0 1
## 655 0 0 1 1 0 0
## 656 0 1 0 0 1 0
## 657 0 0 1 0 1 0
## 658 0 0 1 1 0 0
## 659 0 1 0 0 1 0
## 660 1 0 0 0 1 0
## 661 1 0 0 0 1 1
## 662 0 0 1 0 1 0
## 663 1 0 0 0 1 0
## 664 0 0 1 0 1 0
## 665 0 0 1 0 1 1
## 666 0 1 0 0 1 0
## 667 0 1 0 0 1 0
## 668 0 0 1 0 1 0
## 669 0 0 1 0 1 0
## 670 1 0 0 1 0 1
## 671 0 1 0 1 0 1
## 672 1 0 0 0 1 0
## 673 0 1 0 0 1 0
## 674 0 1 0 0 1 1
## 675 0 1 0 0 1 0
## 676 0 0 1 0 1 0
## 677 0 0 1 0 1 0
## 678 0 0 1 1 0 1
## 679 0 0 1 1 0 0
## 680 1 0 0 0 1 1
## 681 0 0 1 1 0 0
## 682 1 0 0 0 1 1
## 683 0 0 1 0 1 0
## 684 0 0 1 0 1 0
## 685 0 1 0 0 1 0
## 686 0 1 0 0 1 0
## 687 0 0 1 0 1 0
## 688 0 0 1 0 1 0
## 689 0 0 1 0 1 0
## 690 1 0 0 1 0 1
## 691 1 0 0 0 1 1
## 692 0 0 1 1 0 1
## 693 0 0 1 0 1 1
## 694 0 0 1 0 1 0
## 695 1 0 0 0 1 0
## 696 0 1 0 0 1 0
## 697 0 0 1 0 1 0
## 698 0 0 1 1 0 1
## 699 1 0 0 0 1 0
## 700 0 0 1 0 1 0
## 701 1 0 0 1 0 1
## 702 1 0 0 0 1 1
## 703 0 0 1 1 0 0
## 704 0 0 1 0 1 0
## 705 0 0 1 0 1 0
## 706 0 1 0 0 1 0
## 707 0 1 0 1 0 1
## 708 1 0 0 0 1 1
## 709 1 0 0 1 0 1
## 710 0 0 1 0 1 1
## 711 1 0 0 1 0 1
## 712 1 0 0 0 1 0
## 713 1 0 0 0 1 1
## 714 0 0 1 0 1 0
## 715 0 1 0 0 1 0
## 716 0 0 1 0 1 0
## 717 1 0 0 1 0 1
## 718 0 1 0 1 0 1
## 719 0 0 1 0 1 0
## 720 0 0 1 0 1 0
## 721 0 1 0 1 0 1
## 722 0 0 1 0 1 0
## 723 0 1 0 0 1 0
## 724 0 1 0 0 1 0
## 725 1 0 0 0 1 1
## 726 0 0 1 0 1 0
## 727 0 1 0 1 0 1
## 728 0 0 1 1 0 1
## 729 0 1 0 0 1 0
## 730 0 0 1 1 0 0
## 731 1 0 0 1 0 1
## 732 0 0 1 0 1 0
## 733 0 1 0 0 1 0
## 734 0 1 0 0 1 0
## 735 0 1 0 0 1 0
## 736 0 0 1 0 1 0
## 737 0 0 1 1 0 0
## 738 1 0 0 0 1 1
## 739 0 0 1 0 1 0
## 740 0 0 1 0 1 0
## 741 1 0 0 0 1 1
## 742 1 0 0 0 1 0
## 743 1 0 0 1 0 1
## 744 0 0 1 0 1 0
## 745 0 0 1 0 1 1
## 746 1 0 0 0 1 0
## 747 0 0 1 0 1 0
## 748 0 1 0 1 0 1
## 749 1 0 0 0 1 0
## 750 0 0 1 0 1 0
## 751 0 1 0 1 0 1
## 752 0 0 1 0 1 1
## 753 0 0 1 0 1 0
## 754 0 0 1 0 1 0
## 755 0 1 0 1 0 1
## 756 0 1 0 0 1 1
## 757 0 0 1 0 1 0
## 758 0 1 0 0 1 0
## 759 0 0 1 0 1 0
## 760 1 0 0 1 0 1
## 761 0 0 1 0 1 0
## 762 0 0 1 0 1 0
## 763 0 0 1 0 1 1
## 764 1 0 0 1 0 1
## 765 0 0 1 0 1 0
## 766 1 0 0 1 0 1
## 767 1 0 0 0 1 0
## 768 0 0 1 1 0 0
## 769 0 0 1 0 1 0
## 770 0 0 1 0 1 0
## 771 0 0 1 0 1 0
## 772 0 0 1 0 1 0
## 773 0 1 0 1 0 0
## 774 0 0 1 0 1 0
## 775 0 1 0 1 0 1
## 776 0 0 1 0 1 0
## 777 0 0 1 0 1 0
## 778 0 0 1 1 0 1
## 779 0 0 1 0 1 0
## 780 1 0 0 1 0 1
## 781 0 0 1 1 0 1
## 782 1 0 0 1 0 1
## 783 1 0 0 0 1 0
## 784 0 0 1 0 1 0
## 785 0 0 1 0 1 0
## 786 0 0 1 0 1 0
## 787 0 0 1 1 0 1
## 788 0 0 1 0 1 0
## 789 0 0 1 0 1 1
## 790 1 0 0 0 1 0
## 791 0 0 1 0 1 0
## 792 0 1 0 0 1 0
## 793 0 0 1 1 0 0
## 794 1 0 0 0 1 0
## 795 0 0 1 0 1 0
## 796 0 1 0 0 1 0
## 797 1 0 0 1 0 1
## 798 0 0 1 1 0 1
## 799 0 0 1 0 1 0
## 800 0 0 1 1 0 0
## 801 0 1 0 0 1 0
## 802 0 1 0 1 0 1
## 803 1 0 0 0 1 1
## 804 0 0 1 0 1 1
## 805 0 0 1 0 1 1
## 806 0 0 1 0 1 0
## 807 1 0 0 0 1 0
## 808 0 0 1 1 0 0
## 809 0 1 0 0 1 0
## 810 1 0 0 1 0 1
## 811 0 0 1 0 1 0
## 812 0 0 1 0 1 0
## 813 0 1 0 0 1 0
## 814 0 0 1 1 0 0
## 815 0 0 1 0 1 0
## 816 1 0 0 0 1 0
## 817 0 0 1 1 0 0
## 818 0 1 0 0 1 0
## 819 0 0 1 0 1 0
## 820 0 0 1 0 1 0
## 821 1 0 0 1 0 1
## 822 0 0 1 0 1 1
## 823 1 0 0 0 1 0
## 824 0 0 1 1 0 1
## 825 0 0 1 0 1 0
## 826 0 0 1 0 1 0
## 827 0 0 1 0 1 0
## 828 0 1 0 0 1 1
## 829 0 0 1 0 1 1
## 830 1 0 0 1 0 1
## 831 0 0 1 1 0 1
## 832 0 1 0 0 1 1
## 833 0 0 1 0 1 0
## 834 0 0 1 0 1 0
## 835 0 0 1 0 1 0
## 836 1 0 0 1 0 1
## 837 0 0 1 0 1 0
## 838 0 0 1 0 1 0
## 839 0 0 1 0 1 1
## 840 1 0 0 0 1 1
## 841 0 0 1 0 1 0
## 842 0 1 0 0 1 0
## 843 1 0 0 1 0 1
## 844 0 0 1 0 1 0
## 845 0 0 1 0 1 0
## 846 0 0 1 0 1 0
## 847 0 0 1 0 1 0
## 848 0 0 1 0 1 0
## 849 0 1 0 0 1 0
## 850 1 0 0 1 0 1
## 851 0 0 1 0 1 0
## 852 0 0 1 0 1 0
## 853 0 0 1 1 0 0
## 854 1 0 0 1 0 1
## 855 0 1 0 1 0 0
## 856 0 0 1 1 0 1
## 857 1 0 0 1 0 1
## 858 1 0 0 0 1 1
## 859 0 0 1 1 0 1
## 860 0 0 1 0 1 0
## 861 0 0 1 0 1 0
## 862 0 1 0 0 1 0
## 863 1 0 0 1 0 1
## 864 0 0 1 1 0 0
## 865 0 1 0 0 1 0
## 866 0 1 0 1 0 1
## 867 0 1 0 1 0 1
## 868 1 0 0 0 1 0
## 869 0 0 1 0 1 0
## 870 0 0 1 0 1 1
## 871 0 0 1 0 1 0
## 872 1 0 0 1 0 1
## 873 1 0 0 0 1 0
## 874 0 0 1 0 1 0
## 875 0 1 0 1 0 1
## 876 0 0 1 1 0 1
## 877 0 0 1 0 1 0
## 878 0 0 1 0 1 0
## 879 0 0 1 0 1 0
## 880 1 0 0 1 0 1
## 881 0 1 0 1 0 1
## 882 0 0 1 0 1 0
## 883 0 0 1 1 0 0
## 884 0 1 0 0 1 0
## 885 0 0 1 0 1 0
## 886 0 0 1 1 0 0
## 887 0 1 0 0 1 0
## 888 1 0 0 1 0 1
## 889 0 0 1 1 0 0
## 890 1 0 0 0 1 1
## 891 0 0 1 0 1 0
set.seed(1)
train.index<-createDataPartition(ohe.Data2$Survived, p=0.8, list = FALSE)
train.set<-ohe.Data2[train.index, ]
test.set<-ohe.Data2[-train.index, ]
Check the stability of the CV estimates using boxplots for different seeds using the best model in each category.
SVM.L.All<-NULL
SVM.P.All<-NULL
SVM.R.All<-NULL
KNN.All<-NULL
TREE.All<-NULL
for(j in 1:150){
set.seed(j)
folds <- createFolds(y=factor(train.set$Survived), k = 5, list = FALSE)
train.set$fold <- folds
# Using SVM linear kernel cost = 11173.88 - the best linear kernel SVM model considered.
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Survived ~ Pclass1 + Pclass2 + Pclass3 + Sexfemale + Sexmale ,data = train.data, kernel="linear", cost=100 ,scale=FALSE)
svm.y<-valid.data$Survived
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
SVM.L.All<-c(SVM.L.All, sum(CV.error))
}
SVM.L.All
## [1] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [8] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [15] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [22] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [29] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [36] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [43] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [50] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [57] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [64] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [71] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [78] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [85] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [92] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [99] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [106] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [113] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [120] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [127] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [134] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [141] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [148] 0.2086835 0.2086835 0.2086835
for(j in 1:150){
set.seed(j)
folds <- createFolds(y=factor(train.set$Survived), k = 5, list = FALSE)
train.set$fold <- folds
# Using SVM linear kernel cost = 11173.88 - the best linear kernel SVM model considered.
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Survived ~ Pclass1 + Pclass2 + Pclass3 + Sexfemale + Sexmale ,data = train.data, kernel="polynomial", cost=exp(17.1810) , gamma = 1, degree = 2)
svm.y<-valid.data$Survived
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
SVM.P.All<-c(SVM.P.All, sum(CV.error))
}
SVM.P.All
## [1] 0.2240896 0.2184874 0.2086835 0.2212885 0.2142857 0.2240896 0.2156863
## [8] 0.2240896 0.2170868 0.2142857 0.2142857 0.2212885 0.2156863 0.2156863
## [15] 0.2184874 0.2184874 0.2254902 0.2156863 0.2086835 0.2240896 0.2226891
## [22] 0.2296919 0.2296919 0.2156863 0.2282913 0.2226891 0.2142857 0.2226891
## [29] 0.2086835 0.2086835 0.2268908 0.2156863 0.2086835 0.2170868 0.2170868
## [36] 0.2184874 0.2226891 0.2184874 0.2142857 0.2142857 0.2268908 0.2170868
## [43] 0.2170868 0.2198880 0.2226891 0.2156863 0.2086835 0.2212885 0.2170868
## [50] 0.2240896 0.2170868 0.2156863 0.2184874 0.2086835 0.2296919 0.2170868
## [57] 0.2296919 0.2142857 0.2198880 0.2086835 0.2324930 0.2268908 0.2198880
## [64] 0.2156863 0.2086835 0.2338936 0.2212885 0.2184874 0.2198880 0.2086835
## [71] 0.2212885 0.2226891 0.2268908 0.2338936 0.2156863 0.2226891 0.2254902
## [78] 0.2184874 0.2268908 0.2282913 0.2142857 0.2254902 0.2170868 0.2170868
## [85] 0.2240896 0.2240896 0.2142857 0.2156863 0.2170868 0.2212885 0.2212885
## [92] 0.2198880 0.2086835 0.2212885 0.2268908 0.2170868 0.2086835 0.2156863
## [99] 0.2170868 0.2156863 0.2198880 0.2142857 0.2226891 0.2086835 0.2226891
## [106] 0.2170868 0.2142857 0.2156863 0.2170868 0.2198880 0.2170868 0.2254902
## [113] 0.2184874 0.2156863 0.2282913 0.2156863 0.2086835 0.2212885 0.2240896
## [120] 0.2170868 0.2296919 0.2296919 0.2212885 0.2226891 0.2240896 0.2226891
## [127] 0.2086835 0.2142857 0.2198880 0.2086835 0.2184874 0.2156863 0.2156863
## [134] 0.2142857 0.2282913 0.2170868 0.2156863 0.2184874 0.2156863 0.2142857
## [141] 0.2086835 0.2268908 0.2268908 0.2142857 0.2086835 0.2170868 0.2240896
## [148] 0.2086835 0.2240896 0.2086835
I’ve stopped the polynomial being reduced to degree 1.
for(j in 1:150){
set.seed(j)
folds <- createFolds(y=factor(train.set$Survived), k = 5, list = FALSE)
train.set$fold <- folds
# Using SVM linear kernel cost = 11173.88 - the best linear kernel SVM model considered.
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Survived ~ Pclass1 + Pclass2 + Pclass3 + Sexfemale + Sexmale, data = train.data, kernel="radial", cost=exp(1.6377), gamma=exp(-7.3007))
svm.y<-valid.data$Survived
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
SVM.R.All<-c(SVM.R.All, sum(CV.error))
}
SVM.R.All
## [1] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [8] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [15] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [22] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [29] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [36] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [43] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [50] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [57] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [64] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [71] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [78] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [85] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [92] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [99] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [106] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [113] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [120] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [127] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [134] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [141] 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835 0.2086835
## [148] 0.2086835 0.2086835 0.2086835
for(j in 1:150){
set.seed(j)
folds <- createFolds(y=factor(train.set$Survived), k = 5, list = FALSE)
train.set$fold <- folds
# Using SVM linear kernel cost = 11173.88 - the best linear kernel SVM model considered.
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
knn.y<-valid.data$Survived
knn.predy<-knn(train = train.data[, c("Pclass1", "Pclass2", "Pclass3", "Sexfemale", "Sexmale")], test = valid.data[, c("Pclass1", "Pclass2", "Pclass3", "Sexfemale", "Sexmale")], cl = train.data$Survived, k = 82)
knn.y<-valid.data$Survived
ith.test.error<- mean(knn.y!=knn.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
KNN.All<-c(KNN.All, sum(CV.error))
}
KNN.All
## [1] 0.2058824 0.2184874 0.2086835 0.2212885 0.2114846 0.2212885 0.2156863
## [8] 0.2240896 0.2128852 0.2100840 0.2114846 0.2212885 0.2156863 0.2142857
## [15] 0.2184874 0.2184874 0.2254902 0.2156863 0.2086835 0.2240896 0.2226891
## [22] 0.2296919 0.2296919 0.2156863 0.2240896 0.2212885 0.2142857 0.2226891
## [29] 0.2086835 0.2086835 0.2268908 0.2156863 0.2086835 0.2170868 0.2170868
## [36] 0.2184874 0.2226891 0.2184874 0.2086835 0.2072829 0.2268908 0.2170868
## [43] 0.2170868 0.2170868 0.2226891 0.2114846 0.2086835 0.2212885 0.2170868
## [50] 0.2240896 0.2170868 0.2156863 0.2184874 0.2086835 0.2296919 0.2170868
## [57] 0.2240896 0.2184874 0.2198880 0.2086835 0.2324930 0.2310924 0.2198880
## [64] 0.2156863 0.2086835 0.2338936 0.2212885 0.2184874 0.2198880 0.2086835
## [71] 0.2170868 0.2226891 0.2296919 0.2338936 0.2156863 0.2226891 0.2254902
## [78] 0.2184874 0.2268908 0.2282913 0.2156863 0.2254902 0.2212885 0.2170868
## [85] 0.2184874 0.2240896 0.2156863 0.2156863 0.2170868 0.2240896 0.2198880
## [92] 0.2100840 0.2086835 0.2212885 0.2240896 0.2184874 0.2086835 0.2184874
## [99] 0.2170868 0.2156863 0.2198880 0.2114846 0.2226891 0.2086835 0.2226891
## [106] 0.2170868 0.2114846 0.2156863 0.2170868 0.2198880 0.2170868 0.2254902
## [113] 0.2184874 0.2156863 0.2282913 0.2198880 0.2086835 0.2212885 0.2184874
## [120] 0.2170868 0.2296919 0.2296919 0.2212885 0.2226891 0.2226891 0.2282913
## [127] 0.2086835 0.2170868 0.2198880 0.2128852 0.2184874 0.2198880 0.2156863
## [134] 0.2114846 0.2282913 0.2170868 0.2156863 0.2184874 0.2156863 0.2100840
## [141] 0.2086835 0.2268908 0.2212885 0.2072829 0.2086835 0.2170868 0.2198880
## [148] 0.2086835 0.2240896 0.2086835
for(j in 1:150){
set.seed(j)
folds <- createFolds(y=factor(train.set$Survived), k = 5, list = FALSE)
train.set$fold <- folds
# Using SVM linear kernel cost = 11173.88 - the best linear kernel SVM model considered.
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
treefit<-rpart(Survived ~ Pclass1 + Pclass2 + Pclass3 + Sexfemale + Sexmale, data = train.data, method = "class", control = rpart.control(minsplit = 1, cp = 0.004))
tree.y<-valid.data$Survived
tree.predy<-predict(treefit, newdata = valid.data[, c("Pclass1", "Pclass2", "Pclass3", "Sexfemale", "Sexmale")], type ="class")
ith.test.error<- mean(tree.y!=tree.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
TREE.All<-c(TREE.All, sum(CV.error))
}
TREE.All
## [1] 0.2240896 0.2184874 0.2086835 0.2212885 0.2086835 0.2184874 0.2156863
## [8] 0.2240896 0.2170868 0.2086835 0.2086835 0.2212885 0.2156863 0.2156863
## [15] 0.2184874 0.2184874 0.2254902 0.2156863 0.2086835 0.2184874 0.2226891
## [22] 0.2296919 0.2296919 0.2156863 0.2226891 0.2170868 0.2086835 0.2226891
## [29] 0.2086835 0.2086835 0.2268908 0.2156863 0.2086835 0.2170868 0.2170868
## [36] 0.2184874 0.2226891 0.2184874 0.2086835 0.2086835 0.2212885 0.2170868
## [43] 0.2170868 0.2086835 0.2226891 0.2156863 0.2086835 0.2156863 0.2170868
## [50] 0.2240896 0.2170868 0.2156863 0.2184874 0.2086835 0.2296919 0.2170868
## [57] 0.2240896 0.2086835 0.2198880 0.2086835 0.2324930 0.2212885 0.2198880
## [64] 0.2156863 0.2086835 0.2338936 0.2212885 0.2184874 0.2198880 0.2086835
## [71] 0.2156863 0.2226891 0.2212885 0.2338936 0.2156863 0.2226891 0.2254902
## [78] 0.2184874 0.2268908 0.2282913 0.2086835 0.2254902 0.2170868 0.2170868
## [85] 0.2184874 0.2184874 0.2086835 0.2156863 0.2170868 0.2156863 0.2212885
## [92] 0.2086835 0.2086835 0.2212885 0.2212885 0.2170868 0.2086835 0.2156863
## [99] 0.2170868 0.2156863 0.2198880 0.2086835 0.2226891 0.2086835 0.2226891
## [106] 0.2170868 0.2086835 0.2156863 0.2170868 0.2198880 0.2170868 0.2254902
## [113] 0.2184874 0.2156863 0.2282913 0.2156863 0.2086835 0.2156863 0.2184874
## [120] 0.2170868 0.2296919 0.2296919 0.2156863 0.2226891 0.2184874 0.2170868
## [127] 0.2086835 0.2086835 0.2086835 0.2086835 0.2184874 0.2156863 0.2156863
## [134] 0.2086835 0.2282913 0.2170868 0.2156863 0.2184874 0.2156863 0.2086835
## [141] 0.2086835 0.2268908 0.2212885 0.2086835 0.2086835 0.2170868 0.2184874
## [148] 0.2086835 0.2240896 0.2086835
SVM.L<-data.frame(Classifier="SVM.L", CV.Error=SVM.L.All)
SVM.P<-data.frame(Classifier="SVM.P", CV.Error=SVM.P.All)
SVM.R<-data.frame(Classifier="SVM.R", CV.Error=SVM.R.All)
KNN<-data.frame(Classifier="KNN", CV.Error=KNN.All)
TREE<-data.frame(Classifier="TREE", CV.Error=TREE.All)
df<-rbind(SVM.L,SVM.P, SVM.R, KNN, TREE)
ggplot(df, aes(x=Classifier, y=CV.Error)) + geom_boxplot(color="green") + geom_jitter(alpha=0.1) + ylab("CV Error Rate")
summary(SVM.L)
## Classifier CV.Error
## SVM.L:150 Min. :0.2087
## 1st Qu.:0.2087
## Median :0.2087
## Mean :0.2087
## 3rd Qu.:0.2087
## Max. :0.2087
summary(SVM.P)
## Classifier CV.Error
## SVM.P:150 Min. :0.2087
## 1st Qu.:0.2157
## Median :0.2185
## Mean :0.2188
## 3rd Qu.:0.2227
## Max. :0.2339
summary(SVM.R)
## Classifier CV.Error
## SVM.R:150 Min. :0.2087
## 1st Qu.:0.2087
## Median :0.2087
## Mean :0.2087
## 3rd Qu.:0.2087
## Max. :0.2087
summary(KNN)
## Classifier CV.Error
## KNN:150 Min. :0.2059
## 1st Qu.:0.2157
## Median :0.2185
## Mean :0.2183
## 3rd Qu.:0.2227
## Max. :0.2339
summary(TREE)
## Classifier CV.Error
## TREE:150 Min. :0.2087
## 1st Qu.:0.2104
## Median :0.2171
## Mean :0.2173
## 3rd Qu.:0.2213
## Max. :0.2339