library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
library(readr)
library(dplyr)
library(funModeling)
## Warning: package 'funModeling' was built under R version 4.2.2
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following object is masked from 'package:plotly':
##
## subplot
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
## / Now in Spanish: librovivodecienciadedatos.ai
##
## Attaching package: 'funModeling'
##
## The following object is masked from 'package:GGally':
##
## range01
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.2.2
##
## Attaching package: 'MLmetrics'
##
## The following object is masked from 'package:base':
##
## Recall
library(e1071)
## Warning: package 'e1071' was built under R version 4.2.2
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:Hmisc':
##
## impute
library(rsample)
## Warning: package 'rsample' was built under R version 4.2.2
##
## Attaching package: 'rsample'
##
## The following object is masked from 'package:e1071':
##
## permutations
library(caret)
## Warning: package 'caret' was built under R version 4.2.2
##
## Attaching package: 'caret'
##
## The following objects are masked from 'package:MLmetrics':
##
## MAE, RMSE
##
## The following object is masked from 'package:survival':
##
## cluster
##
## The following object is masked from 'package:purrr':
##
## lift
library(ROCR)
## Warning: package 'ROCR' was built under R version 4.2.2
library(partykit)
## Warning: package 'partykit' was built under R version 4.2.2
## Loading required package: grid
## Loading required package: libcoin
## Warning: package 'libcoin' was built under R version 4.2.2
## Loading required package: mvtnorm
titanic <- read.csv("train.csv")
str(titanic)
## 'data.frame': 891 obs. of 12 variables:
## $ PassengerId: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Survived : int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Name : chr "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Ticket : chr "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked : chr "S" "C" "S" "S" ...
The name of variables are as follows: 1. PassengerId : Id number 2. Survived : Survival (0 = No, 1 = Yes) 3. Pclass : Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd) 4. Name : Name 5. Sex : Sex 6. Age : Age in years 7. SibSp : # of siblings/spouses aboard the Titanic 8. Parch : # of parents/children aboard the Titanic 9. Ticket : Ticket number 10.Fare : Passenger fare 11.Cabin : Cabin number 12.Embarked : Port of Embarkation C = Cherbourg, Q = Queenstown, S = Southampton
titanic <- titanic %>%
select(-PassengerId, -Name, -Ticket, -Fare) %>%
mutate(Pclass = as.factor(Pclass),
SibSp = as.factor(SibSp),
Parch = as.factor(Parch))
str(titanic)
## 'data.frame': 891 obs. of 8 variables:
## $ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : Factor w/ 3 levels "1","2","3": 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : Factor w/ 7 levels "0","1","2","3",..: 2 2 1 2 1 1 1 4 1 2 ...
## $ Parch : Factor w/ 7 levels "0","1","2","3",..: 1 1 1 1 1 1 1 2 3 1 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked: chr "S" "C" "S" "S" ...
titanic %>%
is.na() %>%
colSums(is.na(titanic))
## Survived Pclass Sex Age SibSp Parch Cabin Embarked
## 0 0 0 177 0 0 0 0
mean_impute<-function(x){
ifelse(is.na(x),mean(x,na.rm = T),x)
}
Age_1 <- titanic[,4]
Age_1 <- mean_impute(Age_1)
Age_1 <- as.data.frame(Age_1)
titanic_new <- c(titanic, Age_1)
titanic_new <- as.data.frame(titanic_new)
str(titanic_new)
## 'data.frame': 891 obs. of 9 variables:
## $ Survived: int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : Factor w/ 3 levels "1","2","3": 3 1 3 1 3 3 1 3 3 2 ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : Factor w/ 7 levels "0","1","2","3",..: 2 2 1 2 1 1 1 4 1 2 ...
## $ Parch : Factor w/ 7 levels "0","1","2","3",..: 1 1 1 1 1 1 1 2 3 1 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked: chr "S" "C" "S" "S" ...
## $ Age_1 : num 22 38 26 35 35 ...
titanic_new %>%
is.na() %>%
colSums(is.na(titanic_new))
## Survived Pclass Sex Age SibSp Parch Cabin Embarked
## 0 0 0 177 0 0 0 0
## Age_1
## 0
titanic_new1 <- titanic_new %>%
select(-Age)
titanic_new1 %>%
is.na() %>%
colSums(is.na(titanic_new1))
## Survived Pclass Sex SibSp Parch Cabin Embarked Age_1
## 0 0 0 0 0 0 0 0
summary(titanic_new1)
## Survived Pclass Sex SibSp Parch Cabin
## Min. :0.0000 1:216 Length:891 0:608 0:678 Length:891
## 1st Qu.:0.0000 2:184 Class :character 1:209 1:118 Class :character
## Median :0.0000 3:491 Mode :character 2: 28 2: 80 Mode :character
## Mean :0.3838 3: 16 3: 5
## 3rd Qu.:1.0000 4: 18 4: 4
## Max. :1.0000 5: 5 5: 5
## 8: 7 6: 1
## Embarked Age_1
## Length:891 Min. : 0.42
## Class :character 1st Qu.:22.00
## Mode :character Median :29.70
## Mean :29.70
## 3rd Qu.:35.00
## Max. :80.00
##
round(prop.table(table(titanic_new1$Survived, titanic_new1$Pclass)),2)
##
## 1 2 3
## 0 0.09 0.11 0.42
## 1 0.15 0.10 0.13
round(prop.table(table(titanic_new1$Survived, titanic_new1$Sex)),2)
##
## female male
## 0 0.09 0.53
## 1 0.26 0.12
round(prop.table(table(titanic_new1$Survived, titanic_new1$SibSp)),2)
##
## 0 1 2 3 4 5 8
## 0 0.45 0.11 0.02 0.01 0.02 0.01 0.01
## 1 0.24 0.13 0.01 0.00 0.00 0.00 0.00
round(prop.table(table(titanic_new1$Survived, titanic_new1$Parch)),2)
##
## 0 1 2 3 4 5 6
## 0 0.50 0.06 0.04 0.00 0.00 0.00 0.00
## 1 0.26 0.07 0.04 0.00 0.00 0.00 0.00
round(prop.table(table(titanic_new1$Survived)),2)
##
## 0 1
## 0.62 0.38
Preparing Train and Test Dataset (Cross Validation)
set.seed(100)
split_nb <- initial_split(data = titanic_new1, prop = 0.8, strata = Survived)
train_nb <- training(split_nb)
test_nb <- testing(split_nb)
prop.table(table(titanic_new1$Survived))
##
## 0 1
## 0.6161616 0.3838384
prop.table(table(titanic_new1$Survived))
##
## 0 1
## 0.6161616 0.3838384
prop.table(table(titanic_new1$Survived))
##
## 0 1
## 0.6161616 0.3838384
For Decision Tree
set.seed(100)
split_dt <- initial_split(data = titanic_new1, prop = 0.8, strata = Survived)
train_dt <- training(split_dt)
test_dt <- testing(split_dt)
prop.table(table(titanic_new1$Survived))
##
## 0 1
## 0.6161616 0.3838384
prop.table(table(train_dt$Survived))
##
## 0 1
## 0.616573 0.383427
prop.table(table(test_dt$Survived))
##
## 0 1
## 0.6145251 0.3854749
Developing Model For Naive Bayes
model_nb <- naiveBayes(formula = Survived ~., data = titanic_new1, laplace = 1)
model_nb
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## 0 1
## 0.6161616 0.3838384
##
## Conditional probabilities:
## Pclass
## Y 1 2 3
## 0 0.1467391 0.1775362 0.6757246
## 1 0.3971014 0.2550725 0.3478261
##
## Sex
## Y female male
## 0 0.1493625 0.8542805
## 1 0.6842105 0.3216374
##
## SibSp
## Y 0 1 2 3 4 5
## 0 0.71762590 0.17625899 0.02877698 0.02338129 0.02877698 0.01079137
## 1 0.60458453 0.32378223 0.04011461 0.01432665 0.01146132 0.00286533
## SibSp
## Y 8
## 0 0.01438849
## 1 0.00286533
##
## Parch
## Y 0 1 2 3 4 5
## 0 0.802158273 0.097122302 0.073741007 0.005395683 0.008992806 0.008992806
## 1 0.670487106 0.189111748 0.117478510 0.011461318 0.002865330 0.005730659
## Parch
## Y 6
## 0 0.003597122
## 1 0.002865330
##
## Cabin
## Y A10 A14 A16 A19 A20
## 0 0.877959927 0.003642987 0.003642987 0.001821494 0.003642987 0.001821494
## 1 0.605263158 0.002923977 0.002923977 0.005847953 0.002923977 0.005847953
## Cabin
## Y A23 A24 A26 A31 A32 A34
## 0 0.001821494 0.003642987 0.001821494 0.001821494 0.003642987 0.001821494
## 1 0.005847953 0.002923977 0.005847953 0.005847953 0.002923977 0.005847953
## Cabin
## Y A36 A5 A6 A7 B101 B102
## 0 0.003642987 0.003642987 0.001821494 0.003642987 0.001821494 0.003642987
## 1 0.002923977 0.002923977 0.005847953 0.002923977 0.005847953 0.002923977
## Cabin
## Y B18 B19 B20 B22 B28 B3
## 0 0.001821494 0.003642987 0.001821494 0.003642987 0.001821494 0.001821494
## 1 0.008771930 0.002923977 0.008771930 0.005847953 0.008771930 0.005847953
## Cabin
## Y B30 B35 B37 B38 B39 B4
## 0 0.003642987 0.001821494 0.003642987 0.003642987 0.001821494 0.001821494
## 1 0.002923977 0.008771930 0.002923977 0.002923977 0.005847953 0.005847953
## Cabin
## Y B41 B42 B49 B5 B50 B51 B53 B55
## 0 0.001821494 0.001821494 0.001821494 0.001821494 0.001821494 0.003642987
## 1 0.005847953 0.005847953 0.008771930 0.008771930 0.005847953 0.005847953
## Cabin
## Y B57 B59 B63 B66 B58 B60 B69 B71 B73 B77
## 0 0.001821494 0.003642987 0.001821494 0.003642987 0.001821494 0.001821494
## 1 0.008771930 0.005847953 0.005847953 0.002923977 0.005847953 0.008771930
## Cabin
## Y B78 B79 B80 B82 B84 B86 B94
## 0 0.001821494 0.001821494 0.001821494 0.003642987 0.003642987 0.003642987
## 1 0.005847953 0.005847953 0.005847953 0.002923977 0.002923977 0.002923977
## Cabin
## Y B96 B98 C101 C103 C104 C106 C110
## 0 0.001821494 0.001821494 0.001821494 0.001821494 0.001821494 0.003642987
## 1 0.014619883 0.005847953 0.005847953 0.005847953 0.005847953 0.002923977
## Cabin
## Y C111 C118 C123 C124 C125 C126
## 0 0.003642987 0.003642987 0.003642987 0.005464481 0.001821494 0.001821494
## 1 0.002923977 0.002923977 0.005847953 0.002923977 0.008771930 0.008771930
## Cabin
## Y C128 C148 C2 C22 C26 C23 C25 C27 C30
## 0 0.003642987 0.001821494 0.003642987 0.005464481 0.005464481 0.003642987
## 1 0.002923977 0.005847953 0.005847953 0.005847953 0.008771930 0.002923977
## Cabin
## Y C32 C45 C46 C47 C49 C50
## 0 0.001821494 0.001821494 0.003642987 0.001821494 0.003642987 0.001821494
## 1 0.005847953 0.005847953 0.002923977 0.005847953 0.002923977 0.005847953
## Cabin
## Y C52 C54 C62 C64 C65 C68 C7
## 0 0.001821494 0.001821494 0.001821494 0.003642987 0.003642987 0.001821494
## 1 0.008771930 0.005847953 0.005847953 0.005847953 0.005847953 0.005847953
## Cabin
## Y C70 C78 C82 C83 C85 C86
## 0 0.001821494 0.003642987 0.003642987 0.003642987 0.001821494 0.003642987
## 1 0.005847953 0.005847953 0.002923977 0.005847953 0.005847953 0.002923977
## Cabin
## Y C87 C90 C91 C92 C93 C95
## 0 0.003642987 0.001821494 0.003642987 0.001821494 0.001821494 0.003642987
## 1 0.002923977 0.005847953 0.002923977 0.008771930 0.008771930 0.002923977
## Cabin
## Y C99 D D10 D12 D11 D15 D17
## 0 0.001821494 0.003642987 0.001821494 0.001821494 0.001821494 0.001821494
## 1 0.005847953 0.008771930 0.005847953 0.005847953 0.005847953 0.008771930
## Cabin
## Y D19 D20 D21 D26 D28 D30
## 0 0.001821494 0.001821494 0.001821494 0.005464481 0.001821494 0.003642987
## 1 0.005847953 0.008771930 0.005847953 0.002923977 0.005847953 0.002923977
## Cabin
## Y D33 D35 D36 D37 D45 D46
## 0 0.001821494 0.001821494 0.001821494 0.001821494 0.001821494 0.003642987
## 1 0.008771930 0.008771930 0.008771930 0.005847953 0.005847953 0.002923977
## Cabin
## Y D47 D48 D49 D50 D56 D6
## 0 0.001821494 0.003642987 0.001821494 0.003642987 0.001821494 0.003642987
## 1 0.005847953 0.002923977 0.005847953 0.002923977 0.005847953 0.002923977
## Cabin
## Y D7 D9 E10 E101 E12 E121
## 0 0.001821494 0.001821494 0.001821494 0.001821494 0.001821494 0.001821494
## 1 0.005847953 0.005847953 0.005847953 0.011695906 0.005847953 0.008771930
## Cabin
## Y E17 E24 E25 E31 E33 E34
## 0 0.001821494 0.001821494 0.001821494 0.003642987 0.001821494 0.001821494
## 1 0.005847953 0.008771930 0.008771930 0.002923977 0.008771930 0.005847953
## Cabin
## Y E36 E38 E40 E44 E46 E49
## 0 0.001821494 0.003642987 0.001821494 0.003642987 0.003642987 0.001821494
## 1 0.005847953 0.002923977 0.005847953 0.005847953 0.002923977 0.005847953
## Cabin
## Y E50 E58 E63 E67 E68 E77
## 0 0.001821494 0.003642987 0.003642987 0.003642987 0.001821494 0.003642987
## 1 0.005847953 0.002923977 0.002923977 0.005847953 0.005847953 0.002923977
## Cabin
## Y E8 F E69 F G63 F G73 F2 F33
## 0 0.001821494 0.001821494 0.003642987 0.005464481 0.003642987 0.001821494
## 1 0.008771930 0.005847953 0.002923977 0.002923977 0.008771930 0.011695906
## Cabin
## Y F38 F4 G6 T
## 0 0.003642987 0.001821494 0.005464481 0.003642987
## 1 0.002923977 0.008771930 0.008771930 0.002923977
##
## Embarked
## Y C Q S
## 0 0.001821494 0.138433515 0.087431694 0.779599271
## 1 0.008771930 0.274853801 0.090643275 0.637426901
##
## Age_1
## Y [,1] [,2]
## 0 30.41510 12.45737
## 1 28.54978 13.77250
For Random Forest
model_rf <- train(form = Survived ~., data = titanic_new1)
## Warning in train.default(x, y, weights = w, ...): You are trying to do
## regression and your outcome only has two possible values Are you trying to do
## classification? If so, use a 2 level factor as your outcome column.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): The response has
## five or fewer unique values. Are you sure you want to do regression?
model_rf
## Random Forest
##
## 891 samples
## 7 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 891, 891, 891, 891, 891, 891, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 0.4721908 0.3353219 0.4570699
## 84 0.3886766 0.3829849 0.2481347
## 166 0.4027872 0.3584555 0.2423302
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 84.
pred_nb <- predict(model_nb, test_nb)
head(pred_nb)
## [1] 0 0 1 0 1 0
## Levels: 0 1