This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(rpart)
library(rsample)
## Warning: package 'rsample' was built under R version 4.2.1
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.2.1
library(ranger)
## Warning: package 'ranger' was built under R version 4.2.1
library(mice)
## Warning: package 'mice' was built under R version 4.2.1
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(e1071)
## Warning: package 'e1071' was built under R version 4.2.1
##
## Attaching package: 'e1071'
## The following object is masked from 'package:rsample':
##
## permutations
library(xgboost)
## Warning: package 'xgboost' was built under R version 4.2.1
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
options(warn=-1)
Train <- read.csv("train.csv",na.strings=c("", "NA"), sep=",",header = TRUE)
Test<- read.csv("test.csv",na.strings = c("",NA),sep = ",",header = TRUE)
head(Train)
## PassengerId HomePlanet CryoSleep Cabin Destination Age VIP RoomService
## 1 0001_01 Europa False B/0/P TRAPPIST-1e 39 False 0
## 2 0002_01 Earth False F/0/S TRAPPIST-1e 24 False 109
## 3 0003_01 Europa False A/0/S TRAPPIST-1e 58 True 43
## 4 0003_02 Europa False A/0/S TRAPPIST-1e 33 False 0
## 5 0004_01 Earth False F/1/S TRAPPIST-1e 16 False 303
## 6 0005_01 Earth False F/0/P PSO J318.5-22 44 False 0
## FoodCourt ShoppingMall Spa VRDeck Name Transported
## 1 0 0 0 0 Maham Ofracculy False
## 2 9 25 549 44 Juanna Vines True
## 3 3576 0 6715 49 Altark Susent False
## 4 1283 371 3329 193 Solam Susent False
## 5 70 151 565 2 Willy Santantines True
## 6 483 0 291 0 Sandie Hinetthews True
summary(Train)
## PassengerId HomePlanet CryoSleep Cabin
## Length:8693 Length:8693 Length:8693 Length:8693
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Destination Age VIP RoomService
## Length:8693 Min. : 0.00 Length:8693 Min. : 0.0
## Class :character 1st Qu.:19.00 Class :character 1st Qu.: 0.0
## Mode :character Median :27.00 Mode :character Median : 0.0
## Mean :28.83 Mean : 224.7
## 3rd Qu.:38.00 3rd Qu.: 47.0
## Max. :79.00 Max. :14327.0
## NA's :179 NA's :181
## FoodCourt ShoppingMall Spa VRDeck
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 0.0 Median : 0.0 Median : 0.0 Median : 0.0
## Mean : 458.1 Mean : 173.7 Mean : 311.1 Mean : 304.9
## 3rd Qu.: 76.0 3rd Qu.: 27.0 3rd Qu.: 59.0 3rd Qu.: 46.0
## Max. :29813.0 Max. :23492.0 Max. :22408.0 Max. :24133.0
## NA's :183 NA's :208 NA's :183 NA's :188
## Name Transported
## Length:8693 Length:8693
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
Train <- separate(Train, Cabin, c('deck','num','side'))
Train$num <- as.numeric(Train$num)
Train <- as.data.frame(unclass(Train), stringsAsFactors = TRUE)
Train <- Train %>% mutate(CryoSleep = factor(case_when(CryoSleep == 'False' ~ 0, CryoSleep == 'True' ~ 1)),
Transported = factor(case_when(Transported == 'False' ~ 0,Transported == 'True' ~ 1)),
VIP = factor(case_when(VIP == 'False' ~ 0, VIP == 'True' ~ 1))) %>%
select(-c(PassengerId,Name)) %>% mice(method = "rf") %>%
complete()
##
## iter imp variable
## 1 1 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 1 2 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 1 3 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 1 4 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 1 5 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 2 1 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 2 2 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 2 3 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 2 4 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 2 5 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 3 1 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 3 2 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 3 3 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 3 4 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 3 5 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 4 1 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 4 2 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 4 3 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 4 4 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 4 5 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 5 1 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 5 2 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 5 3 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 5 4 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
## 5 5 HomePlanet CryoSleep deck num side Destination Age VIP RoomService FoodCourt ShoppingMall Spa VRDeck
sum(is.na(Train))
## [1] 0
str(Train)
## 'data.frame': 8693 obs. of 14 variables:
## $ HomePlanet : Factor w/ 3 levels "Earth","Europa",..: 2 1 2 2 1 1 1 1 1 2 ...
## $ CryoSleep : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 2 ...
## $ deck : Factor w/ 8 levels "A","B","C","D",..: 2 6 1 1 6 6 6 7 6 2 ...
## $ num : num 0 0 0 0 1 0 2 0 3 1 ...
## $ side : Factor w/ 2 levels "P","S": 1 2 2 2 2 1 2 2 2 1 ...
## $ Destination : Factor w/ 3 levels "55 Cancri e",..: 3 3 3 3 3 2 3 3 3 1 ...
## $ Age : num 39 24 58 33 16 44 26 28 35 14 ...
## $ VIP : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ RoomService : num 0 109 43 0 303 0 42 0 0 0 ...
## $ FoodCourt : num 0 9 3576 1283 70 ...
## $ ShoppingMall: num 0 25 0 371 151 0 3 0 17 0 ...
## $ Spa : num 0 549 6715 3329 565 ...
## $ VRDeck : num 0 44 49 193 2 0 0 0 0 0 ...
## $ Transported : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 2 2 2 ...
TrainnoSleep = filter(Train, CryoSleep == 0)
ggplot(data = TrainnoSleep, mapping = aes(x = Age, y = Transported, color = VIP))+
geom_boxplot(notch=TRUE,
outlier.colour="red",
outlier.shape = 1,
outlier.size = 1)+
coord_flip()