data.frame(Titanic)
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
## 29 1st Female Adult Yes 140
## 30 2nd Female Adult Yes 80
## 31 3rd Female Adult Yes 76
## 32 Crew Female Adult Yes 20
colSums(is.na(data.frame(Titanic)))
## Class Sex Age Survived Freq
## 0 0 0 0 0
#tidak ada missing values disemua variabel # Cek jumlah outlier di kolom Freq
boxplot.stats(data.frame(Titanic)$Freq)$out
## [1] 387 670
length(boxplot.stats(data.frame(Titanic)$Freq)$out)
## [1] 2
#baris yang terduplikat
sum(duplicated(data.frame(Titanic)))
## [1] 0
#mean meadian sd
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92, 85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)
mean(nilai) # rata-rata
## [1] 80.4
median(nilai) # median
## [1] 80
sd(nilai) # standar deviasi
## [1] 11.48792
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(110)
nrow(BreastCancer)
## [1] 699
sum(is.na(BreastCancer))
## [1] 16
bc <- na.omit(BreastCancer)
nrow(bc)
## [1] 683
library(caTools)
set.seed(110)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train <- subset(bc, split == TRUE)
test <- subset(bc, split == FALSE)
#cek dimensi data
dim(train)
## [1] 546 11
dim(test)
## [1] 137 11