data.frame(Titanic)
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20
colSums(is.na(data.frame(Titanic)))
##    Class      Sex      Age Survived     Freq 
##        0        0        0        0        0

#tidak ada missing values disemua variabel # Cek jumlah outlier di kolom Freq

boxplot.stats(data.frame(Titanic)$Freq)$out
## [1] 387 670
length(boxplot.stats(data.frame(Titanic)$Freq)$out)
## [1] 2

#baris yang terduplikat

sum(duplicated(data.frame(Titanic)))
## [1] 0

#mean meadian sd

nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92, 85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)

mean(nilai)       # rata-rata
## [1] 80.4
median(nilai)     # median
## [1] 80
sd(nilai)         # standar deviasi
## [1] 11.48792
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(110)
nrow(BreastCancer)
## [1] 699
sum(is.na(BreastCancer))
## [1] 16
bc <- na.omit(BreastCancer)
nrow(bc)
## [1] 683
library(caTools)
set.seed(110)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train <- subset(bc, split == TRUE)
test <- subset(bc, split == FALSE)

#cek dimensi data

dim(train)
## [1] 546  11
dim(test)
## [1] 137  11