library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(110)
split <- sample.split(BreastCancer$Class, SplitRatio = 0.8)
train <- subset(BreastCancer, split == TRUE)
test <- subset(BreastCancer, split == FALSE)
nrow(train)
## [1] 559
nrow(test)
## [1] 140
BreastCancer <- na.omit(BreastCancer)
split <- sample.split(BreastCancer$Class, SplitRatio = 0.8)
train <- subset(BreastCancer, split == TRUE)
test <- subset(BreastCancer, split == FALSE)
nrow(train)
## [1] 546
nrow(test)
## [1] 137

#RATA RATA

nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92, 85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)

mean(nilai)      
## [1] 80.4
median(nilai)    
## [1] 80
sd(nilai)        
## [1] 11.48792

#TITANIC

data1 <- data.frame(Titanic)
sum(duplicated(data1))
## [1] 0

#MISSING VALUE TITANIC

data1 <- data.frame(Titanic)
Q1 <- quantile(data1$Freq, 0.25)
Q3 <- quantile(data1$Freq, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
outliers <- data1$Freq[data1$Freq < lower_bound | data1$Freq > upper_bound]
data1 <- data.frame(Titanic)
Q1 <- quantile(data1$Freq, 0.25)
Q3 <- quantile(data1$Freq, 0.75)
IQR <- Q3 - Q1