data1<-data.frame(Titanic)
colSums(is.na(Titanic))
## , , Survived = No
##
## Age
## Sex Child Adult
## Male 0 0
## Female 0 0
##
## , , Survived = Yes
##
## Age
## Sex Child Adult
## Male 0 0
## Female 0 0
summary(Titanic)
## Number of cases in table: 2201
## Number of factors: 4
## Test for independence of all factors:
## Chisq = 1637.4, df = 25, p-value = 0
## Chi-squared approximation may be incorrect
freq_data <- data1$Freq
Q1 <- quantile(freq_data, 0.25)
Q3 <- quantile(freq_data, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
outliers <- freq_data[freq_data < lower_bound | freq_data > upper_bound]
length(outliers)
## [1] 3
sum(duplicated(data1))
## [1] 0
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92, 85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)
summary(nilai)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.0 75.0 80.0 80.4 89.5 100.0
sd(nilai)
## [1] 11.48792
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(110)
split=sample.split(BreastCancer, SplitRatio = 0.2)
training_set=subset(BreastCancer,split==TRUE)
test_set=subset(BreastCancer,split==FALSE)
dim(training_set)
## [1] 128 11
dim(test_set)
## [1] 571 11
topredict_set<-test_set[1:4]