library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
data1<-data.frame(Titanic)
head(Titanic)
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
colSums(is.na(data1))
## Class Sex Age Survived Freq
## 0 0 0 0 0
freq_data <- data1$Freq
Q1 <- quantile(freq_data, 0.25)
Q3 <- quantile(freq_data, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
outliers <- freq_data[freq_data < lower_bound | freq_data > upper_bound]
length(outliers)
## [1] 3
sum(duplicated(data1))
## [1] 0
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92, 85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)
mean(nilai)
## [1] 80.4
median(nilai)
## [1] 80
sd(nilai)
## [1] 11.48792
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(110)
split = sample.split(BreastCancer, SplitRatio = 0.8)
training_set = subset(BreastCancer, split==TRUE)
test_set = subset(BreastCancer, split == FALSE)
dim(training_set)
## [1] 509 11
dim(test_set)
## [1] 190 11