data1<-data.frame(Titanic)
colSums(is.na(data1))
## Class Sex Age Survived Freq
## 0 0 0 0 0
data1<-data.frame(Titanic)
str(data1)
## 'data.frame': 32 obs. of 5 variables:
## $ Class : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
## $ Sex : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
## $ Age : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
## $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Freq : num 0 0 35 0 0 0 17 0 118 154 ...
boxplot.stats(data1$Freq)$out
## [1] 387 670
length(boxplot.stats(data1$Freq)$out)
## [1] 2
sum(duplicated(data1))
## [1] 0
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92,
85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)
# Histogram
hist(nilai, main = "Distribusi Nilai Siswa", xlab = "Nilai", col = "skyblue", breaks = 10)

# Boxplot
boxplot(nilai, horizontal = TRUE, main = "Boxplot Nilai Siswa", col = "orange")

# Data nilai siswa
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92,
85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)
# Hitung rata-rata
mean(nilai)
## [1] 80.4
# Hitung median
median(nilai)
## [1] 80
# Hitung standar deviasi
sd(nilai)
## [1] 11.48792
# Histogram
hist(nilai,
main = "Distribusi Nilai Siswa",
xlab = "Nilai",
ylab = "Frekuensi",
col = "skyblue",
border = "white",
breaks = 10)

# Boxplot
boxplot(nilai,
horizontal = TRUE,
main = "Boxplot Nilai Siswa",
col = "orange",
xlab = "Nilai")

# Load library
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
# Load data
data("BreastCancer")
# Hapus baris dengan missing value
bc <- na.omit(BreastCancer)
# Hapus kolom ID (tidak berguna untuk prediksi)
bc$Id <- NULL
# Ubah semua kolom (selain Class) ke numeric
bc[, 1:9] <- lapply(bc[, 1:9], as.numeric)
# Set seed dan split data
set.seed(110)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train <- subset(bc, split == TRUE)
test <- subset(bc, split == FALSE)
# Latih model logistic regression
model <- glm(Class ~ ., data = train, family = binomial)
# Prediksi
pred_prob <- predict(model, test, type = "response")
pred_class <- ifelse(pred_prob > 0.5, "malignant", "benign")
pred_class <- factor(pred_class, levels = c("benign", "malignant"))
# Evaluasi: Confusion Matrix dan Akurasi
conf_matrix <- table(Predicted = pred_class, Actual = test$Class)
print(conf_matrix)
## Actual
## Predicted benign malignant
## benign 88 1
## malignant 1 47
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Akurasi Model:", round(accuracy * 100, 2), "%\n")
## Akurasi Model: 98.54 %
# Load library
library(mlbench)
library(caTools)
# Load dan bersihkan data
data("BreastCancer")
bc <- na.omit(BreastCancer)
bc$Id <- NULL # hapus kolom ID
bc[, 1:9] <- lapply(bc[, 1:9], as.numeric) # ubah ke numerik
# Split data 80:20
set.seed(110)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train <- subset(bc, split == TRUE)
test <- subset(bc, split == FALSE)
# Tampilkan dimensi
cat("Dimensi data training:", dim(train)[1], "x", dim(train)[2], "\n")
## Dimensi data training: 546 x 10
cat("Dimensi data testing:", dim(test)[1], "x", dim(test)[2], "\n")
## Dimensi data testing: 137 x 10
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
bc<-na.omit(BreastCancer)
# Load library dan data
library(mlbench)
data("BreastCancer")
# Cek jumlah missing value per kolom
colSums(is.na(BreastCancer))
## Id Cl.thickness Cell.size Cell.shape Marg.adhesion
## 0 0 0 0 0
## Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses
## 0 16 0 0 0
## Class
## 0
# Jumlah total missing value di seluruh data
sum(is.na(BreastCancer))
## [1] 16