data1<-data.frame(Titanic)
colSums(is.na(data1))
##    Class      Sex      Age Survived     Freq 
##        0        0        0        0        0
data1<-data.frame(Titanic)
str(data1)
## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...
boxplot.stats(data1$Freq)$out
## [1] 387 670
length(boxplot.stats(data1$Freq)$out)
## [1] 2
sum(duplicated(data1))
## [1] 0
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92,
           85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)

# Histogram
hist(nilai, main = "Distribusi Nilai Siswa", xlab = "Nilai", col = "skyblue", breaks = 10)

# Boxplot
boxplot(nilai, horizontal = TRUE, main = "Boxplot Nilai Siswa", col = "orange")

# Data nilai siswa
nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92,
           85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)

# Hitung rata-rata
mean(nilai)
## [1] 80.4
# Hitung median
median(nilai)
## [1] 80
# Hitung standar deviasi
sd(nilai)
## [1] 11.48792
# Histogram
hist(nilai,
     main = "Distribusi Nilai Siswa",
     xlab = "Nilai",
     ylab = "Frekuensi",
     col = "skyblue",
     border = "white",
     breaks = 10)

# Boxplot
boxplot(nilai,
        horizontal = TRUE,
        main = "Boxplot Nilai Siswa",
        col = "orange",
        xlab = "Nilai")

# Load library
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
# Load data
data("BreastCancer")

# Hapus baris dengan missing value
bc <- na.omit(BreastCancer)

# Hapus kolom ID (tidak berguna untuk prediksi)
bc$Id <- NULL

# Ubah semua kolom (selain Class) ke numeric
bc[, 1:9] <- lapply(bc[, 1:9], as.numeric)

# Set seed dan split data
set.seed(110)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train <- subset(bc, split == TRUE)
test <- subset(bc, split == FALSE)

# Latih model logistic regression
model <- glm(Class ~ ., data = train, family = binomial)

# Prediksi
pred_prob <- predict(model, test, type = "response")
pred_class <- ifelse(pred_prob > 0.5, "malignant", "benign")
pred_class <- factor(pred_class, levels = c("benign", "malignant"))

# Evaluasi: Confusion Matrix dan Akurasi
conf_matrix <- table(Predicted = pred_class, Actual = test$Class)
print(conf_matrix)
##            Actual
## Predicted   benign malignant
##   benign        88         1
##   malignant      1        47
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Akurasi Model:", round(accuracy * 100, 2), "%\n")
## Akurasi Model: 98.54 %
# Load library
library(mlbench)
library(caTools)

# Load dan bersihkan data
data("BreastCancer")
bc <- na.omit(BreastCancer)
bc$Id <- NULL  # hapus kolom ID
bc[, 1:9] <- lapply(bc[, 1:9], as.numeric)  # ubah ke numerik

# Split data 80:20
set.seed(110)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train <- subset(bc, split == TRUE)
test <- subset(bc, split == FALSE)

# Tampilkan dimensi
cat("Dimensi data training:", dim(train)[1], "x", dim(train)[2], "\n")
## Dimensi data training: 546 x 10
cat("Dimensi data testing:", dim(test)[1], "x", dim(test)[2], "\n")
## Dimensi data testing: 137 x 10
colSums(is.na(airquality))
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0
bc<-na.omit(BreastCancer)
# Load library dan data
library(mlbench)
data("BreastCancer")

# Cek jumlah missing value per kolom
colSums(is.na(BreastCancer))
##              Id    Cl.thickness       Cell.size      Cell.shape   Marg.adhesion 
##               0               0               0               0               0 
##    Epith.c.size     Bare.nuclei     Bl.cromatin Normal.nucleoli         Mitoses 
##               0              16               0               0               0 
##           Class 
##               0
# Jumlah total missing value di seluruh data
sum(is.na(BreastCancer))
## [1] 16