library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
data1 <- data.frame(Titanic)
str(data1)
## 'data.frame':    32 obs. of  5 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Sex     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 1 1 ...
##  $ Age     : Factor w/ 2 levels "Child","Adult": 1 1 1 1 1 1 1 1 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Freq    : num  0 0 35 0 0 0 17 0 118 154 ...
colSums(is.na(data1))
##    Class      Sex      Age Survived     Freq 
##        0        0        0        0        0
Q1 <- quantile(data1$Freq, 0.25)
Q3 <- quantile(data1$Freq, 0.75)
IQR_value <- IQR(data1$Freq)
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
outliers <- data1$Freq[data1$Freq < lower_bound | data1$Freq > upper_bound]
length(outliers)
## [1] 3
data1[duplicated(data1), ]
## [1] Class    Sex      Age      Survived Freq    
## <0 rows> (or 0-length row.names)

#SOAL ANALISIS UJIAN MATEMATIKA

nilai <- c(70, 75, 80, 85, 85, 90, 95, 100, 60, 75, 77, 85, 90, 98, 68, 92, 85, 66, 75, 80, 72, 84, 50, 69, 76, 80, 90, 95, 88, 77)
mean(nilai)
## [1] 80.4
median(nilai)
## [1] 80
sd(nilai)
## [1] 11.48792

#DATA BREASTCACER

library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
data("BreastCancer")
library(caTools)
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(110)
bc <- na.omit(BreastCancer)
split <- sample.split(bc$Class, SplitRatio = 0.8)
train_data <- subset(bc, split == TRUE)
test_data <- subset(bc, split == FALSE)
dim(train_data)
## [1] 546  11
dim(test_data)
## [1] 137  11

#DATA BERMAIN GOLF

data_golf <- data.frame(
  Outlook = c("Rainy", "Rainy", "Overcast", "Sunny", "Sunny", "Sunny", "Overcast",
              "Rainy", "Rainy", "Sunny", "Rainy", "Overcast", "Overcast", "Sunny"),
  Temperature = c("Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Cool",
                  "Mild", "Cool", "Mild", "Mild", "Mild", "Hot", "Mild"),
  Humidity = c("High", "High", "High", "High", "Normal", "Normal", "Normal",
               "High", "Normal", "Normal", "Normal", "High", "Normal", "High"),
  Windy = c(FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE,
            FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE),
  PlayGolf = c("No", "No", "Yes", "Yes", "Yes", "No", "Yes",
               "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No")
)
data_golf[] <- lapply(data_golf, as.factor)
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
model <- naiveBayes(PlayGolf ~ ., data = data_golf)
# Data baru: Outlook = Overcast, Temp = Mild, Humidity = Normal, Windy = FALSE
data_baru <- data.frame(
  Outlook = factor("Overcast", levels = levels(data_golf$Outlook)),
  Temperature = factor("Mild", levels = levels(data_golf$Temperature)),
  Humidity = factor("Normal", levels = levels(data_golf$Humidity)),
  Windy = factor(FALSE, levels = levels(data_golf$Windy))
)

# Prediksi probabilitas
predict(model, data_baru, type = "raw")
##               No       Yes
## [1,] 0.000202459 0.9997975

#DATA CUSTOMER BARU

data <- data.frame(
  Income = c(rep(">=5jt", 80), rep("<5jt", 120)),
  Gender = c(rep("Perempuan", 65), rep("Laki-laki", 15), rep("Perempuan", 55), rep("Laki-laki", 65)),
  Married = c(rep("Ya", 53), rep("Tidak", 12), rep("Ya", 0), rep("Tidak", 15), rep("Ya", 30), rep("Tidak", 25), rep("Ya", 10), rep("Tidak", 55)),
  Buy = c(rep("Yes", 51), rep("No", 2), rep("Yes", 1), rep("No", 11),
          rep("Yes", 0), rep("No", 0), rep("Yes", 5), rep("No", 10),
          rep("Yes", 20), rep("No", 10), rep("Yes", 10), rep("No", 15),
          rep("Yes", 2), rep("No", 8), rep("Yes", 8), rep("No", 47))
)
target_node <- subset(data, Income == ">=5jt" & Gender == "Perempuan" & Married == "Ya")
prob <- sum(target_node$Buy == "Yes") / nrow(target_node)
prob 
## [1] 0.9622642