library(dplyr)   # Untuk manipulasi data
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)   # Untuk menangani missing values
## Warning: package 'tidyr' was built under R version 4.4.3
library(ggplot2) # Untuk visualisasi data
library(VIM)     # Untuk visualisasi missing values
## Warning: package 'VIM' was built under R version 4.4.3
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
data("volcano")
head(airquality)  # Menampilkan 6 baris pertama
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
summary(airquality) # Statistik ringkasan dataset
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
colSums(is.na(airquality))  # Menampilkan jumlah missing values per kolom
##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0
aggr(airquality, numbers = TRUE, prop = FALSE)

# Mengubah dataset volcano ke dalam format data frame
volcano_df <- as.data.frame(as.table(volcano))

# Mengecek apakah ada missing value
sum(is.na(volcano_df))
## [1] 0
# Plot missing value
aggr(volcano_df, col = c("navyblue", "red"), numbers = TRUE, sortVars = TRUE,
     labels = c("X", "Y", "Elevation"), cex.axis = 0.7, gap = 3, 
     ylab = c("Missing Data Pattern", "Percentage of Missing Values"))

## 
##  Variables sorted by number of missings: 
##   Variable Count
##          X     0
##          Y     0
##  Elevation     0
# Menghitung kuartil
Q1 <- quantile(volcano_df$Freq, 0.25)
Q3 <- quantile(volcano_df$Freq, 0.75)
IQR <- Q3 - Q1

# Menentukan batas outlier
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR

# Menentukan data yang termasuk outlier
outliers <- volcano_df$Freq < lower_bound | volcano_df$Freq > upper_bound

# Jumlah outlier
sum(outliers)
## [1] 0
boxplot(volcano_df$Freq, main = "Boxplot Elevasi Volcano", col = "lightblue",
        ylab = "Elevation")