4101422146_ProjekDATA MINING

# 1. Memuat Library
library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.4.3

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'readr' was built under R version 4.4.3

## Warning: package 'forcats' was built under R version 4.4.3

## Warning: package 'lubridate' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.4.3

## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift

data <- read.csv("D:/SEMESTER ^6^ DPEN/Data Mining/Projek/weatherHistory.csv")
glimpse(data)

## Rows: 96,453
## Columns: 12
## $ Formatted.Date           <chr> "2006-04-01 00:00:00.000 +0200", "2006-04-01 …
## $ Summary                  <chr> "Partly Cloudy", "Partly Cloudy", "Mostly Clo…
## $ Precip.Type              <chr> "rain", "rain", "rain", "rain", "rain", "rain…
## $ Temperature..C.          <dbl> 9.472222, 9.355556, 9.377778, 8.288889, 8.755…
## $ Apparent.Temperature..C. <dbl> 7.388889, 7.227778, 9.377778, 5.944444, 6.977…
## $ Humidity                 <dbl> 0.89, 0.86, 0.89, 0.83, 0.83, 0.85, 0.95, 0.8…
## $ Wind.Speed..km.h.        <dbl> 14.1197, 14.2646, 3.9284, 14.1036, 11.0446, 1…
## $ Wind.Bearing..degrees.   <dbl> 251, 259, 204, 269, 259, 258, 259, 260, 259, …
## $ Visibility..km.          <dbl> 15.8263, 15.8263, 14.9569, 15.8263, 15.8263, …
## $ Loud.Cover               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pressure..millibars.     <dbl> 1015.13, 1015.63, 1015.94, 1016.41, 1016.51, …
## $ Daily.Summary            <chr> "Partly cloudy throughout the day.", "Partly …

summary(data)

##  Formatted.Date       Summary          Precip.Type        Temperature..C.  
##  Length:96453       Length:96453       Length:96453       Min.   :-21.822  
##  Class :character   Class :character   Class :character   1st Qu.:  4.689  
##  Mode  :character   Mode  :character   Mode  :character   Median : 12.000  
##                                                           Mean   : 11.933  
##                                                           3rd Qu.: 18.839  
##                                                           Max.   : 39.906  
##  Apparent.Temperature..C.    Humidity      Wind.Speed..km.h.
##  Min.   :-27.717          Min.   :0.0000   Min.   : 0.000   
##  1st Qu.:  2.311          1st Qu.:0.6000   1st Qu.: 5.828   
##  Median : 12.000          Median :0.7800   Median : 9.966   
##  Mean   : 10.855          Mean   :0.7349   Mean   :10.811   
##  3rd Qu.: 18.839          3rd Qu.:0.8900   3rd Qu.:14.136   
##  Max.   : 39.344          Max.   :1.0000   Max.   :63.853   
##  Wind.Bearing..degrees. Visibility..km.   Loud.Cover Pressure..millibars.
##  Min.   :  0.0          Min.   : 0.00   Min.   :0    Min.   :   0        
##  1st Qu.:116.0          1st Qu.: 8.34   1st Qu.:0    1st Qu.:1012        
##  Median :180.0          Median :10.05   Median :0    Median :1016        
##  Mean   :187.5          Mean   :10.35   Mean   :0    Mean   :1003        
##  3rd Qu.:290.0          3rd Qu.:14.81   3rd Qu.:0    3rd Qu.:1021        
##  Max.   :359.0          Max.   :16.10   Max.   :0    Max.   :1046        
##  Daily.Summary     
##  Length:96453      
##  Class :character  
##  Mode  :character  
##                    
##                    
##

# Menyalin ke variabel kerja
data_clean <- data

# Membuat label target berdasarkan "Summary"
data_clean$RainLabel <- ifelse(grepl("Rain", data_clean$Summary), "Rain", "NotRain")
data_clean$RainLabel <- as.factor(data_clean$RainLabel)

# Mengecek distribusi kelas
table(data_clean$RainLabel)

## 
## NotRain    Rain 
##   96380      73

set.seed(123)
index <- createDataPartition(data_clean$RainLabel, p = 0.7, list = FALSE)
train_data <- data_clean[index, ]
test_data <- data_clean[-index, ]

# Menghapus kolom non-numerik yang tidak diperlukan
rf_model <- randomForest(RainLabel ~ . -Summary -Formatted.Date -Daily.Summary,
                         data = train_data, ntree = 100, importance = TRUE)

# Melakukan prediksi
pred_rf <- predict(rf_model, newdata = test_data)

# Confusion Matrix
confusionMatrix(data = pred_rf, reference = test_data$RainLabel, positive = "Rain")

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction NotRain  Rain
##    NotRain   28914    17
##    Rain          0     4
##                                           
##                Accuracy : 0.9994          
##                  95% CI : (0.9991, 0.9997)
##     No Information Rate : 0.9993          
##     P-Value [Acc > NIR] : 0.2268698       
##                                           
##                   Kappa : 0.3198          
##                                           
##  Mcnemar's Test P-Value : 0.0001042       
##                                           
##             Sensitivity : 0.1904762       
##             Specificity : 1.0000000       
##          Pos Pred Value : 1.0000000       
##          Neg Pred Value : 0.9994124       
##              Prevalence : 0.0007258       
##          Detection Rate : 0.0001382       
##    Detection Prevalence : 0.0001382       
##       Balanced Accuracy : 0.5952381       
##                                           
##        'Positive' Class : Rain            
##

4101422146_ProjekDATA MINING

Dhafa Purbaya N

2025-05-13