IMPORT LIBRARY

library("psych")
## Warning: package 'psych' was built under R version 4.5.3
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.5.3
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("GPArotation")
## Warning: package 'GPArotation' was built under R version 4.5.3
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library("clValid")
## Warning: package 'clValid' was built under R version 4.5.3
## Loading required package: cluster
## Warning: package 'cluster' was built under R version 4.5.3
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.5.3
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library("cluster")
library("car")
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
library("openxlsx")
## Warning: package 'openxlsx' was built under R version 4.5.3

LOAD DATASET

data_mentah = read.xlsx("C:/Users/msi09/Documents/DATA MINING PUBLIKASI/dataset kalbar.xlsx")

data_mentah
##           Kab_Kota   RLS   HLS   APS
## 1           Sambas  6.76 12.76 70.58
## 2       Bengkayang  7.41 12.22 74.14
## 3           Landak  7.46 12.54 76.48
## 4         Mempawah  7.21 12.90 71.93
## 5          Sanggau  7.47 11.88 52.88
## 6         Ketapang  7.68 11.97 67.35
## 7          Sintang  7.65 12.32 64.11
## 8      Kapuas Hulu  8.03 12.23 65.91
## 9          Sekadau  7.23 11.93 66.57
## 10          Melawi  7.66 11.37 57.79
## 11    Kayong Utara  6.54 12.20 65.09
## 12       Kubu Raya  7.05 13.90 69.19
## 13  Kota Pontianak 10.47 15.06 80.29
## 14 Kota Singkawang  8.22 12.95 74.49

DESKRIPSI DATA

summary(data_mentah)
##    Kab_Kota              RLS              HLS             APS       
##  Length:14          Min.   : 6.540   Min.   :11.37   Min.   :52.88  
##  Class :character   1st Qu.: 7.215   1st Qu.:12.03   1st Qu.:65.30  
##  Mode  :character   Median : 7.465   Median :12.28   Median :68.27  
##                     Mean   : 7.631   Mean   :12.59   Mean   :68.34  
##                     3rd Qu.: 7.675   3rd Qu.:12.87   3rd Qu.:73.59  
##                     Max.   :10.470   Max.   :15.06   Max.   :80.29

PEMBERSIHAN DATA

data_mentah$APS = as.numeric(data_mentah$APS)
data_mentah$HLS = as.numeric(data_mentah$HLS)
data_mentah$RLS = as.numeric(data_mentah$RLS)

str(data_mentah)
## 'data.frame':    14 obs. of  4 variables:
##  $ Kab_Kota: chr  "Sambas" "Bengkayang" "Landak" "Mempawah" ...
##  $ RLS     : num  6.76 7.41 7.46 7.21 7.47 7.68 7.65 8.03 7.23 7.66 ...
##  $ HLS     : num  12.8 12.2 12.5 12.9 11.9 ...
##  $ APS     : num  70.6 74.1 76.5 71.9 52.9 ...
data_mentah
##           Kab_Kota   RLS   HLS   APS
## 1           Sambas  6.76 12.76 70.58
## 2       Bengkayang  7.41 12.22 74.14
## 3           Landak  7.46 12.54 76.48
## 4         Mempawah  7.21 12.90 71.93
## 5          Sanggau  7.47 11.88 52.88
## 6         Ketapang  7.68 11.97 67.35
## 7          Sintang  7.65 12.32 64.11
## 8      Kapuas Hulu  8.03 12.23 65.91
## 9          Sekadau  7.23 11.93 66.57
## 10          Melawi  7.66 11.37 57.79
## 11    Kayong Utara  6.54 12.20 65.09
## 12       Kubu Raya  7.05 13.90 69.19
## 13  Kota Pontianak 10.47 15.06 80.29
## 14 Kota Singkawang  8.22 12.95 74.49
is.na(data_mentah)
##    Kab_Kota   RLS   HLS   APS
## 1     FALSE FALSE FALSE FALSE
## 2     FALSE FALSE FALSE FALSE
## 3     FALSE FALSE FALSE FALSE
## 4     FALSE FALSE FALSE FALSE
## 5     FALSE FALSE FALSE FALSE
## 6     FALSE FALSE FALSE FALSE
## 7     FALSE FALSE FALSE FALSE
## 8     FALSE FALSE FALSE FALSE
## 9     FALSE FALSE FALSE FALSE
## 10    FALSE FALSE FALSE FALSE
## 11    FALSE FALSE FALSE FALSE
## 12    FALSE FALSE FALSE FALSE
## 13    FALSE FALSE FALSE FALSE
## 14    FALSE FALSE FALSE FALSE
unique(data_mentah)
##           Kab_Kota   RLS   HLS   APS
## 1           Sambas  6.76 12.76 70.58
## 2       Bengkayang  7.41 12.22 74.14
## 3           Landak  7.46 12.54 76.48
## 4         Mempawah  7.21 12.90 71.93
## 5          Sanggau  7.47 11.88 52.88
## 6         Ketapang  7.68 11.97 67.35
## 7          Sintang  7.65 12.32 64.11
## 8      Kapuas Hulu  8.03 12.23 65.91
## 9          Sekadau  7.23 11.93 66.57
## 10          Melawi  7.66 11.37 57.79
## 11    Kayong Utara  6.54 12.20 65.09
## 12       Kubu Raya  7.05 13.90 69.19
## 13  Kota Pontianak 10.47 15.06 80.29
## 14 Kota Singkawang  8.22 12.95 74.49
duplicate_rows = duplicated(data_mentah)
duplicate_rows
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE
duplicate_rows_count = sum(duplicate_rows)
duplicate_rows_count
## [1] 0

VISUALISASI DATA

# Margin kiri disesuaikan manual berdasarkan nama Kab/Kota
max_label_len <- max(nchar(as.character(data_mentah$Kab_Kota)))
left_margin   <- max(max_label_len * 0.35, 10)  # minimal 10

par(mar = c(4, left_margin, 2, 2))

bars <- barplot(
  data_mentah$APS,
  col       = "purple",
  main      = "BARPLOT APS",
  horiz     = TRUE,
  las       = 1,
  xlim      = c(0, 100),
  names.arg = data_mentah$Kab_Kota,   # ← Kab_Kota bukan Kab/Kota
  cex.names = 0.85
)

# Label nilai di ujung bar
text(x = data_mentah$APS, y = bars,
     labels = data_mentah$APS,
     pos = 4, cex = 0.6)

# Garis rata-rata
avg_aps <- mean(data_mentah$APS, na.rm = TRUE)
abline(v = avg_aps, col = "red", lty = 2, lwd = 2)

# Margin kiri disesuaikan manual berdasarkan nama Kab/Kota
max_label_len <- max(nchar(as.character(data_mentah$Kab_Kota)))

left_margin   <- max(max_label_len * 0.35, 10)  # minimal 10

par(mar = c(4, left_margin, 2, 2))

bars <- barplot(
  data_mentah$HLS,
  col       = "green",
  main      = "BARPLOT HLS",
  horiz     = TRUE,
  las       = 1,
  xlim      = c(0,20),
  names.arg = data_mentah$Kab_Kota,   # ← Kab_Kota bukan Kab/Kota
  cex.names = 0.75
)

# Label nilai di ujung bar
text(x = data_mentah$HLS, y = bars,
     labels = data_mentah$HLS,
     pos = 4, cex = 0.6)

# Garis rata-rata
avg_aps <- mean(data_mentah$HLS, na.rm = TRUE)
abline(v = avg_aps, col = "red", lty = 2, lwd = 2)

# Margin kiri disesuaikan manual berdasarkan nama Kab/Kota
max_label_len <- max(nchar(as.character(data_mentah$Kab_Kota)))

left_margin   <- max(max_label_len * 0.35, 10)  # minimal 10

par(mar = c(4, left_margin, 2, 2))

bars <- barplot(
  data_mentah$RLS,
  col       = "yellow",
  main      = "BARPLOT RLS",
  horiz     = TRUE,
  las       = 1,
  xlim      = c(0,15),
  names.arg = data_mentah$Kab_Kota,   # ← Kab_Kota bukan Kab/Kota
  cex.names = 0.75
)

# Label nilai di ujung bar
text(x = data_mentah$RLS, y = bars,
     labels = data_mentah$RLS,
     pos = 4, cex = 0.6)

# Garis rata-rata
avg_aps <- mean(data_mentah$RLS, na.rm = TRUE)
abline(v = avg_aps, col = "red", lty = 2, lwd = 2)

STANDARISASI DATA

sd(data_mentah$APS, na.rm = TRUE)
## [1] 7.274508
sd(data_mentah$HLS, na.rm = TRUE)
## [1] 0.9355392
sd(data_mentah$RLS, na.rm = TRUE)
## [1] 0.93225
data_standardized = round(scale(data_mentah[,2:4]), 4)
data_standardized
##        RLS     HLS     APS
## 1  -0.9348  0.1840  0.3075
## 2  -0.2375 -0.3932  0.7969
## 3  -0.1839 -0.0512  1.1186
## 4  -0.4521  0.3337  0.4931
## 5  -0.1732 -0.7566 -2.1256
## 6   0.0521 -0.6604 -0.1365
## 7   0.0199 -0.2863 -0.5819
## 8   0.4275 -0.3825 -0.3344
## 9  -0.4306 -0.7032 -0.2437
## 10  0.0306 -1.3018 -1.4507
## 11 -1.1707 -0.4146 -0.4472
## 12 -0.6237  1.4026  0.1165
## 13  3.0449  2.6425  1.6423
## 14  0.6313  0.3871  0.8450
## attr(,"scaled:center")
##       RLS       HLS       APS 
##  7.631429 12.587857 68.342857 
## attr(,"scaled:scale")
##       RLS       HLS       APS 
## 0.9322500 0.9355392 7.2745081

UJI ASUMSI DAN MULTIKOLINEARITAS

data_ujiasumsi = data_standardized
KMO(data_ujiasumsi)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_ujiasumsi)
## Overall MSA =  0.63
## MSA for each item = 
##  RLS  HLS  APS 
## 0.69 0.58 0.64
cor(data_ujiasumsi, method = "pearson")
##          RLS       HLS       APS
## RLS 1.000000 0.6054820 0.4112800
## HLS 0.605482 1.0000000 0.6860695
## APS 0.411280 0.6860695 1.0000000

MENENTUKAN JUMLAH KLUSTER

fviz_nbclust(data_ujiasumsi, FUNcluster = kmeans, method = "wss")

fviz_nbclust(data_ujiasumsi, FUNcluster = kmeans, method = "silhouette")

fviz_nbclust(data_ujiasumsi, FUNcluster = kmeans, method = "gap_stat")

fviz_nbclust(data_ujiasumsi, pam, method = "wss")

fviz_nbclust(data_ujiasumsi, pam, method = "silhouette")

fviz_nbclust(data_ujiasumsi, pam, method = "gap_stat")

K MEANS 2 KLUSTER

final2 = kmeans(data_ujiasumsi, centers = 2, nstart = 25)
final2
## K-means clustering with 2 clusters of sizes 13, 1
## 
## Cluster means:
##          RLS        HLS        APS
## 1 -0.2342385 -0.2032615 -0.1263385
## 2  3.0449000  2.6425000  1.6423000
## 
## Clustering vector:
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 
##  1  1  1  1  1  1  1  1  1  1  1  1  2  1 
## 
## Within cluster sum of squares by cluster:
## [1] 18.59114  0.00000
##  (between_SS / total_SS =  52.3 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
hasil_cluster = fviz_cluster(final2, data = data_ujiasumsi,
                     geom = "point",
                     ellipse.type = "norm",
                     ggtheme = theme_minimal())
hasil_cluster
## Too few points to calculate an ellipse

data_with_clusters = data.frame(data_mentah, Cluster = final2$cluster)
data_with_clusters
##           Kab_Kota   RLS   HLS   APS Cluster
## 1           Sambas  6.76 12.76 70.58       1
## 2       Bengkayang  7.41 12.22 74.14       1
## 3           Landak  7.46 12.54 76.48       1
## 4         Mempawah  7.21 12.90 71.93       1
## 5          Sanggau  7.47 11.88 52.88       1
## 6         Ketapang  7.68 11.97 67.35       1
## 7          Sintang  7.65 12.32 64.11       1
## 8      Kapuas Hulu  8.03 12.23 65.91       1
## 9          Sekadau  7.23 11.93 66.57       1
## 10          Melawi  7.66 11.37 57.79       1
## 11    Kayong Utara  6.54 12.20 65.09       1
## 12       Kubu Raya  7.05 13.90 69.19       1
## 13  Kota Pontianak 10.47 15.06 80.29       2
## 14 Kota Singkawang  8.22 12.95 74.49       1

SINGLE LINKAGE

jarak =dist(data_standardized, method = "euclidean")
jarak
##            1         2         3         4         5         6         7
## 2  1.0290284                                                            
## 3  1.1300677 0.4725758                                                  
## 4  0.5383835 0.8165361 0.7818756                                        
## 5  2.7174875 2.9457088 3.3200207 2.8502858                              
## 6  1.3726321 1.0131631 1.4149546 1.2801752 2.0041291                    
## 7  1.3869631 1.4066882 1.7287298 1.3277082 1.6252629 0.5825537          
## 8  1.6089808 1.3123182 1.6108317 1.4040637 1.9259254 0.5072652 0.4864653
## 9  1.1598116 1.1028309 1.5303026 1.2722021 1.9001721 0.4963093 0.7008103
## 10 2.4961703 2.4390853 2.8655389 2.5857722 0.8912169 1.4625245 1.3364760
## 11 0.9917373 1.5553472 1.8861504 1.4001796 1.9821697 1.2853762 1.2050450
## 12 1.2721046 1.9588242 1.8196601 1.1462205 3.1451721 2.1855625 1.9376188
## 13 4.8645581 4.5502006 4.2373849 4.3451383 6.0089199 4.7989717 4.7618862
## 14 1.6681802 1.1687579 0.9651502 1.1403687 3.2832506 1.5479248 1.6921351
##            8         9        10        11        12        13
## 2                                                             
## 3                                                             
## 4                                                             
## 5                                                             
## 6                                                             
## 7                                                             
## 8                                                             
## 9  0.9205491                                                  
## 10 1.4995892 1.4240353                                        
## 11 1.6024973 0.8200306 1.7992381                              
## 12 2.1201213 2.1450933 3.1934313 1.9796925                    
## 13 4.4619223 5.1797495 5.8489449 5.6109851 4.1622162          
## 14 1.4229557 1.8712705 2.9126384 2.3579032 1.7711515 3.3982321
single_linkage = hclust(dist(scale(data_mentah[,2:4])), method = "single")
single_linkage
## 
## Call:
## hclust(d = dist(scale(data_mentah[, 2:4])), method = "single")
## 
## Cluster method   : single 
## Distance         : euclidean 
## Number of objects: 14
left_margin   <- max(max_label_len * 0.25, 5)  # minimal 10

par(mar = c(5, left_margin, 2, 2))

plot(single_linkage,labels = (data_mentah$`Kab/Kota`),hang = 1,
                  col = "red",
                  main = "cluster dendogram",
                  xlab = "KABUPATEN/KOTA")

inval = clValid(data_standardized, 2: 5, clMethods = "hierarchical", validation = "internal", metric = "euclidean", method = "single")
summary(inval)
## 
## Clustering Methods:
##  hierarchical 
## 
## Cluster sizes:
##  2 3 4 5 
## 
## Validation Measures:
##                                  2       3       4       5
##                                                           
## hierarchical Connectivity   2.9290  7.5687 11.3004 19.3877
##              Dunn           1.0236  0.5668  0.4861  0.5945
##              Silhouette     0.6099  0.4010  0.2591  0.3250
## 
## Optimal Scores:
## 
##              Score  Method       Clusters
## Connectivity 2.9290 hierarchical 2       
## Dunn         1.0236 hierarchical 2       
## Silhouette   0.6099 hierarchical 2