IMPORT LIBRARY
library("psych")
## Warning: package 'psych' was built under R version 4.5.3
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.5.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("GPArotation")
## Warning: package 'GPArotation' was built under R version 4.5.3
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library("clValid")
## Warning: package 'clValid' was built under R version 4.5.3
## Loading required package: cluster
## Warning: package 'cluster' was built under R version 4.5.3
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.5.3
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library("cluster")
library("car")
## Warning: package 'car' was built under R version 4.5.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
library("openxlsx")
## Warning: package 'openxlsx' was built under R version 4.5.3
LOAD DATASET
data_mentah = read.xlsx("C:/Users/msi09/Documents/DATA MINING PUBLIKASI/dataset kalbar.xlsx")
data_mentah
## Kab_Kota RLS HLS APS
## 1 Sambas 6.76 12.76 70.58
## 2 Bengkayang 7.41 12.22 74.14
## 3 Landak 7.46 12.54 76.48
## 4 Mempawah 7.21 12.90 71.93
## 5 Sanggau 7.47 11.88 52.88
## 6 Ketapang 7.68 11.97 67.35
## 7 Sintang 7.65 12.32 64.11
## 8 Kapuas Hulu 8.03 12.23 65.91
## 9 Sekadau 7.23 11.93 66.57
## 10 Melawi 7.66 11.37 57.79
## 11 Kayong Utara 6.54 12.20 65.09
## 12 Kubu Raya 7.05 13.90 69.19
## 13 Kota Pontianak 10.47 15.06 80.29
## 14 Kota Singkawang 8.22 12.95 74.49
DESKRIPSI DATA
summary(data_mentah)
## Kab_Kota RLS HLS APS
## Length:14 Min. : 6.540 Min. :11.37 Min. :52.88
## Class :character 1st Qu.: 7.215 1st Qu.:12.03 1st Qu.:65.30
## Mode :character Median : 7.465 Median :12.28 Median :68.27
## Mean : 7.631 Mean :12.59 Mean :68.34
## 3rd Qu.: 7.675 3rd Qu.:12.87 3rd Qu.:73.59
## Max. :10.470 Max. :15.06 Max. :80.29
PEMBERSIHAN DATA
data_mentah$APS = as.numeric(data_mentah$APS)
data_mentah$HLS = as.numeric(data_mentah$HLS)
data_mentah$RLS = as.numeric(data_mentah$RLS)
str(data_mentah)
## 'data.frame': 14 obs. of 4 variables:
## $ Kab_Kota: chr "Sambas" "Bengkayang" "Landak" "Mempawah" ...
## $ RLS : num 6.76 7.41 7.46 7.21 7.47 7.68 7.65 8.03 7.23 7.66 ...
## $ HLS : num 12.8 12.2 12.5 12.9 11.9 ...
## $ APS : num 70.6 74.1 76.5 71.9 52.9 ...
data_mentah
## Kab_Kota RLS HLS APS
## 1 Sambas 6.76 12.76 70.58
## 2 Bengkayang 7.41 12.22 74.14
## 3 Landak 7.46 12.54 76.48
## 4 Mempawah 7.21 12.90 71.93
## 5 Sanggau 7.47 11.88 52.88
## 6 Ketapang 7.68 11.97 67.35
## 7 Sintang 7.65 12.32 64.11
## 8 Kapuas Hulu 8.03 12.23 65.91
## 9 Sekadau 7.23 11.93 66.57
## 10 Melawi 7.66 11.37 57.79
## 11 Kayong Utara 6.54 12.20 65.09
## 12 Kubu Raya 7.05 13.90 69.19
## 13 Kota Pontianak 10.47 15.06 80.29
## 14 Kota Singkawang 8.22 12.95 74.49
is.na(data_mentah)
## Kab_Kota RLS HLS APS
## 1 FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE FALSE
## 7 FALSE FALSE FALSE FALSE
## 8 FALSE FALSE FALSE FALSE
## 9 FALSE FALSE FALSE FALSE
## 10 FALSE FALSE FALSE FALSE
## 11 FALSE FALSE FALSE FALSE
## 12 FALSE FALSE FALSE FALSE
## 13 FALSE FALSE FALSE FALSE
## 14 FALSE FALSE FALSE FALSE
unique(data_mentah)
## Kab_Kota RLS HLS APS
## 1 Sambas 6.76 12.76 70.58
## 2 Bengkayang 7.41 12.22 74.14
## 3 Landak 7.46 12.54 76.48
## 4 Mempawah 7.21 12.90 71.93
## 5 Sanggau 7.47 11.88 52.88
## 6 Ketapang 7.68 11.97 67.35
## 7 Sintang 7.65 12.32 64.11
## 8 Kapuas Hulu 8.03 12.23 65.91
## 9 Sekadau 7.23 11.93 66.57
## 10 Melawi 7.66 11.37 57.79
## 11 Kayong Utara 6.54 12.20 65.09
## 12 Kubu Raya 7.05 13.90 69.19
## 13 Kota Pontianak 10.47 15.06 80.29
## 14 Kota Singkawang 8.22 12.95 74.49
duplicate_rows = duplicated(data_mentah)
duplicate_rows
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE
duplicate_rows_count = sum(duplicate_rows)
duplicate_rows_count
## [1] 0
VISUALISASI DATA
# Margin kiri disesuaikan manual berdasarkan nama Kab/Kota
max_label_len <- max(nchar(as.character(data_mentah$Kab_Kota)))
left_margin <- max(max_label_len * 0.35, 10) # minimal 10
par(mar = c(4, left_margin, 2, 2))
bars <- barplot(
data_mentah$APS,
col = "purple",
main = "BARPLOT APS",
horiz = TRUE,
las = 1,
xlim = c(0, 100),
names.arg = data_mentah$Kab_Kota, # ← Kab_Kota bukan Kab/Kota
cex.names = 0.85
)
# Label nilai di ujung bar
text(x = data_mentah$APS, y = bars,
labels = data_mentah$APS,
pos = 4, cex = 0.6)
# Garis rata-rata
avg_aps <- mean(data_mentah$APS, na.rm = TRUE)
abline(v = avg_aps, col = "red", lty = 2, lwd = 2)

# Margin kiri disesuaikan manual berdasarkan nama Kab/Kota
max_label_len <- max(nchar(as.character(data_mentah$Kab_Kota)))
left_margin <- max(max_label_len * 0.35, 10) # minimal 10
par(mar = c(4, left_margin, 2, 2))
bars <- barplot(
data_mentah$HLS,
col = "green",
main = "BARPLOT HLS",
horiz = TRUE,
las = 1,
xlim = c(0,20),
names.arg = data_mentah$Kab_Kota, # ← Kab_Kota bukan Kab/Kota
cex.names = 0.75
)
# Label nilai di ujung bar
text(x = data_mentah$HLS, y = bars,
labels = data_mentah$HLS,
pos = 4, cex = 0.6)
# Garis rata-rata
avg_aps <- mean(data_mentah$HLS, na.rm = TRUE)
abline(v = avg_aps, col = "red", lty = 2, lwd = 2)

# Margin kiri disesuaikan manual berdasarkan nama Kab/Kota
max_label_len <- max(nchar(as.character(data_mentah$Kab_Kota)))
left_margin <- max(max_label_len * 0.35, 10) # minimal 10
par(mar = c(4, left_margin, 2, 2))
bars <- barplot(
data_mentah$RLS,
col = "yellow",
main = "BARPLOT RLS",
horiz = TRUE,
las = 1,
xlim = c(0,15),
names.arg = data_mentah$Kab_Kota, # ← Kab_Kota bukan Kab/Kota
cex.names = 0.75
)
# Label nilai di ujung bar
text(x = data_mentah$RLS, y = bars,
labels = data_mentah$RLS,
pos = 4, cex = 0.6)
# Garis rata-rata
avg_aps <- mean(data_mentah$RLS, na.rm = TRUE)
abline(v = avg_aps, col = "red", lty = 2, lwd = 2)

STANDARISASI DATA
sd(data_mentah$APS, na.rm = TRUE)
## [1] 7.274508
sd(data_mentah$HLS, na.rm = TRUE)
## [1] 0.9355392
sd(data_mentah$RLS, na.rm = TRUE)
## [1] 0.93225
data_standardized = round(scale(data_mentah[,2:4]), 4)
data_standardized
## RLS HLS APS
## 1 -0.9348 0.1840 0.3075
## 2 -0.2375 -0.3932 0.7969
## 3 -0.1839 -0.0512 1.1186
## 4 -0.4521 0.3337 0.4931
## 5 -0.1732 -0.7566 -2.1256
## 6 0.0521 -0.6604 -0.1365
## 7 0.0199 -0.2863 -0.5819
## 8 0.4275 -0.3825 -0.3344
## 9 -0.4306 -0.7032 -0.2437
## 10 0.0306 -1.3018 -1.4507
## 11 -1.1707 -0.4146 -0.4472
## 12 -0.6237 1.4026 0.1165
## 13 3.0449 2.6425 1.6423
## 14 0.6313 0.3871 0.8450
## attr(,"scaled:center")
## RLS HLS APS
## 7.631429 12.587857 68.342857
## attr(,"scaled:scale")
## RLS HLS APS
## 0.9322500 0.9355392 7.2745081
UJI ASUMSI DAN MULTIKOLINEARITAS
data_ujiasumsi = data_standardized
KMO(data_ujiasumsi)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_ujiasumsi)
## Overall MSA = 0.63
## MSA for each item =
## RLS HLS APS
## 0.69 0.58 0.64
cor(data_ujiasumsi, method = "pearson")
## RLS HLS APS
## RLS 1.000000 0.6054820 0.4112800
## HLS 0.605482 1.0000000 0.6860695
## APS 0.411280 0.6860695 1.0000000
MENENTUKAN JUMLAH KLUSTER
fviz_nbclust(data_ujiasumsi, FUNcluster = kmeans, method = "wss")

fviz_nbclust(data_ujiasumsi, FUNcluster = kmeans, method = "silhouette")

fviz_nbclust(data_ujiasumsi, FUNcluster = kmeans, method = "gap_stat")

fviz_nbclust(data_ujiasumsi, pam, method = "wss")

fviz_nbclust(data_ujiasumsi, pam, method = "silhouette")

fviz_nbclust(data_ujiasumsi, pam, method = "gap_stat")

K MEANS 2 KLUSTER
final2 = kmeans(data_ujiasumsi, centers = 2, nstart = 25)
final2
## K-means clustering with 2 clusters of sizes 13, 1
##
## Cluster means:
## RLS HLS APS
## 1 -0.2342385 -0.2032615 -0.1263385
## 2 3.0449000 2.6425000 1.6423000
##
## Clustering vector:
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1 1 1 1 1 1 1 1 1 1 1 1 2 1
##
## Within cluster sum of squares by cluster:
## [1] 18.59114 0.00000
## (between_SS / total_SS = 52.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
hasil_cluster = fviz_cluster(final2, data = data_ujiasumsi,
geom = "point",
ellipse.type = "norm",
ggtheme = theme_minimal())
hasil_cluster
## Too few points to calculate an ellipse

data_with_clusters = data.frame(data_mentah, Cluster = final2$cluster)
data_with_clusters
## Kab_Kota RLS HLS APS Cluster
## 1 Sambas 6.76 12.76 70.58 1
## 2 Bengkayang 7.41 12.22 74.14 1
## 3 Landak 7.46 12.54 76.48 1
## 4 Mempawah 7.21 12.90 71.93 1
## 5 Sanggau 7.47 11.88 52.88 1
## 6 Ketapang 7.68 11.97 67.35 1
## 7 Sintang 7.65 12.32 64.11 1
## 8 Kapuas Hulu 8.03 12.23 65.91 1
## 9 Sekadau 7.23 11.93 66.57 1
## 10 Melawi 7.66 11.37 57.79 1
## 11 Kayong Utara 6.54 12.20 65.09 1
## 12 Kubu Raya 7.05 13.90 69.19 1
## 13 Kota Pontianak 10.47 15.06 80.29 2
## 14 Kota Singkawang 8.22 12.95 74.49 1
SINGLE LINKAGE
jarak =dist(data_standardized, method = "euclidean")
jarak
## 1 2 3 4 5 6 7
## 2 1.0290284
## 3 1.1300677 0.4725758
## 4 0.5383835 0.8165361 0.7818756
## 5 2.7174875 2.9457088 3.3200207 2.8502858
## 6 1.3726321 1.0131631 1.4149546 1.2801752 2.0041291
## 7 1.3869631 1.4066882 1.7287298 1.3277082 1.6252629 0.5825537
## 8 1.6089808 1.3123182 1.6108317 1.4040637 1.9259254 0.5072652 0.4864653
## 9 1.1598116 1.1028309 1.5303026 1.2722021 1.9001721 0.4963093 0.7008103
## 10 2.4961703 2.4390853 2.8655389 2.5857722 0.8912169 1.4625245 1.3364760
## 11 0.9917373 1.5553472 1.8861504 1.4001796 1.9821697 1.2853762 1.2050450
## 12 1.2721046 1.9588242 1.8196601 1.1462205 3.1451721 2.1855625 1.9376188
## 13 4.8645581 4.5502006 4.2373849 4.3451383 6.0089199 4.7989717 4.7618862
## 14 1.6681802 1.1687579 0.9651502 1.1403687 3.2832506 1.5479248 1.6921351
## 8 9 10 11 12 13
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 0.9205491
## 10 1.4995892 1.4240353
## 11 1.6024973 0.8200306 1.7992381
## 12 2.1201213 2.1450933 3.1934313 1.9796925
## 13 4.4619223 5.1797495 5.8489449 5.6109851 4.1622162
## 14 1.4229557 1.8712705 2.9126384 2.3579032 1.7711515 3.3982321
single_linkage = hclust(dist(scale(data_mentah[,2:4])), method = "single")
single_linkage
##
## Call:
## hclust(d = dist(scale(data_mentah[, 2:4])), method = "single")
##
## Cluster method : single
## Distance : euclidean
## Number of objects: 14
left_margin <- max(max_label_len * 0.25, 5) # minimal 10
par(mar = c(5, left_margin, 2, 2))
plot(single_linkage,labels = (data_mentah$`Kab/Kota`),hang = 1,
col = "red",
main = "cluster dendogram",
xlab = "KABUPATEN/KOTA")

inval = clValid(data_standardized, 2: 5, clMethods = "hierarchical", validation = "internal", metric = "euclidean", method = "single")
summary(inval)
##
## Clustering Methods:
## hierarchical
##
## Cluster sizes:
## 2 3 4 5
##
## Validation Measures:
## 2 3 4 5
##
## hierarchical Connectivity 2.9290 7.5687 11.3004 19.3877
## Dunn 1.0236 0.5668 0.4861 0.5945
## Silhouette 0.6099 0.4010 0.2591 0.3250
##
## Optimal Scores:
##
## Score Method Clusters
## Connectivity 2.9290 hierarchical 2
## Dunn 1.0236 hierarchical 2
## Silhouette 0.6099 hierarchical 2