title: “Evaluasi 3 - Klaster Hirarki dan PCA” output: html_document: toc: true toc_depth: 3 toc_float: true theme: flatly highlight: tango code_folding: hide —

STUDI KASUS 1: Klaster Hirarki Produksi Sayuran

1. Pendahuluan

Analisis klaster hirarki digunakan untuk mengelompokkan provinsi berdasarkan kemiripan pola produksi enam jenis sayuran.

2. Load Package

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(cluster)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.5.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode

3. Import Data

data_sayur <- read.csv("C:/Users/ASUS/OneDrive/Documents/smt 5/PRAK SMT/evaluasi 3/Data No 1.csv")
colnames(data_sayur) <- c("Provinsi","BawangMerah","BawangPutih","Kentang","KembangKol","Bayam","CabaiBesar")
str(data_sayur)
## 'data.frame':    38 obs. of  7 variables:
##  $ Provinsi   : chr  "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
##  $ BawangMerah: num  131844 580700 2307184 2455 122877 ...
##  $ BawangPutih: num  0 366 8678 0 2260 ...
##  $ Kentang    : num  110241 1366082 174808 0 1379840 ...
##  $ KembangKol : num  4171 633798.4 105884.5 36.5 51665 ...
##  $ Bayam      : num  49610 175810 69060 93013 49968 ...
##  $ CabaiBesar : num  34266 29294.19 2.26 11396.1 11156 ...
summary(data_sayur)
##    Provinsi          BawangMerah       BawangPutih           Kentang       
##  Length:38          Min.   :      0   Min.   :0.000e+00   Min.   :      0  
##  Class :character   1st Qu.:   1916   1st Qu.:0.000e+00   1st Qu.:      0  
##  Mode  :character   Median :   7522   Median :0.000e+00   Median :    116  
##                     Mean   : 548874   Mean   :1.038e+04   Mean   : 334329  
##                     3rd Qu.: 129602   3rd Qu.:4.938e+00   3rd Qu.:  31236  
##                     Max.   :6078967   Max.   :2.778e+05   Max.   :3018745  
##    KembangKol            Bayam          CabaiBesar     
##  Min.   :     0.00   Min.   :     0   Min.   :      0  
##  1st Qu.:    57.12   1st Qu.: 10484   1st Qu.:   2567  
##  Median :   611.25   Median : 24413   Median :  14326  
##  Mean   : 47996.57   Mean   : 44316   Mean   : 101054  
##  3rd Qu.: 11147.36   3rd Qu.: 49271   3rd Qu.:  58826  
##  Max.   :633798.40   Max.   :322040   Max.   :1441340

4. Pengecekan Outlier

par(mfrow=c(2,3))
boxplot(data_sayur$BawangMerah, main="Bawang Merah")
boxplot(data_sayur$BawangPutih, main="Bawang Putih")
boxplot(data_sayur$Kentang, main="Kentang")
boxplot(data_sayur$KembangKol, main="Kembang Kol")
boxplot(data_sayur$Bayam, main="Bayam")
boxplot(data_sayur$CabaiBesar, main="Cabai Besar")

5. Uji Multikolinieritas (VIF)

vif_model <- lm(BawangMerah ~ BawangPutih + Kentang + KembangKol + Bayam + CabaiBesar, data=data_sayur)
vif(vif_model)
## BawangPutih     Kentang  KembangKol       Bayam  CabaiBesar 
##    1.610504    5.276521    3.293747    4.931705    4.206096

6. Standarisasi Data

data_scaled <- scale(data_sayur[,2:7])

7. Klaster Hirarki (Ward’s Method)

dist_matrix <- dist(data_scaled, method="euclidean")
hc <- hclust(dist_matrix, method="ward.D2")
plot(hc, labels=data_sayur$Provinsi, main="Dendrogram Ward's")

8. Penentuan dan Visualisasi Klaster

clusters <- cutree(hc, k=3)
data_sayur$Cluster <- clusters

fviz_cluster(list(data=data_scaled, cluster=clusters),
             geom="point", ellipse.type="convex", main="Cluster Provinsi")

9. Profilisasi Klaster

profil_cluster <- data_sayur %>% group_by(Cluster) %>% summarise(across(BawangMerah:CabaiBesar, mean))
profil_cluster
## # A tibble: 3 × 7
##   Cluster BawangMerah BawangPutih  Kentang KembangKol   Bayam CabaiBesar
##     <int>       <dbl>       <dbl>    <dbl>      <dbl>   <dbl>      <dbl>
## 1       1     218587.       3174.   84246.     10544.  27388.     33748.
## 2       2    2448758.       2875. 2293787.    402515. 209824.    761633.
## 3       3    6078967.     277838. 2958773     257821. 123339.    407700.

Kesimpulan Studi Kasus 1

Praktikan dapat menentukan klaster provinsi berdasarkan karakter produksi, memahami profil tiap klaster, dan hasil klaster dapat digunakan untuk perencanaan pertanian.

STUDI KASUS 2: Analisis PCA pada Data Produksi Sayuran

1. Pendahuluan

PCA digunakan untuk mereduksi dimensi data agar pola utama lebih terlihat.

2. Load Package

3. Baca Data

data <- read.csv("C:/Users/ASUS/OneDrive/Documents/smt 5/PRAK SMT/evaluasi 3/DATA no 2.csv", stringsAsFactors = FALSE)
provinsi <- data$Provinsi
data_num <- data %>% select(-Provinsi)

4. Deteksi Outlier

par(mfrow=c(3,4))
for(i in 1:ncol(data_num)){
  boxplot(data_num[[i]], main=colnames(data_num)[i])
}

5. Transformasi dan Standarisasi

data_num_log <- log1p(data_num)
data_scaled <- scale(data_num_log)

6. PCA

pca <- prcomp(data_scaled, center=TRUE, scale.=TRUE)
summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.7542 1.1863 0.86831 0.70555 0.52141 0.40453 0.34284
## Proportion of Variance 0.6896 0.1279 0.06854 0.04525 0.02472 0.01488 0.01069
## Cumulative Proportion  0.6896 0.8175 0.88606 0.93131 0.95603 0.97090 0.98159
##                            PC8    PC9    PC10    PC11
## Standard deviation     0.28898 0.2438 0.20775 0.12809
## Proportion of Variance 0.00759 0.0054 0.00392 0.00149
## Cumulative Proportion  0.98918 0.9946 0.99851 1.00000

7. Eigenvalue dan Variansi

eig_values <- pca$sdev^2
prop_var <- eig_values / sum(eig_values)
cbind(Eigenvalues=eig_values, Proportion=prop_var, Cumulative=cumsum(prop_var))
##       Eigenvalues  Proportion Cumulative
##  [1,]  7.58545940 0.689587218  0.6895872
##  [2,]  1.40721721 0.127928837  0.8175161
##  [3,]  0.75395488 0.068541353  0.8860574
##  [4,]  0.49779931 0.045254483  0.9313119
##  [5,]  0.27187033 0.024715484  0.9560274
##  [6,]  0.16364390 0.014876718  0.9709041
##  [7,]  0.11754028 0.010685480  0.9815896
##  [8,]  0.08350835 0.007591669  0.9891812
##  [9,]  0.05944020 0.005403655  0.9945849
## [10,]  0.04315892 0.003923538  0.9985084
## [11,]  0.01640721 0.001491565  1.0000000

8. Loading (Persamaan PC)

loadings <- pca$rotation
loadings
##                                               PC1         PC2         PC3
## Produksi.Bayam..kuintal...Kw.          -0.2059623 -0.63829160  0.17311025
## Produksi.Cabai.Rawit..kuintal...Kw.    -0.3295685  0.19069185 -0.01896695
## Produksi.Kacang.Panjang..kuintal...Kw. -0.3265279  0.17611413  0.28773006
## Produksi.Kangkung..kuintal...Kw.       -0.2735204 -0.49045883 -0.08131981
## Produksi.Ketimun..kuintal...Kw.        -0.3361205  0.09396967  0.26397027
## Produksi.Labu.Siam..kuintal...Kw.      -0.2883397  0.15741172 -0.58605268
## Produksi.Petsai.Sawi..kuintal...Kw.    -0.2865279 -0.32334085 -0.42449739
## Produksi.Semangka..kuintal...Kw.       -0.3042503  0.20076382  0.33197207
## Produksi.Terung..kuintal...Kw.         -0.3464314  0.10897949 -0.01692132
## Produksi.Tomat..kuintal...Kw.          -0.3169130  0.28826386 -0.26575354
## Produksi.Cabai.Keriting.kuintal...Kw.  -0.2762219 -0.10591346  0.32529799
##                                                 PC4         PC5         PC6
## Produksi.Bayam..kuintal...Kw.          -0.214147922 -0.25167718 -0.18880906
## Produksi.Cabai.Rawit..kuintal...Kw.     0.087258453  0.56995151 -0.07731570
## Produksi.Kacang.Panjang..kuintal...Kw. -0.279999690 -0.29431429 -0.03082428
## Produksi.Kangkung..kuintal...Kw.       -0.054033914  0.31377001 -0.35142181
## Produksi.Ketimun..kuintal...Kw.        -0.183366125 -0.29531292  0.22226988
## Produksi.Labu.Siam..kuintal...Kw.      -0.108911972 -0.23513280 -0.45193951
## Produksi.Petsai.Sawi..kuintal...Kw.    -0.005574158  0.12271715  0.72385483
## Produksi.Semangka..kuintal...Kw.       -0.366962845  0.43523765  0.02506549
## Produksi.Terung..kuintal...Kw.          0.187991214 -0.27087286  0.19380770
## Produksi.Tomat..kuintal...Kw.           0.136594735 -0.07186442 -0.08791731
## Produksi.Cabai.Keriting.kuintal...Kw.   0.794374325 -0.03487521 -0.10192056
##                                                PC7         PC8         PC9
## Produksi.Bayam..kuintal...Kw.           0.48098011 -0.19749674  0.30127026
## Produksi.Cabai.Rawit..kuintal...Kw.    -0.10694438 -0.24321831  0.62489344
## Produksi.Kacang.Panjang..kuintal...Kw. -0.23362323 -0.12671986  0.05428230
## Produksi.Kangkung..kuintal...Kw.       -0.48665860 -0.05049410 -0.45430413
## Produksi.Ketimun..kuintal...Kw.        -0.42166219 -0.01124555  0.11174735
## Produksi.Labu.Siam..kuintal...Kw.       0.03335596  0.46631788  0.19466501
## Produksi.Petsai.Sawi..kuintal...Kw.     0.02895394  0.08884252  0.04041902
## Produksi.Semangka..kuintal...Kw.        0.43417536  0.37384182 -0.29541144
## Produksi.Terung..kuintal...Kw.          0.07253891  0.14436494 -0.20651136
## Produksi.Tomat..kuintal...Kw.           0.29656975 -0.66303786 -0.35599940
## Produksi.Cabai.Keriting.kuintal...Kw.   0.06580974  0.23987984  0.02719452
##                                               PC10        PC11
## Produksi.Bayam..kuintal...Kw.           0.11328977  0.03743571
## Produksi.Cabai.Rawit..kuintal...Kw.     0.20967001 -0.10573289
## Produksi.Kacang.Panjang..kuintal...Kw. -0.45937672 -0.57357879
## Produksi.Kangkung..kuintal...Kw.        0.07371769 -0.04407336
## Produksi.Ketimun..kuintal...Kw.         0.13996534  0.65442525
## Produksi.Labu.Siam..kuintal...Kw.      -0.10293388  0.10084961
## Produksi.Petsai.Sawi..kuintal...Kw.    -0.28549026 -0.04720533
## Produksi.Semangka..kuintal...Kw.       -0.05832843  0.11516213
## Produksi.Terung..kuintal...Kw.          0.70809566 -0.38909983
## Produksi.Tomat..kuintal...Kw.          -0.13064730  0.20716170
## Produksi.Cabai.Keriting.kuintal...Kw.  -0.30525709  0.09075057

9. Nilai Komponen Utama

Muthia_PCA <- as.data.frame(pca$x)
Muthia_PCA <- cbind(Provinsi = provinsi, Muthia_PCA)
head(Muthia_PCA)
##           Provinsi        PC1         PC2        PC3         PC4         PC5
## 1             Aceh -1.6517783  0.08765886  0.9258896  0.36275293 -0.03654865
## 2   Sumatera Utara -4.4344646 -0.86393947 -0.2107896  0.28291071 -0.01480195
## 3   Sumatera Barat -3.1279632 -0.09410228 -0.3427372  0.56662586 -0.69911099
## 4             Riau -0.4939537 -1.24092590  1.5940780 -0.70253223  0.04751922
## 5            Jambi -2.4671791 -0.11137788 -0.3008030  0.90627664 -0.08311172
## 6 Sumatera Selatan -0.5189068  0.26381668  0.4059626  0.07714545 -0.33955344
##           PC6        PC7         PC8          PC9        PC10        PC11
## 1 -0.67317140 -0.3112623 -0.22794159  0.572172561  0.01405522  0.08772386
## 2  0.11450885  0.3159455 -0.32080938 -0.251687126 -0.03852426 -0.10391759
## 3  0.07563802  0.3464716  0.13104759 -0.105785992  0.16268662 -0.00586412
## 4 -0.18268736 -0.3155946  0.82436483 -0.068192822  0.19139907 -0.10155575
## 5  0.24440945  0.5366365  0.01808147  0.134003376 -0.13529333 -0.17605316
## 6 -0.25187278  0.2850433  0.33163867 -0.002704643 -0.03274722  0.07023665

10. Visualisasi PCA

fviz_eig(pca, addlabels=TRUE, ylim=c(0,100))
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

fviz_pca_biplot(pca, repel=TRUE, col.var="blue", col.ind="red")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Kesimpulan Studi Kasus 2

Praktikan dapat memahami kontribusi variabel terhadap komponen utama dan melihat pola produksi antarprovinsi melalui biplot.

Kesimpulan Akhir

Penggabungan analisis Klaster Hirarki dan PCA memberikan pemahaman komprehensif terkait pola produksi sayuran di Indonesia. Praktikan dapat membuat visualisasi klaster dan PCA, memahami struktur data, serta menggunakan hasil untuk perencanaan pertanian berbasis data.