library("dplyr")
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.2.3
library("ggfortify")
library("gridExtra")
## Warning: package 'gridExtra' was built under R version 4.2.2
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library("carData")
library("car")
## Warning: package 'car' was built under R version 4.2.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("corrplot")
## Warning: package 'corrplot' was built under R version 4.2.2
## corrplot 0.92 loaded
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
Data <- read_excel("D:/Lily/(10_25) Lab Result.xlsx")
head(Data)
## # A tibble: 6 × 8
##   Stasiun `Nitrat (mg/L)` `Fosfat (mg/L)`    DO Salinitas Temperature    pH
##     <dbl>           <dbl>           <dbl> <dbl>     <dbl>       <dbl> <dbl>
## 1       1         0.126             0.212  7.87      1.56        30.5  4.31
## 2       2         0.0747            0.182  4.51      9.92        29.6  8.06
## 3       3         0.0349            0.185  3.66     19.4         31.7  7.93
## 4       4         0.0199            0.189  7.74     18           31.2  8.62
## 5       5         0.0134            0.187  8.45     26           31.4  8.33
## 6       6         0.00737           0.183  8.58     26           30.6  8.22
## # ℹ 1 more variable: Kecerahan <dbl>
data <- Data %>%
  select(-Stasiun) %>%
  mutate(across(everything(), ~as.numeric(.)))
str(data)
## tibble [33 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Nitrat (mg/L): num [1:33] 0.1261 0.0747 0.0349 0.0199 0.0134 ...
##  $ Fosfat (mg/L): num [1:33] 0.212 0.182 0.185 0.189 0.187 0.183 0.184 0.19 0.183 0.192 ...
##  $ DO           : num [1:33] 7.87 4.51 3.66 7.74 8.45 ...
##  $ Salinitas    : num [1:33] 1.56 9.92 19.39 18 26 ...
##  $ Temperature  : num [1:33] 30.5 29.6 31.7 31.2 31.4 ...
##  $ pH           : num [1:33] 4.31 8.06 7.93 8.62 8.33 ...
##  $ Kecerahan    : num [1:33] 25 34.5 28.5 27.5 18.8 ...
#View(data)
pca_result <- prcomp(data, scale = TRUE)
print(pca_result)
## Standard deviations (1, .., p=7):
## [1] 1.7146614 1.1504144 1.1119227 0.7918597 0.6081847 0.5491408 0.4490265
## 
## Rotation (n x k) = (7 x 7):
##                       PC1         PC2         PC3        PC4         PC5
## Nitrat (mg/L) -0.49451372  0.15532225 -0.04928073  0.1913844 -0.65543039
## Fosfat (mg/L) -0.36583331 -0.54343058  0.12018123 -0.3743179  0.38422114
## DO            -0.24120447  0.60865967  0.36539539  0.2585531  0.55546102
## Salinitas      0.49897890  0.06408111 -0.11359697 -0.2805945  0.01573817
## Temperature    0.18339329 -0.52003611  0.45487109  0.6567573 -0.03121462
## pH             0.52356319  0.15076909  0.03061512  0.1497428 -0.04666553
## Kecerahan      0.08192147  0.11320932  0.79300998 -0.4732041 -0.33293912
##                       PC6         PC7
## Nitrat (mg/L)  0.33572532 -0.38726491
## Fosfat (mg/L)  0.10920693 -0.50669493
## DO             0.22247952 -0.11382557
## Salinitas      0.80689795 -0.06280237
## Temperature    0.22661971  0.07321612
## pH            -0.33690534 -0.75107070
## Kecerahan     -0.09959524  0.08317414
eig.val<-get_eigenvalue(pca_result)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.9400637        42.000910                    42.00091
## Dim.2  1.3234534        18.906477                    60.90739
## Dim.3  1.2363721        17.662458                    78.56985
## Dim.4  0.6270418         8.957740                    87.52759
## Dim.5  0.3698886         5.284123                    92.81171
## Dim.6  0.3015556         4.307937                    97.11965
## Dim.7  0.2016248         2.880355                   100.00000
#?!eig.val
# Variansi yang dijelaskan oleh masing-masing komponen
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 60), title = "")

# Plot individu 
fviz_pca_ind(pca_result,
             repel = TRUE,
             pointsize = 3,
             col.ind = "steelblue",
             title = "")

# Plot kontribusi tiap variabel
fviz_pca_var(pca_result,
             col.var = "contrib", # warna = kontribusi
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE,
             title = "")

# Biplot gabungan (individu + variabel)
fviz_pca_biplot(pca_result, repel = TRUE, title = "")

library(factoextra)

# Ambil skor PCA
pca_scores <- pca_result$x

# Tentukan jumlah cluster
set.seed(123)
kmeans_result <- kmeans(pca_scores[, 1:2], centers = 2)  # gunakan PC1, PC2

# Tambahkan hasil cluster ke dalam data PCA
cluster_pca <- data.frame(pca_scores[, 1:2], Cluster = as.factor(kmeans_result$cluster))

# Plot hasil PCA dengan warna berdasarkan cluster
fviz_pca_ind(pca_result,
             geom.ind = "point",
             col.ind = as.factor(kmeans_result$cluster),
             palette = "jco",
             addEllipses = TRUE,
             ellipse.level = 0.95,
             legend.title = "Cluster",
             title="",
             repel = TRUE) +
  theme_minimal()

kmeanss <- as.data.frame(kmeans_result$cluster)
fviz_pca_biplot(pca_result,
                geom.ind = c("point", "text"),           # tampilkan titik + label stasiun
                col.ind = as.factor(kmeanss$`kmeans_result$cluster`),  # warna per cluster
                palette = "jco",                         # palet warna
                addEllipses = TRUE,                      # tampilkan area klaster
                ellipse.level = 0.95,
                label = "all",                           # tampilkan label stasiun + variabel
                repel = TRUE,                            # hindari tumpang tindih teks
                col.var = "red",                         # warna panah variabel
                arrowsize = 0.8,                         # ukuran panah
                legend.title = "Cluster") +
  theme_minimal() +
  ggtitle("") +
  theme(plot.title = element_text(hjust = 0.5))

head(Data$Stasiun)
## [1] 1 2 3 4 5 6
pca_scores <- as.data.frame(pca_result$x)  # ubah ke data.frame biar bisa pakai rownames
pca_scores$Stasiun <- Data$Stasiun
pca_scores$Cluster <- as.factor(kmeans_result$cluster)

#lihat stasiun dan cluster
print(pca_scores[, c("Stasiun", "Cluster")])
##    Stasiun Cluster
## 1        1       2
## 2        2       2
## 3        3       2
## 4        4       2
## 5        5       2
## 6        6       2
## 7        7       2
## 8        8       2
## 9        9       2
## 10      10       2
## 11      11       2
## 12      12       2
## 13      13       1
## 14      14       2
## 15      15       2
## 16      16       2
## 17      17       2
## 18      18       2
## 19      19       1
## 20      20       1
## 21      21       1
## 22      22       1
## 23      23       1
## 24      24       1
## 25      25       1
## 26      26       1
## 27      27       1
## 28      28       1
## 29      29       1
## 30      30       1
## 31      31       1
## 32      32       1
## 33      33       1
# Gabungkan hasil cluster dengan data awal (variabel numerik)
data_clustered <- cbind(Data, Cluster = pca_scores$Cluster)
print(data_clustered)
##    Stasiun Nitrat (mg/L) Fosfat (mg/L)          DO Salinitas Temperature
## 1        1    0.12607798         0.212  7.87333333   1.56100    30.50000
## 2        2    0.07473914         0.182  4.51333333   9.92390    29.56667
## 3        3    0.03488772         0.185  3.66000000  19.38500    31.70000
## 4        4    0.01985480         0.189  7.74000000  18.00000    31.20000
## 5        5    0.01340199         0.187  8.45000000  26.00000    31.40000
## 6        6    0.00737464         0.183  8.58000000  26.00000    30.60000
## 7        7    0.09324665         0.184  9.39000000  25.00000    29.80000
## 8        8    0.04956609         0.190  9.08000000  25.00000    30.70000
## 9        9    0.03176768         0.183  8.91000000  26.00000    31.20000
## 10      10    0.02829309         0.192 10.04000000  26.00000    30.20000
## 11      11    0.04212054         0.184  9.60000000  27.00000    31.60000
## 12      12    0.04488603         0.189 10.20000000  20.00000    31.90000
## 13      13    0.03389498         0.186  7.44000000  19.00000    36.60000
## 14      14    0.03736957         0.193  8.91000000  18.00000    34.30000
## 15      15    0.03779503         0.188  9.86000000  20.00000    33.60000
## 16      16    0.02098936         0.186 10.50000000  19.00000    32.30000
## 17      17    0.01467837         0.183 10.38000000  24.00000    32.10000
## 18      18    0.01808205         0.186  9.77000000  23.00000    34.50000
## 19      19   -0.00085092         0.189  1.82041636  18.66660    31.13333
## 20      20    0.00368732         0.186  0.09484366  30.00000    30.53333
## 21      21    0.00361641         0.184  0.09380820  33.33333    31.60000
## 22      22    0.00631099         0.187  0.09665549  34.66667    31.90000
## 23      23    0.00794192         0.186  2.16628064  33.33333    32.43333
## 24      24    0.00390005         0.186  0.09495002  31.73333    35.31667
## 25      25    0.01872024         0.184  0.10136012  29.33333    35.06667
## 26      26    0.04552422         0.189  2.94260807  20.33333    36.28333
## 27      27    0.03212223         0.197  0.11456112  20.66667    33.01667
## 28      28    0.01850751         0.194  0.10625376  16.66667    33.33333
## 29      29    0.03304406         0.192  5.92468135  25.66667    33.40000
## 30      30    0.02914401         0.196  0.11257201  21.66667    32.98333
## 31      31   -0.00482188         0.198  7.10005937  20.66667    34.33333
## 32      32    0.00503461         0.188  0.09651730  30.70000    32.65000
## 33      33   -0.00078001         0.196  0.09761000  30.00000    33.91667
##          pH Kecerahan Cluster
## 1  4.313333     25.00       2
## 2  8.060000     34.50       2
## 3  7.926667     28.50       2
## 4  8.620000     27.50       2
## 5  8.330000     18.75       2
## 6  8.220000     47.50       2
## 7  8.240000     50.00       2
## 8  8.140000     47.50       2
## 9  8.240000     85.00       2
## 10 8.150000     60.00       2
## 11 8.100000     45.00       2
## 12 8.120000     90.00       2
## 13 7.940000     45.00       1
## 14 8.000000     75.00       2
## 15 8.120000     57.50       2
## 16 8.130000     50.00       2
## 17 8.140000     40.00       2
## 18 8.020000    115.00       2
## 19 7.966667     43.10       1
## 20 8.163333     58.40       1
## 21 8.390000     54.20       1
## 22 8.466667     36.90       1
## 23 8.610000     21.80       1
## 24 8.416667     48.00       1
## 25 8.606667     34.40       1
## 26 8.423333     43.60       1
## 27 8.193333     51.75       1
## 28 8.203333     50.30       1
## 29 8.343333     46.10       1
## 30 8.266667     52.50       1
## 31 8.236667     66.00       1
## 32 8.383333     44.40       1
## 33 8.590000     74.00       1
# --- Ambil 2 faktor utama dari PCA ---
faktor_df <- pca_result$x[, 1:2] %>%
  as.data.frame() %>%
  mutate(Stasiun = Data$Stasiun) %>%
  rename(Faktor1 = PC1,
         Faktor2 = PC2)

# --- Gabungkan dengan data_clustered ---
data_final <- data_clustered %>%
  left_join(faktor_df, by = "Stasiun")

# --- Lihat hasil akhir ---
print(data_final)
##    Stasiun Nitrat (mg/L) Fosfat (mg/L)          DO Salinitas Temperature
## 1        1    0.12607798         0.212  7.87333333   1.56100    30.50000
## 2        2    0.07473914         0.182  4.51333333   9.92390    29.56667
## 3        3    0.03488772         0.185  3.66000000  19.38500    31.70000
## 4        4    0.01985480         0.189  7.74000000  18.00000    31.20000
## 5        5    0.01340199         0.187  8.45000000  26.00000    31.40000
## 6        6    0.00737464         0.183  8.58000000  26.00000    30.60000
## 7        7    0.09324665         0.184  9.39000000  25.00000    29.80000
## 8        8    0.04956609         0.190  9.08000000  25.00000    30.70000
## 9        9    0.03176768         0.183  8.91000000  26.00000    31.20000
## 10      10    0.02829309         0.192 10.04000000  26.00000    30.20000
## 11      11    0.04212054         0.184  9.60000000  27.00000    31.60000
## 12      12    0.04488603         0.189 10.20000000  20.00000    31.90000
## 13      13    0.03389498         0.186  7.44000000  19.00000    36.60000
## 14      14    0.03736957         0.193  8.91000000  18.00000    34.30000
## 15      15    0.03779503         0.188  9.86000000  20.00000    33.60000
## 16      16    0.02098936         0.186 10.50000000  19.00000    32.30000
## 17      17    0.01467837         0.183 10.38000000  24.00000    32.10000
## 18      18    0.01808205         0.186  9.77000000  23.00000    34.50000
## 19      19   -0.00085092         0.189  1.82041636  18.66660    31.13333
## 20      20    0.00368732         0.186  0.09484366  30.00000    30.53333
## 21      21    0.00361641         0.184  0.09380820  33.33333    31.60000
## 22      22    0.00631099         0.187  0.09665549  34.66667    31.90000
## 23      23    0.00794192         0.186  2.16628064  33.33333    32.43333
## 24      24    0.00390005         0.186  0.09495002  31.73333    35.31667
## 25      25    0.01872024         0.184  0.10136012  29.33333    35.06667
## 26      26    0.04552422         0.189  2.94260807  20.33333    36.28333
## 27      27    0.03212223         0.197  0.11456112  20.66667    33.01667
## 28      28    0.01850751         0.194  0.10625376  16.66667    33.33333
## 29      29    0.03304406         0.192  5.92468135  25.66667    33.40000
## 30      30    0.02914401         0.196  0.11257201  21.66667    32.98333
## 31      31   -0.00482188         0.198  7.10005937  20.66667    34.33333
## 32      32    0.00503461         0.188  0.09651730  30.70000    32.65000
## 33      33   -0.00078001         0.196  0.09761000  30.00000    33.91667
##          pH Kecerahan Cluster     Faktor1     Faktor2
## 1  4.313333     25.00       2 -7.97380766 -1.75112608
## 2  8.060000     34.50       2 -1.74034905  1.36102952
## 3  7.926667     28.50       2 -0.38534339  0.16407286
## 4  8.620000     27.50       2 -0.24020583  0.58565833
## 5  8.330000     18.75       2  0.30682887  0.74020370
## 6  8.220000     47.50       2  0.60394620  1.45037526
## 7  8.240000     50.00       2 -1.15826972  2.19448953
## 8  8.140000     47.50       2 -0.72038019  1.07469933
## 9  8.240000     85.00       2  0.37803464  1.67834489
## 10 8.150000     60.00       2 -0.43811437  1.13959759
## 11 8.100000     45.00       2 -0.05885641  1.38866519
## 12 8.120000     90.00       2 -0.72664554  1.14760557
## 13 7.940000     45.00       1 -0.10640522 -0.68230846
## 14 8.000000     75.00       2 -0.81436190 -0.25265467
## 15 8.120000     57.50       2 -0.48162513  0.48291529
## 16 8.130000     50.00       2 -0.32474291  0.98263830
## 17 8.140000     40.00       2  0.28598376  1.24929883
## 18 8.020000    115.00       2  0.46276281  0.61244742
## 19 7.966667     43.10       1  0.09383190 -0.41859696
## 20 8.163333     58.40       1  1.26427488  0.02877779
## 21 8.390000     54.20       1  1.88561077 -0.03963415
## 22 8.466667     36.90       1  1.76919412 -0.44744944
## 23 8.610000     21.80       1  1.68215368 -0.26510244
## 24 8.416667     48.00       1  2.01043862 -1.31864386
## 25 8.606667     34.40       1  1.75296124 -1.04228781
## 26 8.423333     43.60       1  0.18017604 -1.34542382
## 27 8.193333     51.75       1 -0.34214511 -1.62282583
## 28 8.203333     50.30       1 -0.17529929 -1.56367679
## 29 8.343333     46.10       1  0.09776281 -0.38356832
## 30 8.266667     52.50       1 -0.10221790 -1.51127289
## 31 8.236667     66.00       1  0.07249781 -1.18649711
## 32 8.383333     44.40       1  1.48748685 -0.77046288
## 33 8.590000     74.00       1  1.45482463 -1.67928791