library("dplyr")
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.2.3
library("ggfortify")
library("gridExtra")
## Warning: package 'gridExtra' was built under R version 4.2.2
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library("carData")
library("car")
## Warning: package 'car' was built under R version 4.2.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("corrplot")
## Warning: package 'corrplot' was built under R version 4.2.2
## corrplot 0.92 loaded
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
Data <- read_excel("D:/Lily/(10_25) Lab Result.xlsx")
head(Data)
## # A tibble: 6 × 8
##   Stasiun `Nitrat (mg/L)` `Fosfat (mg/L)`    DO Salinitas Temperature    pH
##     <dbl>           <dbl>           <dbl> <dbl>     <dbl>       <dbl> <dbl>
## 1       1         0.126             0.212  7.87      1.56        30.5  4.31
## 2       2         0.0747            0.182  4.51      9.92        29.6  8.06
## 3       3         0.0349            0.185  3.66     19.4         31.7  7.93
## 4       4         0.0199            0.189  7.74     18           31.2  8.62
## 5       5         0.0134            0.187  8.45     26           31.4  8.33
## 6       6         0.00737           0.183  8.58     26           30.6  8.22
## # ℹ 1 more variable: Kecerahan <dbl>
data <- Data %>%
  select(-Stasiun) %>%
  mutate(across(everything(), ~as.numeric(.)))
str(data)
## tibble [33 × 7] (S3: tbl_df/tbl/data.frame)
##  $ Nitrat (mg/L): num [1:33] 0.1261 0.0747 0.0349 0.0199 0.0134 ...
##  $ Fosfat (mg/L): num [1:33] 0.212 0.182 0.185 0.189 0.187 0.183 0.184 0.19 0.183 0.192 ...
##  $ DO           : num [1:33] 7.87 4.51 3.66 7.74 8.45 ...
##  $ Salinitas    : num [1:33] 1.56 9.92 19.39 18 26 ...
##  $ Temperature  : num [1:33] 30.5 29.6 31.7 31.2 31.4 ...
##  $ pH           : num [1:33] 4.31 8.06 7.93 8.62 8.33 ...
##  $ Kecerahan    : num [1:33] 25 34.5 28.5 27.5 18.8 ...
#View(data)
pca_result <- prcomp(data, scale = TRUE)
print(pca_result)
## Standard deviations (1, .., p=7):
## [1] 1.7146614 1.1504144 1.1119227 0.7918597 0.6081847 0.5491408 0.4490265
## 
## Rotation (n x k) = (7 x 7):
##                       PC1         PC2         PC3        PC4         PC5
## Nitrat (mg/L) -0.49451372  0.15532225 -0.04928073  0.1913844 -0.65543039
## Fosfat (mg/L) -0.36583331 -0.54343058  0.12018123 -0.3743179  0.38422114
## DO            -0.24120447  0.60865967  0.36539539  0.2585531  0.55546102
## Salinitas      0.49897890  0.06408111 -0.11359697 -0.2805945  0.01573817
## Temperature    0.18339329 -0.52003611  0.45487109  0.6567573 -0.03121462
## pH             0.52356319  0.15076909  0.03061512  0.1497428 -0.04666553
## Kecerahan      0.08192147  0.11320932  0.79300998 -0.4732041 -0.33293912
##                       PC6         PC7
## Nitrat (mg/L)  0.33572532 -0.38726491
## Fosfat (mg/L)  0.10920693 -0.50669493
## DO             0.22247952 -0.11382557
## Salinitas      0.80689795 -0.06280237
## Temperature    0.22661971  0.07321612
## pH            -0.33690534 -0.75107070
## Kecerahan     -0.09959524  0.08317414
eig.val<-get_eigenvalue(pca_result)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.9400637        42.000910                    42.00091
## Dim.2  1.3234534        18.906477                    60.90739
## Dim.3  1.2363721        17.662458                    78.56985
## Dim.4  0.6270418         8.957740                    87.52759
## Dim.5  0.3698886         5.284123                    92.81171
## Dim.6  0.3015556         4.307937                    97.11965
## Dim.7  0.2016248         2.880355                   100.00000
# Variansi yang dijelaskan oleh masing-masing komponen
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 60), title = "")

# Plot individu 
fviz_pca_ind(pca_result,
             repel = TRUE,
             pointsize = 3,
             col.ind = "steelblue",
             title = "")

# Plot kontribusi tiap variabel
fviz_pca_var(pca_result,
             col.var = "contrib", # warna = kontribusi
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE,
             title = "")

# Biplot gabungan (individu + variabel)
fviz_pca_biplot(pca_result, repel = TRUE, title = "")

library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Ambil skor individu
pca_ind <- as.data.frame(pca_result$x)

# Plot interaktif 3D (PC1, PC2, PC3)
plot_ly(pca_ind, x = ~PC1, y = ~PC2, z = ~PC3,
        type = "scatter3d", mode = "markers",
        marker = list(size = 4, color = 'steelblue')) %>%
  layout(scene = list(
    xaxis = list(title = "PC1"),
    yaxis = list(title = "PC2"),
    zaxis = list(title = "PC3")
  ))
library(factoextra)

pca_scores <- pca_result$x

# Tentukan jumlah cluster (misal 3)
set.seed(123)
kmeans_result <- kmeans(pca_scores[, 1:3], centers = 2)  # pakai PC1 dan PC24

# Plot PCA dengan warna berdasarkan hasil k-means cluster
fviz_pca_ind(pca_result,
             geom.ind = "point",
             col.ind = as.factor(kmeans_result$cluster),
             palette = "jco",
             addEllipses = TRUE,
             ellipse.level = 0.95,
             legend.title = "Cluster",
             title="",
             repel = TRUE) +
  theme_minimal()

library(factoextra)

# Ambil skor PCA
pca_scores <- pca_result$x

# Tentukan jumlah cluster (3 cluster)
set.seed(123)
kmeans_result <- kmeans(pca_scores[, 1:3], centers = 2)  # gunakan PC1, PC2, dan PC3

# Tambahkan hasil cluster ke dalam data PCA
cluster_pca <- data.frame(pca_scores[, 1:3], Cluster = as.factor(kmeans_result$cluster))

# Plot hasil PCA dengan warna berdasarkan cluster
fviz_pca_ind(pca_result,
             geom.ind = "point",
             col.ind = as.factor(kmeans_result$cluster),
             palette = "jco",
             addEllipses = TRUE,
             ellipse.level = 0.95,
             legend.title = "Cluster",
             title="",
             repel = TRUE) +
  theme_minimal()

fviz_pca_biplot(pca_result,
                geom.ind = c("point", "text"),           # tampilkan titik + label stasiun
                col.ind = as.factor(kmeans_result$cluster),  # warna per cluster
                palette = "jco",                         # palet warna
                addEllipses = TRUE,                      # tampilkan area klaster
                ellipse.level = 0.95,
                label = "all",                           # tampilkan label stasiun + variabel
                repel = TRUE,                            # hindari tumpang tindih teks
                col.var = "red",                         # warna panah variabel
                arrowsize = 0.8,                         # ukuran panah
                legend.title = "Cluster") +
  theme_minimal() +
  ggtitle("") +
  theme(plot.title = element_text(hjust = 0.5))

# --- 1️⃣ Pastikan kolom 'Stasiun' ada di data ---
# (kalau nama kolomnya beda, sesuaikan ya)
head(Data$Stasiun)
## [1] 1 2 3 4 5 6
# --- 2️⃣ Ambil hanya variabel numerik untuk PCA ---
data_num <- Data %>% select(where(is.numeric))

# --- 3️⃣ Jalankan PCA dengan data numerik ---
pca_result <- prcomp(data_num, scale. = TRUE)
pca_scores <- as.data.frame(pca_result$x)  # ubah ke data.frame biar bisa pakai rownames

# --- 4️⃣ Tambahkan kolom Stasiun agar tidak hilang ---
pca_scores$Stasiun <- Data$Stasiun

# --- 5️⃣ Jalankan K-Means berdasarkan PC1 & PC2 ---
set.seed(123)
kmeans_result <- kmeans(pca_scores[, 1:2], centers = 2)

# --- 6️⃣ Gabungkan hasil klaster ke skor PCA ---
pca_scores$Cluster <- as.factor(kmeans_result$cluster)

# --- 7️⃣ Lihat hasil tabel klaster ---
head(pca_scores[, c("Stasiun", "Cluster")])
##   Stasiun Cluster
## 1       1       2
## 2       2       2
## 3       3       2
## 4       4       2
## 5       5       2
## 6       6       2
# Gabungkan hasil cluster dengan data awal (variabel numerik)
data_clustered <- cbind(Data, Cluster = pca_scores$Cluster)

# Hitung rata-rata tiap variabel per klaster
cluster_summary <- data_clustered %>%
  group_by(Cluster) %>%
  summarise(across(where(is.numeric), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), mean, na.rm = TRUE)`.
## ℹ In group 1: `Cluster = 1`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
View(data_clustered)
# --- Ambil 2 faktor utama dari PCA ---
faktor_df <- pca_result$x[, 1:2] %>%
  as.data.frame() %>%
  mutate(Stasiun = Data$Stasiun) %>%
  rename(Faktor1 = PC1,
         Faktor2 = PC2)

# --- Gabungkan dengan data_clustered ---
data_final <- data_clustered %>%
  left_join(faktor_df, by = "Stasiun")

# --- Lihat hasil akhir ---
print(data_final)
##    Stasiun Nitrat (mg/L) Fosfat (mg/L)          DO Salinitas Temperature
## 1        1    0.12607798         0.212  7.87333333   1.56100    30.50000
## 2        2    0.07473914         0.182  4.51333333   9.92390    29.56667
## 3        3    0.03488772         0.185  3.66000000  19.38500    31.70000
## 4        4    0.01985480         0.189  7.74000000  18.00000    31.20000
## 5        5    0.01340199         0.187  8.45000000  26.00000    31.40000
## 6        6    0.00737464         0.183  8.58000000  26.00000    30.60000
## 7        7    0.09324665         0.184  9.39000000  25.00000    29.80000
## 8        8    0.04956609         0.190  9.08000000  25.00000    30.70000
## 9        9    0.03176768         0.183  8.91000000  26.00000    31.20000
## 10      10    0.02829309         0.192 10.04000000  26.00000    30.20000
## 11      11    0.04212054         0.184  9.60000000  27.00000    31.60000
## 12      12    0.04488603         0.189 10.20000000  20.00000    31.90000
## 13      13    0.03389498         0.186  7.44000000  19.00000    36.60000
## 14      14    0.03736957         0.193  8.91000000  18.00000    34.30000
## 15      15    0.03779503         0.188  9.86000000  20.00000    33.60000
## 16      16    0.02098936         0.186 10.50000000  19.00000    32.30000
## 17      17    0.01467837         0.183 10.38000000  24.00000    32.10000
## 18      18    0.01808205         0.186  9.77000000  23.00000    34.50000
## 19      19   -0.00085092         0.189  1.82041636  18.66660    31.13333
## 20      20    0.00368732         0.186  0.09484366  30.00000    30.53333
## 21      21    0.00361641         0.184  0.09380820  33.33333    31.60000
## 22      22    0.00631099         0.187  0.09665549  34.66667    31.90000
## 23      23    0.00794192         0.186  2.16628064  33.33333    32.43333
## 24      24    0.00390005         0.186  0.09495002  31.73333    35.31667
## 25      25    0.01872024         0.184  0.10136012  29.33333    35.06667
## 26      26    0.04552422         0.189  2.94260807  20.33333    36.28333
## 27      27    0.03212223         0.197  0.11456112  20.66667    33.01667
## 28      28    0.01850751         0.194  0.10625376  16.66667    33.33333
## 29      29    0.03304406         0.192  5.92468135  25.66667    33.40000
## 30      30    0.02914401         0.196  0.11257201  21.66667    32.98333
## 31      31   -0.00482188         0.198  7.10005937  20.66667    34.33333
## 32      32    0.00503461         0.188  0.09651730  30.70000    32.65000
## 33      33   -0.00078001         0.196  0.09761000  30.00000    33.91667
##          pH Kecerahan Cluster     Faktor1     Faktor2
## 1  4.313333     25.00       2 -7.60699518 -3.30328887
## 2  8.060000     34.50       2 -2.47021278  1.46772674
## 3  7.926667     28.50       2 -1.01851160  0.90989852
## 4  8.620000     27.50       2 -0.89070036  1.17252083
## 5  8.330000     18.75       2 -0.39787789  1.51657401
## 6  8.220000     47.50       2 -0.15637996  1.96294916
## 7  8.240000     50.00       2 -1.80949074  1.89263682
## 8  8.140000     47.50       2 -1.18930262  1.05050595
## 9  8.240000     85.00       2 -0.19373272  1.60472783
## 10 8.150000     60.00       2 -0.83218991  0.98952710
## 11 8.100000     45.00       2 -0.53069487  1.42459279
## 12 8.120000     90.00       2 -0.95429898  0.53245643
## 13 7.940000     45.00       1 -0.15503562 -0.47417891
## 14 8.000000     75.00       2 -0.74816558 -0.67357327
## 15 8.120000     57.50       2 -0.56495321  0.16912305
## 16 8.130000     50.00       2 -0.48284141  0.67871937
## 17 8.140000     40.00       2  0.04210964  1.16927975
## 18 8.020000    115.00       1  0.50047658 -0.01188109
## 19 7.966667     43.10       1  0.19984455 -0.25260187
## 20 8.163333     58.40       1  1.24804383  0.40714075
## 21 8.390000     54.20       1  1.85441850  0.53712768
## 22 8.466667     36.90       1  1.83054405  0.23037708
## 23 8.610000     21.80       1  1.74233203  0.38197243
## 24 8.416667     48.00       1  2.29734396 -0.65756642
## 25 8.606667     34.40       1  2.03289498 -0.44301498
## 26 8.423333     43.60       1  0.74270614 -1.46262902
## 27 8.193333     51.75       1  0.36899265 -1.90479233
## 28 8.203333     50.30       1  0.54116121 -1.82374455
## 29 8.343333     46.10       1  0.65448481 -0.86155991
## 30 8.266667     52.50       1  0.69266553 -1.87151962
## 31 8.236667     66.00       1  0.89548966 -1.86064895
## 32 8.383333     44.40       1  2.06123046 -0.68511851
## 33 8.590000     74.00       1  2.29664485 -1.81173798