set.seed(123)# 1. PCA on the 20k subset (this already works in your cluster comparison code)pc_cc <-prcomp(df_cc[idx_cc, ], scale =TRUE)# 2. PCA scores for these same 20k rowsscores_cc <-as.data.frame(pc_cc$x[, 1:2])# 3. K-means clustering on PCs 1–6km4_cc <-kmeans(pc_cc$x[, 1:6], centers =4, nstart =20)