LPA_kmeans

import data

LAP

#LPA with 4 class
burnout.ana <- burnout.item %>% estimate_profiles(n_profiles = 4)#(n_profiles =1:5)
burnout.ana

## tidyLPA analysis using mclust: 
## 
##  Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
##  1     4       6125.02 6349.50 0.95    0.93     0.99     0.16  0.49  0.01

burnout.data <- get_data(burnout.ana)
burnout.data

## # A tibble: 160 × 21
##    model_number classes_number  EHU1  EHU2  EHU3  EHU4  RLS1  RLS2  RLS3  RLS4
##           <dbl>          <dbl> <int> <int> <int> <int> <int> <int> <int> <int>
##  1            1              4     3     2     2     3     3     4     4     1
##  2            1              4     3     4     3     4     3     4     3     3
##  3            1              4     1     1     1     1     1     5     1     1
##  4            1              4     3     2     3     3     4     4     3     2
##  5            1              4     4     5     4     3     3     4     3     3
##  6            1              4     1     1     1     1     1     1     1     1
##  7            1              4     4     4     4     4     4     4     1     1
##  8            1              4     3     1     1     1     3     3     1     1
##  9            1              4     4     4     4     4     4     5     5     5
## 10            1              4     1     1     1     1     2     2     1     2
## # ℹ 150 more rows
## # ℹ 11 more variables: UNS1 <int>, UNS2 <int>, UNS3 <int>, UNS4 <int>,
## #   UNS5 <int>, UNS6 <int>, CPROB1 <dbl>, CPROB2 <dbl>, CPROB3 <dbl>,
## #   CPROB4 <dbl>, Class <dbl>

burnout.item.fit <- get_fit(burnout.ana)
burnout.item.fit

## # A tibble: 1 × 18
##   Model Classes LogLik   AIC   AWE   BIC  CAIC   CLC   KIC SABIC    ICL Entropy
##   <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>   <dbl>
## 1     1       4 -2990. 6125. 6937. 6350. 6423. 5981. 6201. 6118. -6358.   0.950
## # ℹ 6 more variables: prob_min <dbl>, prob_max <dbl>, n_min <dbl>, n_max <dbl>,
## #   BLRT_val <dbl>, BLRT_p <dbl>

index.burnout <- burnout.item.fit$BLRT_p
index.burnout

## [1] 0.00990099

#import Class to dat file
new_data_lpa <- cbind(dat, cluster_lpa = burnout.data$Class)
names(new_data_lpa)

##  [1] "WSTM"        "WSEM"        "WSBH"        "EHU"         "RLS"        
##  [6] "UNS"         "WSTM1"       "WSTM2"       "WSTM3"       "WSTM4"      
## [11] "WSEM1"       "WSEM2"       "WSEM3"       "WSEM4"       "WSBH1"      
## [16] "WSBH2"       "WSBH3"       "WSBH4"       "WSBH5"       "EHU1"       
## [21] "EHU2"        "EHU3"        "EHU4"        "RLS1"        "RLS2"       
## [26] "RLS3"        "RLS4"        "UNS1"        "UNS2"        "UNS3"       
## [31] "UNS4"        "UNS5"        "UNS6"        "BOTc2"       "BOTc3"      
## [36] "BOTc4"       "BOTc5"       "WSCc2"       "WSCc3"       "WSCc4"      
## [41] "WSCc5"       "WSC"         "BOT"         "cluster_lpa"

#saving data for LPA
write.csv(burnout.data,
          "S:\\PHD_Res\\Thesis\\survey data\\clusterana\\LPA\\burnout_data_prob.csv", 
          row.names = FALSE)

kmeans

#scale data 
burnout_scaled <- scale(burnout.item)

#silhouette for choose the number of clusters
sil_plot<- fviz_nbclust(burnout_scaled, kmeans, method = "silhouette")
sil_plot

elbow_plot<- fviz_nbclust(burnout_scaled, kmeans, method = "wss")
elbow_plot

set.seed(123)
#kmeans
cluster.km <- kmeans(burnout_scaled, 4, nstart = 20)
print(cluster.km)

## K-means clustering with 4 clusters of sizes 35, 72, 27, 26
## 
## Cluster means:
##          EHU1        EHU2       EHU3        EHU4         RLS1        RLS2
## 1  0.63191486  0.39822793  0.8479890  0.83744875  0.594926990  0.48103936
## 2 -0.78071969 -0.74022610 -0.7747782 -0.72293776 -0.726529995 -0.59020238
## 3  0.08562297 -0.01600034 -0.1447129 -0.07574667  0.001582854 -0.01205843
## 4  1.22242220  1.53039658  1.1542947  0.95330664  1.209422229  0.99937581
##         RLS3       RLS4       UNS1       UNS2       UNS3        UNS4       UNS5
## 1 -0.1993078 -0.1717821 -0.1096164 -0.4014070 -0.3295561 -0.03355486 -0.3331239
## 2 -0.5629571 -0.5086291 -0.3852130 -0.3187233 -0.5104889 -0.44656958 -0.3933270
## 3  0.2917742  0.1183170  0.9050536  1.1847296  1.1791743  1.00816466  1.2247245
## 4  1.5242607  1.5168890  0.2744410  0.1926780  0.6327676  0.23488400  0.2658199
##         UNS6
## 1 -0.2347112
## 2 -0.5295336
## 3  1.1147394
## 4  0.6247440
## 
## Clustering vector:
##   [1] 3 4 2 3 4 2 1 2 4 2 4 1 4 1 1 2 2 1 1 3 4 1 1 3 4 1 2 2 2 3 1 4 2 2 2 1 2
##  [38] 2 3 1 3 1 2 2 2 3 3 3 1 4 2 2 1 2 2 2 1 2 2 2 1 3 3 2 3 1 4 4 3 1 2 1 1 2
##  [75] 3 2 1 2 2 3 3 2 3 2 3 2 2 2 1 3 2 2 3 4 1 4 2 3 1 3 2 2 1 1 1 1 2 2 3 2 2
## [112] 2 2 2 1 4 2 4 2 4 2 3 4 3 4 4 2 4 4 1 2 2 2 2 1 3 2 2 2 2 2 2 2 2 4 2 2 4
## [149] 2 1 4 2 1 2 2 4 4 2 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 267.7148 348.7290 286.0130 292.8591
##  (between_SS / total_SS =  46.3 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

new_data_km <- cbind(dat, cluster_km = cluster.km$cluster)
names(new_data_km)

##  [1] "WSTM"       "WSEM"       "WSBH"       "EHU"        "RLS"       
##  [6] "UNS"        "WSTM1"      "WSTM2"      "WSTM3"      "WSTM4"     
## [11] "WSEM1"      "WSEM2"      "WSEM3"      "WSEM4"      "WSBH1"     
## [16] "WSBH2"      "WSBH3"      "WSBH4"      "WSBH5"      "EHU1"      
## [21] "EHU2"       "EHU3"       "EHU4"       "RLS1"       "RLS2"      
## [26] "RLS3"       "RLS4"       "UNS1"       "UNS2"       "UNS3"      
## [31] "UNS4"       "UNS5"       "UNS6"       "BOTc2"      "BOTc3"     
## [36] "BOTc4"      "BOTc5"      "WSCc2"      "WSCc3"      "WSCc4"     
## [41] "WSCc5"      "WSC"        "BOT"        "cluster_km"

fviz_cluster(cluster.km, data = new_data_km, ggtheme = theme_minimal())

Kmedoids with Manhattan distance

sil_plot <- fviz_nbclust(x=burnout_scaled, pam, 
                           method="silhouette", 
                           diss=dist(dat,method="manhattan"))
sil_plot

elbow_plot <- fviz_nbclust(x=burnout_scaled, pam, 
                           method="wss", 
                           diss=dist(dat,method="manhattan"))
elbow_plot

library(ClusterR)

## Warning: package 'ClusterR' was built under R version 4.4.2

set.seed(123)
#cluster with manhattan distance
clus.mandis <- pam(burnout_scaled, metric="manhattan", k=4)
print(clus.mandis)

## Medoids:
##       ID       EHU1       EHU2       EHU3       EHU4       RLS1        RLS2
## [1,]  41 -0.3458629 -0.2046359 -0.4412142 -0.2782199 -0.2516738  0.09365932
## [2,] 125  1.1104018  1.2505526  1.0143378  1.2837162  1.2678661  0.80725413
## [3,]  34 -1.0739952 -0.9322301 -1.1689902 -1.0591879 -1.0114437 -1.33353031
## [4,]  84 -0.3458629 -0.2046359 -0.4412142 -0.2782199 -0.2516738  0.09365932
##            RLS3        RLS4       UNS1       UNS2       UNS3       UNS4
## [1,]  0.1363685  0.08773423  1.1705747  0.6980627  0.9111258  1.2788129
## [2,]  1.8147498  1.73920200 -0.6216928 -0.2405090 -0.1227758  0.2348840
## [3,] -0.7028222 -0.73799966 -0.6216928 -1.1790808 -1.1566774 -0.8090449
## [4,] -0.7028222 -0.73799966 -0.6216928 -0.2405090 -0.1227758 -0.8090449
##            UNS5       UNS6
## [1,]  1.4328340  0.8040106
## [2,]  0.3090426 -0.1281756
## [3,] -0.8147488 -1.0603618
## [4,] -0.8147488 -0.1281756
## Clustering vector:
##   [1] 1 2 3 1 2 3 2 3 2 4 2 4 2 4 4 3 3 4 2 4 2 4 2 1 2 4 4 3 3 1 4 2 4 3 3 2 3
##  [38] 4 1 2 1 4 3 3 3 1 4 1 2 2 4 3 4 3 4 4 4 3 3 4 3 3 1 4 1 2 2 2 1 4 3 4 2 3
##  [75] 1 1 3 3 4 1 1 4 1 4 1 1 3 4 2 1 3 3 1 2 4 2 4 1 2 4 4 3 2 2 4 4 3 3 1 4 3
## [112] 4 3 3 4 2 4 2 3 2 3 1 2 1 2 2 3 4 2 4 3 3 4 3 2 1 3 3 3 3 4 3 4 3 2 3 3 2
## [149] 3 2 2 4 4 4 4 2 2 3 4 4
## Objective function:
##    build     swap 
## 7.265251 7.025756 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

new_data_mandis <- cbind(dat, cluster_mandis = clus.mandis$cluster)
head(new_data_mandis)

##   WSTM WSEM WSBH  EHU  RLS      UNS WSTM1 WSTM2 WSTM3 WSTM4 WSEM1 WSEM2 WSEM3
## 1 3.50 4.25  3.4 2.50 3.00 3.000000     5     4     2     3     3     4     5
## 2 3.50 3.25  3.2 3.50 3.25 2.500000     3     4     3     4     4     4     3
## 3 2.75 2.50  1.4 1.00 2.00 1.666667     5     1     3     2     3     4     2
## 4 3.25 4.00  3.8 2.75 3.25 3.166667     2     2     4     5     5     5     3
## 5 4.25 4.00  3.6 4.00 3.25 2.000000     5     4     4     4     5     4     4
## 6 1.00 1.00  1.0 1.00 1.00 1.833333     1     1     1     1     1     1     1
##   WSEM4 WSBH1 WSBH2 WSBH3 WSBH4 WSBH5 EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4
## 1     5     4     2     3     3     5    3    2    2    3    3    4    4    1
## 2     2     4     3     3     3     3    3    4    3    4    3    4    3    3
## 3     1     1     1     1     1     3    1    1    1    1    1    5    1    1
## 4     3     4     4     3     3     5    3    2    3    3    4    4    3    2
## 5     3     4     4     4     4     2    4    5    4    3    3    4    3    3
## 6     1     1     1     1     1     1    1    1    1    1    1    1    1    1
##   UNS1 UNS2 UNS3 UNS4 UNS5 UNS6 BOTc2 BOTc3 BOTc4 BOTc5 WSCc2 WSCc3 WSCc4 WSCc5
## 1    3    4    3    2    2    4     1     2     3     3     1     1     1     5
## 2    2    2    3    3    2    3     1     1     1     4     1     1     1     5
## 3    5    1    1    1    1    1     2     3     4     5     2     2     2     2
## 4    4    4    3    2    2    4     1     2     3     3     1     1     1     5
## 5    3    2    2    1    2    2     1     1     1     4     1     1     1     1
## 6    2    2    2    2    2    1     2     3     4     5     2     3     3     3
##        WSC      BOT cluster_mandis
## 1 3.531250 2.857143              1
## 2 3.265625 3.000000              2
## 3 2.041667 1.571429              3
## 4 3.572917 3.071429              1
## 5 3.765625 2.928571              2
## 6 1.052083 1.357143              3

fviz_cluster(clus.mandis, data = new_data_mandis, ggtheme = theme_minimal())

kmode

turn rating scale to dummy variable

#convert burnout item to dummy 
burnout.dummy <- data.frame(
  lapply(burnout.item, function(x) ifelse(x < 4, 0, 1))
)
#head(burnout.item)
head(burnout.dummy)

##   EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4 UNS1 UNS2 UNS3 UNS4 UNS5 UNS6
## 1    0    0    0    0    0    1    1    0    0    1    0    0    0    1
## 2    0    1    0    1    0    1    0    0    0    0    0    0    0    0
## 3    0    0    0    0    0    1    0    0    1    0    0    0    0    0
## 4    0    0    0    0    1    1    0    0    1    1    0    0    0    1
## 5    1    1    1    0    0    1    0    0    0    0    0    0    0    0
## 6    0    0    0    0    0    0    0    0    0    0    0    0    0    0

burnout.dummy <- data.frame(lapply(burnout.dummy, as.factor))
str(burnout.dummy)

## 'data.frame':    160 obs. of  14 variables:
##  $ EHU1: Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 2 1 ...
##  $ EHU2: Factor w/ 2 levels "0","1": 1 2 1 1 2 1 2 1 2 1 ...
##  $ EHU3: Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 2 1 ...
##  $ EHU4: Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 2 1 ...
##  $ RLS1: Factor w/ 2 levels "0","1": 1 1 1 2 1 1 2 1 2 1 ...
##  $ RLS2: Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 1 ...
##  $ RLS3: Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ RLS4: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ UNS1: Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 2 1 1 ...
##  $ UNS2: Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 1 1 1 ...
##  $ UNS3: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ UNS4: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ UNS5: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ UNS6: Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 1 1 1 ...

library(klaR)

## Warning: package 'klaR' was built under R version 4.4.2

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

set.seed(123)

clus.mode <- kmodes(burnout.dummy, modes = 4, iter.max = 10)
print(clus.mode)

## K-modes clustering with 4 clusters of sizes 17, 10, 18, 115
## 
## Cluster modes:
##   EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4 UNS1 UNS2 UNS3 UNS4 UNS5 UNS6
## 1    1    1    1    0    1    1    1    1    0    0    0    0    0    0
## 2    1    1    1    1    1    1    0    0    1    1    0    0    0    0
## 3    1    1    1    1    0    1    0    0    0    0    0    0    0    0
## 4    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 
## Clustering vector:
##   [1] 4 3 4 4 3 4 3 4 1 4 1 4 2 3 4 4 4 3 1 4 1 4 4 4 2 3 4 4 4 4 4 1 4 4 4 4 4
##  [38] 4 2 3 4 4 4 4 4 4 4 2 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 2 1 3 4 4 4 3 3 4
##  [75] 4 4 1 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 2 4 1 4 4 4 4 4 4 3 4 3 4 4 4 2 4 4
## [112] 4 4 4 4 1 4 1 4 3 4 3 1 4 1 4 4 1 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 4 1
## [149] 4 3 1 4 3 4 4 1 2 4 4 4
## 
## Within cluster simple-matching distance by cluster:
## [1]  37  32  34 112
## 
## Available components:
## [1] "cluster"    "size"       "modes"      "withindiff" "iterations"
## [6] "weighted"

new_data_mode <- cbind(dat, cluster_mo = clus.mode$cluster)
head(new_data_mode)

##   WSTM WSEM WSBH  EHU  RLS      UNS WSTM1 WSTM2 WSTM3 WSTM4 WSEM1 WSEM2 WSEM3
## 1 3.50 4.25  3.4 2.50 3.00 3.000000     5     4     2     3     3     4     5
## 2 3.50 3.25  3.2 3.50 3.25 2.500000     3     4     3     4     4     4     3
## 3 2.75 2.50  1.4 1.00 2.00 1.666667     5     1     3     2     3     4     2
## 4 3.25 4.00  3.8 2.75 3.25 3.166667     2     2     4     5     5     5     3
## 5 4.25 4.00  3.6 4.00 3.25 2.000000     5     4     4     4     5     4     4
## 6 1.00 1.00  1.0 1.00 1.00 1.833333     1     1     1     1     1     1     1
##   WSEM4 WSBH1 WSBH2 WSBH3 WSBH4 WSBH5 EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4
## 1     5     4     2     3     3     5    3    2    2    3    3    4    4    1
## 2     2     4     3     3     3     3    3    4    3    4    3    4    3    3
## 3     1     1     1     1     1     3    1    1    1    1    1    5    1    1
## 4     3     4     4     3     3     5    3    2    3    3    4    4    3    2
## 5     3     4     4     4     4     2    4    5    4    3    3    4    3    3
## 6     1     1     1     1     1     1    1    1    1    1    1    1    1    1
##   UNS1 UNS2 UNS3 UNS4 UNS5 UNS6 BOTc2 BOTc3 BOTc4 BOTc5 WSCc2 WSCc3 WSCc4 WSCc5
## 1    3    4    3    2    2    4     1     2     3     3     1     1     1     5
## 2    2    2    3    3    2    3     1     1     1     4     1     1     1     5
## 3    5    1    1    1    1    1     2     3     4     5     2     2     2     2
## 4    4    4    3    2    2    4     1     2     3     3     1     1     1     5
## 5    3    2    2    1    2    2     1     1     1     4     1     1     1     1
## 6    2    2    2    2    2    1     2     3     4     5     2     3     3     3
##        WSC      BOT cluster_mo
## 1 3.531250 2.857143          4
## 2 3.265625 3.000000          3
## 3 2.041667 1.571429          4
## 4 3.572917 3.071429          4
## 5 3.765625 2.928571          3
## 6 1.052083 1.357143          4

fviz_cluster(
  list(data = new_data_mode, cluster = clus.mode$cluster),
  ggtheme = theme_minimal(),
  geom = "point")

เปรียบเทียบ LPA vs kmeans vs kmedoids

##LPA
#mean
new_data_lpa %>% 
  group_by(cluster_lpa) %>% 
  summarise(mean = mean(BOT, na.rm = TRUE))

## # A tibble: 4 × 2
##   cluster_lpa  mean
##         <dbl> <dbl>
## 1           1  3.36
## 2           2  2.55
## 3           3  2.77
## 4           4  1.59

#member and percentage 
new_data_lpa %>% 
  group_by(cluster_lpa) %>% 
  summarise(n = n()) %>% 
  mutate(percentage = n/sum(n))

## # A tibble: 4 × 3
##   cluster_lpa     n percentage
##         <dbl> <int>      <dbl>
## 1           1    25      0.156
## 2           2    28      0.175
## 3           3    28      0.175
## 4           4    79      0.494

##kmeans
new_data_km %>% 
  group_by(cluster_km) %>% 
  summarise(mean = mean(BOT, na.rm = TRUE))

## # A tibble: 4 × 2
##   cluster_km  mean
##        <int> <dbl>
## 1          1  2.47
## 2          2  1.54
## 3          3  2.74
## 4          4  3.35

#member and percentage 
new_data_km %>% 
  group_by(cluster_km) %>% 
  summarise(n = n()) %>% 
  mutate(percentage = n/sum(n))

## # A tibble: 4 × 3
##   cluster_km     n percentage
##        <int> <int>      <dbl>
## 1          1    35      0.219
## 2          2    72      0.45 
## 3          3    27      0.169
## 4          4    26      0.162

##kmedoids
#mean
new_data_mandis %>% 
  group_by(cluster_mandis) %>% 
  summarise(mean = mean(BOT, na.rm = TRUE))

## # A tibble: 4 × 2
##   cluster_mandis  mean
##            <int> <dbl>
## 1              1  2.75
## 2              2  3.15
## 3              3  1.41
## 4              4  2.07

#member and percentage 
new_data_mandis %>% 
  group_by(cluster_mandis) %>% 
  summarise(n = n()) %>% 
  mutate(percentage = n/sum(n))

## # A tibble: 4 × 3
##   cluster_mandis     n percentage
##            <int> <int>      <dbl>
## 1              1    25      0.156
## 2              2    39      0.244
## 3              3    48      0.3  
## 4              4    48      0.3

##kmode
#mean
new_data_mode %>% 
  group_by(cluster_mo) %>% 
  summarise(mean = mean(BOT, na.rm = TRUE))

## # A tibble: 4 × 2
##   cluster_mo  mean
##        <int> <dbl>
## 1          1  3.18
## 2          2  3.49
## 3          3  2.81
## 4          4  1.90

#member and percentage 
new_data_mode %>% 
  group_by(cluster_mo) %>% 
  summarise(n = n()) %>% 
  mutate(percentage = n/sum(n))

## # A tibble: 4 × 3
##   cluster_mo     n percentage
##        <int> <int>      <dbl>
## 1          1    17     0.106 
## 2          2    10     0.0625
## 3          3    18     0.112 
## 4          4   115     0.719

LPA_kmeans

2024-11-30

import data

LAP

kmeans

Kmedoids with Manhattan distance

kmode

เปรียบเทียบ LPA vs kmeans vs kmedoids