#LPA with 4 class
burnout.ana <- burnout.item %>% estimate_profiles(n_profiles = 4)#(n_profiles =1:5)
burnout.ana
## tidyLPA analysis using mclust:
##
## Model Classes AIC BIC Entropy prob_min prob_max n_min n_max BLRT_p
## 1 4 6125.02 6349.50 0.95 0.93 0.99 0.16 0.49 0.01
burnout.data <- get_data(burnout.ana)
burnout.data
## # A tibble: 160 × 21
## model_number classes_number EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4
## <dbl> <dbl> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 1 4 3 2 2 3 3 4 4 1
## 2 1 4 3 4 3 4 3 4 3 3
## 3 1 4 1 1 1 1 1 5 1 1
## 4 1 4 3 2 3 3 4 4 3 2
## 5 1 4 4 5 4 3 3 4 3 3
## 6 1 4 1 1 1 1 1 1 1 1
## 7 1 4 4 4 4 4 4 4 1 1
## 8 1 4 3 1 1 1 3 3 1 1
## 9 1 4 4 4 4 4 4 5 5 5
## 10 1 4 1 1 1 1 2 2 1 2
## # ℹ 150 more rows
## # ℹ 11 more variables: UNS1 <int>, UNS2 <int>, UNS3 <int>, UNS4 <int>,
## # UNS5 <int>, UNS6 <int>, CPROB1 <dbl>, CPROB2 <dbl>, CPROB3 <dbl>,
## # CPROB4 <dbl>, Class <dbl>
burnout.item.fit <- get_fit(burnout.ana)
burnout.item.fit
## # A tibble: 1 × 18
## Model Classes LogLik AIC AWE BIC CAIC CLC KIC SABIC ICL Entropy
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 4 -2990. 6125. 6937. 6350. 6423. 5981. 6201. 6118. -6358. 0.950
## # ℹ 6 more variables: prob_min <dbl>, prob_max <dbl>, n_min <dbl>, n_max <dbl>,
## # BLRT_val <dbl>, BLRT_p <dbl>
index.burnout <- burnout.item.fit$BLRT_p
index.burnout
## [1] 0.00990099
#import Class to dat file
new_data_lpa <- cbind(dat, cluster_lpa = burnout.data$Class)
names(new_data_lpa)
## [1] "WSTM" "WSEM" "WSBH" "EHU" "RLS"
## [6] "UNS" "WSTM1" "WSTM2" "WSTM3" "WSTM4"
## [11] "WSEM1" "WSEM2" "WSEM3" "WSEM4" "WSBH1"
## [16] "WSBH2" "WSBH3" "WSBH4" "WSBH5" "EHU1"
## [21] "EHU2" "EHU3" "EHU4" "RLS1" "RLS2"
## [26] "RLS3" "RLS4" "UNS1" "UNS2" "UNS3"
## [31] "UNS4" "UNS5" "UNS6" "BOTc2" "BOTc3"
## [36] "BOTc4" "BOTc5" "WSCc2" "WSCc3" "WSCc4"
## [41] "WSCc5" "WSC" "BOT" "cluster_lpa"
#saving data for LPA
write.csv(burnout.data,
"S:\\PHD_Res\\Thesis\\survey data\\clusterana\\LPA\\burnout_data_prob.csv",
row.names = FALSE)
#scale data
burnout_scaled <- scale(burnout.item)
#silhouette for choose the number of clusters
sil_plot<- fviz_nbclust(burnout_scaled, kmeans, method = "silhouette")
sil_plot
elbow_plot<- fviz_nbclust(burnout_scaled, kmeans, method = "wss")
elbow_plot
set.seed(123)
#kmeans
cluster.km <- kmeans(burnout_scaled, 4, nstart = 20)
print(cluster.km)
## K-means clustering with 4 clusters of sizes 35, 72, 27, 26
##
## Cluster means:
## EHU1 EHU2 EHU3 EHU4 RLS1 RLS2
## 1 0.63191486 0.39822793 0.8479890 0.83744875 0.594926990 0.48103936
## 2 -0.78071969 -0.74022610 -0.7747782 -0.72293776 -0.726529995 -0.59020238
## 3 0.08562297 -0.01600034 -0.1447129 -0.07574667 0.001582854 -0.01205843
## 4 1.22242220 1.53039658 1.1542947 0.95330664 1.209422229 0.99937581
## RLS3 RLS4 UNS1 UNS2 UNS3 UNS4 UNS5
## 1 -0.1993078 -0.1717821 -0.1096164 -0.4014070 -0.3295561 -0.03355486 -0.3331239
## 2 -0.5629571 -0.5086291 -0.3852130 -0.3187233 -0.5104889 -0.44656958 -0.3933270
## 3 0.2917742 0.1183170 0.9050536 1.1847296 1.1791743 1.00816466 1.2247245
## 4 1.5242607 1.5168890 0.2744410 0.1926780 0.6327676 0.23488400 0.2658199
## UNS6
## 1 -0.2347112
## 2 -0.5295336
## 3 1.1147394
## 4 0.6247440
##
## Clustering vector:
## [1] 3 4 2 3 4 2 1 2 4 2 4 1 4 1 1 2 2 1 1 3 4 1 1 3 4 1 2 2 2 3 1 4 2 2 2 1 2
## [38] 2 3 1 3 1 2 2 2 3 3 3 1 4 2 2 1 2 2 2 1 2 2 2 1 3 3 2 3 1 4 4 3 1 2 1 1 2
## [75] 3 2 1 2 2 3 3 2 3 2 3 2 2 2 1 3 2 2 3 4 1 4 2 3 1 3 2 2 1 1 1 1 2 2 3 2 2
## [112] 2 2 2 1 4 2 4 2 4 2 3 4 3 4 4 2 4 4 1 2 2 2 2 1 3 2 2 2 2 2 2 2 2 4 2 2 4
## [149] 2 1 4 2 1 2 2 4 4 2 2 1
##
## Within cluster sum of squares by cluster:
## [1] 267.7148 348.7290 286.0130 292.8591
## (between_SS / total_SS = 46.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
new_data_km <- cbind(dat, cluster_km = cluster.km$cluster)
names(new_data_km)
## [1] "WSTM" "WSEM" "WSBH" "EHU" "RLS"
## [6] "UNS" "WSTM1" "WSTM2" "WSTM3" "WSTM4"
## [11] "WSEM1" "WSEM2" "WSEM3" "WSEM4" "WSBH1"
## [16] "WSBH2" "WSBH3" "WSBH4" "WSBH5" "EHU1"
## [21] "EHU2" "EHU3" "EHU4" "RLS1" "RLS2"
## [26] "RLS3" "RLS4" "UNS1" "UNS2" "UNS3"
## [31] "UNS4" "UNS5" "UNS6" "BOTc2" "BOTc3"
## [36] "BOTc4" "BOTc5" "WSCc2" "WSCc3" "WSCc4"
## [41] "WSCc5" "WSC" "BOT" "cluster_km"
fviz_cluster(cluster.km, data = new_data_km, ggtheme = theme_minimal())
sil_plot <- fviz_nbclust(x=burnout_scaled, pam,
method="silhouette",
diss=dist(dat,method="manhattan"))
sil_plot
elbow_plot <- fviz_nbclust(x=burnout_scaled, pam,
method="wss",
diss=dist(dat,method="manhattan"))
elbow_plot
library(ClusterR)
## Warning: package 'ClusterR' was built under R version 4.4.2
set.seed(123)
#cluster with manhattan distance
clus.mandis <- pam(burnout_scaled, metric="manhattan", k=4)
print(clus.mandis)
## Medoids:
## ID EHU1 EHU2 EHU3 EHU4 RLS1 RLS2
## [1,] 41 -0.3458629 -0.2046359 -0.4412142 -0.2782199 -0.2516738 0.09365932
## [2,] 125 1.1104018 1.2505526 1.0143378 1.2837162 1.2678661 0.80725413
## [3,] 34 -1.0739952 -0.9322301 -1.1689902 -1.0591879 -1.0114437 -1.33353031
## [4,] 84 -0.3458629 -0.2046359 -0.4412142 -0.2782199 -0.2516738 0.09365932
## RLS3 RLS4 UNS1 UNS2 UNS3 UNS4
## [1,] 0.1363685 0.08773423 1.1705747 0.6980627 0.9111258 1.2788129
## [2,] 1.8147498 1.73920200 -0.6216928 -0.2405090 -0.1227758 0.2348840
## [3,] -0.7028222 -0.73799966 -0.6216928 -1.1790808 -1.1566774 -0.8090449
## [4,] -0.7028222 -0.73799966 -0.6216928 -0.2405090 -0.1227758 -0.8090449
## UNS5 UNS6
## [1,] 1.4328340 0.8040106
## [2,] 0.3090426 -0.1281756
## [3,] -0.8147488 -1.0603618
## [4,] -0.8147488 -0.1281756
## Clustering vector:
## [1] 1 2 3 1 2 3 2 3 2 4 2 4 2 4 4 3 3 4 2 4 2 4 2 1 2 4 4 3 3 1 4 2 4 3 3 2 3
## [38] 4 1 2 1 4 3 3 3 1 4 1 2 2 4 3 4 3 4 4 4 3 3 4 3 3 1 4 1 2 2 2 1 4 3 4 2 3
## [75] 1 1 3 3 4 1 1 4 1 4 1 1 3 4 2 1 3 3 1 2 4 2 4 1 2 4 4 3 2 2 4 4 3 3 1 4 3
## [112] 4 3 3 4 2 4 2 3 2 3 1 2 1 2 2 3 4 2 4 3 3 4 3 2 1 3 3 3 3 4 3 4 3 2 3 3 2
## [149] 3 2 2 4 4 4 4 2 2 3 4 4
## Objective function:
## build swap
## 7.265251 7.025756
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
new_data_mandis <- cbind(dat, cluster_mandis = clus.mandis$cluster)
head(new_data_mandis)
## WSTM WSEM WSBH EHU RLS UNS WSTM1 WSTM2 WSTM3 WSTM4 WSEM1 WSEM2 WSEM3
## 1 3.50 4.25 3.4 2.50 3.00 3.000000 5 4 2 3 3 4 5
## 2 3.50 3.25 3.2 3.50 3.25 2.500000 3 4 3 4 4 4 3
## 3 2.75 2.50 1.4 1.00 2.00 1.666667 5 1 3 2 3 4 2
## 4 3.25 4.00 3.8 2.75 3.25 3.166667 2 2 4 5 5 5 3
## 5 4.25 4.00 3.6 4.00 3.25 2.000000 5 4 4 4 5 4 4
## 6 1.00 1.00 1.0 1.00 1.00 1.833333 1 1 1 1 1 1 1
## WSEM4 WSBH1 WSBH2 WSBH3 WSBH4 WSBH5 EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4
## 1 5 4 2 3 3 5 3 2 2 3 3 4 4 1
## 2 2 4 3 3 3 3 3 4 3 4 3 4 3 3
## 3 1 1 1 1 1 3 1 1 1 1 1 5 1 1
## 4 3 4 4 3 3 5 3 2 3 3 4 4 3 2
## 5 3 4 4 4 4 2 4 5 4 3 3 4 3 3
## 6 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## UNS1 UNS2 UNS3 UNS4 UNS5 UNS6 BOTc2 BOTc3 BOTc4 BOTc5 WSCc2 WSCc3 WSCc4 WSCc5
## 1 3 4 3 2 2 4 1 2 3 3 1 1 1 5
## 2 2 2 3 3 2 3 1 1 1 4 1 1 1 5
## 3 5 1 1 1 1 1 2 3 4 5 2 2 2 2
## 4 4 4 3 2 2 4 1 2 3 3 1 1 1 5
## 5 3 2 2 1 2 2 1 1 1 4 1 1 1 1
## 6 2 2 2 2 2 1 2 3 4 5 2 3 3 3
## WSC BOT cluster_mandis
## 1 3.531250 2.857143 1
## 2 3.265625 3.000000 2
## 3 2.041667 1.571429 3
## 4 3.572917 3.071429 1
## 5 3.765625 2.928571 2
## 6 1.052083 1.357143 3
fviz_cluster(clus.mandis, data = new_data_mandis, ggtheme = theme_minimal())
turn rating scale to dummy variable
#convert burnout item to dummy
burnout.dummy <- data.frame(
lapply(burnout.item, function(x) ifelse(x < 4, 0, 1))
)
#head(burnout.item)
head(burnout.dummy)
## EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4 UNS1 UNS2 UNS3 UNS4 UNS5 UNS6
## 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1
## 2 0 1 0 1 0 1 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 1 0 0 1 0 0 0 0 0
## 4 0 0 0 0 1 1 0 0 1 1 0 0 0 1
## 5 1 1 1 0 0 1 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0
burnout.dummy <- data.frame(lapply(burnout.dummy, as.factor))
str(burnout.dummy)
## 'data.frame': 160 obs. of 14 variables:
## $ EHU1: Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 2 1 ...
## $ EHU2: Factor w/ 2 levels "0","1": 1 2 1 1 2 1 2 1 2 1 ...
## $ EHU3: Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 2 1 ...
## $ EHU4: Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 2 1 ...
## $ RLS1: Factor w/ 2 levels "0","1": 1 1 1 2 1 1 2 1 2 1 ...
## $ RLS2: Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 1 ...
## $ RLS3: Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
## $ RLS4: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ UNS1: Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 2 1 1 ...
## $ UNS2: Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 1 1 1 ...
## $ UNS3: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ UNS4: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ UNS5: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ UNS6: Factor w/ 2 levels "0","1": 2 1 1 2 1 1 1 1 1 1 ...
library(klaR)
## Warning: package 'klaR' was built under R version 4.4.2
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
set.seed(123)
clus.mode <- kmodes(burnout.dummy, modes = 4, iter.max = 10)
print(clus.mode)
## K-modes clustering with 4 clusters of sizes 17, 10, 18, 115
##
## Cluster modes:
## EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4 UNS1 UNS2 UNS3 UNS4 UNS5 UNS6
## 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0
## 2 1 1 1 1 1 1 0 0 1 1 0 0 0 0
## 3 1 1 1 1 0 1 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##
## Clustering vector:
## [1] 4 3 4 4 3 4 3 4 1 4 1 4 2 3 4 4 4 3 1 4 1 4 4 4 2 3 4 4 4 4 4 1 4 4 4 4 4
## [38] 4 2 3 4 4 4 4 4 4 4 2 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 2 1 3 4 4 4 3 3 4
## [75] 4 4 1 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 2 4 1 4 4 4 4 4 4 3 4 3 4 4 4 2 4 4
## [112] 4 4 4 4 1 4 1 4 3 4 3 1 4 1 4 4 1 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 4 1
## [149] 4 3 1 4 3 4 4 1 2 4 4 4
##
## Within cluster simple-matching distance by cluster:
## [1] 37 32 34 112
##
## Available components:
## [1] "cluster" "size" "modes" "withindiff" "iterations"
## [6] "weighted"
new_data_mode <- cbind(dat, cluster_mo = clus.mode$cluster)
head(new_data_mode)
## WSTM WSEM WSBH EHU RLS UNS WSTM1 WSTM2 WSTM3 WSTM4 WSEM1 WSEM2 WSEM3
## 1 3.50 4.25 3.4 2.50 3.00 3.000000 5 4 2 3 3 4 5
## 2 3.50 3.25 3.2 3.50 3.25 2.500000 3 4 3 4 4 4 3
## 3 2.75 2.50 1.4 1.00 2.00 1.666667 5 1 3 2 3 4 2
## 4 3.25 4.00 3.8 2.75 3.25 3.166667 2 2 4 5 5 5 3
## 5 4.25 4.00 3.6 4.00 3.25 2.000000 5 4 4 4 5 4 4
## 6 1.00 1.00 1.0 1.00 1.00 1.833333 1 1 1 1 1 1 1
## WSEM4 WSBH1 WSBH2 WSBH3 WSBH4 WSBH5 EHU1 EHU2 EHU3 EHU4 RLS1 RLS2 RLS3 RLS4
## 1 5 4 2 3 3 5 3 2 2 3 3 4 4 1
## 2 2 4 3 3 3 3 3 4 3 4 3 4 3 3
## 3 1 1 1 1 1 3 1 1 1 1 1 5 1 1
## 4 3 4 4 3 3 5 3 2 3 3 4 4 3 2
## 5 3 4 4 4 4 2 4 5 4 3 3 4 3 3
## 6 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## UNS1 UNS2 UNS3 UNS4 UNS5 UNS6 BOTc2 BOTc3 BOTc4 BOTc5 WSCc2 WSCc3 WSCc4 WSCc5
## 1 3 4 3 2 2 4 1 2 3 3 1 1 1 5
## 2 2 2 3 3 2 3 1 1 1 4 1 1 1 5
## 3 5 1 1 1 1 1 2 3 4 5 2 2 2 2
## 4 4 4 3 2 2 4 1 2 3 3 1 1 1 5
## 5 3 2 2 1 2 2 1 1 1 4 1 1 1 1
## 6 2 2 2 2 2 1 2 3 4 5 2 3 3 3
## WSC BOT cluster_mo
## 1 3.531250 2.857143 4
## 2 3.265625 3.000000 3
## 3 2.041667 1.571429 4
## 4 3.572917 3.071429 4
## 5 3.765625 2.928571 3
## 6 1.052083 1.357143 4
fviz_cluster(
list(data = new_data_mode, cluster = clus.mode$cluster),
ggtheme = theme_minimal(),
geom = "point")
##LPA
#mean
new_data_lpa %>%
group_by(cluster_lpa) %>%
summarise(mean = mean(BOT, na.rm = TRUE))
## # A tibble: 4 × 2
## cluster_lpa mean
## <dbl> <dbl>
## 1 1 3.36
## 2 2 2.55
## 3 3 2.77
## 4 4 1.59
#member and percentage
new_data_lpa %>%
group_by(cluster_lpa) %>%
summarise(n = n()) %>%
mutate(percentage = n/sum(n))
## # A tibble: 4 × 3
## cluster_lpa n percentage
## <dbl> <int> <dbl>
## 1 1 25 0.156
## 2 2 28 0.175
## 3 3 28 0.175
## 4 4 79 0.494
##kmeans
new_data_km %>%
group_by(cluster_km) %>%
summarise(mean = mean(BOT, na.rm = TRUE))
## # A tibble: 4 × 2
## cluster_km mean
## <int> <dbl>
## 1 1 2.47
## 2 2 1.54
## 3 3 2.74
## 4 4 3.35
#member and percentage
new_data_km %>%
group_by(cluster_km) %>%
summarise(n = n()) %>%
mutate(percentage = n/sum(n))
## # A tibble: 4 × 3
## cluster_km n percentage
## <int> <int> <dbl>
## 1 1 35 0.219
## 2 2 72 0.45
## 3 3 27 0.169
## 4 4 26 0.162
##kmedoids
#mean
new_data_mandis %>%
group_by(cluster_mandis) %>%
summarise(mean = mean(BOT, na.rm = TRUE))
## # A tibble: 4 × 2
## cluster_mandis mean
## <int> <dbl>
## 1 1 2.75
## 2 2 3.15
## 3 3 1.41
## 4 4 2.07
#member and percentage
new_data_mandis %>%
group_by(cluster_mandis) %>%
summarise(n = n()) %>%
mutate(percentage = n/sum(n))
## # A tibble: 4 × 3
## cluster_mandis n percentage
## <int> <int> <dbl>
## 1 1 25 0.156
## 2 2 39 0.244
## 3 3 48 0.3
## 4 4 48 0.3
##kmode
#mean
new_data_mode %>%
group_by(cluster_mo) %>%
summarise(mean = mean(BOT, na.rm = TRUE))
## # A tibble: 4 × 2
## cluster_mo mean
## <int> <dbl>
## 1 1 3.18
## 2 2 3.49
## 3 3 2.81
## 4 4 1.90
#member and percentage
new_data_mode %>%
group_by(cluster_mo) %>%
summarise(n = n()) %>%
mutate(percentage = n/sum(n))
## # A tibble: 4 × 3
## cluster_mo n percentage
## <int> <int> <dbl>
## 1 1 17 0.106
## 2 2 10 0.0625
## 3 3 18 0.112
## 4 4 115 0.719