#install.packages(ggplot2)
library(ggplot2)
#install.packages("dplyr")
library(dplyr)
#install.packages("Hmisc")
library(Hmisc)
#install.packages("factoextra")
library(factoextra)
#install.packages("cluster")
library(cluster)
#install.packages("magrittr")
library(magrittr)
#install.packages("NbClust")
library(NbClust)
#install.packages("tidyr")
library(tidyr)
#install.packages("rstatix")
library(rstatix)
data <- read.table("./anketa_final_6.csv", header=TRUE, sep=",", dec=".")
head(data)
## ID Q47a Q47b Q47c Q47d Q21 Q50a Q50b Q50c Q50d Q51 Q52a Q52b Q52c Q52d Q52e
## 1 1 1 1 0 0 4 2 2 6 6 4 2 4 2 1 1
## 2 2 1 0 0 0 7 2 6 6 6 5 1 1 1 1 1
## 3 3 1 1 0 0 7 1 2 6 6 4 1 1 1 1 1
## 4 4 1 1 1 0 7 1 2 4 6 1 1 3 1 1 1
## 5 5 1 0 1 0 7 1 6 2 6 5 1 3 1 1 1
## 6 6 1 1 0 1 6 2 1 6 4 3 2 4 2 2 2
## Q52f Q52g Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q54a Q54b Q54c Q54d Q55a Q55b Q55c Q55d
## 1 2 2 4 4 5 3 1 1 0 0 1 0 1 0
## 2 1 1 1 7 7 7 0 0 0 1 1 1 1 1
## 3 1 1 1 7 7 7 0 1 0 0 1 1 1 0
## 4 1 1 3 6 7 7 0 1 0 0 1 1 0 0
## 5 1 1 1 7 7 7 1 1 0 0 1 0 0 0
## 6 1 2 6 5 2 2 1 1 0 0 0 1 1 0
## Q55e Q55f Q55g Q56 Q57 Q58 Q59 Q60a Q60b Q60c Q60d Q60e Q60f Q60g Q60g_text
## 1 0 0 0 5 6 3 5 -2 -2 -2 -2 -2 -2 -2 -2
## 2 1 1 0 7 1 1 1 0 0 0 1 1 -2 0 -2
## 3 1 1 0 6 3 5 1 1 1 1 1 0 -2 0 -2
## 4 1 0 0 5 6 6 4 0 1 1 0 0 -2 0 -2
## 5 0 0 0 5 1 5 6 -2 -2 -2 -2 -2 -2 -2 -2
## 6 0 0 0 2 5 6 6 -2 -2 -2 -2 -2 -2 -2 -2
## Q61a Q61b Q61c Q61d Q61e Q61f Q61f_text Q62 Q63a_1 Q63b_1 Q63c_1 Q63d_1
## 1 1 0 0 -2 -2 0 -2 2 -2 -2 -2 -2
## 2 0 0 0 -2 1 0 -2 2 -2 -2 -2 -2
## 3 1 0 0 -2 -2 0 -2 2 -2 -2 -2 -2
## 4 1 0 0 -2 -2 0 -2 1 6 4 6 7
## 5 0 0 1 -2 -2 0 -2 2 -2 -2 -2 -2
## 6 1 0 1 -2 -2 0 -2 2 -2 -2 -2 -2
## Q63e_1 Q63f_1 Q64 Q65 Q46 Q1a_1 Q1b_1 Q1c_1 Q1d_1 Q1e_1 Q1f_1 Q2a_1 Q2b_1
## 1 -2 -2 3 1 66 4 6 6 6 5 6 6 6
## 2 -2 -2 1 1 0 7 7 7 7 7 7 7 1
## 3 -2 -2 5 3 16 7 7 7 7 7 7 7 5
## 4 6 7 5 4 70 5 7 7 7 3 3 7 6
## 5 -2 -2 1 1 49 7 7 7 7 7 7 7 1
## 6 -2 -2 6 6 85 6 6 6 5 5 6 7 6
## Q2c_1 Q3a_1 Q3b_1 Q3c_1 Q4a_1 Q4b_1 Q4c_1 Q5a_1 Q5b_1 Q5c_1 Q6a_1 Q6b_1 Q6c_1
## 1 6 5 6 4 5 5 4 5 5 4 7 5 5
## 2 1 7 1 1 7 1 1 7 1 1 7 1 1
## 3 3 7 7 5 6 7 1 7 7 1 7 5 3
## 4 6 6 7 7 7 6 5 7 6 5 6 6 6
## 5 7 7 1 7 7 1 7 7 1 7 7 1 7
## 6 5 7 6 6 7 6 6 7 6 6 7 6 6
## Q7a_1 Q7b_1 Q7c_1 Q39 Q40 Q37 Q38 Q41 Q42 Q42_5_text Q43 Q44 Q45 Q45_10_text
## 1 6 4 4 1 1958 1 3 4 3 -2 -2 2 6 -2
## 2 7 1 1 2 1942 1 1 2 3 -2 -2 3 7 -2
## 3 7 3 3 2 1953 2 -2 4 3 -2 -2 3 1 -2
## 4 3 5 6 1 1948 5 -2 3 3 -2 -2 3 1 -2
## 5 7 1 7 1 1953 5 -2 5 3 -2 -2 3 4 -2
## 6 7 6 6 1 1955 1 4 5 3 -2 -2 3 1 -2
data_seg <- as.data.frame(data[c("ID","Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58")])
summary(data_seg[,-1])
## Q51 Q53a_1 Q53b_1 Q53c_1
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:1.000
## Median :3.000 Median :5.000 Median :3.000 Median :2.000
## Mean :3.481 Mean :4.877 Mean :3.642 Mean :2.877
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:4.000
## Max. :5.000 Max. :7.000 Max. :7.000 Max. :7.000
## Q53d_1 Q56 Q58
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:5.000
## Median :2.000 Median :5.000 Median :6.000
## Mean :3.113 Mean :4.575 Mean :5.594
## 3rd Qu.:5.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000
data_seg_std <- as.data.frame(scale(data_seg[c("Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58")]))
head(data_seg_std)
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56 Q58
## 1 0.5358171 -0.4348621 0.1583175 1.0352670 -0.05133481 0.2325582 -1.9729472
## 2 1.5684828 -1.9218101 1.4831852 2.0107185 1.76249520 1.3281658 -3.4939101
## 3 0.5358171 -1.9218101 1.4831852 2.0107185 1.76249520 0.7803620 -0.4519843
## 4 -2.5621799 -0.9305115 1.0415627 2.0107185 1.76249520 0.2325582 0.3084972
## 5 1.5684828 -1.9218101 1.4831852 2.0107185 1.76249520 0.2325582 -0.4519843
## 6 -0.4968486 0.5564365 0.5999401 -0.4279103 -0.50479231 -1.4108531 0.3084972
data_seg$Dissimilarity = sqrt(data_seg_std$Q51^2 + data_seg_std$Q53a_1^2 + data_seg_std$Q53b_1^2 + data_seg_std$Q53c_1^2 + data_seg_std$Q53d_1^2 + data_seg_std$Q56^2 + data_seg_std$Q58^2)
head(data_seg[order(-data_seg$Dissimilarity), c("ID", "Dissimilarity")], 15)
## ID Dissimilarity
## 2 2 5.429009
## 92 92 4.746060
## 63 63 4.599919
## 27 27 4.294490
## 78 78 4.276532
## 4 4 3.976695
## 106 106 3.974940
## 5 5 3.970023
## 89 89 3.937557
## 3 3 3.760708
## 83 83 3.687591
## 60 60 3.677529
## 90 90 3.415806
## 88 88 3.415193
## 47 47 3.339925
data <- data %>%
filter(!ID %in% c("2"))
data <- data %>%
mutate(ID = row_number())
data_seg <- as.data.frame(data[c("ID","Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58")])
data_seg_std <- as.data.frame(scale(data_seg[c(2:8)]))
head(data_seg_std)
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56 Q58
## 1 0.5547258 -0.4592293 0.1734642 1.0703936 -0.03490949 0.2461331 -2.1252565
## 2 0.5547258 -1.9660753 1.5061769 2.0606256 1.79783885 0.7960050 -0.5140512
## 3 -2.5656070 -0.9615113 1.0619393 2.0606256 1.79783885 0.2461331 0.2915514
## 4 1.5948368 -1.9660753 1.5061769 2.0606256 1.79783885 0.2461331 -0.5140512
## 5 -0.4853851 0.5453348 0.6177018 -0.4149544 -0.49309658 -1.4034825 0.2915514
## 6 -1.5254961 -1.4637933 1.5061769 0.5752776 1.79783885 0.7960050 0.2915514
get_clust_tendency(data_seg_std,
n = nrow(data_seg_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.6028032
##
## $plot
## NULL
Distance <- get_dist(data_seg_std,
method = "euclidian")
fviz_dist(Distance,
gradient = list(low = "darkred",
mid = "grey95",
high = "white"))

fviz_nbclust(data_seg_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")

fviz_nbclust(data_seg_std, kmeans, method = "silhouette")+
labs(subtitle = "Silhouette analysis")

NbClust(data_seg_std,
distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans",
index = "all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 10 proposed 2 as the best number of clusters
## * 5 proposed 3 as the best number of clusters
## * 2 proposed 7 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 6 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 5.8231 50.4280 17.2340 -1.1697 144.2888 1.250154e+13 6064.080 488.7244
## 3 1.2767 37.6804 12.2854 -1.5673 263.6807 9.022453e+12 4354.009 418.6722
## 4 0.6503 31.9249 12.2267 -2.0020 333.3147 8.263921e+12 3759.051 373.6658
## 5 2.2571 29.6029 7.8224 -1.7001 374.4693 8.725395e+12 2598.760 333.3159
## 6 0.5725 26.8283 8.8850 -1.7292 444.5704 6.444654e+12 2290.061 309.1341
## 7 14.2482 25.5832 4.1178 -1.3979 483.4065 6.059851e+12 1890.003 283.6750
## 8 0.0684 23.1989 7.9438 -1.9053 525.8342 5.283955e+12 1761.571 272.2360
## 9 0.5696 22.7178 11.6443 -1.3978 577.7530 4.078689e+12 1460.604 251.6290
## 10 1.7603 23.6874 7.7622 0.3559 708.3057 1.452285e+12 1303.030 224.4093
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 5.5742 1.4896 0.3767 1.4160 0.2968 1.3614 -18.0518 -1.1818 0.3848
## 3 8.0354 1.7388 0.4052 1.7158 0.2097 1.3050 -13.5572 -1.0419 0.3631
## 4 8.9837 1.9483 0.3892 1.5854 0.2153 0.9076 4.7877 0.4529 0.3440
## 5 9.5475 2.1841 0.4175 1.5594 0.2047 1.4236 -9.2247 -1.2969 0.3289
## 6 11.7865 2.3550 0.4013 1.5719 0.1785 1.3557 -8.1340 -1.1350 0.3089
## 7 12.3394 2.5663 0.3800 1.5390 0.1832 2.0520 -16.9181 -2.1848 0.2949
## 8 13.8453 2.6742 0.3770 1.4809 0.1872 1.5993 -5.2459 -1.4970 0.2793
## 9 15.2710 2.8931 0.3812 1.4448 0.1870 0.7786 4.2659 1.2302 0.2694
## 10 21.6004 3.2441 0.3369 1.3719 0.2084 1.0064 -0.0765 -0.0259 0.2623
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 244.3622 0.5361 1.6236 0.6234 0.1274 0.0029 1.4482 2.0363 0.9433
## 3 139.5574 0.4672 0.1422 1.3240 0.1245 0.0034 1.6879 1.8644 0.7460
## 4 93.4165 0.4950 0.7674 1.5628 0.1280 0.0038 1.5572 1.7664 0.5824
## 5 66.6632 0.4596 0.4319 2.2372 0.1074 0.0040 1.5684 1.6678 0.5271
## 6 51.5223 0.4442 0.3317 2.7789 0.1266 0.0042 1.6926 1.6087 0.5075
## 7 40.5250 0.4326 0.0876 3.2993 0.1091 0.0044 1.6455 1.5273 0.4767
## 8 34.0295 0.4338 0.1301 3.3316 0.1993 0.0045 1.5314 1.4963 0.4557
## 9 27.9588 0.4360 0.0768 3.5227 0.2084 0.0048 1.4508 1.4449 0.4199
## 10 22.4409 0.4496 0.1131 3.9167 0.2073 0.0049 1.5063 1.3651 0.3964
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.6579 35.3646 1.0000
## 3 0.6638 29.3788 1.0000
## 4 0.6515 25.1406 0.8678
## 5 0.5737 23.0370 1.0000
## 6 0.5494 25.4245 1.0000
## 7 0.5070 32.0944 1.0000
## 8 0.3729 23.5412 1.0000
## 9 0.5494 12.3022 0.2911
## 10 0.4004 17.9674 1.0000
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 7.0000 2.000 3.0000 10.0000 10.0000 3.000000e+00 3.000
## Value_Index 14.2482 50.428 4.9485 0.3559 130.5526 2.720555e+12 1710.071
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 3.0000 10.0000 7.0000 10.0000 10.0000 2.0000 2.0000
## Value_Index 25.0458 6.3295 -0.1035 0.3369 1.3719 0.2968 1.3614
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 2.0000 2.0000 2.0000 3.0000 2.0000 2.0000 2.0000
## Value_Index -18.0518 -1.1818 0.3848 104.8048 0.5361 1.6236 0.6234
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 9.0000 0 2.0000 0 10.0000
## Value_Index 0.2084 0 1.4482 0 0.3964
##
## $Best.partition
## [1] 1 1 1 1 2 1 2 2 2 2 2 2 1 2 1 2 2 2 1 2 1 2 2 1 2 2 2 1 2 2 2 1 1 1 2 2 2
## [38] 2 2 2 2 2 1 1 2 1 1 2 2 2 1 2 2 2 1 2 1 2 1 2 1 1 2 2 2 2 1 2 1 2 2 2 2 1
## [75] 2 2 1 2 1 2 2 1 1 1 1 2 2 1 2 2 1 2 2 2 1 2 2 1 1 1 2 2 1 2 1
Clustering <- kmeans(data_seg_std,
centers = 3,
nstart = 25)
Clustering
## K-means clustering with 3 clusters of sizes 17, 33, 55
##
## Cluster means:
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56
## 1 -0.2406531 -1.70016130 1.3232556 1.5363852 1.66307794 0.5048963
## 2 -0.1386815 0.07349408 0.6311635 0.3202179 0.09005062 0.4627493
## 3 0.1575926 0.48140795 -0.7877044 -0.6670134 -0.56807265 -0.4337085
## Q58
## 1 -0.4192744
## 2 -0.4408146
## 3 0.3940827
##
## Clustering vector:
## [1] 2 1 1 1 3 1 3 3 2 3 3 3 2 3 2 3 3 2 1 3 2 3 3 1 3 2 3 2 2 3 2 2 2 2 3 3 3
## [38] 3 3 3 3 3 2 1 3 1 2 3 3 3 2 3 3 3 1 3 2 3 1 2 2 1 3 3 3 3 2 2 1 2 3 3 3 2
## [75] 3 3 1 3 2 3 3 1 2 2 2 2 3 1 3 3 1 3 3 3 2 3 3 2 2 2 3 3 2 3 1
##
## Within cluster sum of squares by cluster:
## [1] 68.30014 159.23512 185.67036
## (between_SS / total_SS = 43.2 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
labelsize = 8,
data = data_seg_std)

data <- data %>%
filter(!ID %in% c("68", "8", "101", "10", "100", "105"))
data <- data %>%
mutate(ID = row_number())
data_seg <- as.data.frame(data[c("ID","Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58")])
data_seg_std <- as.data.frame(scale(data_seg[c(2:8)]))
head(data_seg_std)
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56 Q58
## 1 0.5460205 -0.4931529 0.1930030 1.0378422 -0.01397306 0.2435151 -2.1412135
## 2 0.5460205 -2.0188446 1.5260700 2.0117414 1.83047059 0.7914240 -0.5353034
## 3 -2.5725968 -1.0017168 1.0817143 2.0117414 1.83047059 0.2435151 0.2676517
## 4 1.5855597 -2.0188446 1.5260700 2.0117414 1.83047059 0.2435151 -0.5353034
## 5 -0.4935186 0.5239749 0.6373587 -0.4230068 -0.47508397 -1.4002118 0.2676517
## 6 -1.5330577 -1.5102807 1.5260700 0.5508925 1.83047059 0.7914240 0.2676517
Clustering <- kmeans(data_seg_std,
centers = 3,
nstart = 25)
Clustering
## K-means clustering with 3 clusters of sizes 31, 16, 52
##
## Cluster means:
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56
## 1 -0.09111633 0.06462689 0.6230246 0.3152717 0.1347724 0.4732834
## 2 -0.36357618 -1.73277736 1.3316644 1.5552262 1.6863734 0.6544468
## 3 0.16618895 0.49463470 -0.7811614 -0.6664816 -0.5992292 -0.4835179
## Q58
## 1 -0.4316963
## 2 -0.4349340
## 3 0.3911832
##
## Clustering vector:
## [1] 1 2 2 2 3 2 3 1 3 3 1 3 1 3 3 1 2 3 1 3 3 2 3 1 3 1 1 3 1 1 1 1 3 3 3 3 3 3
## [39] 3 3 1 2 3 2 1 3 3 3 1 3 3 3 2 3 1 3 2 1 1 2 3 3 3 3 1 2 1 3 3 3 1 3 3 2 3 1
## [77] 3 3 2 1 1 1 1 3 2 3 3 2 3 3 3 1 3 3 1 1 3 1 3
##
## Within cluster sum of squares by cluster:
## [1] 149.83868 56.85868 170.13367
## (between_SS / total_SS = 45.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
labelsize = 8,
data = data_seg_std)

data <- data %>%
filter(!ID %in% c("54", "90", "73", "26", "24", "8","60", "22", "3"))
data <- data %>%
mutate(ID = row_number())
data_seg <- as.data.frame(data[c("ID","Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58")])
data_seg_std <- as.data.frame(scale(data_seg[c(2:8)]))
head(data_seg_std)
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56 Q58
## 1 0.5076327 -0.57132494 0.2268151 1.1388323 0.0000000 0.2634498 -2.3561954
## 2 0.5076327 -2.14538344 1.5877057 2.1486344 1.8636445 0.8023243 -0.5890489
## 3 1.6498061 -2.14538344 1.5877057 2.1486344 1.8636445 0.2634498 -0.5890489
## 4 -0.6345408 0.47804740 0.6804453 -0.3758708 -0.4659111 -1.3531738 0.2945244
## 5 -1.7767143 -1.62069727 1.5877057 0.6339313 1.8636445 0.8023243 0.2945244
## 6 1.6498061 -0.04663877 -0.6804453 -0.3758708 -0.4659111 0.2634498 -0.5890489
Clustering <- kmeans(data_seg_std,
centers = 3,
nstart = 25)
Clustering
## K-means clustering with 3 clusters of sizes 49, 28, 13
##
## Cluster means:
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56 Q58
## 1 0.2046070 0.55300256 -0.7637651 -0.6746897 -0.5895202 -0.4623812 0.4027171
## 2 -0.1858298 -0.06537756 0.6642442 0.4355773 0.2163159 0.4943960 -0.4628241
## 3 -0.3709623 -1.94358106 1.4481271 1.6048948 1.7561265 0.6779686 -0.5210817
##
## Clustering vector:
## [1] 2 3 3 1 3 1 1 1 2 1 2 1 1 2 3 1 2 1 1 1 1 2 1 2 2 2 2 1 1 1 1 1 1 1 1 2 3 1
## [39] 3 2 1 1 1 2 1 1 1 3 2 1 3 2 2 1 1 1 1 2 3 2 1 1 1 2 1 3 1 2 1 1 3 2 2 2 2 1
## [77] 3 1 1 3 1 1 2 1 1 2 2 1 2 1
##
## Within cluster sum of squares by cluster:
## [1] 157.75180 125.41395 42.31302
## (between_SS / total_SS = 47.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
labelsize = 8,
data = data_seg_std)

Averages <- Clustering$centers
Averages
## Q51 Q53a_1 Q53b_1 Q53c_1 Q53d_1 Q56 Q58
## 1 0.2046070 0.55300256 -0.7637651 -0.6746897 -0.5895202 -0.4623812 0.4027171
## 2 -0.1858298 -0.06537756 0.6642442 0.4355773 0.2163159 0.4943960 -0.4628241
## 3 -0.3709623 -1.94358106 1.4481271 1.6048948 1.7561265 0.6779686 -0.5210817
Figure <- as.data.frame(Averages)
Figure$id <- 1:nrow(Figure)
Figure <- pivot_longer(Figure, cols = c("Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58"))
Figure$Group <- factor(Figure$id,
levels = c(1, 2, 3),
labels = c("1", "2", "3"))
Figure$ImeF <- factor(Figure$name,
levels = c("Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58"),
labels = c("Q51", "Q53a_1", "Q53b_1", "Q53c_1", "Q53d_1", "Q56", "Q58"))
library(ggplot2)
ggplot(Figure, aes(x = ImeF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 3) +
geom_line(aes(group = id), linewidth = 1) +
ylab("Averages") +
xlab("Cluster variables") +
scale_color_brewer(palette="Set1") +
ylim(-2, 2) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 10))

data$Group <- Clustering$cluster
data_seg$Group <- Clustering$cluster
fit <- aov(cbind(Q51, Q53a_1, Q53b_1, Q53c_1, Q53d_1, Q56, Q58) ~ as.factor(Group),
data = data)
summary(fit)
## Response Q51 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 3.685 1.84247 2.4838 0.08933 .
## Residuals 87 64.537 0.74181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Q53a_1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 233.248 116.624 112.68 < 2.2e-16 ***
## Residuals 87 90.041 1.035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Q53b_1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 331.42 165.710 142.63 < 2.2e-16 ***
## Residuals 87 101.08 1.162
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Q53c_1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 239.68 119.842 95.27 < 2.2e-16 ***
## Residuals 87 109.44 1.258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Q53d_1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 269.18 134.589 83.148 < 2.2e-16 ***
## Residuals 87 140.82 1.619
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Q56 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 80.222 40.111 15.423 1.849e-06 ***
## Residuals 87 226.267 2.601
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Q58 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 22.383 11.1915 10.627 7.423e-05 ***
## Residuals 87 91.617 1.0531
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data %>%
group_by(Group) %>%
shapiro_test(Q50a)
## # A tibble: 3 × 4
## Group variable statistic p
## <int> <chr> <dbl> <dbl>
## 1 1 Q50a 0.878 0.000113
## 2 2 Q50a 0.868 0.00217
## 3 3 Q50a 0.646 0.000167
kruskal.test(Q50a ~ Group,
data = data)
##
## Kruskal-Wallis rank sum test
##
## data: Q50a by Group
## Kruskal-Wallis chi-squared = 30.354, df = 2, p-value = 2.563e-07
kruskal_effsize(Q50a ~ Group,
data = data)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Q50a 90 0.326 eta2[H] large
data %>%
group_by(Group) %>%
shapiro_test(Q44)
## # A tibble: 3 × 4
## Group variable statistic p
## <int> <chr> <dbl> <dbl>
## 1 1 Q44 0.759 0.000000138
## 2 2 Q44 0.724 0.00000629
## 3 3 Q44 0.785 0.00454
kruskal.test(Q44 ~ Group,
data = data)
##
## Kruskal-Wallis rank sum test
##
## data: Q44 by Group
## Kruskal-Wallis chi-squared = 6.224, df = 2, p-value = 0.04451
kruskal_effsize(Q44 ~ Group,
data = data)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Q44 90 0.0486 eta2[H] small
data %>%
group_by(Group) %>%
shapiro_test(Q46)
## # A tibble: 3 × 4
## Group variable statistic p
## <int> <chr> <dbl> <dbl>
## 1 1 Q46 0.792 0.000000705
## 2 2 Q46 0.922 0.0385
## 3 3 Q46 0.814 0.00991
kruskal.test(Q46 ~ Group,
data = data)
##
## Kruskal-Wallis rank sum test
##
## data: Q46 by Group
## Kruskal-Wallis chi-squared = 29.014, df = 2, p-value = 5.008e-07
kruskal_effsize(Q46 ~ Group,
data = data)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Q46 90 0.311 eta2[H] large