# Load dataset from Excel file
library(readxl)
mydata <- read_excel("C:/Users/Tamara/Desktop/R data/DataPerception.xlsx")

# Display first few rows
head(mydata)
## # A tibble: 6 × 22
##      ID Q2a_1 Q2b_1 Q2c_1 Q3a_1 Q3b_1 Q3c_1 Q4a_1 Q4b_1 Q4c_1 Q5a_1 Q5b_1 Q5c_1
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1     5     6     5     7     7     5     7     7     5     7     7     6
## 2     2     7     7     6     2     6     6     4     7     7     3     6     6
## 3     3     7     6     6     6     6     6     7     7     5     6     7     5
## 4     4     7     3     5     6     6     3     3     6     5     3     6     6
## 5     5     6     5     5     5     5     6     6     6     7     4     6     7
## 6     6     6     6     6     5     7     6     6     7     5     7     7     5
## # ℹ 9 more variables: Q6a_1 <dbl>, Q6b_1 <dbl>, Q6c_1 <dbl>, Q7a_1 <dbl>,
## #   Q7b_1 <dbl>, Q7c_1 <dbl>, Age <dbl>, Gender <dbl>, Location <dbl>
colnames(mydata)
##  [1] "ID"       "Q2a_1"    "Q2b_1"    "Q2c_1"    "Q3a_1"    "Q3b_1"   
##  [7] "Q3c_1"    "Q4a_1"    "Q4b_1"    "Q4c_1"    "Q5a_1"    "Q5b_1"   
## [13] "Q5c_1"    "Q6a_1"    "Q6b_1"    "Q6c_1"    "Q7a_1"    "Q7b_1"   
## [19] "Q7c_1"    "Age"      "Gender"   "Location"
mydata$GenderFactor <- factor(mydata$Gender, 
                             levels = c(1, 2), 
                             labels = c("Male", "Female"))

mydata$LocationFactor <- factor(mydata$Location, 
                             levels = c(1, 2, 3), 
                             labels = c("Urban", "Suburban", "Rural"))

For the purpose of clustering, I chose 6 cluster variables:!!!

#Saving standardized cluster variables into new data frame

mydata_clu_new <- as.data.frame(scale(mydata[c("Q2a_1", "Q3a_1", "Q4a_1", "Q5a_1", "Q6a_1", "Q7a_1")])) 
#Finding outliers

mydata$Dissimilarity <- sqrt(mydata_clu_new$Q2a_1^2 + mydata_clu_new$Q3a_1^2 + mydata_clu_new$Q4a_1^2 + mydata_clu_new$Q5a_1 + mydata_clu_new$Q6a_1^2 + mydata_clu_new$Q7a_1^2) 
## Warning in sqrt(mydata_clu_new$Q2a_1^2 + mydata_clu_new$Q3a_1^2 +
## mydata_clu_new$Q4a_1^2 + : NaNs produced
#Finding units with highest value of dissimilarity

head(mydata[order(-mydata$Dissimilarity), c("ID", "Dissimilarity")]) 
## # A tibble: 6 × 2
##      ID Dissimilarity
##   <dbl>         <dbl>
## 1    14          4.28
## 2    40          4.21
## 3   120          4.14
## 4    71          3.84
## 5   138          3.83
## 6    34          3.39

There is a relatively big jump between third and fourth unit, so I will check first three units.

#Showing units ID14, 40, 120

print(mydata[c(14,40,120), ])
## # A tibble: 3 × 25
##      ID Q2a_1 Q2b_1 Q2c_1 Q3a_1 Q3b_1 Q3c_1 Q4a_1 Q4b_1 Q4c_1 Q5a_1 Q5b_1 Q5c_1
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1    14     7     7     5     3     6     7     6     6     7     3     6     7
## 2    40     5     3     3     2     7     7     2     7     7     2     7     7
## 3   120     1     6     5     6     6     7     7     5     6     6     6     6
## # ℹ 12 more variables: Q6a_1 <dbl>, Q6b_1 <dbl>, Q6c_1 <dbl>, Q7a_1 <dbl>,
## #   Q7b_1 <dbl>, Q7c_1 <dbl>, Age <dbl>, Gender <dbl>, Location <dbl>,
## #   GenderFactor <fct>, LocationFactor <fct>, Dissimilarity <dbl>

They don’t seem unusual.

#Removing ...
library(factoextra) 
## Warning: package 'factoextra' was built under R version 4.4.2
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#Finding Euclidean distances based on 6 Cluster variables, then saving them into object Distances

Distances <- get_dist(mydata_clu_new, 
                      method = "euclidian")

#Showing matrix of distances

fviz_dist(Distances, 
          gradient = list(low = "slateblue4",
                          mid = "skyblue3",
                          high = "skyblue"))

There are three or four groups of homogeneous objects forming, but they are not very evident.

#Hopkins statistics

library(factoextra) 
get_clust_tendency(mydata_clu_new, 
                   n = nrow(mydata_clu_new) - 1,
                   graph = FALSE)
## $hopkins_stat
## [1] 0.6418886
## 
## $plot
## NULL

Hopkins statistics is above 0.5 - data is clusterable.

#Determining number of clusters for K-means clustering

library(factoextra)
library(NbClust)

fviz_nbclust(mydata_clu_new, kmeans, method = "wss") +
  labs(subtitle = "Elbow method")

It seems that the biggest break is at 5, indicating that we should form 5 clusters based on Elbow method.

#Determining number of clusters for K-means clustering

fviz_nbclust(mydata_clu_new, kmeans, method = "silhouette")+
  labs(subtitle = "Silhouette analysis")

Since we want average Silhouette to be as high as possible, according to this index, it is the best option to form 2 clusters, but 4 is also almost as good.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(factoextra)
WARD <- mydata_clu_new %>%
  get_dist(method = "euclidean") %>%  
  hclust(method = "ward.D2")          

WARD
## 
## Call:
## hclust(d = ., method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 152
library(factoextra)
fviz_dend(WARD)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(NbClust)
NbClust(mydata_clu_new, 
        distance = "euclidean", 
        min.nc = 2, max.nc = 10,
        method = "kmeans", 
        index = "all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 8 proposed 2 as the best number of clusters 
## * 5 proposed 3 as the best number of clusters 
## * 3 proposed 5 as the best number of clusters 
## * 2 proposed 6 as the best number of clusters 
## * 2 proposed 9 as the best number of clusters 
## * 3 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************
## $All.index
##         KL      CH Hartigan     CCC    Scott      Marriot    TrCovW   TraceW
## 2  16.5370 59.5648  22.8451 -1.2199 190.7707 4.307472e+12 12926.431 648.4867
## 3   0.1354 45.4359  23.4358 -1.8840 367.1832 3.036401e+12  9223.823 562.7755
## 4   0.7123 42.5792  23.8440 -1.6591 474.6207 2.662356e+12  6927.557 486.2886
## 5   2.2046 42.7495  14.9309  0.1108 564.7277 2.299495e+12  4648.256 418.8142
## 6   2.5760 40.3828   9.9144  0.8422 663.8276 1.725228e+12  3722.664 380.1973
## 7   0.8891 37.3325   9.0992  0.7345 727.1115 1.548550e+12  3098.369 356.0210
## 8   0.4194 35.0637  11.8946  0.7330 782.5853 1.404137e+12  2947.080 334.9989
## 9   9.5096 34.4609   5.7515  1.5349 847.1365 1.162195e+12  2492.866 309.4387
## 10  0.0898 32.2757  11.9974  1.1503 877.7749 1.172881e+12  2235.064 297.4742
##    Friedman  Rubin Cindex     DB Silhouette   Duda Pseudot2   Beale Ratkowsky
## 2    3.2372 1.3971 0.4534 1.5833     0.2516 1.2250 -18.5509 -0.6976    0.3433
## 3    5.4346 1.6099 0.4247 1.7077     0.2047 1.5054 -24.1722 -1.2658    0.3359
## 4    6.8884 1.8631 0.4612 1.5760     0.2104 1.1178  -5.7980 -0.3969    0.3241
## 5    7.5885 2.1633 0.4244 1.3974     0.2328 1.2897 -11.6811 -0.8421    0.3275
## 6    9.3895 2.3830 0.3998 1.3793     0.2277 1.4435 -10.4458 -1.1283    0.3105
## 7   10.5507 2.5448 0.4490 1.4012     0.2230 1.0142  -0.5887 -0.0525    0.2941
## 8   11.6282 2.7045 0.4165 1.4359     0.2077 1.6961 -10.2607 -1.4862    0.2804
## 9   12.8642 2.9279 0.4229 1.4137     0.2152 1.1606  -2.4907 -0.4969    0.2704
## 10  13.4092 3.0456 0.4162 1.3749     0.2092 1.8949 -12.7511 -1.6655    0.2591
##        Ball Ptbiserial    Frey McClain   Dunn Hubert SDindex Dindex   SDbw
## 2  324.2434     0.4586  0.8104  0.7394 0.1312 0.0023  1.6254 1.9756 1.2828
## 3  187.5918     0.4468  0.1855  1.4472 0.0831 0.0026  1.6291 1.8218 1.0393
## 4  121.5722     0.4777  0.0745  1.9274 0.0977 0.0028  1.4802 1.6897 0.6214
## 5   83.7628     0.5135  0.2775  2.2588 0.1090 0.0032  1.3569 1.5742 0.5371
## 6   63.3662     0.5105  0.7706  2.6311 0.1078 0.0033  1.3411 1.5061 0.4869
## 7   50.8601     0.4811  1.1917  3.1800 0.1112 0.0034  1.5588 1.4607 0.4626
## 8   41.8749     0.4367  0.0850  4.0759 0.1051 0.0036  1.6493 1.4036 0.4293
## 9   34.3821     0.4416  0.4428  4.3256 0.1112 0.0038  1.5593 1.3558 0.3930
## 10  29.7474     0.4290 -0.1904  4.7313 0.1480 0.0039  1.5085 1.3214 0.3768
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.7006            43.1654            1
## 3          0.6533            38.2096            1
## 4          0.6459            30.1530            1
## 5          0.6222            31.5710            1
## 6          0.5356            29.4770            1
## 7          0.6188            25.8772            1
## 8          0.4889            26.1338            1
## 9          0.4643            20.7641            1
## 10         0.4174            37.6933            1
## 
## $Best.nc
##                     KL      CH Hartigan    CCC    Scott      Marriot   TrCovW
## Number_clusters  2.000  2.0000   5.0000 9.0000   3.0000            3    3.000
## Value_Index     16.537 59.5648   8.9131 1.5349 176.4125 897026528841 3702.608
##                  TraceW Friedman   Rubin Cindex      DB Silhouette  Duda
## Number_clusters  5.0000   3.0000  9.0000 6.0000 10.0000     2.0000 2.000
## Value_Index     28.8575   2.1974 -0.1056 0.3998  1.3749     0.2516 1.225
##                 PseudoT2   Beale Ratkowsky     Ball PtBiserial Frey McClain
## Number_clusters   2.0000  2.0000    2.0000   3.0000     5.0000    1  2.0000
## Value_Index     -18.5509 -0.6976    0.3433 136.6515     0.5135   NA  0.7394
##                   Dunn Hubert SDindex Dindex    SDbw
## Number_clusters 10.000      0  6.0000      0 10.0000
## Value_Index      0.148      0  1.3411      0  0.3768
## 
## $Best.partition
##   [1] 1 2 1 2 1 1 2 2 2 1 2 1 1 2 2 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 2 1 2 2 2 2 1
##  [38] 1 1 2 2 2 1 2 2 2 1 1 1 2 1 1 2 1 1 2 2 2 2 2 1 2 2 1 1 1 1 1 2 2 1 1 2 1
##  [75] 2 1 2 2 1 2 2 1 2 2 1 2 2 2 2 2 1 2 1 1 1 1 2 2 2 1 2 2 1 2 1 1 1 1 1 1 2
## [112] 1 2 1 2 1 1 1 1 1 1 1 2 1 1 2 2 1 2 2 2 1 1 2 1 1 2 2 1 2 2 1 2 1 2 2 2 2
## [149] 1 2 1 1

According to the majority rule, the best number of clusters is 5.

Clustering <- kmeans(mydata_clu_new, 
                     centers = 4, #Number of groups
                     nstart = 25) #Number of attempts at different starting leader positions

Clustering
## K-means clustering with 4 clusters of sizes 46, 26, 32, 48
## 
## Cluster means:
##         Q2a_1       Q3a_1      Q4a_1      Q5a_1      Q6a_1       Q7a_1
## 1  0.10210822 -0.05550022  0.4076716  0.3829262  0.5054298 -0.74807794
## 2  0.27318259 -0.29962981 -0.4237942 -0.2482213 -1.7159132 -0.09898356
## 3  0.06563359 -1.09898469 -1.1577087 -1.2082286  0.2610379 -0.48917780
## 4 -0.28958334  0.94814365  0.6106757  0.5729680  0.2710575  1.09664265
## 
## Clustering vector:
##   [1] 4 3 1 3 4 1 2 3 2 1 3 1 1 2 3 4 1 3 1 4 3 1 4 4 1 4 4 4 4 1 3 4 2 3 4 2 4
##  [38] 1 1 2 1 3 1 3 3 3 4 1 1 2 4 4 2 4 4 3 2 1 3 1 4 1 2 4 4 4 1 4 1 2 4 4 2 4
##  [75] 2 1 3 3 4 1 3 1 3 3 1 1 3 1 3 3 1 2 1 4 4 2 3 1 3 1 1 3 4 1 4 4 4 4 4 1 2
## [112] 4 3 4 1 1 1 1 1 4 4 1 3 1 4 1 2 4 2 2 2 1 4 2 4 1 3 2 2 1 2 4 3 4 1 3 2 2
## [149] 4 3 4 4
## 
## Within cluster sum of squares by cluster:
## [1] 131.9942  95.0562 119.3242 139.4512
##  (between_SS / total_SS =  46.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = FALSE,
             ggtheme = theme_bw(),
             data = mydata_clu_new)

Units ID40, ID84 and ID120 seem to be far away from the center, so I will remove them.

mydata <- mydata %>%
  filter(!ID %in% c(40, 84, 120))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_new <- as.data.frame(scale(mydata[c("Q2a_1", "Q3a_1", "Q4a_1", "Q5a_1", "Q6a_1", "Q7a_1")])) 
Clustering <- kmeans(mydata_clu_new, 
                     centers = 4, #Number of groups
                     nstart = 25) #Number of attempts at different starting leader positions

Clustering
## K-means clustering with 4 clusters of sizes 31, 41, 31, 46
## 
## Cluster means:
##         Q2a_1       Q3a_1      Q4a_1      Q5a_1      Q6a_1       Q7a_1
## 1 -0.03128611 -0.06424249 -0.2892342 -0.1214947 -1.6084816  0.08157507
## 2 -0.13282415  0.97382376  0.6853525  0.6053649  0.4642818  1.13824513
## 3  0.11828490 -1.13229077 -1.2108368 -1.2313090  0.2424502 -0.47239185
## 4  0.05975712 -0.06161399  0.4000597  0.3721077  0.5067699 -0.75114631
## 
## Clustering vector:
##   [1] 2 3 4 3 2 4 1 3 1 4 3 4 4 1 3 2 4 3 4 2 3 4 2 2 4 2 2 2 2 4 3 2 1 3 1 1 2
##  [38] 4 4 4 3 4 3 3 3 2 4 4 1 1 2 1 1 2 3 1 4 3 4 2 4 1 2 2 2 4 2 4 1 2 2 1 2 1
##  [75] 4 3 3 2 4 3 4 3 4 4 3 4 3 3 4 1 4 2 2 1 3 4 3 4 4 3 2 4 2 2 2 2 2 4 1 2 3
## [112] 2 4 4 4 4 4 2 4 3 4 2 4 1 2 1 1 1 4 2 1 1 4 3 1 1 4 1 2 3 1 4 3 1 1 1 3 2
## [149] 2
## 
## Within cluster sum of squares by cluster:
## [1] 127.79269  95.91834 114.09601 137.52207
##  (between_SS / total_SS =  46.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = FALSE,
             ggtheme = theme_bw(),
             data = mydata_clu_new)

#Average values of cluster variables to describe groups

Averages <- Clustering$centers
Averages 
##         Q2a_1       Q3a_1      Q4a_1      Q5a_1      Q6a_1       Q7a_1
## 1 -0.03128611 -0.06424249 -0.2892342 -0.1214947 -1.6084816  0.08157507
## 2 -0.13282415  0.97382376  0.6853525  0.6053649  0.4642818  1.13824513
## 3  0.11828490 -1.13229077 -1.2108368 -1.2313090  0.2424502 -0.47239185
## 4  0.05975712 -0.06161399  0.4000597  0.3721077  0.5067699 -0.75114631
Figure <- as.data.frame(Averages)
Figure$ID <- 1:nrow(Figure)

library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Q2a_1", "Q3a_1", "Q4a_1", "Q5a_1", "Q6a_1", "Q7a_1"))

Figure$Group <- factor(Figure$ID, 
                       levels = c(1, 2, 3, 4), 
                       labels = c("1", "2", "3", "4"))

Figure$NameF <- factor(Figure$name, 
                       levels = c("Q2a_1", "Q3a_1", "Q4a_1", "Q5a_1", "Q6a_1", "Q7a_1"), 
                       labels = c("Cash_Safety", "Cash_Speed", "Cash_Ease of Use", "Cash_Convenience", "Cash_Privacy",  "Cash_Tracking Expenses"))

library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
  geom_hline(yintercept = 0) +
  theme_bw() +
  geom_point(aes(shape = Group, col = Group), size = 4) +
  geom_line(aes(group = ID), linewidth = 1) +
  ylab("Averages") +
  xlab("Cluster variables")+
  ylim(-2.5, 2.5) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 10))

#Saving where each unit belongs

mydata$Group <- Clustering$cluster
#Checking if clustering variables successfully differentiate between groups

fit <- aov(cbind(Q2a_1, Q3a_1, Q4a_1, Q5a_1, Q6a_1, Q7a_1) ~ as.factor(Group), 
           data = mydata)

summary(fit)
##  Response Q2a_1 :
##                   Df  Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group)   3   2.263 0.75445  0.4455 0.7209
## Residuals        145 245.562 1.69353               
## 
##  Response Q3a_1 :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   3 202.25  67.416  55.231 < 2.2e-16 ***
## Residuals        145 176.99   1.221                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Q4a_1 :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   3 161.36  53.787  49.208 < 2.2e-16 ***
## Residuals        145 158.49   1.093                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Q5a_1 :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   3 216.45  72.149  42.046 < 2.2e-16 ***
## Residuals        145 248.81   1.716                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Q6a_1 :
##                   Df  Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   3 104.912  34.971   109.5 < 2.2e-16 ***
## Residuals        145  46.309   0.319                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Q7a_1 :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   3 337.88 112.627  67.412 < 2.2e-16 ***
## Residuals        145 242.25   1.671                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

It differs for at least one of the groups for all variables.

#Additional variables

aggregate(mydata$Age, 
          by = list(mydata$Group), 
          FUN = mean)
##   Group.1        x
## 1       1 37.90323
## 2       2 44.41463
## 3       3 37.22581
## 4       4 36.45652
#Checking normal distribution of variables

library(dplyr)
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.4.2
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
mydata %>%
  group_by(as.factor(mydata$Group)) %>%
  shapiro_test(Age)
## # A tibble: 4 × 4
##   `as.factor(mydata$Group)` variable statistic        p
##   <fct>                     <chr>        <dbl>    <dbl>
## 1 1                         Age          0.863 0.000990
## 2 2                         Age          0.896 0.00128 
## 3 3                         Age          0.896 0.00570 
## 4 4                         Age          0.928 0.00710

None of the variables are normally distributed. -> Kruskal Walis

kruskal.test(Age ~ as.factor(Group), 
             data = mydata)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Age by as.factor(Group)
## Kruskal-Wallis chi-squared = 6.971, df = 3, p-value = 0.07283

Not significant.

#Checking the association between the location and classification into 4 groups
chi_square <- chisq.test(mydata$LocationFactor, as.factor(mydata$Group))
chi_square
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$LocationFactor and as.factor(mydata$Group)
## X-squared = 7.1807, df = 6, p-value = 0.3045

Not significant.

#Checking the association between the gender and classification into 4 groups
chi_square <- chisq.test(mydata$GenderFactor, as.factor(mydata$Group))
chi_square
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$GenderFactor and as.factor(mydata$Group)
## X-squared = 4.6171, df = 3, p-value = 0.2021

Not significant.