#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mydata <- read_excel("C:/Users/Pino/Desktop/IMB/MVA/Survey.xlsx")
mydata <- as.data.frame(mydata)
mydata$ID <- seq(1, nrow(mydata))
mydata <- mydata %>%
  filter(!ID %in% 1)
mydata$ID <- seq(1, nrow(mydata))
head(mydata)
##   Q1 Q21 Q22a Q23a Q23b Q23c Q23d Q23e Q23f Q24 Q25 Q26 Q27a Q27b Q27c Q28 Q29a
## 1  1   6    3    1    1    1    1    1    1   2   3   1    1    1    0   2    3
## 2  1   5    4    1    1    1    1    1    1   1   4   1    1    0    0   2    4
## 3  1   6    4    2    1    1    1    2    2   2   3   2    0    0    1   1    4
## 4  1   6    5    1    1    1    1    1    1   1   4   1    0    0    1   2    5
## 5  1   6    5    1    1    1    1    1    1   1   4   1    0    0    1   2    5
## 6  1   6    5    2    1    1    1    1    1   4   3   1    0    1    0   1    4
##   Q29b Q29c Q29d Q30a Q30b Q30c Q30d Q30e Q31a Q31b Q33a Q33b Q33c Q33d Q33e
## 1    3    4    5    3    3    3    3    3    4    4    3    4    5    3    4
## 2    2    2    1    3    3    3    3    3    5    2    2    4    4    5    4
## 3    4    2    5    4    4    4    4    1    2    4    5    4    4    5    5
## 4    2    1    1    2    2    4    1    3    5    1    4    4    5    3    4
## 5    2    2    2    5    5    5    5    5    5    2    3    4    4    5    5
## 6    1    1    4    3    5    5    2    4    4    5    5    4    5    4    5
##   Q33f Poslovalnica_podpora_in_usmerjanje
## 1    4                                  4
## 2    5                                  4
## 3    5                                  5
## 4    5                                  5
## 5    5                                  4
## 6    4                                  4
##   Mobilna aplikacija_podpora_in_usmerjanje Poslovalnica_brezkrbnost
## 1                                        4                        3
## 2                                        3                        4
## 3                                        2                        5
## 4                                        3                        5
## 5                                        3                        4
## 6                                        2                        4
##   Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## 1                              3                    4
## 2                              4                    4
## 3                              2                    5
## 4                              4                    5
## 5                              4                    4
## 6                              3                    4
##   Mobilna aplikacija_varnost Poslovalnica_dostopnost
## 1                          3                       3
## 2                          4                       3
## 3                          3                       5
## 4                          4                       5
## 5                          3                       4
## 6                          3                       5
##   Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## 1                             3                    4                          4
## 2                             5                    5                          4
## 3                             2                    5                          2
## 4                             3                    5                          4
## 5                             5                    4                          4
## 6                             2                    4                          2
##   Poslovalnica_hitrost Mobilna aplikacija_hitrost                    Q40 Q41
## 1                    3                          4                     -1   2
## 2                    3                          5                     -1   2
## 3                    3                          4                     -1   2
## 4                    2                          5                     -1   2
## 5                    2                          4                     -1   2
## 6                    1                          5 Stay humble, only cash   2
##    Q42 Q43a Q43b Q43c Q43d Q43e Q43f Q43g Q43h Q44 Q45 Q45_13_text Q46 Q47 Q48
## 1 2000    1    0    0    0    0    0    0    0   2   3          -2   2   2   2
## 2 1998    0    0    0    1    0    0    0    0   6   3          -2   2   3   4
## 3 2001    1    0    0    0    0    0    0    0   2   1          -2   2   2   3
## 4 1994    0    0    0    1    0    0    0    0   6  12          -2   5   6   5
## 5 2000    1    1    0    0    0    0    0    0   2   1          -2   2   3   6
## 6 2004    1    0    0    0    0    0    0    0   3   1          -2   1   8   4
##   ID
## 1  1
## 2  2
## 3  3
## 4  4
## 5  5
## 6  6
mydata[c(2:9,11, 17:45)] <- mydata[c(2:9,11, 17:45,48)] %>% mutate_all(as.numeric)
## Warning in `[<-.data.frame`(`*tmp*`, c(2:9, 11, 17:45), value =
## structure(list(: provided 39 variables to replace 38 variables
mydata$Q26 <- factor(mydata$Q26, 
                         levels = c(1, 2), 
                         labels = c("Da","Ne"))
mydata$Q27a <- factor(mydata$Q27a, 
                         levels = c(1, 0), 
                         labels = c("V mobilni aplikaciji","Ne"))
mydata$Q27b <- factor(mydata$Q27b, 
                         levels = c(1, 0), 
                         labels = c("V poslovalnici","Ne"))
mydata$Q27c <- factor(mydata$Q27c, 
                         levels = c(1, 0), 
                         labels = c("Nisem upiorabljal/a","Ne"))
mydata$Q28 <- factor(mydata$Q28, 
                         levels = c(1, 2), 
                         labels = c("V poslovalnici","V mobilni aplikaciji"))

summary(mydata[c(2:45)])
##       Q21             Q22a            Q23a            Q23b      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :5.000   Median :5.000   Median :1.000   Median :1.000  
##  Mean   :4.502   Mean   :4.325   Mean   :1.381   Mean   :1.192  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :6.000   Max.   :5.000   Max.   :2.000   Max.   :2.000  
##       Q23c            Q23d            Q23e            Q23f      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median :1.000   Median :2.000  
##  Mean   :1.283   Mean   :1.181   Mean   :1.355   Mean   :1.543  
##  3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##      Q24                 Q25        Q26                        Q27a    
##  Length:265         Min.   :1.000   Da:141   V mobilni aplikaciji:106  
##  Class :character   1st Qu.:3.000   Ne:124   Ne                  :159  
##  Mode  :character   Median :4.000                                      
##                     Mean   :3.498                                      
##                     3rd Qu.:4.000                                      
##                     Max.   :5.000                                      
##              Q27b                      Q27c                       Q28     
##  V poslovalnici: 67   Nisem upiorabljal/a:114   V poslovalnici      : 96  
##  Ne            :198   Ne                 :151   V mobilni aplikaciji:169  
##                                                                           
##                                                                           
##                                                                           
##                                                                           
##       Q29a            Q29b            Q29c            Q29d           Q30a      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:2.00   1st Qu.:2.000  
##  Median :4.000   Median :3.000   Median :2.000   Median :3.00   Median :3.000  
##  Mean   :3.438   Mean   :2.728   Mean   :2.596   Mean   :2.97   Mean   :3.072  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000  
##       Q30b            Q30c            Q30d            Q30e      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :4.000   Median :4.000   Median :3.000   Median :3.000  
##  Mean   :3.377   Mean   :3.426   Mean   :3.117   Mean   :2.932  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##       Q31a            Q31b            Q33a           Q33b            Q33c      
##  Min.   :1.000   Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:2.000   1st Qu.:3.00   1st Qu.:4.000   1st Qu.:4.000  
##  Median :4.000   Median :2.000   Median :4.00   Median :4.000   Median :5.000  
##  Mean   :4.128   Mean   :2.506   Mean   :3.83   Mean   :4.075   Mean   :4.264  
##  3rd Qu.:5.000   3rd Qu.:3.000   3rd Qu.:5.00   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000  
##       Q33d            Q33e            Q33f      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:4.000   1st Qu.:4.000   1st Qu.:4.000  
##  Median :4.000   Median :4.000   Median :4.000  
##  Mean   :4.042   Mean   :4.023   Mean   :4.219  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  Poslovalnica_podpora_in_usmerjanje Mobilna aplikacija_podpora_in_usmerjanje
##  Min.   :1.000                      Min.   :1.000                           
##  1st Qu.:3.000                      1st Qu.:3.000                           
##  Median :4.000                      Median :3.000                           
##  Mean   :3.811                      Mean   :3.125                           
##  3rd Qu.:4.000                      3rd Qu.:4.000                           
##  Max.   :5.000                      Max.   :5.000                           
##  Poslovalnica_brezkrbnost Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
##  Min.   :1.000            Min.   :1.000                  Min.   :1.000       
##  1st Qu.:3.000            1st Qu.:3.000                  1st Qu.:4.000       
##  Median :4.000            Median :4.000                  Median :4.000       
##  Mean   :3.921            Mean   :3.551                  Mean   :4.211       
##  3rd Qu.:5.000            3rd Qu.:4.000                  3rd Qu.:5.000       
##  Max.   :5.000            Max.   :5.000                  Max.   :5.000       
##  Mobilna aplikacija_varnost Poslovalnica_dostopnost
##  Min.   :1.000              Min.   :1.000          
##  1st Qu.:3.000              1st Qu.:3.000          
##  Median :4.000              Median :4.000          
##  Mean   :3.698              Mean   :3.785          
##  3rd Qu.:4.000              3rd Qu.:5.000          
##  Max.   :5.000              Max.   :5.000          
##  Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
##  Min.   :1.000                 Min.   :1.000        Min.   :1.000             
##  1st Qu.:3.000                 1st Qu.:3.000        1st Qu.:3.000             
##  Median :4.000                 Median :4.000        Median :3.000             
##  Mean   :3.792                 Mean   :3.913        Mean   :3.347             
##  3rd Qu.:4.000                 3rd Qu.:5.000        3rd Qu.:4.000             
##  Max.   :5.000                 Max.   :5.000        Max.   :5.000             
##  Poslovalnica_hitrost Mobilna aplikacija_hitrost
##  Min.   :1.000        Min.   :1.0               
##  1st Qu.:2.000        1st Qu.:4.0               
##  Median :3.000        Median :4.0               
##  Mean   :2.891        Mean   :4.2               
##  3rd Qu.:4.000        3rd Qu.:5.0               
##  Max.   :5.000        Max.   :5.0
mydata$Q41 <- factor(mydata$Q41, 
                         levels = c(1, 2, 3), 
                         labels = c("Female","Male", "I don't want to answer"))

mydata$Q43a <- factor(mydata$Q43a, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43b <- factor(mydata$Q43b, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43c <- factor(mydata$Q43c, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43d <- factor(mydata$Q43d, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43e <- factor(mydata$Q43e, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43f <- factor(mydata$Q43f, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43g <- factor(mydata$Q43g, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q43h <- factor(mydata$Q43h, 
                         levels = c(1, 0), 
                         labels = c("Selected","Not selected"))

mydata$Q44 <- factor(mydata$Q44, 
                         levels = c(1, 2, 3, 4, 5, 6), 
                         labels = c("Less than 1.000 habitants",
                       "1.000 – 5.000 habitants",
                       "5.001 – 20.000 habitants",
                       "20.001 – 50.000 habitants",
                       "50.001 – 100.000 habitants",
                       "More than 100.000 habitants"))

mydata$Q45 <- factor(mydata$Q45, 
                         levels = c(3, 5, 1, 9, 12, 7, 10, 4, 11, 6), 
                         labels = c("OTP banka d.d.","Banka Intesa Sanpaolo d.d.", "Nova Ljubljanska Banka d.d. (NLB)", "Gorenjska Banka d.d.", "Delavska Hranilnica d.d.", "Revolut", "Deželna Banka Slovenije d.d.", "Banka Sparkasse d.d.", "Addiko Bank d.d.", "UniCredit Banka Slovenija d.d."))

mydata$Q46 <- factor(mydata$Q46, 
                         levels = c(1, 2, 3, 5, 6, 4), 
                         labels = c("Študent/-ka","Redno zaposlen/-a", "Upokojen/-a", "Samozaposlen/-a", "Delno zaposlen/-a", "Brezposeln/-a"))

mydata$Q47 <- factor(mydata$Q47, 
                         levels = c(1, 2, 3, 4, 5, 6, 7, 8), 
                         labels = c("Pod 1.000€","1.000€ - 1.500€","1.501€ - 2.000€","2.001€ - 3.000€","3.001€ - 5.000€","5.001€ - 10.000€","Above 10.000€", "I don't want to answer"))

mydata$Q48 <- factor(mydata$Q48, 
                           levels = c(2, 3, 4, 5, 6, 7),  
                           labels = c(
                             "Dokončana osnovna šola", 
                             "Dokončana nižja ali srednja poklicna izobrazba", 
                             "Dokončana srednja strokovna ali splošna izobrazba", 
                             "Dokončana višješolska strokovna ali visokošolska strokovna izobrazba (tudi 1. bolonjska stopnja)", 
                             "Dokončana visokošolska strokovna univerzitetna izobrazba (tudi 2. bolonjska stopnja)", 
                             "Dokončana specializacija, znanstveni magisterij, doktorat"
                           ))
library(psych)
describe.by(mydata[c(2:45)])
## Warning in describe.by(mydata[c(2:45)]): describe.by is deprecated.  Please use
## the describeBy function
## Warning in describeBy(x = x, group = group, mat = mat, type = type, ...): no
## grouping variable requested
##                                          vars   n mean   sd median trimmed  mad
## Q21                                         1 265 4.50 1.28      5    4.66 1.48
## Q22a                                        2 265 4.32 0.87      5    4.49 0.00
## Q23a                                        3 265 1.38 0.49      1    1.35 0.00
## Q23b                                        4 265 1.19 0.39      1    1.12 0.00
## Q23c                                        5 265 1.28 0.45      1    1.23 0.00
## Q23d                                        6 265 1.18 0.39      1    1.10 0.00
## Q23e                                        7 265 1.35 0.48      1    1.32 0.00
## Q23f                                        8 265 1.54 0.50      2    1.55 0.00
## Q24*                                        9 265 2.16 1.74      1    1.83 0.00
## Q25                                        10 265 3.50 0.86      4    3.52 1.48
## Q26*                                       11 265 1.47 0.50      1    1.46 0.00
## Q27a*                                      12 265 1.60 0.49      2    1.62 0.00
## Q27b*                                      13 265 1.75 0.44      2    1.81 0.00
## Q27c*                                      14 265 1.57 0.50      2    1.59 0.00
## Q28*                                       15 265 1.64 0.48      2    1.67 0.00
## Q29a                                       16 265 3.44 1.26      4    3.54 1.48
## Q29b                                       17 265 2.73 1.13      3    2.74 1.48
## Q29c                                       18 265 2.60 1.31      2    2.50 1.48
## Q29d                                       19 265 2.97 1.32      3    2.96 1.48
## Q30a                                       20 265 3.07 1.20      3    3.09 1.48
## Q30b                                       21 265 3.38 1.18      4    3.47 1.48
## Q30c                                       22 265 3.43 1.24      4    3.52 1.48
## Q30d                                       23 265 3.12 1.25      3    3.15 1.48
## Q30e                                       24 265 2.93 1.24      3    2.92 1.48
## Q31a                                       25 265 4.13 0.86      4    4.25 1.48
## Q31b                                       26 265 2.51 1.22      2    2.42 1.48
## Q33a                                       27 265 3.83 1.06      4    3.97 1.48
## Q33b                                       28 265 4.08 0.95      4    4.22 1.48
## Q33c                                       29 265 4.26 1.02      5    4.48 0.00
## Q33d                                       30 265 4.04 0.91      4    4.16 1.48
## Q33e                                       31 265 4.02 0.92      4    4.13 1.48
## Q33f                                       32 265 4.22 0.89      4    4.35 1.48
## Poslovalnica_podpora_in_usmerjanje         33 265 3.81 0.88      4    3.86 1.48
## Mobilna aplikacija_podpora_in_usmerjanje   34 265 3.12 0.88      3    3.13 1.48
## Poslovalnica_brezkrbnost                   35 265 3.92 0.87      4    3.97 1.48
## Mobilna aplikacija_brezkrbnost             36 265 3.55 0.87      4    3.57 1.48
## Poslovalnica_varnost                       37 265 4.21 0.81      4    4.31 1.48
## Mobilna aplikacija_varnost                 38 265 3.70 0.85      4    3.73 1.48
## Poslovalnica_dostopnost                    39 265 3.78 1.00      4    3.88 1.48
## Mobilna aplikacija_dostopnost              40 265 3.79 0.89      4    3.84 1.48
## Poslovalnica_jasnost                       41 265 3.91 0.91      4    4.00 1.48
## Mobilna aplikacija_jasnost                 42 265 3.35 0.85      3    3.34 1.48
## Poslovalnica_hitrost                       43 265 2.89 1.02      3    2.90 1.48
## Mobilna aplikacija_hitrost                 44 265 4.20 0.76      4    4.28 1.48
##                                          min max range  skew kurtosis   se
## Q21                                        1   6     5 -1.10     0.76 0.08
## Q22a                                       1   5     4 -1.54     2.46 0.05
## Q23a                                       1   2     1  0.49    -1.77 0.03
## Q23b                                       1   2     1  1.55     0.41 0.02
## Q23c                                       1   2     1  0.96    -1.09 0.03
## Q23d                                       1   2     1  1.65     0.71 0.02
## Q23e                                       1   2     1  0.60    -1.64 0.03
## Q23f                                       1   2     1 -0.17    -1.98 0.03
## Q24*                                       1   6     5  1.34     0.23 0.11
## Q25                                        1   5     4 -0.33     0.31 0.05
## Q26*                                       1   2     1  0.13    -1.99 0.03
## Q27a*                                      1   2     1 -0.41    -1.84 0.03
## Q27b*                                      1   2     1 -1.13    -0.72 0.03
## Q27c*                                      1   2     1 -0.28    -1.93 0.03
## Q28*                                       1   2     1 -0.57    -1.68 0.03
## Q29a                                       1   5     4 -0.45    -0.73 0.08
## Q29b                                       1   5     4 -0.05    -0.97 0.07
## Q29c                                       1   5     4  0.35    -1.06 0.08
## Q29d                                       1   5     4 -0.09    -1.18 0.08
## Q30a                                       1   5     4 -0.24    -0.84 0.07
## Q30b                                       1   5     4 -0.57    -0.55 0.07
## Q30c                                       1   5     4 -0.43    -0.79 0.08
## Q30d                                       1   5     4 -0.31    -0.97 0.08
## Q30e                                       1   5     4 -0.01    -1.02 0.08
## Q31a                                       1   5     4 -1.28     2.22 0.05
## Q31b                                       1   5     4  0.43    -0.85 0.07
## Q33a                                       1   5     4 -0.96     0.54 0.06
## Q33b                                       1   5     4 -1.19     1.39 0.06
## Q33c                                       1   5     4 -1.60     2.10 0.06
## Q33d                                       1   5     4 -1.27     2.05 0.06
## Q33e                                       1   5     4 -1.12     1.62 0.06
## Q33f                                       1   5     4 -1.25     1.64 0.05
## Poslovalnica_podpora_in_usmerjanje         1   5     4 -0.62     0.55 0.05
## Mobilna aplikacija_podpora_in_usmerjanje   1   5     4 -0.08    -0.19 0.05
## Poslovalnica_brezkrbnost                   1   5     4 -0.59     0.39 0.05
## Mobilna aplikacija_brezkrbnost             1   5     4 -0.12    -0.22 0.05
## Poslovalnica_varnost                       1   5     4 -1.13     1.85 0.05
## Mobilna aplikacija_varnost                 1   5     4 -0.44     0.15 0.05
## Poslovalnica_dostopnost                    1   5     4 -0.63     0.10 0.06
## Mobilna aplikacija_dostopnost              1   5     4 -0.42    -0.09 0.05
## Poslovalnica_jasnost                       1   5     4 -0.72     0.33 0.06
## Mobilna aplikacija_jasnost                 1   5     4 -0.03    -0.06 0.05
## Poslovalnica_hitrost                       1   5     4  0.07    -0.59 0.06
## Mobilna aplikacija_hitrost                 1   5     4 -1.02     2.08 0.05
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra) 
## Warning: package 'factoextra' was built under R version 4.4.2
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
get_clust_tendency(mydata_clu_std, 
                   n = nrow(mydata_clu_std) - 1,
                   graph = FALSE) 
## $hopkins_stat
## [1] 0.5586369
## 
## $plot
## NULL
colnames(mydata) [3] <- "Awareness"
colnames(mydata) [11] <- "Ease"
colnames(mydata) [26] <- "Value"
colnames(mydata) [27] <- "Trust"

I changed the name of the variables. We are creating clusters on 4 cluster variables: “Awareness”, “Ease”, “Value”, “Trust”.

library(factoextra)
library(NbClust)

fviz_nbclust(mydata_clu_std, kmeans, method = "wss") +
  labs(subtitle = "Elbow method")

fviz_nbclust(mydata_clu_std, kmeans, method = "silhouette")+
  labs(subtitle = "Silhouette analysis")

library(dplyr)
library(factoextra)
WARD <- mydata_clu_std %>%
  get_dist(method = "euclidean") %>%  
  hclust(method = "ward.D2")          

WARD
## 
## Call:
## hclust(d = ., method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 265
library(factoextra)
fviz_dend(WARD)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fviz_dend(WARD,
          k = 5,
          cex = 0.5,
          palette = "jama",
          color_labels_by_k = TRUE,
          rect = TRUE)

library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 27, 74, 76, 32, 56
## 
## Cluster means:
##         Q22a        Q25       Q31a       Q31b
## 1 -2.2987082 -0.6636458 -0.1920661  0.4967113
## 2  0.1713700 -0.7658139  0.1806157 -0.3039786
## 3  0.5063700  0.9025907  0.5693474 -0.7926587
## 4 -0.0860382 -0.5414867 -1.9278200  0.2004560
## 5  0.2438008  0.4164170  0.1828583  1.1234050
## 
## Clustering vector:
##   [1] 1 3 4 3 3 5 2 5 3 1 4 4 5 2 4 1 2 2 5 5 3 5 4 3 3 1 5 2 2 2 5 2 3 3 3 5 3
##  [38] 4 3 2 3 5 3 3 2 3 3 3 3 3 2 3 5 5 2 2 5 3 5 5 5 3 3 2 3 2 2 2 5 5 5 5 3 2
##  [75] 4 2 5 4 3 5 5 1 3 2 3 2 1 5 3 2 2 1 1 1 2 2 5 3 2 1 3 1 4 3 2 3 1 2 1 3 3
## [112] 5 2 2 1 2 5 1 3 2 5 2 2 4 5 1 2 2 1 2 5 2 5 3 5 1 5 4 4 2 3 1 1 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 5 1 4 1 3 3 3 3 2 2 3 2 3 2 5 3 4 4 3 2 2 3 2 3 3 3 5 3 2 2
## [186] 4 2 2 2 3 1 3 5 4 4 2 2 3 3 5 4 4 2 5 2 4 3 3 5 5 2 1 3 2 5 2 5 2 5 5 4 5
## [223] 3 2 3 2 3 5 3 2 5 3 5 1 1 4 1 5 3 2 4 2 5 2 3 2 3 2 5 5 2 3 3 5 5 2 2 3 4
## [260] 4 3 3 5 4 4
## 
## Within cluster sum of squares by cluster:
## [1]  95.65988  92.49211  79.70569 116.86933  85.02220
##  (between_SS / total_SS =  55.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

mydata$Dissimilarity <- sqrt(
  mydata_clu_std$Q22a^2 +
  mydata_clu_std$Q25^2 +
  mydata_clu_std$Q31a^2 +
  mydata_clu_std$Q31b^2 
)

head(mydata[order(-mydata$Dissimilarity), c("ID", "Dissimilarity")], 10) 
##      ID Dissimilarity
## 191 191      6.365406
## 11   11      4.823598
## 202 202      4.716886
## 93   93      4.390156
## 241 241      4.281977
## 38   38      4.229806
## 234 234      4.116562
## 115 115      3.993249
## 103 103      3.861159
## 10   10      3.798713
mydata <- mydata %>%
  filter(!ID %in% c(191))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) 
library(factoextra) 
get_clust_tendency(mydata_clu_std,
                   n = nrow(mydata_clu_std) - 1,
                   graph = FALSE) 
## $hopkins_stat
## [1] 0.5736077
## 
## $plot
## NULL
fviz_dend(WARD,
          k = 5,
          cex = 0.5,
          palette = "jama",
          color_labels_by_k = TRUE,
          rect = TRUE)

library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 56, 74, 76, 32, 26
## 
## Cluster means:
##    Awareness       Ease      Value      Trust
## 1  0.2355188  0.4112907  0.1731763  1.1381089
## 2  0.1611116 -0.7879947  0.1708796 -0.2980480
## 3  0.5052525  0.9044778  0.5689982 -0.7897317
## 4 -0.1033202 -0.5604314 -1.9884698  0.2094871
## 5 -2.3155481 -0.5971993 -0.0752230  0.4475952
## 
## Clustering vector:
##   [1] 5 3 4 3 3 1 2 1 3 5 4 4 1 2 4 5 2 2 1 1 3 1 4 3 3 5 1 2 2 2 1 2 3 3 3 1 3
##  [38] 4 3 2 3 1 3 3 2 3 3 3 3 3 2 3 1 1 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2
##  [75] 4 2 1 4 3 1 1 5 3 2 3 2 5 1 3 2 2 5 5 5 2 2 1 3 2 5 3 5 4 3 2 3 5 2 5 3 3
## [112] 1 2 2 5 2 1 5 3 2 1 2 2 4 1 5 2 2 5 2 1 2 1 3 1 5 1 4 4 2 3 5 5 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 1 5 4 5 3 3 3 3 2 2 3 2 3 2 1 3 4 4 3 2 2 3 2 3 3 3 1 3 2 2
## [186] 4 2 2 2 3 3 1 4 4 2 2 3 3 1 4 4 2 1 2 4 3 3 1 1 2 5 3 2 1 2 1 2 1 1 4 1 3
## [223] 2 3 2 3 1 3 2 1 3 1 5 5 4 5 1 3 2 4 2 1 2 3 2 3 2 1 1 2 3 3 1 1 2 2 3 4 4
## [260] 3 3 1 4 4
## 
## Within cluster sum of squares by cluster:
## [1]  88.14381  95.70238  82.80060 120.50690  75.50678
##  (between_SS / total_SS =  56.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

mydata <- mydata %>%
  filter(!ID %in% c(11))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) 
library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 76, 54, 75, 32, 26
## 
## Cluster means:
##     Awareness       Ease       Value      Trust
## 1  0.16017846 -0.8533305  0.11948655 -0.3059896
## 2  0.21326412  0.3776779  0.22118546  1.1460396
## 3  0.49918810  0.9111497  0.59543745 -0.7892907
## 4 -0.03067026 -0.2443742 -1.97805346  0.2824627
## 5 -2.31336479 -0.6176055 -0.09173426  0.4433489
## 
## Clustering vector:
##   [1] 5 3 4 3 3 2 1 2 3 5 4 2 1 1 5 1 1 2 2 3 2 4 3 3 5 2 1 1 1 2 1 3 3 3 2 3 4
##  [38] 3 1 3 2 3 3 1 3 3 3 3 3 1 3 2 2 1 1 2 3 2 2 2 3 3 1 3 1 1 1 2 2 2 2 3 1 4
##  [75] 1 2 4 4 2 2 5 3 1 3 1 5 2 3 1 1 5 5 5 1 1 2 3 1 5 3 5 4 3 1 3 5 1 5 3 3 2
## [112] 1 1 5 1 2 5 3 1 2 1 1 4 4 5 1 1 5 1 2 1 2 3 2 5 2 4 4 1 3 5 5 4 1 4 1 3 3
## [149] 4 3 1 1 3 1 2 5 4 5 3 3 3 3 1 1 3 1 3 1 2 3 4 4 3 1 1 3 1 3 3 3 2 3 1 1 4
## [186] 1 1 1 3 3 2 4 4 1 1 3 3 2 4 4 1 2 1 4 3 3 4 2 1 5 3 1 2 1 2 1 2 2 4 2 3 1
## [223] 3 1 3 2 3 1 2 3 2 5 5 4 5 2 3 1 4 1 2 1 3 1 3 1 2 2 1 3 3 2 2 1 1 3 4 4 3
## [260] 3 2 4 4
## 
## Within cluster sum of squares by cluster:
## [1] 109.86189  82.00624  80.53778 111.18358  76.83579
##  (between_SS / total_SS =  56.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

mydata <- mydata %>%
  filter(!ID %in% c(86,200,143,239,37))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) 
library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 79, 25, 50, 30, 74
## 
## Cluster means:
##     Awareness       Ease      Value      Trust
## 1  0.50108668  0.8823227  0.6219599 -0.7289837
## 2 -2.38619015 -0.5895733 -0.1589148  0.5303956
## 3  0.14549250  0.3470049  0.1111776  1.2357670
## 4  0.08869192 -0.3574300 -1.9775371  0.1497189
## 5  0.13693951 -0.8323177  0.1162874 -0.2966229
## 
## Clustering vector:
##   [1] 2 1 4 1 1 3 5 3 1 2 4 3 5 4 2 5 5 3 1 1 3 4 1 1 2 3 5 5 5 3 5 1 1 1 3 1 1
##  [38] 5 1 3 1 1 5 1 1 1 1 1 5 1 3 1 5 5 3 1 3 3 3 1 1 5 1 5 5 5 3 3 3 3 1 5 4 5
##  [75] 3 4 4 3 3 2 1 5 1 5 3 1 5 5 2 2 2 5 5 3 1 5 2 1 2 4 1 5 1 2 5 2 1 1 3 5 5
## [112] 2 5 3 2 1 5 3 5 5 4 4 2 5 5 2 5 3 5 3 1 3 2 1 4 4 5 1 2 2 5 4 5 1 1 4 1 4
## [149] 5 1 5 3 2 4 2 1 1 1 1 5 5 1 5 1 5 3 1 4 4 1 5 5 1 5 1 1 1 3 1 5 5 4 5 5 5
## [186] 1 1 3 4 4 5 5 1 1 3 4 5 3 5 4 1 1 4 3 5 2 1 5 3 5 3 5 3 3 4 3 1 5 1 5 1 3
## [223] 1 5 3 1 3 2 2 4 2 3 1 5 5 3 5 1 5 1 5 3 3 5 1 1 1 3 5 5 1 4 4 1 1 3 4 4
## 
## Within cluster sum of squares by cluster:
## [1]  95.40383  77.28224  80.33349  91.09260 105.03046
##  (between_SS / total_SS =  56.3 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

mydata <- mydata %>%
  filter(!ID %in% c(90,155,85,146,100))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) 
library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 23, 76, 49, 79, 26
## 
## Cluster means:
##     Awareness       Ease       Value       Trust
## 1 -2.41967781 -0.6566175 -0.16063729  0.44471611
## 2  0.11993807 -0.8977034  0.06023898 -0.26926030
## 3  0.11446989  0.3155981  0.08097431  1.31250594
## 4  0.49980397  0.8895872  0.61326842 -0.71180559
## 5  0.05552915 -0.0928475 -2.04997890  0.08289087
## 
## Clustering vector:
##   [1] 1 4 5 4 4 3 2 3 4 1 5 3 2 2 1 2 2 3 4 4 3 5 4 4 1 3 2 2 2 3 2 4 4 4 3 4 4
##  [38] 2 4 3 4 4 2 4 4 4 4 4 2 4 3 4 2 2 3 4 3 3 3 4 4 2 4 2 2 2 3 3 3 3 4 2 5 2
##  [75] 3 5 5 3 3 1 4 2 4 2 4 2 2 1 1 2 2 3 4 2 1 4 1 4 2 4 1 2 1 4 4 3 2 2 1 2 3
## [112] 1 4 2 3 2 2 5 5 1 2 2 1 2 3 2 3 4 3 1 4 5 5 2 4 1 1 2 5 2 4 4 4 2 2 4 2 3
## [149] 1 5 4 4 4 4 2 2 4 2 4 2 3 4 5 5 4 2 2 4 2 4 4 4 3 4 2 2 5 2 2 2 4 4 3 5 5
## [186] 2 2 4 4 3 5 2 3 2 5 4 4 5 3 2 1 4 2 3 2 3 2 3 3 5 3 4 2 4 2 4 3 4 2 3 4 3
## [223] 1 1 5 1 3 4 2 2 3 2 4 2 4 2 3 3 2 4 4 4 3 2 2 4 5 5 4 4 3 5 5
## 
## Within cluster sum of squares by cluster:
## [1]  70.16428 124.58392  80.49920  99.56269  63.35325
##  (between_SS / total_SS =  56.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

mydata <- mydata %>%
  filter(!ID %in% c(193,190,164))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) 
library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 75, 26, 79, 23, 47
## 
## Cluster means:
##     Awareness       Ease       Value      Trust
## 1  0.13714948 -0.8797902  0.08082592 -0.2392861
## 2  0.01602031 -0.1291655 -2.03520230 -0.0309770
## 3  0.50815392  0.9082966  0.62028545 -0.7001278
## 4 -2.40486418 -0.6499943 -0.16676336  0.4696909
## 5  0.09499952  0.2667449  0.03587920  1.3459376
## 
## Clustering vector:
##   [1] 4 3 2 3 3 5 1 5 3 4 2 5 1 2 4 1 1 5 3 3 5 2 3 3 4 5 1 1 1 5 1 3 3 3 5 3 3
##  [38] 1 3 5 3 3 1 3 3 3 3 3 1 3 5 3 1 1 5 3 5 5 5 3 3 1 3 1 1 1 5 5 5 5 3 1 2 1
##  [75] 5 2 2 5 5 4 3 1 3 1 3 1 1 4 4 1 1 5 3 1 4 3 4 3 1 3 4 1 4 3 3 5 1 1 4 1 5
## [112] 4 3 1 5 1 1 2 2 4 1 1 4 1 5 1 5 3 5 4 3 2 2 1 3 4 4 1 2 1 3 3 3 1 1 3 1 5
## [149] 4 2 3 3 3 3 1 1 3 1 3 1 5 3 2 3 1 1 3 1 3 3 3 5 3 1 1 2 1 1 1 3 3 5 2 2 1
## [186] 1 3 3 2 1 1 2 3 3 2 5 1 4 3 1 5 1 5 1 5 5 2 5 3 1 3 1 3 5 3 1 5 3 5 4 4 2
## [223] 4 5 3 1 1 5 1 3 1 3 1 5 5 1 3 3 3 5 1 1 3 2 2 3 3 5 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 121.45870  62.95169 101.43697  71.50918  74.12297
##  (between_SS / total_SS =  56.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

mydata <- mydata %>%
  filter(!ID %in% c(220,10,109))

mydata$ID <- seq(1, nrow(mydata))

mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) 
library(factoextra)
library(ggplot2)

Clustering <- kmeans(mydata_clu_std, 
                     centers = 5, 
                     nstart = 25) 

Clustering
## K-means clustering with 5 clusters of sizes 48, 75, 79, 20, 25
## 
## Cluster means:
##     Awareness       Ease        Value       Trust
## 1  0.07755914  0.2561182 -0.003027745  1.38856900
## 2  0.10677233 -0.9230788  0.077510269 -0.22238704
## 3  0.50068099  0.9055025  0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342  0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
## 
## Clustering vector:
##   [1] 4 3 5 3 3 1 2 1 3 5 1 2 5 4 2 2 1 3 3 1 5 3 3 4 1 2 2 2 1 2 3 3 3 1 3 3 2
##  [38] 3 1 3 3 2 3 3 3 3 3 2 3 1 3 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2 5 2 1
##  [75] 5 5 1 1 4 3 2 3 2 3 2 2 4 4 2 2 1 3 2 4 3 4 3 2 3 4 2 4 3 3 1 2 2 2 1 4 3
## [112] 2 1 2 2 5 5 4 2 2 4 2 1 2 1 3 1 4 3 5 5 2 3 4 4 2 5 2 3 3 3 2 2 3 2 1 4 5
## [149] 3 3 3 3 2 2 3 2 3 2 1 3 5 3 2 2 3 2 3 3 3 1 3 2 2 5 2 2 2 3 3 1 5 5 2 2 3
## [186] 3 5 2 2 5 3 3 1 1 2 4 3 2 1 2 1 2 1 1 5 1 3 2 3 2 3 1 3 2 1 3 1 4 5 4 1 3
## [223] 2 2 1 2 3 2 3 2 1 1 2 3 3 3 1 2 2 3 5 5 3 3 1 5 5
## 
## Within cluster sum of squares by cluster:
## [1]  81.67665 127.65628 106.19954  49.86444  62.38244
##  (between_SS / total_SS =  56.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
library(factoextra)
fviz_cluster(Clustering, 
             palette = "Set1", 
             repel = TRUE,
             ggtheme = theme_bw(),
             data = mydata_clu_std)

Averages <- Clustering$centers
Averages
##     Awareness       Ease        Value       Trust
## 1  0.07755914  0.2561182 -0.003027745  1.38856900
## 2  0.10677233 -0.9230788  0.077510269 -0.22238704
## 3  0.50068099  0.9055025  0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342  0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
Figure <- as.data.frame(Averages)
Figure$ID <- 1:nrow(Figure)
library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Awareness", "Ease", "Value", "Trust"))



Figure$Group <- factor(Figure$ID, 
                       levels = c(1, 2, 3, 4,5), 
                       labels = c("1", "2", "3", "4","5"))


Figure$NameF <- factor(Figure$name, 
                       levels = c("Awareness", "Ease", "Value", "Trust"), 
                       labels = c("Awareness", "Ease", "Value", "Trust"))


library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
  geom_hline(yintercept = 0) +
  theme_bw() +
  geom_point(aes(shape = Group, col = Group), size = 5, alpha = 0.4) +
  geom_line(aes(group = ID), linewidth = 1.5) +
  ylab("Averages") +
  xlab("Cluster variables")+
  ylim(-2.5, 2.5) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 12))

mydata$Group <- Clustering$cluster
fit <- aov(cbind(Awareness, Ease, Value, Trust) ~ as.factor(Group), 
             data = mydata)

summary(fit)
##  Response Awareness :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   4 80.074 20.0186  87.691 < 2.2e-16 ***
## Residuals        242 55.245  0.2283                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Ease :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   4 86.860 21.7150  76.879 < 2.2e-16 ***
## Residuals        242 68.354  0.2825                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Value :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   4 71.857 17.9643  76.009 < 2.2e-16 ***
## Residuals        242 57.195  0.2363                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Trust :
##                   Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(Group)   4 172.02  43.004  74.816 < 2.2e-16 ***
## Residuals        242 139.10   0.575                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Significant difference in variables across clusters.

mydata$Q42 <- as.numeric(as.character(mydata$Q42))
current_year <- as.numeric(format(Sys.Date(), "%Y"))
mydata$Age <- current_year - mydata$Q42

Changed the year of birth into age in number of years.

aggregate(mydata$Age, 
          by = list(mydata$Group), 
          FUN = median)
##   Group.1    x
## 1       1 27.0
## 2       2 29.0
## 3       3 27.0
## 4       4 23.5
## 5       5 35.0
table_clusters <- table(mydata$Group, mydata$Q44)

prop_table_clusters <- prop.table(table_clusters, margin = 1)

prop_df <- as.data.frame(as.table(prop_table_clusters))

library(ggplot2)
ggplot(prop_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +  # Multiply Freq by 100 to get percentages
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Category", 
    title = "Percentage Distribution of number of habitants of home place by Group"
  ) +
  theme_minimal()

resulttttt <- chisq.test(mydata$Group, mydata$Q44)
## Warning in chisq.test(mydata$Group, mydata$Q44): Chi-squared approximation may
## be incorrect
resulttttt
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Group and mydata$Q44
## X-squared = 17.464, df = 20, p-value = 0.6226

NO association between group and number of habitants in home town:(

library(dplyr)

mydata$HabitantGroup <- recode(
  mydata$Q44,
  "Less than 1.000 habitants" = "Small population",
  "1.000 – 5.000 habitants" = "Small population",
  "5.001 – 20.000 habitants" = "Medium population",
  "20.001 – 50.000 habitants" = "Medium population",
  "50.001 – 100.000 habitants" = "Large population",
  "More than 100.000 habitants" = "Large population"
)
resulttttt2 <- chisq.test(mydata$Group, mydata$HabitantGroup)

resulttttt2
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Group and mydata$HabitantGroup
## X-squared = 4.9513, df = 8, p-value = 0.7628

no:(

table_clusters1 <- table(mydata$Group, mydata$Q45)

prop_table_clusters1 <- prop.table(table_clusters1, margin = 1)

prop_df1 <- as.data.frame(as.table(prop_table_clusters1))

library(ggplot2)
ggplot(prop_df1, aes(x = Var1, y = Freq * 100, fill = Var2)) +  
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Category", 
    title = "Percentage Distribution of Primary Bank by Group"
  ) +
  theme_minimal()

resultttt <- chisq.test(mydata$Group, mydata$Q45)
## Warning in chisq.test(mydata$Group, mydata$Q45): Chi-squared approximation may
## be incorrect
resultttt
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Group and mydata$Q45
## X-squared = 31.9, df = 36, p-value = 0.664

No association between group and primary bank.

table_clusters2 <- table(mydata$Group, mydata$Q46)

prop_table_clusters2 <- prop.table(table_clusters2, margin = 1)

prop_df2 <- as.data.frame(as.table(prop_table_clusters2))

library(ggplot2)
ggplot(prop_df2, aes(x = Var1, y = Freq * 100, fill = Var2)) +  
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Category", 
    title = "Percentage Distribution of Employment Status by Group"
  ) +
  theme_minimal()

resulttt <- chisq.test(mydata$Group, mydata$Q46)
## Warning in chisq.test(mydata$Group, mydata$Q46): Chi-squared approximation may
## be incorrect
resulttt
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Group and mydata$Q46
## X-squared = 17.276, df = 20, p-value = 0.635

There is no association between Group and Employment status.

table_clusters3 <- table(mydata$Group, mydata$Q47)

prop_table_clusters3 <- prop.table(table_clusters3, margin = 1)

prop_df3 <- as.data.frame(as.table(prop_table_clusters3))

library(ggplot2)
ggplot(prop_df3, aes(x = Var1, y = Freq * 100, fill = Var2)) + 
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Category", 
    title = "Percentage Distribution of Monthly Income by Group"
  ) +
  theme_minimal()

resultt <- chisq.test(mydata$Group, mydata$Q47)
## Warning in chisq.test(mydata$Group, mydata$Q47): Chi-squared approximation may
## be incorrect
resultt
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Group and mydata$Q47
## X-squared = 26.577, df = 28, p-value = 0.5414

There is no association between Group and Income level.

print(table_clusters3)
##    
##     Pod 1.000€ 1.000€ - 1.500€ 1.501€ - 2.000€ 2.001€ - 3.000€ 3.001€ - 5.000€
##   1         13               9              11               5               2
##   2         26               9              14              12               5
##   3         19              16              12              19               5
##   4          7               6               4               0               0
##   5          7               5               3               5               3
##    
##     5.001€ - 10.000€ Above 10.000€ I don't want to answer
##   1                1             1                      6
##   2                1             0                      8
##   3                2             0                      6
##   4                0             1                      2
##   5                1             0                      1
table_clusters4 <- table(mydata$Group, mydata$Q48)

prop_table_clusters4 <- prop.table(table_clusters4, margin = 1)

prop_df4 <- as.data.frame(as.table(prop_table_clusters4))

library(ggplot2)

ggplot(prop_df4, aes(x = Var1, y = Freq * 100, fill = Var2)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +  # Adjust bar width
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Category", 
    title = "Percentage Distribution of Education Level by Group"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate x-axis labels
    text = element_text(size = 14),  # Increase text size
    plot.title = element_text(size = 16, hjust = 0.5),  # Center the title
    legend.position = "bottom",  # Move the legend below the histogram
    legend.key.size = unit(0.8, "cm"),  # Adjust legend key size
    legend.text = element_text(size = 10)  # Adjust legend text size
  ) +
  guides(fill = guide_legend(nrow = 2))  # Arrange legend items in two rows

result <- chisq.test(mydata$Group, mydata$Q48)
## Warning in chisq.test(mydata$Group, mydata$Q48): Chi-squared approximation may
## be incorrect
result
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Group and mydata$Q48
## X-squared = 17.242, df = 20, p-value = 0.6372

There is no association between Group and Education level.

mydata$AgeGroup <- cut(mydata$Age, 
                       breaks = c(18, 30, 40, 50, 60, Inf), 
                       labels = c("18-30", "31-40", "41-50", "51-60", "60+"))
table_age <- table(mydata$AgeGroup, mydata$Q23a)

print(table_age)
##        
##          1  2
##   18-30 74 66
##   31-40 26  2
##   41-50 26 10
##   51-60 18  7
##   60+   14  4
result1 <- chisq.test(mydata$Q23a, mydata$AgeGroup)

result1
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23a and mydata$AgeGroup
## X-squared = 20.89, df = 4, p-value = 0.000333

There is association.

mydata$Q23a <- factor(mydata$Q23a, levels = c(1, 2), labels = c("Yes", "No"))
levels(mydata$Q23a)
## [1] "Yes" "No"
library(ggplot2)

prop_table5 <- prop.table(table(mydata$AgeGroup, mydata$Q23a), margin = 1)
prop_df5 <- as.data.frame(as.table(prop_table5))

ggplot(prop_df5, aes(x = Var1, y = Freq, fill = Var2)) +
  geom_bar(stat = "identity", position = "fill") +
  labs(x = "Age Group", 
       y = "Proportion", 
       fill = "Awareness of Advanced Service", 
       title = "Proportion of Awareness of Ability to Get a Loan by Age Group") +
  theme_minimal()

There is association between Age Group and Awareness of ability to get a loan in the app.

result3 <- chisq.test(mydata$Q28, mydata$Group)

result3
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q28 and mydata$Group
## X-squared = 26.786, df = 4, p-value = 2.196e-05

There is association between Group and where individuals would carry out advanced service in the next week if they had to.

addmargins(result3$observed)
##                       mydata$Group
## mydata$Q28               1   2   3   4   5 Sum
##   V poslovalnici        23  24  10  11  12  80
##   V mobilni aplikaciji  25  51  69   9  13 167
##   Sum                   48  75  79  20  25 247
table_clusters5 <- table(mydata$Group, mydata$Q28)

prop_table_clusters5 <- prop.table(table_clusters5, margin = 1)

prop_df5 <- as.data.frame(as.table(prop_table_clusters5))

library(ggplot2)
ggplot(prop_df5, aes(x = Var1, y = Freq * 100, fill = Var2)) + 
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Odgovor", 
    title = "Percentage Distribution of Where would you carry out advanced services in the next week if you had to"
  ) +
  theme_minimal()

result4 <- chisq.test(mydata$AgeGroup, mydata$Group)
## Warning in chisq.test(mydata$AgeGroup, mydata$Group): Chi-squared approximation
## may be incorrect
result4
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$AgeGroup and mydata$Group
## X-squared = 21.854, df = 16, p-value = 0.148

There is no association between Group and Age Group.

mydata$AgeGroup2 <- cut(mydata$Age, 
                       breaks = c(18, 50, Inf), 
                       labels = c("18-50", "51+"))
result42 <- chisq.test(mydata$AgeGroup2, mydata$Group)
## Warning in chisq.test(mydata$AgeGroup2, mydata$Group): Chi-squared
## approximation may be incorrect
result42
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$AgeGroup2 and mydata$Group
## X-squared = 3.9376, df = 4, p-value = 0.4145

Same story all over again….

library(ggplot2)

prop_table6 <- prop.table(table(mydata$Group, mydata$AgeGroup), margin = 1)
prop_df6 <- as.data.frame(as.table(prop_table6))

ggplot(prop_df6, aes(x = Var1, y = Freq, fill = Var2)) +
  geom_bar(stat = "identity", position = "fill") +
  labs(x = "Age Group", 
       y = "Proportion", 
       fill = "Clusters", 
       title = "Proportion of Age Groups across Clusters") +
  theme_minimal()

result5 <- chisq.test(mydata$Q24, mydata$Group)
## Warning in chisq.test(mydata$Q24, mydata$Group): Chi-squared approximation may
## be incorrect
result5
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q24 and mydata$Group
## X-squared = 31.876, df = 20, p-value = 0.04464

There is association between Group and where individuals heard about advanced features for the first time.

library(dplyr)
mydata$FirstTime <- recode(
  mydata$Q24,
  "Mobile app" = "Online",
  "Ads" = "Online",
  "Social Media" = "Online",
  "This Survey" = "Online",
  "Branch" = "In Person",
  "Friends/Family" = "In Person"
)
result52 <- chisq.test(mydata$FirstTime, mydata$Group)
## Warning in chisq.test(mydata$FirstTime, mydata$Group): Chi-squared
## approximation may be incorrect
result52
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$FirstTime and mydata$Group
## X-squared = 31.876, df = 20, p-value = 0.04464

I’m losing hope…

mydata$Q24 <- factor(mydata$Q24, 
                         levels = c(1, 2, 3, 4, 5, 6), 
                         labels = c("Mobile app","Branch","Ads","Social Media","Friends/Family","This Survey"))

library(ggplot2)

prop_table7 <- prop.table(table(mydata$Group, mydata$Q24), margin = 1)
prop_df7 <- as.data.frame(as.table(prop_table7))

ggplot(prop_df7, aes(x = Var1, y = Freq, fill = Var2)) +
  geom_bar(stat = "identity", position = "fill") +
  labs(x = "Group", 
       y = "Proportion", 
       fill = "Clusters", 
       title = "Proportion of first time hearing about advanced services across Clusters") +
  theme_minimal()

library(ggplot2)

prop_table8 <- prop.table(table(mydata$Group, mydata$Q26), margin = 1)
prop_df8 <- as.data.frame(as.table(prop_table8))

library(ggplot2)
ggplot(prop_df8, aes(x = Var1, y = Freq * 100, fill = Var2)) + 
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    x = "Group", 
    y = "Percentage (%)", 
    fill = "Answer", 
    title = "Have you ever received any notifications about advanced features in your app?"
  ) +
  theme_minimal()

result6 <- chisq.test(mydata$Q26, mydata$Group)

result6
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q26 and mydata$Group
## X-squared = 24.965, df = 4, p-value = 5.113e-05

Association between groups and recieving notifications about advanced services.

result7 <- chisq.test(mydata$Q41, mydata$Group)

result7
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q41 and mydata$Group
## X-squared = 6.2922, df = 4, p-value = 0.1784

no association between group and gender.

result8 <- chisq.test(mydata$Q21, mydata$Group)
## Warning in chisq.test(mydata$Q21, mydata$Group): Chi-squared approximation may
## be incorrect
result8
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q21 and mydata$Group
## X-squared = 28.527, df = 20, p-value = 0.0975

no association between group and frequency of usage of mobile app.

result9 <- chisq.test(mydata$Q23f, mydata$Group)

result9
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23f and mydata$Group
## X-squared = 14.801, df = 4, p-value = 0.005132

there is assiciation between group and being aware of deposit advanced service.

result10 <- chisq.test(mydata$Q23a, mydata$Group)

result10
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23a and mydata$Group
## X-squared = 15.959, df = 4, p-value = 0.003075

Association between group and awareness of taking out a loan advanced service

result11 <- chisq.test(mydata$Q23b, mydata$Group)
## Warning in chisq.test(mydata$Q23b, mydata$Group): Chi-squared approximation may
## be incorrect
result11
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23b and mydata$Group
## X-squared = 19.144, df = 4, p-value = 0.0007365

Association between group and awareness of changing limit on the bank account advanced service.

result12 <- chisq.test(mydata$Q23c, mydata$Group)

result12
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23c and mydata$Group
## X-squared = 23.322, df = 4, p-value = 0.0001092

Association between group and awareness of ordering a new credit card advanced service.

result13 <- chisq.test(mydata$Q23d, mydata$Group)
## Warning in chisq.test(mydata$Q23d, mydata$Group): Chi-squared approximation may
## be incorrect
result13
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23d and mydata$Group
## X-squared = 9.7326, df = 4, p-value = 0.04518

Association between group and awareness of changing limit on the credit card advanced service.

result14 <- chisq.test(mydata$Q23e, mydata$Group)

result14
## 
##  Pearson's Chi-squared test
## 
## data:  mydata$Q23e and mydata$Group
## X-squared = 18.035, df = 4, p-value = 0.001215

Association between group and awareness of opening savings account advanced service.