#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata <- read_excel("C:/Users/Pino/Desktop/IMB/MVA/Survey.xlsx")
mydata <- as.data.frame(mydata)
mydata$ID <- seq(1, nrow(mydata))
mydata <- mydata %>%
filter(!ID %in% 1)
mydata$ID <- seq(1, nrow(mydata))
head(mydata)
## Q1 Q21 Q22a Q23a Q23b Q23c Q23d Q23e Q23f Q24 Q25 Q26 Q27a Q27b Q27c Q28 Q29a
## 1 1 6 3 1 1 1 1 1 1 2 3 1 1 1 0 2 3
## 2 1 5 4 1 1 1 1 1 1 1 4 1 1 0 0 2 4
## 3 1 6 4 2 1 1 1 2 2 2 3 2 0 0 1 1 4
## 4 1 6 5 1 1 1 1 1 1 1 4 1 0 0 1 2 5
## 5 1 6 5 1 1 1 1 1 1 1 4 1 0 0 1 2 5
## 6 1 6 5 2 1 1 1 1 1 4 3 1 0 1 0 1 4
## Q29b Q29c Q29d Q30a Q30b Q30c Q30d Q30e Q31a Q31b Q33a Q33b Q33c Q33d Q33e
## 1 3 4 5 3 3 3 3 3 4 4 3 4 5 3 4
## 2 2 2 1 3 3 3 3 3 5 2 2 4 4 5 4
## 3 4 2 5 4 4 4 4 1 2 4 5 4 4 5 5
## 4 2 1 1 2 2 4 1 3 5 1 4 4 5 3 4
## 5 2 2 2 5 5 5 5 5 5 2 3 4 4 5 5
## 6 1 1 4 3 5 5 2 4 4 5 5 4 5 4 5
## Q33f Poslovalnica_podpora_in_usmerjanje
## 1 4 4
## 2 5 4
## 3 5 5
## 4 5 5
## 5 5 4
## 6 4 4
## Mobilna aplikacija_podpora_in_usmerjanje Poslovalnica_brezkrbnost
## 1 4 3
## 2 3 4
## 3 2 5
## 4 3 5
## 5 3 4
## 6 2 4
## Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## 1 3 4
## 2 4 4
## 3 2 5
## 4 4 5
## 5 4 4
## 6 3 4
## Mobilna aplikacija_varnost Poslovalnica_dostopnost
## 1 3 3
## 2 4 3
## 3 3 5
## 4 4 5
## 5 3 4
## 6 3 5
## Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## 1 3 4 4
## 2 5 5 4
## 3 2 5 2
## 4 3 5 4
## 5 5 4 4
## 6 2 4 2
## Poslovalnica_hitrost Mobilna aplikacija_hitrost Q40 Q41
## 1 3 4 -1 2
## 2 3 5 -1 2
## 3 3 4 -1 2
## 4 2 5 -1 2
## 5 2 4 -1 2
## 6 1 5 Stay humble, only cash 2
## Q42 Q43a Q43b Q43c Q43d Q43e Q43f Q43g Q43h Q44 Q45 Q45_13_text Q46 Q47 Q48
## 1 2000 1 0 0 0 0 0 0 0 2 3 -2 2 2 2
## 2 1998 0 0 0 1 0 0 0 0 6 3 -2 2 3 4
## 3 2001 1 0 0 0 0 0 0 0 2 1 -2 2 2 3
## 4 1994 0 0 0 1 0 0 0 0 6 12 -2 5 6 5
## 5 2000 1 1 0 0 0 0 0 0 2 1 -2 2 3 6
## 6 2004 1 0 0 0 0 0 0 0 3 1 -2 1 8 4
## ID
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
mydata[c(2:9,11, 17:45,48,58)] <- mydata[c(2:9,11, 17:45,48,58)] %>% mutate_all(as.numeric)
library(dplyr)
mydata$BankF <- case_when(
mydata$Q45 == 1 ~ 1,
mydata$Q45 == 2 ~ 2,
mydata$Q45 == 3 ~ 2,
mydata$Q45 == 4 ~ 2,
mydata$Q45 == 5 ~ 2,
mydata$Q45 == 6 ~ 2,
mydata$Q45 == 7 ~ 2,
mydata$Q45 == 8 ~ 2,
mydata$Q45 == 9 ~ 2,
mydata$Q45 == 10 ~ 2,
mydata$Q45 == 11 ~ 2,
mydata$Q45 == 12 ~ 2,
mydata$Q45 == 13 ~ 2,
TRUE ~ 0)
mydata$Q26 <- factor(mydata$Q26,
levels = c(1, 2),
labels = c("Da","Ne"))
mydata$Q27a <- factor(mydata$Q27a,
levels = c(1, 0),
labels = c("V mobilni aplikaciji","Ne"))
mydata$Q27b <- factor(mydata$Q27b,
levels = c(1, 0),
labels = c("V poslovalnici","Ne"))
mydata$Q27c <- factor(mydata$Q27c,
levels = c(1, 0),
labels = c("Nisem upiorabljal/a","Ne"))
mydata$Q28 <- factor(mydata$Q28,
levels = c(1, 2),
labels = c("V poslovalnici","V mobilni aplikaciji"))
summary(mydata[c(2:45)])
## Q21 Q22a Q23a Q23b
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:1.000 1st Qu.:1.000
## Median :5.000 Median :5.000 Median :1.000 Median :1.000
## Mean :4.502 Mean :4.325 Mean :1.381 Mean :1.192
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :6.000 Max. :5.000 Max. :2.000 Max. :2.000
## Q23c Q23d Q23e Q23f
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :1.000 Median :2.000
## Mean :1.283 Mean :1.181 Mean :1.355 Mean :1.543
## 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## Q24 Q25 Q26 Q27a
## Length:265 Min. :1.000 Da:141 V mobilni aplikaciji:106
## Class :character 1st Qu.:3.000 Ne:124 Ne :159
## Mode :character Median :4.000
## Mean :3.498
## 3rd Qu.:4.000
## Max. :5.000
## Q27b Q27c Q28
## V poslovalnici: 67 Nisem upiorabljal/a:114 V poslovalnici : 96
## Ne :198 Ne :151 V mobilni aplikaciji:169
##
##
##
##
## Q29a Q29b Q29c Q29d Q30a
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:2.000
## Median :4.000 Median :3.000 Median :2.000 Median :3.00 Median :3.000
## Mean :3.438 Mean :2.728 Mean :2.596 Mean :2.97 Mean :3.072
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
## Q30b Q30c Q30d Q30e
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median :4.000 Median :4.000 Median :3.000 Median :3.000
## Mean :3.377 Mean :3.426 Mean :3.117 Mean :2.932
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q31a Q31b Q33a Q33b Q33c
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:4.000 1st Qu.:4.000
## Median :4.000 Median :2.000 Median :4.00 Median :4.000 Median :5.000
## Mean :4.128 Mean :2.506 Mean :3.83 Mean :4.075 Mean :4.264
## 3rd Qu.:5.000 3rd Qu.:3.000 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
## Q33d Q33e Q33f
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000
## Mean :4.042 Mean :4.023 Mean :4.219
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Poslovalnica_podpora_in_usmerjanje Mobilna aplikacija_podpora_in_usmerjanje
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :3.000
## Mean :3.811 Mean :3.125
## 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000
## Poslovalnica_brezkrbnost Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000
## Mean :3.921 Mean :3.551 Mean :4.211
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Mobilna aplikacija_varnost Poslovalnica_dostopnost
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000
## Mean :3.698 Mean :3.785
## 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000
## Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :3.000
## Mean :3.792 Mean :3.913 Mean :3.347
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Poslovalnica_hitrost Mobilna aplikacija_hitrost
## Min. :1.000 Min. :1.0
## 1st Qu.:2.000 1st Qu.:4.0
## Median :3.000 Median :4.0
## Mean :2.891 Mean :4.2
## 3rd Qu.:4.000 3rd Qu.:5.0
## Max. :5.000 Max. :5.0
mydata$Q41 <- factor(mydata$Q41,
levels = c(1, 2, 3),
labels = c("Female","Male", "I don't want to answer"))
mydata$Q43a <- factor(mydata$Q43a,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43b <- factor(mydata$Q43b,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43c <- factor(mydata$Q43c,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43d <- factor(mydata$Q43d,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43e <- factor(mydata$Q43e,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43f <- factor(mydata$Q43f,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43g <- factor(mydata$Q43g,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43h <- factor(mydata$Q43h,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q44 <- factor(mydata$Q44,
levels = c(1, 2, 3, 4, 5, 6),
labels = c("Less than 1.000 habitants",
"1.000 – 5.000 habitants",
"5.001 – 20.000 habitants",
"20.001 – 50.000 habitants",
"50.001 – 100.000 habitants",
"More than 100.000 habitants"))
mydata$Q45 <- factor(mydata$Q45,
levels = c(3, 5, 1, 9, 12, 7, 10, 4, 11, 6),
labels = c("OTP banka d.d.","Banka Intesa Sanpaolo d.d.", "Nova Ljubljanska Banka d.d. (NLB)", "Gorenjska Banka d.d.", "Delavska Hranilnica d.d.", "Revolut", "Deželna Banka Slovenije d.d.", "Banka Sparkasse d.d.", "Addiko Bank d.d.", "UniCredit Banka Slovenija d.d."))
mydata$Q46 <- factor(mydata$Q46,
levels = c(1, 2, 3, 5, 6, 4),
labels = c("Študent/-ka","Redno zaposlen/-a", "Upokojen/-a", "Samozaposlen/-a", "Delno zaposlen/-a", "Brezposeln/-a"))
mydata$Q47 <- factor(mydata$Q47,
levels = c(1, 2, 3, 4, 5, 6, 7, 8),
labels = c("Pod 1.000€","1.000€ - 1.500€","1.501€ - 2.000€","2.001€ - 3.000€","3.001€ - 5.000€","5.001€ - 10.000€","Above 10.000€", "I don't want to answer"))
mydata$Q48 <- factor(mydata$Q48,
levels = c(2, 3, 4, 5, 6, 7),
labels = c(
"Dokončana osnovna šola",
"Dokončana nižja ali srednja poklicna izobrazba",
"Dokončana srednja strokovna ali splošna izobrazba",
"Dokončana višješolska strokovna ali visokošolska strokovna izobrazba (tudi 1. bolonjska stopnja)",
"Dokončana visokošolska strokovna univerzitetna izobrazba (tudi 2. bolonjska stopnja)",
"Dokončana specializacija, znanstveni magisterij, doktorat"
))
mydata$BankF <- factor(mydata$BankF,
levels = c(1, 2),
labels = c("NLB","Other"))
library(dplyr)
mydataNLB <- mydata %>%
filter(BankF == "NLB")
library(psych)
describe.by(mydata[c(2:45)])
## Warning in describe.by(mydata[c(2:45)]): describe.by is deprecated. Please use
## the describeBy function
## Warning in describeBy(x = x, group = group, mat = mat, type = type, ...): no
## grouping variable requested
## vars n mean sd median trimmed mad
## Q21 1 265 4.50 1.28 5 4.66 1.48
## Q22a 2 265 4.32 0.87 5 4.49 0.00
## Q23a 3 265 1.38 0.49 1 1.35 0.00
## Q23b 4 265 1.19 0.39 1 1.12 0.00
## Q23c 5 265 1.28 0.45 1 1.23 0.00
## Q23d 6 265 1.18 0.39 1 1.10 0.00
## Q23e 7 265 1.35 0.48 1 1.32 0.00
## Q23f 8 265 1.54 0.50 2 1.55 0.00
## Q24* 9 265 2.16 1.74 1 1.83 0.00
## Q25 10 265 3.50 0.86 4 3.52 1.48
## Q26* 11 265 1.47 0.50 1 1.46 0.00
## Q27a* 12 265 1.60 0.49 2 1.62 0.00
## Q27b* 13 265 1.75 0.44 2 1.81 0.00
## Q27c* 14 265 1.57 0.50 2 1.59 0.00
## Q28* 15 265 1.64 0.48 2 1.67 0.00
## Q29a 16 265 3.44 1.26 4 3.54 1.48
## Q29b 17 265 2.73 1.13 3 2.74 1.48
## Q29c 18 265 2.60 1.31 2 2.50 1.48
## Q29d 19 265 2.97 1.32 3 2.96 1.48
## Q30a 20 265 3.07 1.20 3 3.09 1.48
## Q30b 21 265 3.38 1.18 4 3.47 1.48
## Q30c 22 265 3.43 1.24 4 3.52 1.48
## Q30d 23 265 3.12 1.25 3 3.15 1.48
## Q30e 24 265 2.93 1.24 3 2.92 1.48
## Q31a 25 265 4.13 0.86 4 4.25 1.48
## Q31b 26 265 2.51 1.22 2 2.42 1.48
## Q33a 27 265 3.83 1.06 4 3.97 1.48
## Q33b 28 265 4.08 0.95 4 4.22 1.48
## Q33c 29 265 4.26 1.02 5 4.48 0.00
## Q33d 30 265 4.04 0.91 4 4.16 1.48
## Q33e 31 265 4.02 0.92 4 4.13 1.48
## Q33f 32 265 4.22 0.89 4 4.35 1.48
## Poslovalnica_podpora_in_usmerjanje 33 265 3.81 0.88 4 3.86 1.48
## Mobilna aplikacija_podpora_in_usmerjanje 34 265 3.12 0.88 3 3.13 1.48
## Poslovalnica_brezkrbnost 35 265 3.92 0.87 4 3.97 1.48
## Mobilna aplikacija_brezkrbnost 36 265 3.55 0.87 4 3.57 1.48
## Poslovalnica_varnost 37 265 4.21 0.81 4 4.31 1.48
## Mobilna aplikacija_varnost 38 265 3.70 0.85 4 3.73 1.48
## Poslovalnica_dostopnost 39 265 3.78 1.00 4 3.88 1.48
## Mobilna aplikacija_dostopnost 40 265 3.79 0.89 4 3.84 1.48
## Poslovalnica_jasnost 41 265 3.91 0.91 4 4.00 1.48
## Mobilna aplikacija_jasnost 42 265 3.35 0.85 3 3.34 1.48
## Poslovalnica_hitrost 43 265 2.89 1.02 3 2.90 1.48
## Mobilna aplikacija_hitrost 44 265 4.20 0.76 4 4.28 1.48
## min max range skew kurtosis se
## Q21 1 6 5 -1.10 0.76 0.08
## Q22a 1 5 4 -1.54 2.46 0.05
## Q23a 1 2 1 0.49 -1.77 0.03
## Q23b 1 2 1 1.55 0.41 0.02
## Q23c 1 2 1 0.96 -1.09 0.03
## Q23d 1 2 1 1.65 0.71 0.02
## Q23e 1 2 1 0.60 -1.64 0.03
## Q23f 1 2 1 -0.17 -1.98 0.03
## Q24* 1 6 5 1.34 0.23 0.11
## Q25 1 5 4 -0.33 0.31 0.05
## Q26* 1 2 1 0.13 -1.99 0.03
## Q27a* 1 2 1 -0.41 -1.84 0.03
## Q27b* 1 2 1 -1.13 -0.72 0.03
## Q27c* 1 2 1 -0.28 -1.93 0.03
## Q28* 1 2 1 -0.57 -1.68 0.03
## Q29a 1 5 4 -0.45 -0.73 0.08
## Q29b 1 5 4 -0.05 -0.97 0.07
## Q29c 1 5 4 0.35 -1.06 0.08
## Q29d 1 5 4 -0.09 -1.18 0.08
## Q30a 1 5 4 -0.24 -0.84 0.07
## Q30b 1 5 4 -0.57 -0.55 0.07
## Q30c 1 5 4 -0.43 -0.79 0.08
## Q30d 1 5 4 -0.31 -0.97 0.08
## Q30e 1 5 4 -0.01 -1.02 0.08
## Q31a 1 5 4 -1.28 2.22 0.05
## Q31b 1 5 4 0.43 -0.85 0.07
## Q33a 1 5 4 -0.96 0.54 0.06
## Q33b 1 5 4 -1.19 1.39 0.06
## Q33c 1 5 4 -1.60 2.10 0.06
## Q33d 1 5 4 -1.27 2.05 0.06
## Q33e 1 5 4 -1.12 1.62 0.06
## Q33f 1 5 4 -1.25 1.64 0.05
## Poslovalnica_podpora_in_usmerjanje 1 5 4 -0.62 0.55 0.05
## Mobilna aplikacija_podpora_in_usmerjanje 1 5 4 -0.08 -0.19 0.05
## Poslovalnica_brezkrbnost 1 5 4 -0.59 0.39 0.05
## Mobilna aplikacija_brezkrbnost 1 5 4 -0.12 -0.22 0.05
## Poslovalnica_varnost 1 5 4 -1.13 1.85 0.05
## Mobilna aplikacija_varnost 1 5 4 -0.44 0.15 0.05
## Poslovalnica_dostopnost 1 5 4 -0.63 0.10 0.06
## Mobilna aplikacija_dostopnost 1 5 4 -0.42 -0.09 0.05
## Poslovalnica_jasnost 1 5 4 -0.72 0.33 0.06
## Mobilna aplikacija_jasnost 1 5 4 -0.03 -0.06 0.05
## Poslovalnica_hitrost 1 5 4 0.07 -0.59 0.06
## Mobilna aplikacija_hitrost 1 5 4 -1.02 2.08 0.05
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.2
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
get_clust_tendency(mydata_clu_std,
n = nrow(mydata_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.5586369
##
## $plot
## NULL
colnames(mydata) [3] <- "Awareness"
colnames(mydata) [11] <- "Ease"
colnames(mydata) [26] <- "Value"
colnames(mydata) [27] <- "Trust"
I changed the name of the variables. We are creating clusters on 4 cluster variables: “Awareness”, “Ease”, “Value”, “Trust”.
library(factoextra)
library(NbClust)
fviz_nbclust(mydata_clu_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")
fviz_nbclust(mydata_clu_std, kmeans, method = "silhouette")+
labs(subtitle = "Silhouette analysis")
library(dplyr)
library(factoextra)
WARD <- mydata_clu_std %>%
get_dist(method = "euclidean") %>%
hclust(method = "ward.D2")
WARD
##
## Call:
## hclust(d = ., method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 265
library(factoextra)
fviz_dend(WARD)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_dend(WARD,
k = 5,
cex = 0.5,
palette = "jama",
color_labels_by_k = TRUE,
rect = TRUE)
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 27, 74, 76, 32, 56
##
## Cluster means:
## Q22a Q25 Q31a Q31b
## 1 -2.2987082 -0.6636458 -0.1920661 0.4967113
## 2 0.1713700 -0.7658139 0.1806157 -0.3039786
## 3 0.5063700 0.9025907 0.5693474 -0.7926587
## 4 -0.0860382 -0.5414867 -1.9278200 0.2004560
## 5 0.2438008 0.4164170 0.1828583 1.1234050
##
## Clustering vector:
## [1] 1 3 4 3 3 5 2 5 3 1 4 4 5 2 4 1 2 2 5 5 3 5 4 3 3 1 5 2 2 2 5 2 3 3 3 5 3
## [38] 4 3 2 3 5 3 3 2 3 3 3 3 3 2 3 5 5 2 2 5 3 5 5 5 3 3 2 3 2 2 2 5 5 5 5 3 2
## [75] 4 2 5 4 3 5 5 1 3 2 3 2 1 5 3 2 2 1 1 1 2 2 5 3 2 1 3 1 4 3 2 3 1 2 1 3 3
## [112] 5 2 2 1 2 5 1 3 2 5 2 2 4 5 1 2 2 1 2 5 2 5 3 5 1 5 4 4 2 3 1 1 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 5 1 4 1 3 3 3 3 2 2 3 2 3 2 5 3 4 4 3 2 2 3 2 3 3 3 5 3 2 2
## [186] 4 2 2 2 3 1 3 5 4 4 2 2 3 3 5 4 4 2 5 2 4 3 3 5 5 2 1 3 2 5 2 5 2 5 5 4 5
## [223] 3 2 3 2 3 5 3 2 5 3 5 1 1 4 1 5 3 2 4 2 5 2 3 2 3 2 5 5 2 3 3 5 5 2 2 3 4
## [260] 4 3 3 5 4 4
##
## Within cluster sum of squares by cluster:
## [1] 95.65988 92.49211 79.70569 116.86933 85.02220
## (between_SS / total_SS = 55.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata$Dissimilarity <- sqrt(
mydata_clu_std$Q22a^2 +
mydata_clu_std$Q25^2 +
mydata_clu_std$Q31a^2 +
mydata_clu_std$Q31b^2
)
head(mydata[order(-mydata$Dissimilarity), c("ID", "Dissimilarity")], 10)
## ID Dissimilarity
## 191 191 6.365406
## 11 11 4.823598
## 202 202 4.716886
## 93 93 4.390156
## 241 241 4.281977
## 38 38 4.229806
## 234 234 4.116562
## 115 115 3.993249
## 103 103 3.861159
## 10 10 3.798713
mydata <- mydata %>%
filter(!ID %in% c(191))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
get_clust_tendency(mydata_clu_std,
n = nrow(mydata_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.5736077
##
## $plot
## NULL
fviz_dend(WARD,
k = 5,
cex = 0.5,
palette = "jama",
color_labels_by_k = TRUE,
rect = TRUE)
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 56, 74, 76, 32, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.2355188 0.4112907 0.1731763 1.1381089
## 2 0.1611116 -0.7879947 0.1708796 -0.2980480
## 3 0.5052525 0.9044778 0.5689982 -0.7897317
## 4 -0.1033202 -0.5604314 -1.9884698 0.2094871
## 5 -2.3155481 -0.5971993 -0.0752230 0.4475952
##
## Clustering vector:
## [1] 5 3 4 3 3 1 2 1 3 5 4 4 1 2 4 5 2 2 1 1 3 1 4 3 3 5 1 2 2 2 1 2 3 3 3 1 3
## [38] 4 3 2 3 1 3 3 2 3 3 3 3 3 2 3 1 1 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2
## [75] 4 2 1 4 3 1 1 5 3 2 3 2 5 1 3 2 2 5 5 5 2 2 1 3 2 5 3 5 4 3 2 3 5 2 5 3 3
## [112] 1 2 2 5 2 1 5 3 2 1 2 2 4 1 5 2 2 5 2 1 2 1 3 1 5 1 4 4 2 3 5 5 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 1 5 4 5 3 3 3 3 2 2 3 2 3 2 1 3 4 4 3 2 2 3 2 3 3 3 1 3 2 2
## [186] 4 2 2 2 3 3 1 4 4 2 2 3 3 1 4 4 2 1 2 4 3 3 1 1 2 5 3 2 1 2 1 2 1 1 4 1 3
## [223] 2 3 2 3 1 3 2 1 3 1 5 5 4 5 1 3 2 4 2 1 2 3 2 3 2 1 1 2 3 3 1 1 2 2 3 4 4
## [260] 3 3 1 4 4
##
## Within cluster sum of squares by cluster:
## [1] 88.14381 95.70238 82.80060 120.50690 75.50678
## (between_SS / total_SS = 56.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(11))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 76, 54, 75, 32, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.16017846 -0.8533305 0.11948655 -0.3059896
## 2 0.21326412 0.3776779 0.22118546 1.1460396
## 3 0.49918810 0.9111497 0.59543745 -0.7892907
## 4 -0.03067026 -0.2443742 -1.97805346 0.2824627
## 5 -2.31336479 -0.6176055 -0.09173426 0.4433489
##
## Clustering vector:
## [1] 5 3 4 3 3 2 1 2 3 5 4 2 1 1 5 1 1 2 2 3 2 4 3 3 5 2 1 1 1 2 1 3 3 3 2 3 4
## [38] 3 1 3 2 3 3 1 3 3 3 3 3 1 3 2 2 1 1 2 3 2 2 2 3 3 1 3 1 1 1 2 2 2 2 3 1 4
## [75] 1 2 4 4 2 2 5 3 1 3 1 5 2 3 1 1 5 5 5 1 1 2 3 1 5 3 5 4 3 1 3 5 1 5 3 3 2
## [112] 1 1 5 1 2 5 3 1 2 1 1 4 4 5 1 1 5 1 2 1 2 3 2 5 2 4 4 1 3 5 5 4 1 4 1 3 3
## [149] 4 3 1 1 3 1 2 5 4 5 3 3 3 3 1 1 3 1 3 1 2 3 4 4 3 1 1 3 1 3 3 3 2 3 1 1 4
## [186] 1 1 1 3 3 2 4 4 1 1 3 3 2 4 4 1 2 1 4 3 3 4 2 1 5 3 1 2 1 2 1 2 2 4 2 3 1
## [223] 3 1 3 2 3 1 2 3 2 5 5 4 5 2 3 1 4 1 2 1 3 1 3 1 2 2 1 3 3 2 2 1 1 3 4 4 3
## [260] 3 2 4 4
##
## Within cluster sum of squares by cluster:
## [1] 109.86189 82.00624 80.53778 111.18358 76.83579
## (between_SS / total_SS = 56.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(86,200,143,239,37))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 79, 25, 50, 30, 74
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.50108668 0.8823227 0.6219599 -0.7289837
## 2 -2.38619015 -0.5895733 -0.1589148 0.5303956
## 3 0.14549250 0.3470049 0.1111776 1.2357670
## 4 0.08869192 -0.3574300 -1.9775371 0.1497189
## 5 0.13693951 -0.8323177 0.1162874 -0.2966229
##
## Clustering vector:
## [1] 2 1 4 1 1 3 5 3 1 2 4 3 5 4 2 5 5 3 1 1 3 4 1 1 2 3 5 5 5 3 5 1 1 1 3 1 1
## [38] 5 1 3 1 1 5 1 1 1 1 1 5 1 3 1 5 5 3 1 3 3 3 1 1 5 1 5 5 5 3 3 3 3 1 5 4 5
## [75] 3 4 4 3 3 2 1 5 1 5 3 1 5 5 2 2 2 5 5 3 1 5 2 1 2 4 1 5 1 2 5 2 1 1 3 5 5
## [112] 2 5 3 2 1 5 3 5 5 4 4 2 5 5 2 5 3 5 3 1 3 2 1 4 4 5 1 2 2 5 4 5 1 1 4 1 4
## [149] 5 1 5 3 2 4 2 1 1 1 1 5 5 1 5 1 5 3 1 4 4 1 5 5 1 5 1 1 1 3 1 5 5 4 5 5 5
## [186] 1 1 3 4 4 5 5 1 1 3 4 5 3 5 4 1 1 4 3 5 2 1 5 3 5 3 5 3 3 4 3 1 5 1 5 1 3
## [223] 1 5 3 1 3 2 2 4 2 3 1 5 5 3 5 1 5 1 5 3 3 5 1 1 1 3 5 5 1 4 4 1 1 3 4 4
##
## Within cluster sum of squares by cluster:
## [1] 95.40383 77.28224 80.33349 91.09260 105.03046
## (between_SS / total_SS = 56.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(90,155,85,146,100))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 23, 76, 49, 79, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 -2.41967781 -0.6566175 -0.16063729 0.44471611
## 2 0.11993807 -0.8977034 0.06023898 -0.26926030
## 3 0.11446989 0.3155981 0.08097431 1.31250594
## 4 0.49980397 0.8895872 0.61326842 -0.71180559
## 5 0.05552915 -0.0928475 -2.04997890 0.08289087
##
## Clustering vector:
## [1] 1 4 5 4 4 3 2 3 4 1 5 3 2 2 1 2 2 3 4 4 3 5 4 4 1 3 2 2 2 3 2 4 4 4 3 4 4
## [38] 2 4 3 4 4 2 4 4 4 4 4 2 4 3 4 2 2 3 4 3 3 3 4 4 2 4 2 2 2 3 3 3 3 4 2 5 2
## [75] 3 5 5 3 3 1 4 2 4 2 4 2 2 1 1 2 2 3 4 2 1 4 1 4 2 4 1 2 1 4 4 3 2 2 1 2 3
## [112] 1 4 2 3 2 2 5 5 1 2 2 1 2 3 2 3 4 3 1 4 5 5 2 4 1 1 2 5 2 4 4 4 2 2 4 2 3
## [149] 1 5 4 4 4 4 2 2 4 2 4 2 3 4 5 5 4 2 2 4 2 4 4 4 3 4 2 2 5 2 2 2 4 4 3 5 5
## [186] 2 2 4 4 3 5 2 3 2 5 4 4 5 3 2 1 4 2 3 2 3 2 3 3 5 3 4 2 4 2 4 3 4 2 3 4 3
## [223] 1 1 5 1 3 4 2 2 3 2 4 2 4 2 3 3 2 4 4 4 3 2 2 4 5 5 4 4 3 5 5
##
## Within cluster sum of squares by cluster:
## [1] 70.16428 124.58392 80.49920 99.56269 63.35325
## (between_SS / total_SS = 56.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(193,190,164))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 75, 26, 79, 23, 47
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.13714948 -0.8797902 0.08082592 -0.2392861
## 2 0.01602031 -0.1291655 -2.03520230 -0.0309770
## 3 0.50815392 0.9082966 0.62028545 -0.7001278
## 4 -2.40486418 -0.6499943 -0.16676336 0.4696909
## 5 0.09499952 0.2667449 0.03587920 1.3459376
##
## Clustering vector:
## [1] 4 3 2 3 3 5 1 5 3 4 2 5 1 2 4 1 1 5 3 3 5 2 3 3 4 5 1 1 1 5 1 3 3 3 5 3 3
## [38] 1 3 5 3 3 1 3 3 3 3 3 1 3 5 3 1 1 5 3 5 5 5 3 3 1 3 1 1 1 5 5 5 5 3 1 2 1
## [75] 5 2 2 5 5 4 3 1 3 1 3 1 1 4 4 1 1 5 3 1 4 3 4 3 1 3 4 1 4 3 3 5 1 1 4 1 5
## [112] 4 3 1 5 1 1 2 2 4 1 1 4 1 5 1 5 3 5 4 3 2 2 1 3 4 4 1 2 1 3 3 3 1 1 3 1 5
## [149] 4 2 3 3 3 3 1 1 3 1 3 1 5 3 2 3 1 1 3 1 3 3 3 5 3 1 1 2 1 1 1 3 3 5 2 2 1
## [186] 1 3 3 2 1 1 2 3 3 2 5 1 4 3 1 5 1 5 1 5 5 2 5 3 1 3 1 3 5 3 1 5 3 5 4 4 2
## [223] 4 5 3 1 1 5 1 3 1 3 1 5 5 1 3 3 3 5 1 1 3 2 2 3 3 5 2 2
##
## Within cluster sum of squares by cluster:
## [1] 121.45870 62.95169 101.43697 71.50918 74.12297
## (between_SS / total_SS = 56.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(220,10,109))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 48, 75, 79, 20, 25
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.07755914 0.2561182 -0.003027745 1.38856900
## 2 0.10677233 -0.9230788 0.077510269 -0.22238704
## 3 0.50068099 0.9055025 0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342 0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
##
## Clustering vector:
## [1] 4 3 5 3 3 1 2 1 3 5 1 2 5 4 2 2 1 3 3 1 5 3 3 4 1 2 2 2 1 2 3 3 3 1 3 3 2
## [38] 3 1 3 3 2 3 3 3 3 3 2 3 1 3 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2 5 2 1
## [75] 5 5 1 1 4 3 2 3 2 3 2 2 4 4 2 2 1 3 2 4 3 4 3 2 3 4 2 4 3 3 1 2 2 2 1 4 3
## [112] 2 1 2 2 5 5 4 2 2 4 2 1 2 1 3 1 4 3 5 5 2 3 4 4 2 5 2 3 3 3 2 2 3 2 1 4 5
## [149] 3 3 3 3 2 2 3 2 3 2 1 3 5 3 2 2 3 2 3 3 3 1 3 2 2 5 2 2 2 3 3 1 5 5 2 2 3
## [186] 3 5 2 2 5 3 3 1 1 2 4 3 2 1 2 1 2 1 1 5 1 3 2 3 2 3 1 3 2 1 3 1 4 5 4 1 3
## [223] 2 2 1 2 3 2 3 2 1 1 2 3 3 3 1 2 2 3 5 5 3 3 1 5 5
##
## Within cluster sum of squares by cluster:
## [1] 81.67665 127.65628 106.19954 49.86444 62.38244
## (between_SS / total_SS = 56.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
Averages <- Clustering$centers
Averages
## Awareness Ease Value Trust
## 1 0.07755914 0.2561182 -0.003027745 1.38856900
## 2 0.10677233 -0.9230788 0.077510269 -0.22238704
## 3 0.50068099 0.9055025 0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342 0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
Figure <- as.data.frame(Averages)
Figure$ID <- 1:nrow(Figure)
library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Awareness", "Ease", "Value", "Trust"))
Figure$Group <- factor(Figure$ID,
levels = c(1, 2, 3, 4,5),
labels = c("1", "2", "3", "4","5"))
Figure$NameF <- factor(Figure$name,
levels = c("Awareness", "Ease", "Value", "Trust"),
labels = c("Awareness", "Ease", "Value", "Trust"))
library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 5, alpha = 0.4) +
geom_line(aes(group = ID), linewidth = 1.5) +
ylab("Averages") +
xlab("Cluster variables")+
ylim(-2.5, 2.5) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 12))
mydata$Group <- Clustering$cluster
fit <- aov(cbind(Awareness, Ease, Value, Trust) ~ as.factor(Group),
data = mydata)
summary(fit)
## Response Awareness :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 80.074 20.0186 87.691 < 2.2e-16 ***
## Residuals 242 55.245 0.2283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Ease :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 86.860 21.7150 76.879 < 2.2e-16 ***
## Residuals 242 68.354 0.2825
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Value :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 71.857 17.9643 76.009 < 2.2e-16 ***
## Residuals 242 57.195 0.2363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Trust :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 172.02 43.004 74.816 < 2.2e-16 ***
## Residuals 242 139.10 0.575
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Significant difference in variables across clusters.
mydata$Q42 <- as.numeric(as.character(mydata$Q42))
current_year <- as.numeric(format(Sys.Date(), "%Y"))
mydata$Age <- current_year - mydata$Q42
Changed the year of birth into age in number of years.
aggregate(mydata$Age,
by = list(mydata$Group),
FUN = median)
## Group.1 x
## 1 1 27.0
## 2 2 29.0
## 3 3 27.0
## 4 4 23.5
## 5 5 35.0
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.4.2
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
mydata %>%
group_by(Group) %>%
shapiro_test(Age)
## # A tibble: 5 × 4
## Group variable statistic p
## <int> <chr> <dbl> <dbl>
## 1 1 Age 0.804 0.00000163
## 2 2 Age 0.863 0.000000887
## 3 3 Age 0.832 0.0000000502
## 4 4 Age 0.728 0.0000868
## 5 5 Age 0.897 0.0162
kruskal.test(Age ~ Group,
data = mydata)
##
## Kruskal-Wallis rank sum test
##
## data: Age by Group
## Kruskal-Wallis chi-squared = 4.5423, df = 4, p-value = 0.3376
We can not reject H0 at p=0.3376 No significant difference in Age between groups.
aggregate(mydata$Age,
by = list(mydata$Group),
FUN = median)
## Group.1 x
## 1 1 27.0
## 2 2 29.0
## 3 3 27.0
## 4 4 23.5
## 5 5 35.0
aggregate(mydata$Age,
by = list(mydata$Group),
FUN = mean)
## Group.1 x
## 1 1 34.27083
## 2 2 36.09333
## 3 3 33.40506
## 4 4 32.10000
## 5 5 38.48000
Age does not differ statistically across segments, however here are means and medians ove age in years across segments
library(dplyr)
mydata$UsageFrequency <- case_when(
mydata$Q21 %in% 1:4 ~ "Occasional Usage",
mydata$Q21 %in% 5:6 ~ "Frequent Usage"
)
DifUsage <- chisq.test(mydata$UsageFrequency, mydata$Group)
DifUsage
##
## Pearson's Chi-squared test
##
## data: mydata$UsageFrequency and mydata$Group
## X-squared = 12.12, df = 4, p-value = 0.01648
We reject H0 at p=0.017 There is significant association between usage of the mobile app across groups.
library(dplyr)
library(ggplot2)
usage_table <- table(mydata$Group, mydata$UsageFrequency)
usage_proportions <- prop.table(usage_table, margin = 1)
usage_df <- as.data.frame(as.table(usage_proportions))
most_frequent_group <- usage_df %>%
group_by(Var2) %>%
slice_max(Freq, n = 1)
print("Most Frequent Groups for Each Usage Category:")
## [1] "Most Frequent Groups for Each Usage Category:"
print(most_frequent_group)
## # A tibble: 2 × 3
## # Groups: Var2 [2]
## Var1 Var2 Freq
## <fct> <fct> <dbl>
## 1 3 Frequent Usage 0.785
## 2 5 Occasional Usage 0.52
ggplot(usage_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "stack") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Usage Category",
title = "Usage Proportions Across Groups"
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
Here we see which group/segment has most frequent useres etc.
DifWhere <- chisq.test(mydata$Q28, mydata$Group)
DifWhere
##
## Pearson's Chi-squared test
##
## data: mydata$Q28 and mydata$Group
## X-squared = 26.786, df = 4, p-value = 2.196e-05
We can reject H0 at p<0.001 Significant association between in Where individuals would carry out advanced service next week if they had to and group
library(dplyr)
library(ggplot2)
service_table <- table(mydata$Group, mydata$Q28)
service_proportions <- prop.table(service_table, margin = 1)
service_df <- as.data.frame(as.table(service_proportions))
service_df$Var2 <- recode(service_df$Var2,
`1` = "In Branch",
`2` = "In Mobile App")
most_frequent_service <- service_df %>%
group_by(Var2) %>%
slice_max(Freq, n = 1)
print("Most Frequent Groups for Each Service Option:")
## [1] "Most Frequent Groups for Each Service Option:"
print(most_frequent_service)
## # A tibble: 2 × 3
## # Groups: Var2 [2]
## Var1 Var2 Freq
## <fct> <fct> <dbl>
## 1 4 V poslovalnici 0.55
## 2 3 V mobilni aplikaciji 0.873
ggplot(service_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "stack") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Service Option",
title = "Preference for Advanced Service Location by Group"
) +
theme_minimal() +
theme(
legend.position = "bottom", # Move legend below the plot
axis.text.x = element_text(angle = 45, hjust = 1) # Rotate x-axis labels for better readability
)
Here we see the distribution of the latter across groups, used for describing the groups/segments.
DifNotify <- chisq.test(mydata$Q26, mydata$Group)
DifNotify
##
## Pearson's Chi-squared test
##
## data: mydata$Q26 and mydata$Group
## X-squared = 24.965, df = 4, p-value = 5.113e-05
We can reject H0 at p<0.001 There is association between groups and wheter individuals noticed notifications about advanced services.
library(dplyr)
library(ggplot2)
notification_table <- table(mydata$Group, mydata$Q26)
notification_proportions <- prop.table(notification_table, margin = 1)
notification_df <- as.data.frame(as.table(notification_proportions))
notification_df$Var2 <- recode(notification_df$Var2,
`1` = "Yes",
`2` = "No")
most_frequent_notification <- notification_df %>%
group_by(Var2) %>%
slice_max(Freq, n = 1)
print("Most Frequent Groups for Each Notification Response:")
## [1] "Most Frequent Groups for Each Notification Response:"
print(most_frequent_notification)
## # A tibble: 2 × 3
## # Groups: Var2 [2]
## Var1 Var2 Freq
## <fct> <fct> <dbl>
## 1 3 Da 0.747
## 2 4 Ne 0.75
ggplot(notification_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "stack") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Notification Noticed",
title = "Notifications About Advanced Services by Group"
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
Here we see the distribution of the recievement of notifications across groups, used for describing the groups/segments.
DiffAwareness <- chisq.test(mydata$Q23a, mydata$Group)
DiffAwareness
##
## Pearson's Chi-squared test
##
## data: mydata$Q23a and mydata$Group
## X-squared = 15.959, df = 4, p-value = 0.003075
We can reject H0 at p=0.004 There is association between groups and awareness of advanced feature “Getting Consumer Loan”
DiffAwareness1 <- chisq.test(mydata$Q23e, mydata$Group)
DiffAwareness1
##
## Pearson's Chi-squared test
##
## data: mydata$Q23e and mydata$Group
## X-squared = 18.035, df = 4, p-value = 0.001215
We can reject H0 at p=0.002 There is association between groups and awareness of advanced feature “Opening a Saving Account”
DiffAwareness2 <- chisq.test(mydata$Q23f, mydata$Group)
DiffAwareness2
##
## Pearson's Chi-squared test
##
## data: mydata$Q23f and mydata$Group
## X-squared = 14.801, df = 4, p-value = 0.005132
We can reject H0 at p=0.006 There is association between groups and awareness of advanced service “Term Deposits”
Under here are visual representations of awareness of the mentioned advanced services across groups/segments. This can be used for interpretation of segments and recommendations
library(dplyr)
library(ggplot2)
visualize_distribution <- function(data, group_col, awareness_col, awareness_label) {
awareness_table <- table(data[[group_col]], data[[awareness_col]])
awareness_proportions <- prop.table(awareness_table, margin = 1)
awareness_df <- as.data.frame(as.table(awareness_proportions))
awareness_df$Var2 <- recode(awareness_df$Var2,
`1` = "Yes",
`2` = "No")
ggplot(awareness_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Awareness",
title = paste("Awareness of", awareness_label, "by Group")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
}
plot_q23a <- visualize_distribution(
data = mydata,
group_col = "Group",
awareness_col = "Q23a",
awareness_label = "Getting Consumer Loan"
)
plot_q23e <- visualize_distribution(
data = mydata,
group_col = "Group",
awareness_col = "Q23e",
awareness_label = "Opening a Saving Account"
)
# Plot for Q23f (Term Deposits)
plot_q23f <- visualize_distribution(
data = mydata,
group_col = "Group",
awareness_col = "Q23f",
awareness_label = "Term Deposits"
)
# Display the plots
print(plot_q23a)
print(plot_q23e)
print(plot_q23f)
Segment 1: Trust Champions Description: This group scores the highest in trust, while other dimensions (Awareness, Ease, and Value) remain around the average. They are confident in the app’s reliability but may not feel strongly compelled to use advanced services unless trustworthiness is a key selling point. Key Traits: Trust: Very high. Awareness, Ease, Value: Around average. Barriers: Lack of compelling motivation beyond trust to engage with advanced services. Targeting Strategy: Build on Trust: Emphasize that advanced features are just as reliable and secure as the core app services. Highlight Practical Benefits: Demonstrate how advanced features can enhance the app’s utility without compromising trust. Introduce Exclusivity: Frame advanced features as “trustworthy tools” available to a select group of valued users.
Segment 2: Cautious Navigators Description: This group is generally balanced across all dimensions, but their Ease score is slightly below average. They may feel hesitant to adopt advanced features because they perceive them as too complicated or not intuitive. Key Traits: Awareness, Trust, Value: Average. Ease: Slightly below average. Barriers: Perceived complexity or lack of user-friendliness. Targeting Strategy: Simplify Onboarding: Offer step-by-step tutorials, walkthroughs, or in-app prompts to demystify advanced features. Emphasize User-Friendliness: Highlight how easy it is to activate and use advanced features in marketing messages. Provide Support: Include accessible help resources or a dedicated support channel for questions about advanced services.
Segment 3: High Performers Description: This group scores above average across all dimensions, with Ease being their strongest area. However, their Trust score is the smallest and falls below average. They may engage with advanced features if their concerns around trust and reliability are addressed. Key Traits: Ease, Awareness, Value: Above average. Trust: Below average. Barriers: Lack of trust in the app’s advanced features or uncertainty about their reliability. Targeting Strategy: Address Trust Concerns: Use testimonials, endorsements, and transparent explanations to build confidence in advanced services. Promote Ease and Value: Highlight how easy and rewarding it is to use advanced features, leveraging their preference for intuitive tools. Demonstrate Reliability: Offer guarantees or show evidence of the app’s security and performance in handling advanced features.
Segment 4: Trust-Driven Skeptics Description: This group scores below average across most dimensions, but their Trust score is just above average. They may trust the app but are otherwise disengaged due to lack of awareness or motivation. Key Traits: Trust: Slightly above average. Awareness, Ease, Value: Below average. Barriers: Low awareness and lack of engagement with the app’s features. Targeting Strategy: Raise Awareness: Use in-app notifications, banners, or campaigns to explain the purpose and benefits of advanced features. Leverage Trust: Position advanced features as extensions of the app’s reliable core functionality. Encourage Exploration: Use small incentives (e.g., rewards, discounts) to motivate them to try advanced features.
Segment 5: Disengaged Low-Engagers Description: This group scores below average across all dimensions, with the lowest scores in Awareness and Value. They are the most disengaged group and may not understand or see the value in the app’s advanced services. Key Traits: Awareness, Value: Very low. Ease, Trust: Below average. Barriers: Lack of awareness and a strong sense that advanced features aren’t worth using. Targeting Strategy: Focus on Awareness: Use prominent marketing, notifications, or educational content to ensure this group understands the availability and benefits of advanced services. Demonstrate Clear Value: Show how advanced services solve specific problems or improve their app experience. Simplify Access: Make it extremely easy for this group to try advanced features, perhaps through a free trial or an opt-in demo mode.
mydata$AgeGroup <- cut(mydata$Age,
breaks = c(18, 30, 40, 50, 60, Inf),
labels = c("18-30", "31-40", "41-50", "51-60", "60+"))
table_age <- table(mydata$AgeGroup, mydata$Q23a)
print(table_age)
##
## 1 2
## 18-30 74 66
## 31-40 26 2
## 41-50 26 10
## 51-60 18 7
## 60+ 14 4
library(ggplot2)
prop_table5 <- prop.table(table(mydata$AgeGroup, mydata$Q23a), margin = 1)
prop_df5 <- as.data.frame(as.table(prop_table5))
ggplot(prop_df5, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Getting a Conmsumer Loan",) +
scale_fill_manual(
values = c("1" = "#33006F", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggplot2)
prop_table51 <- prop.table(table(mydata$AgeGroup, mydata$Q23b), margin = 1)
prop_df51 <- as.data.frame(as.table(prop_table51))
ggplot(prop_df51, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Change of the Account Limit",) +
scale_fill_manual(
values = c("1" = "#33006F", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggplot2)
prop_table52 <- prop.table(table(mydata$AgeGroup, mydata$Q23c), margin = 1)
prop_df52 <- as.data.frame(as.table(prop_table52))
ggplot(prop_df52, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Ordering a new card",) +
scale_fill_manual(
values = c("1" = "#33006F", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggplot2)
prop_table53 <- prop.table(table(mydata$AgeGroup, mydata$Q23d), margin = 1)
prop_df53 <- as.data.frame(as.table(prop_table53))
ggplot(prop_df53, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Ordering a New Card",) +
scale_fill_manual(
values = c("1" = "#33006F", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggplot2)
prop_table54 <- prop.table(table(mydata$AgeGroup, mydata$Q23e), margin = 1)
prop_df54 <- as.data.frame(as.table(prop_table54))
ggplot(prop_df54, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Opening a Saving Account",) +
scale_fill_manual(
values = c("1" = "#33006F", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
library(ggplot2)
prop_table55 <- prop.table(table(mydata$AgeGroup, mydata$Q23f), margin = 1)
prop_df55 <- as.data.frame(as.table(prop_table55))
ggplot(prop_df55, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(
x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Term Deposits"
) +
scale_fill_manual(
values = c("1" = "#33006F", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
set.seed(123)
data <- iris[, -5] # Removing the species column
kmeans_result <- kmeans(data, centers = 5)
# Get the size of each cluster
kmeans_result$size
## [1] 19 8 38 23 62
table_clusters1 <- table(mydata$Group, mydata$Q45)
prop_table_clusters1 <- prop.table(table_clusters1, margin = 1)
prop_df1 <- as.data.frame(as.table(prop_table_clusters1))
library(ggplot2)
ggplot(prop_df1, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of Primary Bank by Group"
) +
theme_minimal()
table_data <- table(mydata$Q45, mydata$Group)
percentage_distribution <- prop.table(table_data, margin = 2) * 100
as.data.frame(as.table(percentage_distribution))
## Var1 Var2 Freq
## 1 OTP banka d.d. 1 36.363636
## 2 Banka Intesa Sanpaolo d.d. 1 11.363636
## 3 Nova Ljubljanska Banka d.d. (NLB) 1 38.636364
## 4 Gorenjska Banka d.d. 1 2.272727
## 5 Delavska Hranilnica d.d. 1 4.545455
## 6 Revolut 1 0.000000
## 7 Deželna Banka Slovenije d.d. 1 0.000000
## 8 Banka Sparkasse d.d. 1 2.272727
## 9 Addiko Bank d.d. 1 0.000000
## 10 UniCredit Banka Slovenija d.d. 1 4.545455
## 11 OTP banka d.d. 2 27.536232
## 12 Banka Intesa Sanpaolo d.d. 2 13.043478
## 13 Nova Ljubljanska Banka d.d. (NLB) 2 28.985507
## 14 Gorenjska Banka d.d. 2 2.898551
## 15 Delavska Hranilnica d.d. 2 14.492754
## 16 Revolut 2 1.449275
## 17 Deželna Banka Slovenije d.d. 2 0.000000
## 18 Banka Sparkasse d.d. 2 4.347826
## 19 Addiko Bank d.d. 2 1.449275
## 20 UniCredit Banka Slovenija d.d. 2 5.797101
## 21 OTP banka d.d. 3 45.454545
## 22 Banka Intesa Sanpaolo d.d. 3 5.194805
## 23 Nova Ljubljanska Banka d.d. (NLB) 3 25.974026
## 24 Gorenjska Banka d.d. 3 2.597403
## 25 Delavska Hranilnica d.d. 3 2.597403
## 26 Revolut 3 3.896104
## 27 Deželna Banka Slovenije d.d. 3 1.298701
## 28 Banka Sparkasse d.d. 3 5.194805
## 29 Addiko Bank d.d. 3 2.597403
## 30 UniCredit Banka Slovenija d.d. 3 5.194805
## 31 OTP banka d.d. 4 16.666667
## 32 Banka Intesa Sanpaolo d.d. 4 5.555556
## 33 Nova Ljubljanska Banka d.d. (NLB) 4 50.000000
## 34 Gorenjska Banka d.d. 4 5.555556
## 35 Delavska Hranilnica d.d. 4 11.111111
## 36 Revolut 4 5.555556
## 37 Deželna Banka Slovenije d.d. 4 0.000000
## 38 Banka Sparkasse d.d. 4 0.000000
## 39 Addiko Bank d.d. 4 0.000000
## 40 UniCredit Banka Slovenija d.d. 4 5.555556
## 41 OTP banka d.d. 5 33.333333
## 42 Banka Intesa Sanpaolo d.d. 5 8.333333
## 43 Nova Ljubljanska Banka d.d. (NLB) 5 29.166667
## 44 Gorenjska Banka d.d. 5 4.166667
## 45 Delavska Hranilnica d.d. 5 12.500000
## 46 Revolut 5 0.000000
## 47 Deželna Banka Slovenije d.d. 5 4.166667
## 48 Banka Sparkasse d.d. 5 4.166667
## 49 Addiko Bank d.d. 5 0.000000
## 50 UniCredit Banka Slovenija d.d. 5 4.166667