#install.packages("readxl")
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata <- read_excel("C:/Users/Pino/Desktop/IMB/MVA/Survey.xlsx")
mydata <- as.data.frame(mydata)
mydata$ID <- seq(1, nrow(mydata))
mydata <- mydata %>%
filter(!ID %in% 1)
mydata$ID <- seq(1, nrow(mydata))
head(mydata)
## Q1 Q21 Q22a Q23a Q23b Q23c Q23d Q23e Q23f Q24 Q25 Q26 Q27a Q27b Q27c Q28 Q29a
## 1 1 6 3 1 1 1 1 1 1 2 3 1 1 1 0 2 3
## 2 1 5 4 1 1 1 1 1 1 1 4 1 1 0 0 2 4
## 3 1 6 4 2 1 1 1 2 2 2 3 2 0 0 1 1 4
## 4 1 6 5 1 1 1 1 1 1 1 4 1 0 0 1 2 5
## 5 1 6 5 1 1 1 1 1 1 1 4 1 0 0 1 2 5
## 6 1 6 5 2 1 1 1 1 1 4 3 1 0 1 0 1 4
## Q29b Q29c Q29d Q30a Q30b Q30c Q30d Q30e Q31a Q31b Q33a Q33b Q33c Q33d Q33e
## 1 3 4 5 3 3 3 3 3 4 4 3 4 5 3 4
## 2 2 2 1 3 3 3 3 3 5 2 2 4 4 5 4
## 3 4 2 5 4 4 4 4 1 2 4 5 4 4 5 5
## 4 2 1 1 2 2 4 1 3 5 1 4 4 5 3 4
## 5 2 2 2 5 5 5 5 5 5 2 3 4 4 5 5
## 6 1 1 4 3 5 5 2 4 4 5 5 4 5 4 5
## Q33f Poslovalnica_podpora_in_usmerjanje
## 1 4 4
## 2 5 4
## 3 5 5
## 4 5 5
## 5 5 4
## 6 4 4
## Mobilna aplikacija_podpora_in_usmerjanje Poslovalnica_brezkrbnost
## 1 4 3
## 2 3 4
## 3 2 5
## 4 3 5
## 5 3 4
## 6 2 4
## Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## 1 3 4
## 2 4 4
## 3 2 5
## 4 4 5
## 5 4 4
## 6 3 4
## Mobilna aplikacija_varnost Poslovalnica_dostopnost
## 1 3 3
## 2 4 3
## 3 3 5
## 4 4 5
## 5 3 4
## 6 3 5
## Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## 1 3 4 4
## 2 5 5 4
## 3 2 5 2
## 4 3 5 4
## 5 5 4 4
## 6 2 4 2
## Poslovalnica_hitrost Mobilna aplikacija_hitrost Q40 Q41
## 1 3 4 -1 2
## 2 3 5 -1 2
## 3 3 4 -1 2
## 4 2 5 -1 2
## 5 2 4 -1 2
## 6 1 5 Stay humble, only cash 2
## Q42 Q43a Q43b Q43c Q43d Q43e Q43f Q43g Q43h Q44 Q45 Q45_13_text Q46 Q47 Q48
## 1 2000 1 0 0 0 0 0 0 0 2 3 -2 2 2 2
## 2 1998 0 0 0 1 0 0 0 0 6 3 -2 2 3 4
## 3 2001 1 0 0 0 0 0 0 0 2 1 -2 2 2 3
## 4 1994 0 0 0 1 0 0 0 0 6 12 -2 5 6 5
## 5 2000 1 1 0 0 0 0 0 0 2 1 -2 2 3 6
## 6 2004 1 0 0 0 0 0 0 0 3 1 -2 1 8 4
## ID
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
mydata[c(2:9,11, 17:45)] <- mydata[c(2:9,11, 17:45,48)] %>% mutate_all(as.numeric)
## Warning in `[<-.data.frame`(`*tmp*`, c(2:9, 11, 17:45), value =
## structure(list(: provided 39 variables to replace 38 variables
mydata$Q26 <- factor(mydata$Q26,
levels = c(1, 2),
labels = c("Da","Ne"))
mydata$Q27a <- factor(mydata$Q27a,
levels = c(1, 0),
labels = c("V mobilni aplikaciji","Ne"))
mydata$Q27b <- factor(mydata$Q27b,
levels = c(1, 0),
labels = c("V poslovalnici","Ne"))
mydata$Q27c <- factor(mydata$Q27c,
levels = c(1, 0),
labels = c("Nisem upiorabljal/a","Ne"))
mydata$Q28 <- factor(mydata$Q28,
levels = c(1, 2),
labels = c("V poslovalnici","V mobilni aplikaciji"))
summary(mydata[c(2:45)])
## Q21 Q22a Q23a Q23b
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:1.000 1st Qu.:1.000
## Median :5.000 Median :5.000 Median :1.000 Median :1.000
## Mean :4.502 Mean :4.325 Mean :1.381 Mean :1.192
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :6.000 Max. :5.000 Max. :2.000 Max. :2.000
## Q23c Q23d Q23e Q23f
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :1.000 Median :2.000
## Mean :1.283 Mean :1.181 Mean :1.355 Mean :1.543
## 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## Q24 Q25 Q26 Q27a
## Length:265 Min. :1.000 Da:141 V mobilni aplikaciji:106
## Class :character 1st Qu.:3.000 Ne:124 Ne :159
## Mode :character Median :4.000
## Mean :3.498
## 3rd Qu.:4.000
## Max. :5.000
## Q27b Q27c Q28
## V poslovalnici: 67 Nisem upiorabljal/a:114 V poslovalnici : 96
## Ne :198 Ne :151 V mobilni aplikaciji:169
##
##
##
##
## Q29a Q29b Q29c Q29d Q30a
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:2.000
## Median :4.000 Median :3.000 Median :2.000 Median :3.00 Median :3.000
## Mean :3.438 Mean :2.728 Mean :2.596 Mean :2.97 Mean :3.072
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
## Q30b Q30c Q30d Q30e
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median :4.000 Median :4.000 Median :3.000 Median :3.000
## Mean :3.377 Mean :3.426 Mean :3.117 Mean :2.932
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q31a Q31b Q33a Q33b Q33c
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:4.000 1st Qu.:4.000
## Median :4.000 Median :2.000 Median :4.00 Median :4.000 Median :5.000
## Mean :4.128 Mean :2.506 Mean :3.83 Mean :4.075 Mean :4.264
## 3rd Qu.:5.000 3rd Qu.:3.000 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
## Q33d Q33e Q33f
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000
## Mean :4.042 Mean :4.023 Mean :4.219
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Poslovalnica_podpora_in_usmerjanje Mobilna aplikacija_podpora_in_usmerjanje
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :3.000
## Mean :3.811 Mean :3.125
## 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000
## Poslovalnica_brezkrbnost Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000
## Mean :3.921 Mean :3.551 Mean :4.211
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Mobilna aplikacija_varnost Poslovalnica_dostopnost
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000
## Mean :3.698 Mean :3.785
## 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000
## Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :3.000
## Mean :3.792 Mean :3.913 Mean :3.347
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Poslovalnica_hitrost Mobilna aplikacija_hitrost
## Min. :1.000 Min. :1.0
## 1st Qu.:2.000 1st Qu.:4.0
## Median :3.000 Median :4.0
## Mean :2.891 Mean :4.2
## 3rd Qu.:4.000 3rd Qu.:5.0
## Max. :5.000 Max. :5.0
mydata$Q41 <- factor(mydata$Q41,
levels = c(1, 2, 3),
labels = c("Female","Male", "I don't want to answer"))
mydata$Q43a <- factor(mydata$Q43a,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43b <- factor(mydata$Q43b,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43c <- factor(mydata$Q43c,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43d <- factor(mydata$Q43d,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43e <- factor(mydata$Q43e,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43f <- factor(mydata$Q43f,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43g <- factor(mydata$Q43g,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43h <- factor(mydata$Q43h,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q44 <- factor(mydata$Q44,
levels = c(1, 2, 3, 4, 5, 6),
labels = c("Less than 1.000 habitants",
"1.000 – 5.000 habitants",
"5.001 – 20.000 habitants",
"20.001 – 50.000 habitants",
"50.001 – 100.000 habitants",
"More than 100.000 habitants"))
mydata$Q45 <- factor(mydata$Q45,
levels = c(3, 5, 1, 9, 12, 7, 10, 4, 11, 6),
labels = c("OTP banka d.d.","Banka Intesa Sanpaolo d.d.", "Nova Ljubljanska Banka d.d. (NLB)", "Gorenjska Banka d.d.", "Delavska Hranilnica d.d.", "Revolut", "Deželna Banka Slovenije d.d.", "Banka Sparkasse d.d.", "Addiko Bank d.d.", "UniCredit Banka Slovenija d.d."))
mydata$Q46 <- factor(mydata$Q46,
levels = c(1, 2, 3, 5, 6, 4),
labels = c("Študent/-ka","Redno zaposlen/-a", "Upokojen/-a", "Samozaposlen/-a", "Delno zaposlen/-a", "Brezposeln/-a"))
mydata$Q47 <- factor(mydata$Q47,
levels = c(1, 2, 3, 4, 5, 6, 7, 8),
labels = c("Pod 1.000€","1.000€ - 1.500€","1.501€ - 2.000€","2.001€ - 3.000€","3.001€ - 5.000€","5.001€ - 10.000€","Above 10.000€", "I don't want to answer"))
mydata$Q48 <- factor(mydata$Q48,
levels = c(2, 3, 4, 5, 6, 7),
labels = c(
"Dokončana osnovna šola",
"Dokončana nižja ali srednja poklicna izobrazba",
"Dokončana srednja strokovna ali splošna izobrazba",
"Dokončana višješolska strokovna ali visokošolska strokovna izobrazba (tudi 1. bolonjska stopnja)",
"Dokončana visokošolska strokovna univerzitetna izobrazba (tudi 2. bolonjska stopnja)",
"Dokončana specializacija, znanstveni magisterij, doktorat"
))
library(psych)
describe.by(mydata[c(2:45)])
## Warning in describe.by(mydata[c(2:45)]): describe.by is deprecated. Please use
## the describeBy function
## Warning in describeBy(x = x, group = group, mat = mat, type = type, ...): no
## grouping variable requested
## vars n mean sd median trimmed mad
## Q21 1 265 4.50 1.28 5 4.66 1.48
## Q22a 2 265 4.32 0.87 5 4.49 0.00
## Q23a 3 265 1.38 0.49 1 1.35 0.00
## Q23b 4 265 1.19 0.39 1 1.12 0.00
## Q23c 5 265 1.28 0.45 1 1.23 0.00
## Q23d 6 265 1.18 0.39 1 1.10 0.00
## Q23e 7 265 1.35 0.48 1 1.32 0.00
## Q23f 8 265 1.54 0.50 2 1.55 0.00
## Q24* 9 265 2.16 1.74 1 1.83 0.00
## Q25 10 265 3.50 0.86 4 3.52 1.48
## Q26* 11 265 1.47 0.50 1 1.46 0.00
## Q27a* 12 265 1.60 0.49 2 1.62 0.00
## Q27b* 13 265 1.75 0.44 2 1.81 0.00
## Q27c* 14 265 1.57 0.50 2 1.59 0.00
## Q28* 15 265 1.64 0.48 2 1.67 0.00
## Q29a 16 265 3.44 1.26 4 3.54 1.48
## Q29b 17 265 2.73 1.13 3 2.74 1.48
## Q29c 18 265 2.60 1.31 2 2.50 1.48
## Q29d 19 265 2.97 1.32 3 2.96 1.48
## Q30a 20 265 3.07 1.20 3 3.09 1.48
## Q30b 21 265 3.38 1.18 4 3.47 1.48
## Q30c 22 265 3.43 1.24 4 3.52 1.48
## Q30d 23 265 3.12 1.25 3 3.15 1.48
## Q30e 24 265 2.93 1.24 3 2.92 1.48
## Q31a 25 265 4.13 0.86 4 4.25 1.48
## Q31b 26 265 2.51 1.22 2 2.42 1.48
## Q33a 27 265 3.83 1.06 4 3.97 1.48
## Q33b 28 265 4.08 0.95 4 4.22 1.48
## Q33c 29 265 4.26 1.02 5 4.48 0.00
## Q33d 30 265 4.04 0.91 4 4.16 1.48
## Q33e 31 265 4.02 0.92 4 4.13 1.48
## Q33f 32 265 4.22 0.89 4 4.35 1.48
## Poslovalnica_podpora_in_usmerjanje 33 265 3.81 0.88 4 3.86 1.48
## Mobilna aplikacija_podpora_in_usmerjanje 34 265 3.12 0.88 3 3.13 1.48
## Poslovalnica_brezkrbnost 35 265 3.92 0.87 4 3.97 1.48
## Mobilna aplikacija_brezkrbnost 36 265 3.55 0.87 4 3.57 1.48
## Poslovalnica_varnost 37 265 4.21 0.81 4 4.31 1.48
## Mobilna aplikacija_varnost 38 265 3.70 0.85 4 3.73 1.48
## Poslovalnica_dostopnost 39 265 3.78 1.00 4 3.88 1.48
## Mobilna aplikacija_dostopnost 40 265 3.79 0.89 4 3.84 1.48
## Poslovalnica_jasnost 41 265 3.91 0.91 4 4.00 1.48
## Mobilna aplikacija_jasnost 42 265 3.35 0.85 3 3.34 1.48
## Poslovalnica_hitrost 43 265 2.89 1.02 3 2.90 1.48
## Mobilna aplikacija_hitrost 44 265 4.20 0.76 4 4.28 1.48
## min max range skew kurtosis se
## Q21 1 6 5 -1.10 0.76 0.08
## Q22a 1 5 4 -1.54 2.46 0.05
## Q23a 1 2 1 0.49 -1.77 0.03
## Q23b 1 2 1 1.55 0.41 0.02
## Q23c 1 2 1 0.96 -1.09 0.03
## Q23d 1 2 1 1.65 0.71 0.02
## Q23e 1 2 1 0.60 -1.64 0.03
## Q23f 1 2 1 -0.17 -1.98 0.03
## Q24* 1 6 5 1.34 0.23 0.11
## Q25 1 5 4 -0.33 0.31 0.05
## Q26* 1 2 1 0.13 -1.99 0.03
## Q27a* 1 2 1 -0.41 -1.84 0.03
## Q27b* 1 2 1 -1.13 -0.72 0.03
## Q27c* 1 2 1 -0.28 -1.93 0.03
## Q28* 1 2 1 -0.57 -1.68 0.03
## Q29a 1 5 4 -0.45 -0.73 0.08
## Q29b 1 5 4 -0.05 -0.97 0.07
## Q29c 1 5 4 0.35 -1.06 0.08
## Q29d 1 5 4 -0.09 -1.18 0.08
## Q30a 1 5 4 -0.24 -0.84 0.07
## Q30b 1 5 4 -0.57 -0.55 0.07
## Q30c 1 5 4 -0.43 -0.79 0.08
## Q30d 1 5 4 -0.31 -0.97 0.08
## Q30e 1 5 4 -0.01 -1.02 0.08
## Q31a 1 5 4 -1.28 2.22 0.05
## Q31b 1 5 4 0.43 -0.85 0.07
## Q33a 1 5 4 -0.96 0.54 0.06
## Q33b 1 5 4 -1.19 1.39 0.06
## Q33c 1 5 4 -1.60 2.10 0.06
## Q33d 1 5 4 -1.27 2.05 0.06
## Q33e 1 5 4 -1.12 1.62 0.06
## Q33f 1 5 4 -1.25 1.64 0.05
## Poslovalnica_podpora_in_usmerjanje 1 5 4 -0.62 0.55 0.05
## Mobilna aplikacija_podpora_in_usmerjanje 1 5 4 -0.08 -0.19 0.05
## Poslovalnica_brezkrbnost 1 5 4 -0.59 0.39 0.05
## Mobilna aplikacija_brezkrbnost 1 5 4 -0.12 -0.22 0.05
## Poslovalnica_varnost 1 5 4 -1.13 1.85 0.05
## Mobilna aplikacija_varnost 1 5 4 -0.44 0.15 0.05
## Poslovalnica_dostopnost 1 5 4 -0.63 0.10 0.06
## Mobilna aplikacija_dostopnost 1 5 4 -0.42 -0.09 0.05
## Poslovalnica_jasnost 1 5 4 -0.72 0.33 0.06
## Mobilna aplikacija_jasnost 1 5 4 -0.03 -0.06 0.05
## Poslovalnica_hitrost 1 5 4 0.07 -0.59 0.06
## Mobilna aplikacija_hitrost 1 5 4 -1.02 2.08 0.05
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.2
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
get_clust_tendency(mydata_clu_std,
n = nrow(mydata_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.5586369
##
## $plot
## NULL
colnames(mydata) [3] <- "Awareness"
colnames(mydata) [11] <- "Ease"
colnames(mydata) [26] <- "Value"
colnames(mydata) [27] <- "Trust"
I changed the name of the variables. We are creating clusters on 4 cluster variables: “Awareness”, “Ease”, “Value”, “Trust”.
library(factoextra)
library(NbClust)
fviz_nbclust(mydata_clu_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")
fviz_nbclust(mydata_clu_std, kmeans, method = "silhouette")+
labs(subtitle = "Silhouette analysis")
library(dplyr)
library(factoextra)
WARD <- mydata_clu_std %>%
get_dist(method = "euclidean") %>%
hclust(method = "ward.D2")
WARD
##
## Call:
## hclust(d = ., method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 265
library(factoextra)
fviz_dend(WARD)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_dend(WARD,
k = 5,
cex = 0.5,
palette = "jama",
color_labels_by_k = TRUE,
rect = TRUE)
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 27, 74, 76, 32, 56
##
## Cluster means:
## Q22a Q25 Q31a Q31b
## 1 -2.2987082 -0.6636458 -0.1920661 0.4967113
## 2 0.1713700 -0.7658139 0.1806157 -0.3039786
## 3 0.5063700 0.9025907 0.5693474 -0.7926587
## 4 -0.0860382 -0.5414867 -1.9278200 0.2004560
## 5 0.2438008 0.4164170 0.1828583 1.1234050
##
## Clustering vector:
## [1] 1 3 4 3 3 5 2 5 3 1 4 4 5 2 4 1 2 2 5 5 3 5 4 3 3 1 5 2 2 2 5 2 3 3 3 5 3
## [38] 4 3 2 3 5 3 3 2 3 3 3 3 3 2 3 5 5 2 2 5 3 5 5 5 3 3 2 3 2 2 2 5 5 5 5 3 2
## [75] 4 2 5 4 3 5 5 1 3 2 3 2 1 5 3 2 2 1 1 1 2 2 5 3 2 1 3 1 4 3 2 3 1 2 1 3 3
## [112] 5 2 2 1 2 5 1 3 2 5 2 2 4 5 1 2 2 1 2 5 2 5 3 5 1 5 4 4 2 3 1 1 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 5 1 4 1 3 3 3 3 2 2 3 2 3 2 5 3 4 4 3 2 2 3 2 3 3 3 5 3 2 2
## [186] 4 2 2 2 3 1 3 5 4 4 2 2 3 3 5 4 4 2 5 2 4 3 3 5 5 2 1 3 2 5 2 5 2 5 5 4 5
## [223] 3 2 3 2 3 5 3 2 5 3 5 1 1 4 1 5 3 2 4 2 5 2 3 2 3 2 5 5 2 3 3 5 5 2 2 3 4
## [260] 4 3 3 5 4 4
##
## Within cluster sum of squares by cluster:
## [1] 95.65988 92.49211 79.70569 116.86933 85.02220
## (between_SS / total_SS = 55.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata$Dissimilarity <- sqrt(
mydata_clu_std$Q22a^2 +
mydata_clu_std$Q25^2 +
mydata_clu_std$Q31a^2 +
mydata_clu_std$Q31b^2
)
head(mydata[order(-mydata$Dissimilarity), c("ID", "Dissimilarity")], 10)
## ID Dissimilarity
## 191 191 6.365406
## 11 11 4.823598
## 202 202 4.716886
## 93 93 4.390156
## 241 241 4.281977
## 38 38 4.229806
## 234 234 4.116562
## 115 115 3.993249
## 103 103 3.861159
## 10 10 3.798713
mydata <- mydata %>%
filter(!ID %in% c(191))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
get_clust_tendency(mydata_clu_std,
n = nrow(mydata_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.5736077
##
## $plot
## NULL
fviz_dend(WARD,
k = 5,
cex = 0.5,
palette = "jama",
color_labels_by_k = TRUE,
rect = TRUE)
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 56, 74, 76, 32, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.2355188 0.4112907 0.1731763 1.1381089
## 2 0.1611116 -0.7879947 0.1708796 -0.2980480
## 3 0.5052525 0.9044778 0.5689982 -0.7897317
## 4 -0.1033202 -0.5604314 -1.9884698 0.2094871
## 5 -2.3155481 -0.5971993 -0.0752230 0.4475952
##
## Clustering vector:
## [1] 5 3 4 3 3 1 2 1 3 5 4 4 1 2 4 5 2 2 1 1 3 1 4 3 3 5 1 2 2 2 1 2 3 3 3 1 3
## [38] 4 3 2 3 1 3 3 2 3 3 3 3 3 2 3 1 1 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2
## [75] 4 2 1 4 3 1 1 5 3 2 3 2 5 1 3 2 2 5 5 5 2 2 1 3 2 5 3 5 4 3 2 3 5 2 5 3 3
## [112] 1 2 2 5 2 1 5 3 2 1 2 2 4 1 5 2 2 5 2 1 2 1 3 1 5 1 4 4 2 3 5 5 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 1 5 4 5 3 3 3 3 2 2 3 2 3 2 1 3 4 4 3 2 2 3 2 3 3 3 1 3 2 2
## [186] 4 2 2 2 3 3 1 4 4 2 2 3 3 1 4 4 2 1 2 4 3 3 1 1 2 5 3 2 1 2 1 2 1 1 4 1 3
## [223] 2 3 2 3 1 3 2 1 3 1 5 5 4 5 1 3 2 4 2 1 2 3 2 3 2 1 1 2 3 3 1 1 2 2 3 4 4
## [260] 3 3 1 4 4
##
## Within cluster sum of squares by cluster:
## [1] 88.14381 95.70238 82.80060 120.50690 75.50678
## (between_SS / total_SS = 56.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(11))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 76, 54, 75, 32, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.16017846 -0.8533305 0.11948655 -0.3059896
## 2 0.21326412 0.3776779 0.22118546 1.1460396
## 3 0.49918810 0.9111497 0.59543745 -0.7892907
## 4 -0.03067026 -0.2443742 -1.97805346 0.2824627
## 5 -2.31336479 -0.6176055 -0.09173426 0.4433489
##
## Clustering vector:
## [1] 5 3 4 3 3 2 1 2 3 5 4 2 1 1 5 1 1 2 2 3 2 4 3 3 5 2 1 1 1 2 1 3 3 3 2 3 4
## [38] 3 1 3 2 3 3 1 3 3 3 3 3 1 3 2 2 1 1 2 3 2 2 2 3 3 1 3 1 1 1 2 2 2 2 3 1 4
## [75] 1 2 4 4 2 2 5 3 1 3 1 5 2 3 1 1 5 5 5 1 1 2 3 1 5 3 5 4 3 1 3 5 1 5 3 3 2
## [112] 1 1 5 1 2 5 3 1 2 1 1 4 4 5 1 1 5 1 2 1 2 3 2 5 2 4 4 1 3 5 5 4 1 4 1 3 3
## [149] 4 3 1 1 3 1 2 5 4 5 3 3 3 3 1 1 3 1 3 1 2 3 4 4 3 1 1 3 1 3 3 3 2 3 1 1 4
## [186] 1 1 1 3 3 2 4 4 1 1 3 3 2 4 4 1 2 1 4 3 3 4 2 1 5 3 1 2 1 2 1 2 2 4 2 3 1
## [223] 3 1 3 2 3 1 2 3 2 5 5 4 5 2 3 1 4 1 2 1 3 1 3 1 2 2 1 3 3 2 2 1 1 3 4 4 3
## [260] 3 2 4 4
##
## Within cluster sum of squares by cluster:
## [1] 109.86189 82.00624 80.53778 111.18358 76.83579
## (between_SS / total_SS = 56.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(86,200,143,239,37))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 79, 25, 50, 30, 74
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.50108668 0.8823227 0.6219599 -0.7289837
## 2 -2.38619015 -0.5895733 -0.1589148 0.5303956
## 3 0.14549250 0.3470049 0.1111776 1.2357670
## 4 0.08869192 -0.3574300 -1.9775371 0.1497189
## 5 0.13693951 -0.8323177 0.1162874 -0.2966229
##
## Clustering vector:
## [1] 2 1 4 1 1 3 5 3 1 2 4 3 5 4 2 5 5 3 1 1 3 4 1 1 2 3 5 5 5 3 5 1 1 1 3 1 1
## [38] 5 1 3 1 1 5 1 1 1 1 1 5 1 3 1 5 5 3 1 3 3 3 1 1 5 1 5 5 5 3 3 3 3 1 5 4 5
## [75] 3 4 4 3 3 2 1 5 1 5 3 1 5 5 2 2 2 5 5 3 1 5 2 1 2 4 1 5 1 2 5 2 1 1 3 5 5
## [112] 2 5 3 2 1 5 3 5 5 4 4 2 5 5 2 5 3 5 3 1 3 2 1 4 4 5 1 2 2 5 4 5 1 1 4 1 4
## [149] 5 1 5 3 2 4 2 1 1 1 1 5 5 1 5 1 5 3 1 4 4 1 5 5 1 5 1 1 1 3 1 5 5 4 5 5 5
## [186] 1 1 3 4 4 5 5 1 1 3 4 5 3 5 4 1 1 4 3 5 2 1 5 3 5 3 5 3 3 4 3 1 5 1 5 1 3
## [223] 1 5 3 1 3 2 2 4 2 3 1 5 5 3 5 1 5 1 5 3 3 5 1 1 1 3 5 5 1 4 4 1 1 3 4 4
##
## Within cluster sum of squares by cluster:
## [1] 95.40383 77.28224 80.33349 91.09260 105.03046
## (between_SS / total_SS = 56.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(90,155,85,146,100))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 23, 76, 49, 79, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 -2.41967781 -0.6566175 -0.16063729 0.44471611
## 2 0.11993807 -0.8977034 0.06023898 -0.26926030
## 3 0.11446989 0.3155981 0.08097431 1.31250594
## 4 0.49980397 0.8895872 0.61326842 -0.71180559
## 5 0.05552915 -0.0928475 -2.04997890 0.08289087
##
## Clustering vector:
## [1] 1 4 5 4 4 3 2 3 4 1 5 3 2 2 1 2 2 3 4 4 3 5 4 4 1 3 2 2 2 3 2 4 4 4 3 4 4
## [38] 2 4 3 4 4 2 4 4 4 4 4 2 4 3 4 2 2 3 4 3 3 3 4 4 2 4 2 2 2 3 3 3 3 4 2 5 2
## [75] 3 5 5 3 3 1 4 2 4 2 4 2 2 1 1 2 2 3 4 2 1 4 1 4 2 4 1 2 1 4 4 3 2 2 1 2 3
## [112] 1 4 2 3 2 2 5 5 1 2 2 1 2 3 2 3 4 3 1 4 5 5 2 4 1 1 2 5 2 4 4 4 2 2 4 2 3
## [149] 1 5 4 4 4 4 2 2 4 2 4 2 3 4 5 5 4 2 2 4 2 4 4 4 3 4 2 2 5 2 2 2 4 4 3 5 5
## [186] 2 2 4 4 3 5 2 3 2 5 4 4 5 3 2 1 4 2 3 2 3 2 3 3 5 3 4 2 4 2 4 3 4 2 3 4 3
## [223] 1 1 5 1 3 4 2 2 3 2 4 2 4 2 3 3 2 4 4 4 3 2 2 4 5 5 4 4 3 5 5
##
## Within cluster sum of squares by cluster:
## [1] 70.16428 124.58392 80.49920 99.56269 63.35325
## (between_SS / total_SS = 56.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(193,190,164))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 75, 26, 79, 23, 47
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.13714948 -0.8797902 0.08082592 -0.2392861
## 2 0.01602031 -0.1291655 -2.03520230 -0.0309770
## 3 0.50815392 0.9082966 0.62028545 -0.7001278
## 4 -2.40486418 -0.6499943 -0.16676336 0.4696909
## 5 0.09499952 0.2667449 0.03587920 1.3459376
##
## Clustering vector:
## [1] 4 3 2 3 3 5 1 5 3 4 2 5 1 2 4 1 1 5 3 3 5 2 3 3 4 5 1 1 1 5 1 3 3 3 5 3 3
## [38] 1 3 5 3 3 1 3 3 3 3 3 1 3 5 3 1 1 5 3 5 5 5 3 3 1 3 1 1 1 5 5 5 5 3 1 2 1
## [75] 5 2 2 5 5 4 3 1 3 1 3 1 1 4 4 1 1 5 3 1 4 3 4 3 1 3 4 1 4 3 3 5 1 1 4 1 5
## [112] 4 3 1 5 1 1 2 2 4 1 1 4 1 5 1 5 3 5 4 3 2 2 1 3 4 4 1 2 1 3 3 3 1 1 3 1 5
## [149] 4 2 3 3 3 3 1 1 3 1 3 1 5 3 2 3 1 1 3 1 3 3 3 5 3 1 1 2 1 1 1 3 3 5 2 2 1
## [186] 1 3 3 2 1 1 2 3 3 2 5 1 4 3 1 5 1 5 1 5 5 2 5 3 1 3 1 3 5 3 1 5 3 5 4 4 2
## [223] 4 5 3 1 1 5 1 3 1 3 1 5 5 1 3 3 3 5 1 1 3 2 2 3 3 5 2 2
##
## Within cluster sum of squares by cluster:
## [1] 121.45870 62.95169 101.43697 71.50918 74.12297
## (between_SS / total_SS = 56.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
mydata <- mydata %>%
filter(!ID %in% c(220,10,109))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)]))
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 48, 75, 79, 20, 25
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.07755914 0.2561182 -0.003027745 1.38856900
## 2 0.10677233 -0.9230788 0.077510269 -0.22238704
## 3 0.50068099 0.9055025 0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342 0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
##
## Clustering vector:
## [1] 4 3 5 3 3 1 2 1 3 5 1 2 5 4 2 2 1 3 3 1 5 3 3 4 1 2 2 2 1 2 3 3 3 1 3 3 2
## [38] 3 1 3 3 2 3 3 3 3 3 2 3 1 3 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2 5 2 1
## [75] 5 5 1 1 4 3 2 3 2 3 2 2 4 4 2 2 1 3 2 4 3 4 3 2 3 4 2 4 3 3 1 2 2 2 1 4 3
## [112] 2 1 2 2 5 5 4 2 2 4 2 1 2 1 3 1 4 3 5 5 2 3 4 4 2 5 2 3 3 3 2 2 3 2 1 4 5
## [149] 3 3 3 3 2 2 3 2 3 2 1 3 5 3 2 2 3 2 3 3 3 1 3 2 2 5 2 2 2 3 3 1 5 5 2 2 3
## [186] 3 5 2 2 5 3 3 1 1 2 4 3 2 1 2 1 2 1 1 5 1 3 2 3 2 3 1 3 2 1 3 1 4 5 4 1 3
## [223] 2 2 1 2 3 2 3 2 1 1 2 3 3 3 1 2 2 3 5 5 3 3 1 5 5
##
## Within cluster sum of squares by cluster:
## [1] 81.67665 127.65628 106.19954 49.86444 62.38244
## (between_SS / total_SS = 56.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)
Averages <- Clustering$centers
Averages
## Awareness Ease Value Trust
## 1 0.07755914 0.2561182 -0.003027745 1.38856900
## 2 0.10677233 -0.9230788 0.077510269 -0.22238704
## 3 0.50068099 0.9055025 0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342 0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
Figure <- as.data.frame(Averages)
Figure$ID <- 1:nrow(Figure)
library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Awareness", "Ease", "Value", "Trust"))
Figure$Group <- factor(Figure$ID,
levels = c(1, 2, 3, 4,5),
labels = c("1", "2", "3", "4","5"))
Figure$NameF <- factor(Figure$name,
levels = c("Awareness", "Ease", "Value", "Trust"),
labels = c("Awareness", "Ease", "Value", "Trust"))
library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 5, alpha = 0.4) +
geom_line(aes(group = ID), linewidth = 1.5) +
ylab("Averages") +
xlab("Cluster variables")+
ylim(-2.5, 2.5) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 12))
mydata$Group <- Clustering$cluster
fit <- aov(cbind(Awareness, Ease, Value, Trust) ~ as.factor(Group),
data = mydata)
summary(fit)
## Response Awareness :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 80.074 20.0186 87.691 < 2.2e-16 ***
## Residuals 242 55.245 0.2283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Ease :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 86.860 21.7150 76.879 < 2.2e-16 ***
## Residuals 242 68.354 0.2825
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Value :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 71.857 17.9643 76.009 < 2.2e-16 ***
## Residuals 242 57.195 0.2363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Trust :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 172.02 43.004 74.816 < 2.2e-16 ***
## Residuals 242 139.10 0.575
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Significant difference in variables across clusters.
mydata$Q42 <- as.numeric(as.character(mydata$Q42))
current_year <- as.numeric(format(Sys.Date(), "%Y"))
mydata$Age <- current_year - mydata$Q42
Changed the year of birth into age in number of years.
aggregate(mydata$Age,
by = list(mydata$Group),
FUN = median)
## Group.1 x
## 1 1 27.0
## 2 2 29.0
## 3 3 27.0
## 4 4 23.5
## 5 5 35.0
table_clusters <- table(mydata$Group, mydata$Q44)
prop_table_clusters <- prop.table(table_clusters, margin = 1)
prop_df <- as.data.frame(as.table(prop_table_clusters))
library(ggplot2)
ggplot(prop_df, aes(x = Var1, y = Freq * 100, fill = Var2)) + # Multiply Freq by 100 to get percentages
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of number of habitants of home place by Group"
) +
theme_minimal()
resulttttt <- chisq.test(mydata$Group, mydata$Q44)
## Warning in chisq.test(mydata$Group, mydata$Q44): Chi-squared approximation may
## be incorrect
resulttttt
##
## Pearson's Chi-squared test
##
## data: mydata$Group and mydata$Q44
## X-squared = 17.464, df = 20, p-value = 0.6226
NO association between group and number of habitants in home town:(
library(dplyr)
mydata$HabitantGroup <- recode(
mydata$Q44,
"Less than 1.000 habitants" = "Small population",
"1.000 – 5.000 habitants" = "Small population",
"5.001 – 20.000 habitants" = "Medium population",
"20.001 – 50.000 habitants" = "Medium population",
"50.001 – 100.000 habitants" = "Large population",
"More than 100.000 habitants" = "Large population"
)
resulttttt2 <- chisq.test(mydata$Group, mydata$HabitantGroup)
resulttttt2
##
## Pearson's Chi-squared test
##
## data: mydata$Group and mydata$HabitantGroup
## X-squared = 4.9513, df = 8, p-value = 0.7628
no:(
table_clusters1 <- table(mydata$Group, mydata$Q45)
prop_table_clusters1 <- prop.table(table_clusters1, margin = 1)
prop_df1 <- as.data.frame(as.table(prop_table_clusters1))
library(ggplot2)
ggplot(prop_df1, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of Primary Bank by Group"
) +
theme_minimal()
resultttt <- chisq.test(mydata$Group, mydata$Q45)
## Warning in chisq.test(mydata$Group, mydata$Q45): Chi-squared approximation may
## be incorrect
resultttt
##
## Pearson's Chi-squared test
##
## data: mydata$Group and mydata$Q45
## X-squared = 31.9, df = 36, p-value = 0.664
No association between group and primary bank.
table_clusters2 <- table(mydata$Group, mydata$Q46)
prop_table_clusters2 <- prop.table(table_clusters2, margin = 1)
prop_df2 <- as.data.frame(as.table(prop_table_clusters2))
library(ggplot2)
ggplot(prop_df2, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of Employment Status by Group"
) +
theme_minimal()
resulttt <- chisq.test(mydata$Group, mydata$Q46)
## Warning in chisq.test(mydata$Group, mydata$Q46): Chi-squared approximation may
## be incorrect
resulttt
##
## Pearson's Chi-squared test
##
## data: mydata$Group and mydata$Q46
## X-squared = 17.276, df = 20, p-value = 0.635
There is no association between Group and Employment status.
table_clusters3 <- table(mydata$Group, mydata$Q47)
prop_table_clusters3 <- prop.table(table_clusters3, margin = 1)
prop_df3 <- as.data.frame(as.table(prop_table_clusters3))
library(ggplot2)
ggplot(prop_df3, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of Monthly Income by Group"
) +
theme_minimal()
resultt <- chisq.test(mydata$Group, mydata$Q47)
## Warning in chisq.test(mydata$Group, mydata$Q47): Chi-squared approximation may
## be incorrect
resultt
##
## Pearson's Chi-squared test
##
## data: mydata$Group and mydata$Q47
## X-squared = 26.577, df = 28, p-value = 0.5414
There is no association between Group and Income level.
print(table_clusters3)
##
## Pod 1.000€ 1.000€ - 1.500€ 1.501€ - 2.000€ 2.001€ - 3.000€ 3.001€ - 5.000€
## 1 13 9 11 5 2
## 2 26 9 14 12 5
## 3 19 16 12 19 5
## 4 7 6 4 0 0
## 5 7 5 3 5 3
##
## 5.001€ - 10.000€ Above 10.000€ I don't want to answer
## 1 1 1 6
## 2 1 0 8
## 3 2 0 6
## 4 0 1 2
## 5 1 0 1
table_clusters4 <- table(mydata$Group, mydata$Q48)
prop_table_clusters4 <- prop.table(table_clusters4, margin = 1)
prop_df4 <- as.data.frame(as.table(prop_table_clusters4))
library(ggplot2)
ggplot(prop_df4, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) + # Adjust bar width
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of Education Level by Group"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1), # Rotate x-axis labels
text = element_text(size = 14), # Increase text size
plot.title = element_text(size = 16, hjust = 0.5), # Center the title
legend.position = "bottom", # Move the legend below the histogram
legend.key.size = unit(0.8, "cm"), # Adjust legend key size
legend.text = element_text(size = 10) # Adjust legend text size
) +
guides(fill = guide_legend(nrow = 2)) # Arrange legend items in two rows
result <- chisq.test(mydata$Group, mydata$Q48)
## Warning in chisq.test(mydata$Group, mydata$Q48): Chi-squared approximation may
## be incorrect
result
##
## Pearson's Chi-squared test
##
## data: mydata$Group and mydata$Q48
## X-squared = 17.242, df = 20, p-value = 0.6372
There is no association between Group and Education level.
mydata$AgeGroup <- cut(mydata$Age,
breaks = c(18, 30, 40, 50, 60, Inf),
labels = c("18-30", "31-40", "41-50", "51-60", "60+"))
table_age <- table(mydata$AgeGroup, mydata$Q23a)
print(table_age)
##
## 1 2
## 18-30 74 66
## 31-40 26 2
## 41-50 26 10
## 51-60 18 7
## 60+ 14 4
result1 <- chisq.test(mydata$Q23a, mydata$AgeGroup)
result1
##
## Pearson's Chi-squared test
##
## data: mydata$Q23a and mydata$AgeGroup
## X-squared = 20.89, df = 4, p-value = 0.000333
There is association.
mydata$Q23a <- factor(mydata$Q23a, levels = c(1, 2), labels = c("Yes", "No"))
levels(mydata$Q23a)
## [1] "Yes" "No"
library(ggplot2)
prop_table5 <- prop.table(table(mydata$AgeGroup, mydata$Q23a), margin = 1)
prop_df5 <- as.data.frame(as.table(prop_table5))
ggplot(prop_df5, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Proportion of Awareness of Ability to Get a Loan by Age Group") +
theme_minimal()
There is association between Age Group and Awareness of ability to get a
loan in the app.
result3 <- chisq.test(mydata$Q28, mydata$Group)
result3
##
## Pearson's Chi-squared test
##
## data: mydata$Q28 and mydata$Group
## X-squared = 26.786, df = 4, p-value = 2.196e-05
There is association between Group and where individuals would carry out advanced service in the next week if they had to.
addmargins(result3$observed)
## mydata$Group
## mydata$Q28 1 2 3 4 5 Sum
## V poslovalnici 23 24 10 11 12 80
## V mobilni aplikaciji 25 51 69 9 13 167
## Sum 48 75 79 20 25 247
table_clusters5 <- table(mydata$Group, mydata$Q28)
prop_table_clusters5 <- prop.table(table_clusters5, margin = 1)
prop_df5 <- as.data.frame(as.table(prop_table_clusters5))
library(ggplot2)
ggplot(prop_df5, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Odgovor",
title = "Percentage Distribution of Where would you carry out advanced services in the next week if you had to"
) +
theme_minimal()
result4 <- chisq.test(mydata$AgeGroup, mydata$Group)
## Warning in chisq.test(mydata$AgeGroup, mydata$Group): Chi-squared approximation
## may be incorrect
result4
##
## Pearson's Chi-squared test
##
## data: mydata$AgeGroup and mydata$Group
## X-squared = 21.854, df = 16, p-value = 0.148
There is no association between Group and Age Group.
mydata$AgeGroup2 <- cut(mydata$Age,
breaks = c(18, 50, Inf),
labels = c("18-50", "51+"))
result42 <- chisq.test(mydata$AgeGroup2, mydata$Group)
## Warning in chisq.test(mydata$AgeGroup2, mydata$Group): Chi-squared
## approximation may be incorrect
result42
##
## Pearson's Chi-squared test
##
## data: mydata$AgeGroup2 and mydata$Group
## X-squared = 3.9376, df = 4, p-value = 0.4145
Same story all over again….
library(ggplot2)
prop_table6 <- prop.table(table(mydata$Group, mydata$AgeGroup), margin = 1)
prop_df6 <- as.data.frame(as.table(prop_table6))
ggplot(prop_df6, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Clusters",
title = "Proportion of Age Groups across Clusters") +
theme_minimal()
result5 <- chisq.test(mydata$Q24, mydata$Group)
## Warning in chisq.test(mydata$Q24, mydata$Group): Chi-squared approximation may
## be incorrect
result5
##
## Pearson's Chi-squared test
##
## data: mydata$Q24 and mydata$Group
## X-squared = 31.876, df = 20, p-value = 0.04464
There is association between Group and where individuals heard about advanced features for the first time.
library(dplyr)
mydata$FirstTime <- recode(
mydata$Q24,
"Mobile app" = "Online",
"Ads" = "Online",
"Social Media" = "Online",
"This Survey" = "Online",
"Branch" = "In Person",
"Friends/Family" = "In Person"
)
result52 <- chisq.test(mydata$FirstTime, mydata$Group)
## Warning in chisq.test(mydata$FirstTime, mydata$Group): Chi-squared
## approximation may be incorrect
result52
##
## Pearson's Chi-squared test
##
## data: mydata$FirstTime and mydata$Group
## X-squared = 31.876, df = 20, p-value = 0.04464
I’m losing hope…
mydata$Q24 <- factor(mydata$Q24,
levels = c(1, 2, 3, 4, 5, 6),
labels = c("Mobile app","Branch","Ads","Social Media","Friends/Family","This Survey"))
library(ggplot2)
prop_table7 <- prop.table(table(mydata$Group, mydata$Q24), margin = 1)
prop_df7 <- as.data.frame(as.table(prop_table7))
ggplot(prop_df7, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Group",
y = "Proportion",
fill = "Clusters",
title = "Proportion of first time hearing about advanced services across Clusters") +
theme_minimal()
library(ggplot2)
prop_table8 <- prop.table(table(mydata$Group, mydata$Q26), margin = 1)
prop_df8 <- as.data.frame(as.table(prop_table8))
library(ggplot2)
ggplot(prop_df8, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Answer",
title = "Have you ever received any notifications about advanced features in your app?"
) +
theme_minimal()
result6 <- chisq.test(mydata$Q26, mydata$Group)
result6
##
## Pearson's Chi-squared test
##
## data: mydata$Q26 and mydata$Group
## X-squared = 24.965, df = 4, p-value = 5.113e-05
Association between groups and recieving notifications about advanced services.
result7 <- chisq.test(mydata$Q41, mydata$Group)
result7
##
## Pearson's Chi-squared test
##
## data: mydata$Q41 and mydata$Group
## X-squared = 6.2922, df = 4, p-value = 0.1784
no association between group and gender.
result8 <- chisq.test(mydata$Q21, mydata$Group)
## Warning in chisq.test(mydata$Q21, mydata$Group): Chi-squared approximation may
## be incorrect
result8
##
## Pearson's Chi-squared test
##
## data: mydata$Q21 and mydata$Group
## X-squared = 28.527, df = 20, p-value = 0.0975
no association between group and frequency of usage of mobile app.
result9 <- chisq.test(mydata$Q23f, mydata$Group)
result9
##
## Pearson's Chi-squared test
##
## data: mydata$Q23f and mydata$Group
## X-squared = 14.801, df = 4, p-value = 0.005132
there is assiciation between group and being aware of deposit advanced service.
result10 <- chisq.test(mydata$Q23a, mydata$Group)
result10
##
## Pearson's Chi-squared test
##
## data: mydata$Q23a and mydata$Group
## X-squared = 15.959, df = 4, p-value = 0.003075
Association between group and awareness of taking out a loan advanced service
result11 <- chisq.test(mydata$Q23b, mydata$Group)
## Warning in chisq.test(mydata$Q23b, mydata$Group): Chi-squared approximation may
## be incorrect
result11
##
## Pearson's Chi-squared test
##
## data: mydata$Q23b and mydata$Group
## X-squared = 19.144, df = 4, p-value = 0.0007365
Association between group and awareness of changing limit on the bank account advanced service.
result12 <- chisq.test(mydata$Q23c, mydata$Group)
result12
##
## Pearson's Chi-squared test
##
## data: mydata$Q23c and mydata$Group
## X-squared = 23.322, df = 4, p-value = 0.0001092
Association between group and awareness of ordering a new credit card advanced service.
result13 <- chisq.test(mydata$Q23d, mydata$Group)
## Warning in chisq.test(mydata$Q23d, mydata$Group): Chi-squared approximation may
## be incorrect
result13
##
## Pearson's Chi-squared test
##
## data: mydata$Q23d and mydata$Group
## X-squared = 9.7326, df = 4, p-value = 0.04518
Association between group and awareness of changing limit on the credit card advanced service.
result14 <- chisq.test(mydata$Q23e, mydata$Group)
result14
##
## Pearson's Chi-squared test
##
## data: mydata$Q23e and mydata$Group
## X-squared = 18.035, df = 4, p-value = 0.001215
Association between group and awareness of opening savings account advanced service.