@import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@300;400;600&display=swap');
h1.title {
color: #230078;
font-family: 'Montserrat', sans-serif; /* Closest to Coco Sharp */
font-size: 48px;
font-weight: 700;
text-align: center;
text-transform: uppercase;
margin-bottom: 10px;
}
.author {
color: #84BD00;
font-family: 'Montserrat', sans-serif;
font-size: 24px;
font-weight: 500;
text-align: center;
margin-top: 5px;
}
.date {
color: #230078;
font-family: 'Montserrat', sans-serif;
font-size: 18px;
font-weight: 400;
text-align: center;
margin-top: -5px;
}@import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@300;400;600&display=swap');
body {
background-color: white;
font-family: 'Montserrat', sans-serif;
color: #230078;
line-height: 1.6;
}
h1 {
color: #230078;
font-size: 30pt;
font-weight: 700;
text-align: center;
text-transform: uppercase;
margin-bottom: 20px;
}
h2 {
color: #230078;
font-size: 24pt;
font-weight: 600;
margin-top: 30px;
border-bottom: 3px solid #84BD00;
padding-bottom: 5px;
}
h3 {
color: #230078;
font-size: 20pt;
font-weight: 500;
}
.textbox {
background-color: #f9f9f9;
color: #230078;
padding: 20px;
border-radius: 10px;
font-size: 14pt;
margin-bottom: 20px;
box-shadow: 4px 4px 15px rgba(0, 0, 0, 0.1);
border-left: 6px solid #84BD00;
}
.textbox b {
font-weight: 600;
font-size: 15pt;
color: #230078;
}
.textbox ul {
padding-left: 20px;
}
.textbox li {
margin-bottom: 5px;
font-size: 14pt;
color: #444;
}
pre {
background-color: #f4f4f4;
border-left: 5px solid #230078;
border-right: 5px solid #84BD00;
padding: 10px;
border-radius: 5px;
font-size: 12pt;
}
table {
border-collapse: collapse;
width: 100%;
font-size: 14pt;
}
th, td {
border: 1px solid #230078;
padding: 12px;
text-align: left;
}
th {
background-color: #230078;
color: white;
font-weight: bold;
}
tr:nth-child(even) {
background-color: #f2f2f2;
}
tr:nth-child(odd) {
background-color: #ffffff;
}
h1::before,
h2::before,
h3::before {
content: none !important;
}## Warning: package 'readxl' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata <- as.data.frame(mydata)
mydata$ID <- seq(1, nrow(mydata))
mydata <- mydata %>%
filter(!ID %in% 1)
mydata$ID <- seq(1, nrow(mydata))
head(mydata)## Q1 Q21 Q22a Q23a Q23b Q23c Q23d Q23e Q23f Q24 Q25 Q26 Q27a Q27b Q27c Q28 Q29a
## 1 1 6 3 1 1 1 1 1 1 2 3 1 1 1 0 2 3
## 2 1 5 4 1 1 1 1 1 1 1 4 1 1 0 0 2 4
## 3 1 6 4 2 1 1 1 2 2 2 3 2 0 0 1 1 4
## 4 1 6 5 1 1 1 1 1 1 1 4 1 0 0 1 2 5
## 5 1 6 5 1 1 1 1 1 1 1 4 1 0 0 1 2 5
## 6 1 6 5 2 1 1 1 1 1 4 3 1 0 1 0 1 4
## Q29b Q29c Q29d Q30a Q30b Q30c Q30d Q30e Q31a Q31b Support and guidance
## 1 3 4 5 3 3 3 3 3 4 4 3
## 2 2 2 1 3 3 3 3 3 5 2 2
## 3 4 2 5 4 4 4 4 1 2 4 5
## 4 2 1 1 2 2 4 1 3 5 1 4
## 5 2 2 2 5 5 5 5 5 5 2 3
## 6 1 1 4 3 5 5 2 4 4 5 5
## Confidence Security Accesibility Clarity Speed
## 1 4 5 3 4 4
## 2 4 4 5 4 5
## 3 4 4 5 5 5
## 4 4 5 3 4 5
## 5 4 4 5 5 5
## 6 4 5 4 5 4
## Poslovalnica_podpora in usmerjanje Mobilna aplikacija_podpora in usmerjanje
## 1 4 4
## 2 4 3
## 3 5 2
## 4 5 3
## 5 4 3
## 6 4 2
## Poslovalnica_brezkrbnost Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## 1 3 3 4
## 2 4 4 4
## 3 5 2 5
## 4 5 4 5
## 5 4 4 4
## 6 4 3 4
## Mobilna aplikacija_varnost Poslovalnica_dostopnost
## 1 3 3
## 2 4 3
## 3 3 5
## 4 4 5
## 5 3 4
## 6 3 5
## Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## 1 3 4 4
## 2 5 5 4
## 3 2 5 2
## 4 3 5 4
## 5 5 4 4
## 6 2 4 2
## Poslovalnica_hitrost Mobilna aplikacija_hitrost Q40 Q41
## 1 3 4 -1 2
## 2 3 5 -1 2
## 3 3 4 -1 2
## 4 2 5 -1 2
## 5 2 4 -1 2
## 6 1 5 Stay humble, only cash 2
## Q42 Q43a Q43b Q43c Q43d Q43e Q43f Q43g Q43h Q44 Q45 Q45_13_text Q46 Q47 Q48
## 1 2000 1 0 0 0 0 0 0 0 2 3 -2 2 2 2
## 2 1998 0 0 0 1 0 0 0 0 6 3 -2 2 3 4
## 3 2001 1 0 0 0 0 0 0 0 2 1 -2 2 2 3
## 4 1994 0 0 0 1 0 0 0 0 6 12 -2 5 6 5
## 5 2000 1 1 0 0 0 0 0 0 2 1 -2 2 3 6
## 6 2004 1 0 0 0 0 0 0 0 3 1 -2 1 8 4
## ID
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
mydata[c(2:9,11, 17:45,48,58)] <- mydata[c(2:9,11, 17:45,48,58)] %>% mutate_all(as.numeric)
library(dplyr)
mydata$BankF <- case_when(
mydata$Q45 == 1 ~ 1,
mydata$Q45 == 2 ~ 2,
mydata$Q45 == 3 ~ 2,
mydata$Q45 == 4 ~ 2,
mydata$Q45 == 5 ~ 2,
mydata$Q45 == 6 ~ 2,
mydata$Q45 == 7 ~ 2,
mydata$Q45 == 8 ~ 2,
mydata$Q45 == 9 ~ 2,
mydata$Q45 == 10 ~ 2,
mydata$Q45 == 11 ~ 2,
mydata$Q45 == 12 ~ 2,
mydata$Q45 == 13 ~ 2,
TRUE ~ 0)
mydata$Q26 <- factor(mydata$Q26,
levels = c(1, 2),
labels = c("Da","Ne"))
mydata$Q27a <- factor(mydata$Q27a,
levels = c(1, 0),
labels = c("V mobilni aplikaciji","Ne"))
mydata$Q27b <- factor(mydata$Q27b,
levels = c(1, 0),
labels = c("V poslovalnici","Ne"))
mydata$Q27c <- factor(mydata$Q27c,
levels = c(1, 0),
labels = c("Nisem upiorabljal/a","Ne"))
mydata$Q28 <- factor(mydata$Q28,
levels = c(1, 2),
labels = c("V poslovalnici","V mobilni aplikaciji"))
mydata$Q41 <- factor(mydata$Q41,
levels = c(1, 2, 3),
labels = c("Female","Male", "I don't want to answer"))
mydata$Q43a <- factor(mydata$Q43a,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43b <- factor(mydata$Q43b,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43c <- factor(mydata$Q43c,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43d <- factor(mydata$Q43d,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43e <- factor(mydata$Q43e,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43f <- factor(mydata$Q43f,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43g <- factor(mydata$Q43g,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q43h <- factor(mydata$Q43h,
levels = c(1, 0),
labels = c("Selected","Not selected"))
mydata$Q44 <- factor(mydata$Q44,
levels = c(1, 2, 3, 4, 5, 6),
labels = c("Less than 1.000 habitants",
"1.000 – 5.000 habitants",
"5.001 – 20.000 habitants",
"20.001 – 50.000 habitants",
"50.001 – 100.000 habitants",
"More than 100.000 habitants"))
mydata$Q45 <- factor(mydata$Q45,
levels = c(3, 5, 1, 9, 12, 7, 10, 4, 11, 6),
labels = c("OTP banka d.d.","Banka Intesa Sanpaolo d.d.", "Nova Ljubljanska Banka d.d. (NLB)", "Gorenjska Banka d.d.", "Delavska Hranilnica d.d.", "Revolut", "Deželna Banka Slovenije d.d.", "Banka Sparkasse d.d.", "Addiko Bank d.d.", "UniCredit Banka Slovenija d.d."))
mydata$Q46 <- factor(mydata$Q46,
levels = c(1, 2, 3, 5, 6, 4),
labels = c("Študent/-ka","Redno zaposlen/-a", "Upokojen/-a", "Samozaposlen/-a", "Delno zaposlen/-a", "Brezposeln/-a"))
mydata$Q47 <- factor(mydata$Q47,
levels = c(1, 2, 3, 4, 5, 6, 7, 8),
labels = c("Pod 1.000€","1.000€ - 1.500€","1.501€ - 2.000€","2.001€ - 3.000€","3.001€ - 5.000€","5.001€ - 10.000€","Above 10.000€", "I don't want to answer"))
mydata$Q48 <- factor(mydata$Q48,
levels = c(2, 3, 4, 5, 6, 7),
labels = c(
"Dokončana osnovna šola",
"Dokončana nižja ali srednja poklicna izobrazba",
"Dokončana srednja strokovna ali splošna izobrazba",
"Dokončana višješolska strokovna ali visokošolska strokovna izobrazba (tudi 1. bolonjska stopnja)",
"Dokončana visokošolska strokovna univerzitetna izobrazba (tudi 2. bolonjska stopnja)",
"Dokončana specializacija, znanstveni magisterij, doktorat"
))
mydata$BankF <- factor(mydata$BankF,
levels = c(1, 2),
labels = c("NLB","Other"))
library(dplyr)
mydataNLB <- mydata %>%
filter(BankF == "NLB")
mydata2 <- mydata## Q21 Q22a Q23a Q23b
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:1.000 1st Qu.:1.000
## Median :5.000 Median :5.000 Median :1.000 Median :1.000
## Mean :4.502 Mean :4.325 Mean :1.381 Mean :1.192
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :6.000 Max. :5.000 Max. :2.000 Max. :2.000
## Q23c Q23d Q23e Q23f
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :1.000 Median :2.000
## Mean :1.283 Mean :1.181 Mean :1.355 Mean :1.543
## 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :2.000 Max. :2.000 Max. :2.000 Max. :2.000
## Q24 Q25 Q26 Q27a
## Length:265 Min. :1.000 Da:141 V mobilni aplikaciji:106
## Class :character 1st Qu.:3.000 Ne:124 Ne :159
## Mode :character Median :4.000
## Mean :3.498
## 3rd Qu.:4.000
## Max. :5.000
## Q27b Q27c Q28
## V poslovalnici: 67 Nisem upiorabljal/a:114 V poslovalnici : 96
## Ne :198 Ne :151 V mobilni aplikaciji:169
##
##
##
##
## Q29a Q29b Q29c Q29d Q30a
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:2.000
## Median :4.000 Median :3.000 Median :2.000 Median :3.00 Median :3.000
## Mean :3.438 Mean :2.728 Mean :2.596 Mean :2.97 Mean :3.072
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
## Q30b Q30c Q30d Q30e
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median :4.000 Median :4.000 Median :3.000 Median :3.000
## Mean :3.377 Mean :3.426 Mean :3.117 Mean :2.932
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Q31a Q31b Support and guidance Confidence
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:3.00 1st Qu.:4.000
## Median :4.000 Median :2.000 Median :4.00 Median :4.000
## Mean :4.128 Mean :2.506 Mean :3.83 Mean :4.075
## 3rd Qu.:5.000 3rd Qu.:3.000 3rd Qu.:5.00 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
## Security Accesibility Clarity Speed
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000
## Median :5.000 Median :4.000 Median :4.000 Median :4.000
## Mean :4.264 Mean :4.042 Mean :4.023 Mean :4.219
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## Poslovalnica_podpora in usmerjanje Mobilna aplikacija_podpora in usmerjanje
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :3.000
## Mean :3.811 Mean :3.125
## 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000
## Poslovalnica_brezkrbnost Mobilna aplikacija_brezkrbnost Poslovalnica_varnost
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000
## Mean :3.921 Mean :3.551 Mean :4.211
## 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Mobilna aplikacija_varnost Poslovalnica_dostopnost
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000
## Mean :3.698 Mean :3.785
## 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000
## Mobilna aplikacija_dostopnost Poslovalnica_jasnost Mobilna aplikacija_jasnost
## Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :3.000
## Mean :3.792 Mean :3.913 Mean :3.347
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.000 Max. :5.000
## Poslovalnica_hitrost Mobilna aplikacija_hitrost
## Min. :1.000 Min. :1.0
## 1st Qu.:2.000 1st Qu.:4.0
## Median :3.000 Median :4.0
## Mean :2.891 Mean :4.2
## 3rd Qu.:4.000 3rd Qu.:5.0
## Max. :5.000 Max. :5.0
Survey Questions:
Q1: Ali uporabljate mobilno aplikacijo banke, kjer imate odprt
primarni račun (npr. KlikIn, mBank@Net in podobno)?
[1 - Da, 2 - Ne]
Q21: Kako pogosto uporabljate mobilno bančno aplikacijo?
[1 - Manj kot enkrat na mesec, 2 - Enkrat na mesec, 3 - Dva do trikrat
na mesec, 4 - Enkrat na teden, 5 - Večkrat na teden, 6 - Vsak dan]
Q22: V kolikšni meri se strinjate z naslednjo trditvijo?
[1 - Sploh se ne strinjam, 2 - Se ne strinjam, 3 - Niti se ne strinjam,
niti se strinjam, 4 - Se strinjam, 5 - Popolnoma se strinjam]
Q22a: Zavedam se, da lahko v mobilni aplikaciji opravim več storitev kot zgolj pregled stanja na računu in opravljanje transakcij.
Q23: Ali ste vedeli, da lahko naslednje storitve opravite v
mobilni aplikaciji?
[1 - Da, 2 - Ne]
Q24: Kje ste prvič izvedeli za možnost uporabe naprednih
storitev v mobilni bančni aplikaciji?
[1 - V mobilni bančni aplikaciji, 2 - V poslovalnici banke, 3 - v
oglasih, 4 - Preko družbenih omrežij, 5 - Preko prijateljev/družine, 6 -
V tej anketi, 7 - Drugo]
Q25: Kako enostavno je najti napredne storitve v vaši mobilni
bančni aplikaciji?
[1 - Zelo težko, 2 - Težko, 3 - Niti težko, niti enostavno, 4 -
Enostavno, 5 - Zelo enostavno]
Q26: Ste v svoji aplikaciji kdaj opazili promocije ali
obvestila o naprednih storitvah?
[1 - Da, 2 - Ne]
Q28: Če bi naslednji teden uporabili napredno storitev, kje bi
to storili?
[1 - V mobilni aplikaciji, 2 - V poslovalnici]
Q29: V kolikšni meri naslednji dejavniki vplivajo na vašo
odločitev, da ne uporabljate naprednih storitev mobilne banke?
[1 - Sploh se ne strinjam, 2 - Se ne strinjam, 3 - Niti se ne strinjam,
niti se strinjam, 4 - Se strinjam, 5 - Popolnoma se strinjam]
Q30: Pogosteje bi uporabljal/a napredne storitve v mobilni
bančni aplikaciji.
[1 - Sploh se ne strinjam, 2 - Se ne strinjam, 3 - Niti se ne strinjam,
niti se strinjam, 4 - Se strinjam, 5 - Popolnoma se strinjam]
Q40: Imate še kakšen komentar ali priporočilo glede uporabe mobilne banke?
Q41: Spol
[1 - Ženska, 2 - Moški, 3 - Ne želim odgovoriti]
Q42: Prosimo, vpišite leto rojstva.
[Odprto besedilo]
## Warning: package 'factoextra' was built under R version 4.4.2
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## $hopkins_stat
## [1] 0.5586369
##
## $plot
## NULL
colnames(mydata) [3] <- "Awareness"
colnames(mydata) [11] <- "Ease"
colnames(mydata) [26] <- "Value"
colnames(mydata) [27] <- "Trust"I changed the name of the variables. We are creating clusters on 4 cluster variables: “Awareness”, “Ease”, “Value”, “Trust”.
library(factoextra)
library(NbClust)
fviz_nbclust(mydata_clu_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")library(dplyr)
library(factoextra)
WARD <- mydata_clu_std %>%
get_dist(method = "euclidean") %>%
hclust(method = "ward.D2")
WARD##
## Call:
## hclust(d = ., method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 265
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
We decided to go with 5 clusters.
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 27, 74, 76, 32, 56
##
## Cluster means:
## Q22a Q25 Q31a Q31b
## 1 -2.2987082 -0.6636458 -0.1920661 0.4967113
## 2 0.1713700 -0.7658139 0.1806157 -0.3039786
## 3 0.5063700 0.9025907 0.5693474 -0.7926587
## 4 -0.0860382 -0.5414867 -1.9278200 0.2004560
## 5 0.2438008 0.4164170 0.1828583 1.1234050
##
## Clustering vector:
## [1] 1 3 4 3 3 5 2 5 3 1 4 4 5 2 4 1 2 2 5 5 3 5 4 3 3 1 5 2 2 2 5 2 3 3 3 5 3
## [38] 4 3 2 3 5 3 3 2 3 3 3 3 3 2 3 5 5 2 2 5 3 5 5 5 3 3 2 3 2 2 2 5 5 5 5 3 2
## [75] 4 2 5 4 3 5 5 1 3 2 3 2 1 5 3 2 2 1 1 1 2 2 5 3 2 1 3 1 4 3 2 3 1 2 1 3 3
## [112] 5 2 2 1 2 5 1 3 2 5 2 2 4 5 1 2 2 1 2 5 2 5 3 5 1 5 4 4 2 3 1 1 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 5 1 4 1 3 3 3 3 2 2 3 2 3 2 5 3 4 4 3 2 2 3 2 3 3 3 5 3 2 2
## [186] 4 2 2 2 3 1 3 5 4 4 2 2 3 3 5 4 4 2 5 2 4 3 3 5 5 2 1 3 2 5 2 5 2 5 5 4 5
## [223] 3 2 3 2 3 5 3 2 5 3 5 1 1 4 1 5 3 2 4 2 5 2 3 2 3 2 5 5 2 3 3 5 5 2 2 3 4
## [260] 4 3 3 5 4 4
##
## Within cluster sum of squares by cluster:
## [1] 95.65988 92.49211 79.70569 116.86933 85.02220
## (between_SS / total_SS = 55.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)mydata$Dissimilarity <- sqrt(
mydata_clu_std$Q22a^2 +
mydata_clu_std$Q25^2 +
mydata_clu_std$Q31a^2 +
mydata_clu_std$Q31b^2
)
head(mydata[order(-mydata$Dissimilarity), c("ID", "Dissimilarity")], 10) ## ID Dissimilarity
## 191 191 6.365406
## 11 11 4.823598
## 202 202 4.716886
## 93 93 4.390156
## 241 241 4.281977
## 38 38 4.229806
## 234 234 4.116562
## 115 115 3.993249
## 103 103 3.861159
## 10 10 3.798713
mydata <- mydata %>%
filter(!ID %in% c(191))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) ## $hopkins_stat
## [1] 0.5736077
##
## $plot
## NULL
library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 56, 74, 76, 32, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.2355188 0.4112907 0.1731763 1.1381089
## 2 0.1611116 -0.7879947 0.1708796 -0.2980480
## 3 0.5052525 0.9044778 0.5689982 -0.7897317
## 4 -0.1033202 -0.5604314 -1.9884698 0.2094871
## 5 -2.3155481 -0.5971993 -0.0752230 0.4475952
##
## Clustering vector:
## [1] 5 3 4 3 3 1 2 1 3 5 4 4 1 2 4 5 2 2 1 1 3 1 4 3 3 5 1 2 2 2 1 2 3 3 3 1 3
## [38] 4 3 2 3 1 3 3 2 3 3 3 3 3 2 3 1 1 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2
## [75] 4 2 1 4 3 1 1 5 3 2 3 2 5 1 3 2 2 5 5 5 2 2 1 3 2 5 3 5 4 3 2 3 5 2 5 3 3
## [112] 1 2 2 5 2 1 5 3 2 1 2 2 4 1 5 2 2 5 2 1 2 1 3 1 5 1 4 4 2 3 5 5 4 2 4 2 3
## [149] 3 4 3 4 2 3 2 1 5 4 5 3 3 3 3 2 2 3 2 3 2 1 3 4 4 3 2 2 3 2 3 3 3 1 3 2 2
## [186] 4 2 2 2 3 3 1 4 4 2 2 3 3 1 4 4 2 1 2 4 3 3 1 1 2 5 3 2 1 2 1 2 1 1 4 1 3
## [223] 2 3 2 3 1 3 2 1 3 1 5 5 4 5 1 3 2 4 2 1 2 3 2 3 2 1 1 2 3 3 1 1 2 2 3 4 4
## [260] 3 3 1 4 4
##
## Within cluster sum of squares by cluster:
## [1] 88.14381 95.70238 82.80060 120.50690 75.50678
## (between_SS / total_SS = 56.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)mydata <- mydata %>%
filter(!ID %in% c(11))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 76, 54, 75, 32, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.16017846 -0.8533305 0.11948655 -0.3059896
## 2 0.21326412 0.3776779 0.22118546 1.1460396
## 3 0.49918810 0.9111497 0.59543745 -0.7892907
## 4 -0.03067026 -0.2443742 -1.97805346 0.2824627
## 5 -2.31336479 -0.6176055 -0.09173426 0.4433489
##
## Clustering vector:
## [1] 5 3 4 3 3 2 1 2 3 5 4 2 1 1 5 1 1 2 2 3 2 4 3 3 5 2 1 1 1 2 1 3 3 3 2 3 4
## [38] 3 1 3 2 3 3 1 3 3 3 3 3 1 3 2 2 1 1 2 3 2 2 2 3 3 1 3 1 1 1 2 2 2 2 3 1 4
## [75] 1 2 4 4 2 2 5 3 1 3 1 5 2 3 1 1 5 5 5 1 1 2 3 1 5 3 5 4 3 1 3 5 1 5 3 3 2
## [112] 1 1 5 1 2 5 3 1 2 1 1 4 4 5 1 1 5 1 2 1 2 3 2 5 2 4 4 1 3 5 5 4 1 4 1 3 3
## [149] 4 3 1 1 3 1 2 5 4 5 3 3 3 3 1 1 3 1 3 1 2 3 4 4 3 1 1 3 1 3 3 3 2 3 1 1 4
## [186] 1 1 1 3 3 2 4 4 1 1 3 3 2 4 4 1 2 1 4 3 3 4 2 1 5 3 1 2 1 2 1 2 2 4 2 3 1
## [223] 3 1 3 2 3 1 2 3 2 5 5 4 5 2 3 1 4 1 2 1 3 1 3 1 2 2 1 3 3 2 2 1 1 3 4 4 3
## [260] 3 2 4 4
##
## Within cluster sum of squares by cluster:
## [1] 109.86189 82.00624 80.53778 111.18358 76.83579
## (between_SS / total_SS = 56.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)mydata <- mydata %>%
filter(!ID %in% c(86,200,143,239,37))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 79, 25, 50, 30, 74
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.50108668 0.8823227 0.6219599 -0.7289837
## 2 -2.38619015 -0.5895733 -0.1589148 0.5303956
## 3 0.14549250 0.3470049 0.1111776 1.2357670
## 4 0.08869192 -0.3574300 -1.9775371 0.1497189
## 5 0.13693951 -0.8323177 0.1162874 -0.2966229
##
## Clustering vector:
## [1] 2 1 4 1 1 3 5 3 1 2 4 3 5 4 2 5 5 3 1 1 3 4 1 1 2 3 5 5 5 3 5 1 1 1 3 1 1
## [38] 5 1 3 1 1 5 1 1 1 1 1 5 1 3 1 5 5 3 1 3 3 3 1 1 5 1 5 5 5 3 3 3 3 1 5 4 5
## [75] 3 4 4 3 3 2 1 5 1 5 3 1 5 5 2 2 2 5 5 3 1 5 2 1 2 4 1 5 1 2 5 2 1 1 3 5 5
## [112] 2 5 3 2 1 5 3 5 5 4 4 2 5 5 2 5 3 5 3 1 3 2 1 4 4 5 1 2 2 5 4 5 1 1 4 1 4
## [149] 5 1 5 3 2 4 2 1 1 1 1 5 5 1 5 1 5 3 1 4 4 1 5 5 1 5 1 1 1 3 1 5 5 4 5 5 5
## [186] 1 1 3 4 4 5 5 1 1 3 4 5 3 5 4 1 1 4 3 5 2 1 5 3 5 3 5 3 3 4 3 1 5 1 5 1 3
## [223] 1 5 3 1 3 2 2 4 2 3 1 5 5 3 5 1 5 1 5 3 3 5 1 1 1 3 5 5 1 4 4 1 1 3 4 4
##
## Within cluster sum of squares by cluster:
## [1] 95.40383 77.28224 80.33349 91.09260 105.03046
## (between_SS / total_SS = 56.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)mydata <- mydata %>%
filter(!ID %in% c(90,155,85,146,100))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 23, 76, 49, 79, 26
##
## Cluster means:
## Awareness Ease Value Trust
## 1 -2.41967781 -0.6566175 -0.16063729 0.44471611
## 2 0.11993807 -0.8977034 0.06023898 -0.26926030
## 3 0.11446989 0.3155981 0.08097431 1.31250594
## 4 0.49980397 0.8895872 0.61326842 -0.71180559
## 5 0.05552915 -0.0928475 -2.04997890 0.08289087
##
## Clustering vector:
## [1] 1 4 5 4 4 3 2 3 4 1 5 3 2 2 1 2 2 3 4 4 3 5 4 4 1 3 2 2 2 3 2 4 4 4 3 4 4
## [38] 2 4 3 4 4 2 4 4 4 4 4 2 4 3 4 2 2 3 4 3 3 3 4 4 2 4 2 2 2 3 3 3 3 4 2 5 2
## [75] 3 5 5 3 3 1 4 2 4 2 4 2 2 1 1 2 2 3 4 2 1 4 1 4 2 4 1 2 1 4 4 3 2 2 1 2 3
## [112] 1 4 2 3 2 2 5 5 1 2 2 1 2 3 2 3 4 3 1 4 5 5 2 4 1 1 2 5 2 4 4 4 2 2 4 2 3
## [149] 1 5 4 4 4 4 2 2 4 2 4 2 3 4 5 5 4 2 2 4 2 4 4 4 3 4 2 2 5 2 2 2 4 4 3 5 5
## [186] 2 2 4 4 3 5 2 3 2 5 4 4 5 3 2 1 4 2 3 2 3 2 3 3 5 3 4 2 4 2 4 3 4 2 3 4 3
## [223] 1 1 5 1 3 4 2 2 3 2 4 2 4 2 3 3 2 4 4 4 3 2 2 4 5 5 4 4 3 5 5
##
## Within cluster sum of squares by cluster:
## [1] 70.16428 124.58392 80.49920 99.56269 63.35325
## (between_SS / total_SS = 56.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)mydata <- mydata %>%
filter(!ID %in% c(193,190,164))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 75, 26, 79, 23, 47
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.13714948 -0.8797902 0.08082592 -0.2392861
## 2 0.01602031 -0.1291655 -2.03520230 -0.0309770
## 3 0.50815392 0.9082966 0.62028545 -0.7001278
## 4 -2.40486418 -0.6499943 -0.16676336 0.4696909
## 5 0.09499952 0.2667449 0.03587920 1.3459376
##
## Clustering vector:
## [1] 4 3 2 3 3 5 1 5 3 4 2 5 1 2 4 1 1 5 3 3 5 2 3 3 4 5 1 1 1 5 1 3 3 3 5 3 3
## [38] 1 3 5 3 3 1 3 3 3 3 3 1 3 5 3 1 1 5 3 5 5 5 3 3 1 3 1 1 1 5 5 5 5 3 1 2 1
## [75] 5 2 2 5 5 4 3 1 3 1 3 1 1 4 4 1 1 5 3 1 4 3 4 3 1 3 4 1 4 3 3 5 1 1 4 1 5
## [112] 4 3 1 5 1 1 2 2 4 1 1 4 1 5 1 5 3 5 4 3 2 2 1 3 4 4 1 2 1 3 3 3 1 1 3 1 5
## [149] 4 2 3 3 3 3 1 1 3 1 3 1 5 3 2 3 1 1 3 1 3 3 3 5 3 1 1 2 1 1 1 3 3 5 2 2 1
## [186] 1 3 3 2 1 1 2 3 3 2 5 1 4 3 1 5 1 5 1 5 5 2 5 3 1 3 1 3 5 3 1 5 3 5 4 4 2
## [223] 4 5 3 1 1 5 1 3 1 3 1 5 5 1 3 3 3 5 1 1 3 2 2 3 3 5 2 2
##
## Within cluster sum of squares by cluster:
## [1] 121.45870 62.95169 101.43697 71.50918 74.12297
## (between_SS / total_SS = 56.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)mydata <- mydata %>%
filter(!ID %in% c(220,10,109))
mydata$ID <- seq(1, nrow(mydata))
mydata_clu_std <- as.data.frame(scale(mydata[c(3,11,26,27)])) library(factoextra)
library(ggplot2)
Clustering <- kmeans(mydata_clu_std,
centers = 5,
nstart = 25)
Clustering## K-means clustering with 5 clusters of sizes 48, 75, 79, 20, 25
##
## Cluster means:
## Awareness Ease Value Trust
## 1 0.07755914 0.2561182 -0.003027745 1.38856900
## 2 0.10677233 -0.9230788 0.077510269 -0.22238704
## 3 0.50068099 0.9055025 0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342 0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
##
## Clustering vector:
## [1] 4 3 5 3 3 1 2 1 3 5 1 2 5 4 2 2 1 3 3 1 5 3 3 4 1 2 2 2 1 2 3 3 3 1 3 3 2
## [38] 3 1 3 3 2 3 3 3 3 3 2 3 1 3 2 2 1 3 1 1 1 3 3 2 3 2 2 2 1 1 1 1 3 2 5 2 1
## [75] 5 5 1 1 4 3 2 3 2 3 2 2 4 4 2 2 1 3 2 4 3 4 3 2 3 4 2 4 3 3 1 2 2 2 1 4 3
## [112] 2 1 2 2 5 5 4 2 2 4 2 1 2 1 3 1 4 3 5 5 2 3 4 4 2 5 2 3 3 3 2 2 3 2 1 4 5
## [149] 3 3 3 3 2 2 3 2 3 2 1 3 5 3 2 2 3 2 3 3 3 1 3 2 2 5 2 2 2 3 3 1 5 5 2 2 3
## [186] 3 5 2 2 5 3 3 1 1 2 4 3 2 1 2 1 2 1 1 5 1 3 2 3 2 3 1 3 2 1 3 1 4 5 4 1 3
## [223] 2 2 1 2 3 2 3 2 1 1 2 3 3 3 1 2 2 3 5 5 3 3 1 5 5
##
## Within cluster sum of squares by cluster:
## [1] 81.67665 127.65628 106.19954 49.86444 62.38244
## (between_SS / total_SS = 56.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
data = mydata_clu_std)## Awareness Ease Value Trust
## 1 0.07755914 0.2561182 -0.003027745 1.38856900
## 2 0.10677233 -0.9230788 0.077510269 -0.22238704
## 3 0.50068099 0.9055025 0.618119866 -0.69048033
## 4 -2.49544837 -0.4992393 -0.152598342 0.31410430
## 5 -0.05502378 -0.1845070 -2.057897640 -0.06825693
library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Awareness", "Ease", "Value", "Trust"))
Figure$Group <- factor(Figure$ID,
levels = c(1, 2, 3, 4,5),
labels = c("1", "2", "3", "4","5"))
Figure$NameF <- factor(Figure$name,
levels = c("Awareness", "Ease", "Value", "Trust"),
labels = c("Awareness", "Ease", "Value", "Trust"))
library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 5, alpha = 0.4) +
geom_line(aes(group = ID), linewidth = 1.5) +
ylab("Averages") +
xlab("Cluster variables")+
ylim(-2.5, 2.5) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 12)) +
scale_color_manual(values = c("1" = "#230078", "2" = "#84BD00", "3" = "#FA7800", "4" = "#63666A", "5" = "#A7A8AA"))mydata$Group <- Clustering$cluster
fit <- aov(cbind(Awareness, Ease, Value, Trust) ~ as.factor(Group),
data = mydata)
summary(fit)## Response Awareness :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 80.074 20.0186 87.691 < 2.2e-16 ***
## Residuals 242 55.245 0.2283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Ease :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 86.860 21.7150 76.879 < 2.2e-16 ***
## Residuals 242 68.354 0.2825
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Value :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 71.857 17.9643 76.009 < 2.2e-16 ***
## Residuals 242 57.195 0.2363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Trust :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 172.02 43.004 74.816 < 2.2e-16 ***
## Residuals 242 139.10 0.575
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mydata$Q42 <- as.numeric(as.character(mydata$Q42))
current_year <- as.numeric(format(Sys.Date(), "%Y"))
mydata$Age <- current_year - mydata$Q42## Group.1 x
## 1 1 27.0
## 2 2 29.0
## 3 3 27.0
## 4 4 23.5
## 5 5 35.0
## Warning: package 'rstatix' was built under R version 4.4.2
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
## # A tibble: 5 × 4
## Group variable statistic p
## <int> <chr> <dbl> <dbl>
## 1 1 Age 0.804 0.00000163
## 2 2 Age 0.863 0.000000887
## 3 3 Age 0.832 0.0000000502
## 4 4 Age 0.728 0.0000868
## 5 5 Age 0.897 0.0162
##
## Kruskal-Wallis rank sum test
##
## data: Age by Group
## Kruskal-Wallis chi-squared = 4.5423, df = 4, p-value = 0.3376
## Group.1 x
## 1 1 27.0
## 2 2 29.0
## 3 3 27.0
## 4 4 23.5
## 5 5 35.0
## Group.1 x
## 1 1 34.27083
## 2 2 36.09333
## 3 3 33.40506
## 4 4 32.10000
## 5 5 38.48000
library(dplyr)
mydata$UsageFrequency <- case_when(
mydata$Q21 %in% 1:4 ~ "Occasional Usage",
mydata$Q21 %in% 5:6 ~ "Frequent Usage"
)##
## Pearson's Chi-squared test
##
## data: mydata$UsageFrequency and mydata$Group
## X-squared = 12.12, df = 4, p-value = 0.01648
library(dplyr)
library(ggplot2)
usage_table <- table(mydata$Group, mydata$UsageFrequency)
usage_proportions <- prop.table(usage_table, margin = 1)
usage_df <- as.data.frame(as.table(usage_proportions))
most_frequent_group <- usage_df %>%
group_by(Var2) %>%
slice_max(Freq, n = 1)
print("Most Frequent Groups for Each Usage Category:")## [1] "Most Frequent Groups for Each Usage Category:"
## # A tibble: 2 × 3
## # Groups: Var2 [2]
## Var1 Var2 Freq
## <fct> <fct> <dbl>
## 1 3 Frequent Usage 0.785
## 2 5 Occasional Usage 0.52
ggplot(usage_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "stack") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Usage Category",
title = "Usage Proportions Across Groups"
) +
scale_fill_manual(
values = c("Frequent Usage" = "#230078", "Occasional Usage" = "#84BD00")) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)##
## Pearson's Chi-squared test
##
## data: mydata$Q28 and mydata$Group
## X-squared = 26.786, df = 4, p-value = 2.196e-05
library(dplyr)
library(ggplot2)
service_table <- table(mydata$Group, mydata$Q28)
service_proportions <- prop.table(service_table, margin = 1)
service_df <- as.data.frame(as.table(service_proportions))
service_df$Var2 <- recode(service_df$Var2,
`1` = "In Branch",
`2` = "In Mobile App")
most_frequent_service <- service_df %>%
group_by(Var2) %>%
slice_max(Freq, n = 1)
print("Most Frequent Groups for Each Service Option:")## [1] "Most Frequent Groups for Each Service Option:"
## # A tibble: 2 × 3
## # Groups: Var2 [2]
## Var1 Var2 Freq
## <fct> <fct> <dbl>
## 1 4 V poslovalnici 0.55
## 2 3 V mobilni aplikaciji 0.873
ggplot(service_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "stack") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Service Option",
title = "Preference for Advanced Service Location by Group"
) +
scale_fill_manual(
values = c("V poslovalnici" = "#230078", "V mobilni aplikaciji" = "#84BD00"),
labels = c("V poslovalnici" = "In branch", "V mobilni aplikaciji" = "In the mobile app")) +
theme_minimal() +
theme(
legend.position = "bottom", # Move legend below the plot
axis.text.x = element_text(angle = 45, hjust = 1) # Rotate x-axis labels for better readability
)##
## Pearson's Chi-squared test
##
## data: mydata$Q26 and mydata$Group
## X-squared = 24.965, df = 4, p-value = 5.113e-05
library(dplyr)
library(ggplot2)
notification_table <- table(mydata$Group, mydata$Q26)
notification_proportions <- prop.table(notification_table, margin = 1)
notification_df <- as.data.frame(as.table(notification_proportions))
notification_df$Var2 <- recode(notification_df$Var2,
`1` = "Yes",
`2` = "No")
most_frequent_notification <- notification_df %>%
group_by(Var2) %>%
slice_max(Freq, n = 1)
print("Most Frequent Groups for Each Notification Response:")## [1] "Most Frequent Groups for Each Notification Response:"
## # A tibble: 2 × 3
## # Groups: Var2 [2]
## Var1 Var2 Freq
## <fct> <fct> <dbl>
## 1 3 Da 0.747
## 2 4 Ne 0.75
ggplot(notification_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "stack") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Notification Noticed",
title = "Notifications About Advanced Services by Group"
) +
scale_fill_manual(
values = c("Da" = "#230078", "Ne" = "#84BD00"),
labels = c("Da" = "Yes", "Ne" = "No")) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)##
## Pearson's Chi-squared test
##
## data: mydata$Q23a and mydata$Group
## X-squared = 15.959, df = 4, p-value = 0.003075
##
## Pearson's Chi-squared test
##
## data: mydata$Q23e and mydata$Group
## X-squared = 18.035, df = 4, p-value = 0.001215
##
## Pearson's Chi-squared test
##
## data: mydata$Q23f and mydata$Group
## X-squared = 14.801, df = 4, p-value = 0.005132
library(dplyr)
library(ggplot2)
visualize_distribution <- function(data, group_col, awareness_col, awareness_label) {
awareness_table <- table(data[[group_col]], data[[awareness_col]])
awareness_proportions <- prop.table(awareness_table, margin = 1)
awareness_df <- as.data.frame(as.table(awareness_proportions))
awareness_df$Var2 <- recode(awareness_df$Var2,
`1` = "Yes",
`2` = "No")
ggplot(awareness_df, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(
x = "Group",
y = "Proportion (%)",
fill = "Awareness",
title = paste("Awareness of", awareness_label, "by Group")
) +
scale_fill_manual(
values = c("Yes" = "#230078", "No" = "#84BD00")) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)
}
plot_q23a <- visualize_distribution(
data = mydata,
group_col = "Group",
awareness_col = "Q23a",
awareness_label = "Getting Consumer Loan"
)
plot_q23e <- visualize_distribution(
data = mydata,
group_col = "Group",
awareness_col = "Q23e",
awareness_label = "Opening a Saving Account"
)
# Plot for Q23f (Term Deposits)
plot_q23f <- visualize_distribution(
data = mydata,
group_col = "Group",
awareness_col = "Q23f",
awareness_label = "Term Deposits"
)
# Display the plots
print(plot_q23a)mydata$AgeGroup <- cut(mydata$Age,
breaks = c(18, 30, 40, 50, 60, Inf),
labels = c("18-30", "31-40", "41-50", "51-60", "60+"))##
## 1 2
## 18-30 74 66
## 31-40 26 2
## 41-50 26 10
## 51-60 18 7
## 60+ 14 4
library(ggplot2)
prop_table5 <- prop.table(table(mydata$AgeGroup, mydata$Q23a), margin = 1)
prop_df5 <- as.data.frame(as.table(prop_table5))
ggplot(prop_df5, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Getting a Conmsumer Loan",) +
scale_fill_manual(
values = c("1" = "#230078", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)library(ggplot2)
prop_table51 <- prop.table(table(mydata$AgeGroup, mydata$Q23b), margin = 1)
prop_df51 <- as.data.frame(as.table(prop_table51))
ggplot(prop_df51, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Change of the Account Limit",) +
scale_fill_manual(
values = c("1" = "#230078", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)library(ggplot2)
prop_table52 <- prop.table(table(mydata$AgeGroup, mydata$Q23c), margin = 1)
prop_df52 <- as.data.frame(as.table(prop_table52))
ggplot(prop_df52, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Ordering a new card",) +
scale_fill_manual(
values = c("1" = "#230078", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)library(ggplot2)
prop_table53 <- prop.table(table(mydata$AgeGroup, mydata$Q23d), margin = 1)
prop_df53 <- as.data.frame(as.table(prop_table53))
ggplot(prop_df53, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Change of the Card Limit",) +
scale_fill_manual(
values = c("1" = "#230078", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)library(ggplot2)
prop_table54 <- prop.table(table(mydata$AgeGroup, mydata$Q23e), margin = 1)
prop_df54 <- as.data.frame(as.table(prop_table54))
ggplot(prop_df54, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Opening a Saving Account",) +
scale_fill_manual(
values = c("1" = "#230078", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)library(ggplot2)
prop_table55 <- prop.table(table(mydata$AgeGroup, mydata$Q23f), margin = 1)
prop_df55 <- as.data.frame(as.table(prop_table55))
ggplot(prop_df55, aes(x = Var1, y = Freq, fill = Var2)) +
geom_bar(stat = "identity", position = "fill") +
labs(
x = "Age Group",
y = "Proportion",
fill = "Awareness of Advanced Service",
title = "Term Deposits"
) +
scale_fill_manual(
values = c("1" = "#230078", "2" = "#84BD00"),
labels = c("1" = "Yes", "2" = "No")
) +
theme_minimal() +
theme(
legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)
)set.seed(123)
data <- iris[, -5] # Removing the species column
kmeans_result <- kmeans(data, centers = 5)
# Get the size of each cluster
kmeans_result$size## [1] 19 8 38 23 62
table_clusters1 <- table(mydata$Group, mydata$Q45)
prop_table_clusters1 <- prop.table(table_clusters1, margin = 1)
prop_df1 <- as.data.frame(as.table(prop_table_clusters1))
library(ggplot2)
ggplot(prop_df1, aes(x = Var1, y = Freq * 100, fill = Var2)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
x = "Group",
y = "Percentage (%)",
fill = "Category",
title = "Percentage Distribution of Primary Bank by Group"
) +
scale_fill_manual(values = c("Nova Ljubljanska Banka d.d. (NLB)" = "#230078", "OTP banka d.d." = "#84BD00", "Banka Intesa Sanpaolo d.d." = "#FA7800", "Gorenjska Banka d.d." = "lightblue2", "Delavska Hranilnica d.d." = "darkred", "Revolut" = "cyan", "Deželna Banka Slovenije d.d." = "violet", "Banka Sparkasse d.d." = "magenta", "Addiko Bank d.d." = "salmon", "UniCredit Banka Slovenija d.d." = "yellow3"))##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## $chisq
## [1] 553.3252
##
## $p.value
## [1] 2.869381e-108
##
## $df
## [1] 15
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA = 0.74
## MSA for each item =
## Support and guidance Confidence Security
## 0.82 0.70 0.70
## Accesibility Clarity Speed
## 0.77 0.79 0.77
## Warning: package 'FactoMineR' was built under R version 4.4.2
components <- PCA(mydata_PCA,
scale.unit = TRUE,
graph = FALSE)
library(factoextra)
get_eigenvalue(components)## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.9970186 49.950310 49.95031
## Dim.2 1.0382843 17.304739 67.25505
## Dim.3 0.7337893 12.229822 79.48487
## Dim.4 0.5971498 9.952497 89.43737
## Dim.5 0.4276216 7.127027 96.56440
## Dim.6 0.2061363 3.435605 100.00000
fviz_eig(components,
choice = "eigenvalue",
main = "Screeplot",
ylab = "Eigenvalue",
xlab = "Principal component",
addlabels = TRUE)## Parallel analysis suggests that the number of factors = NA and the number of components = 1
## Dim.1 Dim.2
## Support and guidance 0.6342613 -0.42424621
## Confidence 0.8410214 -0.21848787
## Security 0.8091472 -0.17588683
## Accesibility 0.6469456 0.49579602
## Clarity 0.7638862 -0.09612296
## Speed 0.4802439 0.72427414
loadings <- components$var$cor
library(factoextra)
eigenvalue <- get_eigenvalue(components)[1:2,1 ]
coefficient1 <- loadings[1:6]/sqrt(eigenvalue)[1]
coefficient2 <- loadings[7:12]/sqrt(eigenvalue)[2]## Warning: package 'tidyverse' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ rstatix::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)
mydata_PCAD <- mydata_PCA2 %>%
pivot_longer(everything(), names_to = "name", values_to = "score") %>%
separate(name, into = c("retailer", "dimension"), sep = "_")%>%
pivot_wider(names_from = retailer, values_from = score, values_fn = mean) %>%
column_to_rownames(var = "dimension")
mydata_PCA_std <- scale(mydata_PCAD)
poslovalnica1 <- sum(mydata_PCA_std[,1]*coefficient1)
mobilna_banka1 <- sum(mydata_PCA_std[,2]*coefficient1)
poslovalnica2 <- sum(mydata_PCA_std[,1]*coefficient2)
mobilna_banka2 <- sum(mydata_PCA_std[,2]*coefficient2)library(factoextra)
p <- fviz_pca_biplot(components, repel = TRUE, invisible = "ind", col.var = "#33006F")
p +
annotate("point", x = poslovalnica1, y = poslovalnica2, color = "#84BD00", size = 4, shape = 16) +
annotate("text", x = poslovalnica1, y = poslovalnica2, label = "Branch", vjust = -1, color = "#84BD00") +
annotate("point", x = mobilna_banka1, y = mobilna_banka2, color = "#FA7800", size = 4, shape = 16) +
annotate("text", x = mobilna_banka1, y = mobilna_banka2, label = "Mobile bank", vjust = -1, color = "#FA7800")## Q23a Q23b Q23c Q23d Q23e
## Min. :1.00 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1.00 Median :1.00 Median :1.000 Median :1.000 Median :1.000
## Mean :1.36 Mean :1.17 Mean :1.271 Mean :1.154 Mean :1.332
## 3rd Qu.:2.00 3rd Qu.:1.00 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :2.00 Max. :2.00 Max. :2.000 Max. :2.000 Max. :2.000
## Q23f
## Min. :1.000
## 1st Qu.:1.000
## Median :2.000
## Mean :1.518
## 3rd Qu.:2.000
## Max. :2.000
Market Research Hypothesis 1: Users have knowledge about the
existence of possible advanced services.
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Exact binomial test
##
## data: 164 and 265
## number of successes = 164, number of trials = 265, p-value = 6.535e-05
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.5670886 1.0000000
## sample estimates:
## probability of success
## 0.6188679
Limit Change Hypothesis:
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Exact binomial test
##
## data: 214 and 265
## number of successes = 214, number of trials = 265, p-value < 2.2e-16
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.7633183 1.0000000
## sample estimates:
## probability of success
## 0.8075472
Ordering a New Card Hypothesis:
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Exact binomial test
##
## data: 190 and 265
## number of successes = 190, number of trials = 265, p-value = 5.532e-13
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.667899 1.000000
## sample estimates:
## probability of success
## 0.7169811
Limit Change on Credit Card Hypothesis:
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Exact binomial test
##
## data: 217 and 265
## number of successes = 217, number of trials = 265, p-value < 2.2e-16
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.7754507 1.0000000
## sample estimates:
## probability of success
## 0.8188679
Opening a Saving Account Hypothesis:
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Exact binomial test
##
## data: 171 and 265
## number of successes = 171, number of trials = 265, p-value = 1.3e-06
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.5940064 1.0000000
## sample estimates:
## probability of success
## 0.645283
Term Deposits Hypothesis:
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Exact binomial test
##
## data: 121 and 265
## number of successes = 121, number of trials = 265, p-value = 0.9299
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.4049412 1.0000000
## sample estimates:
## probability of success
## 0.4566038
Market Research Hypothesis 2: Users have no difficulties
finding advanced services in the mobile bank.
H_0: μ = 3.
H_1: μ > 3.
We cannot reject H_0 at p < 0.001. The
proportion of Yes responses is not greater than 50%.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q25
## W = 0.87107, p-value = 3.88e-14
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q25
## V = 10456, p-value = 9.898e-16
## alternative hypothesis: true location is greater than 3
Market Research Hypothesis 3: Users prefer to access advanced
services in-app.
H_0: The number of respondents who answered Yes is
equal to the number of respondents who answered No.
H_1: The number of respondents who answered Yes is
greater than the number of respondents who answered No.
We reject H_0 at p < 0.001. Users have no
difficulties finding advanced services in the mobile bank.
##
## Exact binomial test
##
## data: 198 and 320
## number of successes = 198, number of trials = 320, p-value = 1.271e-05
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.5718382 1.0000000
## sample estimates:
## probability of success
## 0.61875
Market Research Hypothesis 4: Users don’t use the advanced
services in the mobile bank because they don’t need them.
H_0: μ = 3
H_1: μ > 3
We reject H_0 at p < 0.001. The proportion of
Yes responses is greater than 50%.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q29a
## W = 0.88792, p-value = 4.419e-13
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q29a
## V = 13012, p-value = 2.59e-07
## alternative hypothesis: true location is greater than 3
Market Research Hypothesis 5: Users perceive the value of the
advanced services offered by mobile banking as high.
H_0: μ = 3
H_1: μ > 3
We reject H_0 at p < 0.001. Users don’t use
the advanced services in the mobile bank because they don’t need
them.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q31a
## W = 0.77981, p-value < 2.2e-16
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q31a
## V = 27089, p-value < 2.2e-16
## alternative hypothesis: true location is greater than 3
Market Research Hypothesis 6: Users need more detailed video
tutorials for advanced services.
H_0: μ = 3
H_1: μ > 3
We reject H_0 at p < 0.001. Users perceive the
value of the advanced services offered by mobile banking as high.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q30b
## W = 0.88174, p-value = 1.762e-13
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q30b
## V = 14114, p-value = 3.013e-06
## alternative hypothesis: true location is greater than 3
Additional Market Research Hypothesis:
Users would be more likely to use advanced services if the app was
simpler to navigate.
H_0: μ = 3
H_1: μ > 3
We reject H_0 at p < 0.001. Users need more
detailed video tutorials for advanced services.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q30a
## W = 0.90609, p-value = 8.214e-12
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q30a
## V = 9042.5, p-value = 0.2652
## alternative hypothesis: true location is greater than 3
Market Research Hypothesis:
Users would be more likely to use advanced services if the app had
faster processing times.
H_0: μ = 3
H_1: μ > 3
We cannot reject H_0. We cannot say that users would be
more likely to use advanced services if the app was simpler to
navigate.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q30c
## W = 0.8937, p-value = 1.079e-12
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q30c
## V = 14359, p-value = 1.577e-07
## alternative hypothesis: true location is greater than 3
Market Research Hypothesis:
Users would be more likely to use advanced services if the app
offered personalized recommendations.
H_0: μ = 3
H_1: μ > 3
We reject H_0 at p < 0.001. Users would be
more likely to use advanced services if the app had faster processing
times.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q30d
## W = 0.89455, p-value = 1.233e-12
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q30d
## V = 11254, p-value = 0.1314
## alternative hypothesis: true location is greater than 3
Market Research Hypothesis:
Users would be more likely to use advanced services if the app had
an improved design.
H_0: μ = 3
H_1: μ > 3
We cannot reject H_0. We cannot say that users would be
more likely to use advanced services if the app offered personalized
recommendations.
##
## Shapiro-Wilk normality test
##
## data: mydata2$Q30e
## W = 0.91132, p-value = 2.041e-11
We reject H_0 at p < 0.001. The distribution is not
normal.
We need to use a non-parametric test.
##
## Wilcoxon signed rank test with continuity correction
##
## data: mydata2$Q30e
## V = 8691, p-value = 0.8457
## alternative hypothesis: true location is greater than 3
We cannot reject H_0. We cannot say that users would be more likely to use advanced services if the app had an improved design.