mydata <- read.table("~/Program R/MVA/NLB Project/survey-results.csv",
header=TRUE,
sep=";",
dec=",") # Data was partialy cleaned in 1KA and Excel
mydata$ID <- seq(1,nrow(mydata)) # Adding variable ID for better understanding of data
head(mydata)
## Q8 Q19a Q19b Q19c Q19d Q19e Q19f Q19g Q19h Q26 Q27a_1 Q27b_1 Q27c_1 Q28 Q29
## 1 2 2 5 5 5 1 1 2 2 2 7 7 7 1 4
## 2 2 3 5 5 5 2 2 3 3 2 7 7 4 1 3
## 3 2 4 3 4 5 5 1 5 3 5 7 7 7 1 1
## 4 2 4 4 5 5 3 2 3 3 4 6 7 6 1 1
## 5 2 4 5 5 5 4 2 4 4 3 6 7 7 1 1
## 6 2 5 5 5 5 2 2 3 3 3 4 6 6 1 2
## Q30a_1 Q30b_1 Q30c_1 Q30d_1 Q30e_1 Q30f_1 Q30g_1 Q31a_1 Q31b_1 Q31c_1 Q32a_1
## 1 6 3 5 5 5 5 5 5 3 3 6
## 2 7 6 7 4 4 5 5 5 5 4 7
## 3 7 2 6 1 2 1 2 5 7 5 6
## 4 4 3 5 4 6 2 3 5 6 6 5
## 5 6 5 6 2 5 6 5 3 6 3 6
## 6 3 2 4 3 5 2 3 3 5 3 6
## Q32b_1 Q32c_1 Q32d_1 Q32e_1 Q33 Q34 Q35 Q36a Q36b Q36c Q36d Q36e Q46 Q1a_1
## 1 6 6 6 6 3 5 6 1 0 0 1 1 80 6
## 2 5 5 5 5 5 3 7 1 0 0 1 1 50 7
## 3 6 7 7 2 7 7 7 1 1 1 1 0 90 7
## 4 3 4 2 5 6 6 6 1 1 1 0 0 75 6
## 5 6 6 6 5 4 7 3 1 0 0 1 0 72 6
## 6 6 5 7 3 6 6 3 1 1 0 1 0 81 3
## Q1b_1 Q1c_1 Q1d_1 Q1e_1 Q1f_1 Q2a_1 Q2b_1 Q2c_1 Q3a_1 Q3b_1 Q3c_1 Q4a_1 Q4b_1
## 1 6 7 7 7 3 7 6 7 7 7 7 7 6
## 2 7 7 7 7 7 4 4 4 7 7 7 7 7
## 3 7 5 6 7 7 7 6 5 3 7 7 6 7
## 4 6 5 6 6 6 7 5 5 3 5 6 5 6
## 5 5 6 6 6 4 6 6 5 2 4 6 5 3
## 6 6 4 5 3 2 6 6 6 5 6 5 3 6
## Q4c_1 Q5a_1 Q5b_1 Q5c_1 Q6a_1 Q6b_1 Q6c_1 Q7a_1 Q7b_1 Q7c_1 Q39 Q40 Q37 Q38
## 1 7 7 7 7 7 6 6 7 7 7 1 1968 7 -2
## 2 7 7 7 7 7 3 3 2 7 7 2 1975 7 -2
## 3 7 3 7 7 7 6 5 2 7 7 2 1977 5 -2
## 4 6 3 6 6 7 5 7 6 6 6 2 1984 5 -2
## 5 4 2 1 4 5 7 4 4 5 7 1 1975 5 -2
## 6 5 3 7 6 5 4 4 6 5 6 2 1972 6 -2
## Q41 Q42 Q43 Q44 Q45 ID
## 1 7 1 3 1 1 1
## 2 7 1 3 2 1 2
## 3 6 1 4 1 10 3
## 4 6 1 4 1 1 4
## 5 6 1 3 3 1 5
## 6 6 1 3 1 1 6
mydata$Q8 <- as.numeric(mydata$Q8)
mydata$Q26 <- as.numeric(mydata$Q26)
mydata$Q33 <- as.numeric(mydata$Q33)
mydata$Q34 <- as.numeric(mydata$Q34)
mydata$Q35 <- as.numeric(mydata$Q35)
mydata$Q36a <- as.numeric(mydata$Q36a)
mydata$Q36b <- as.numeric(mydata$Q36b)
mydata$Q36c <- as.numeric(mydata$Q36c)
mydata$Q36d <- as.numeric(mydata$Q36d)
mydata$Q36e <- as.numeric(mydata$Q36e)
mydata$Q46 <- as.numeric(mydata$Q46)
mydata$Q40 <- as.numeric(mydata$Q40) # Transforming variables to numeric
mydata$Q19a <- factor(mydata$Q19a,
levels = c(4,1,2,3,5),
labels = c("Mostly digital payments", "Cash only", "Mostly cash", "Half cash, half digital payments", "Only digital payments"))
mydata$Q19b <- factor(mydata$Q19b,
levels = c(5,1,2,3,4),
labels = c("Only digital payments", "Cash only", "Mostly cash", "Half cash, half digital payments", "Mostly digital payments"))
mydata$Q19c <- factor(mydata$Q19c,
levels = c(5,1,2,3,4),
labels = c("Only digital payments", "Cash only", "Mostly cash", "Half cash, half digital payments", "Mostly digital payments"))
mydata$Q19d <- factor(mydata$Q19d,
levels = c(5,1,2,3,4),
labels = c("Only digital payments", "Cash only", "Mostly cash", "Half cash, half digital payments", "Mostly digital payments"))
mydata$Q19e <- factor(mydata$Q19e,
levels = c(3,1,2,5,4),
labels = c("Half cash, half digital payments", "Cash only", "Mostly cash", "Only digital payments", "Mostly digital payments"))
mydata$Q19f <- factor(mydata$Q19f,
levels = c(1,2,3,4,5),
labels = c("Cash only", "Mostly cash", "Half cash, half digital payments", "Mostly digital payments", "Only digital payments"))
mydata$Q19g <- factor(mydata$Q19g,
levels = c(3,1,2,5,4),
labels = c("Half cash, half digital payments", "Cash only", "Mostly cash", "Only digital payments", "Mostly digital payments"))
mydata$Q19h <- factor(mydata$Q19h,
levels = c(4,1,2,3,5),
labels = c("Mostly digital payments", "Cash only", "Mostly cash", "Half cash, half digital payments", "Only digital payments"))
mydata$Q28 <- factor(mydata$Q28,
levels = c(4,1,2,3),
labels = c("Cash", "Credit/Debit cards", "Mobile payment options", "Mobile banks"))
mydata$Q29 <- factor(mydata$Q29,
levels = c(1,2,3,4,5),
labels = c("Less than 50 EUR", "50 EUR - 100 EUR", "101 EUR - 300 EUR", "301 EUR - 500 EUR", "More than 500 EUR"))
mydata$Q39 <- factor(mydata$Q39,
levels = c(1,2),
labels = c("Male", "Female"))
mydata$Q37 <- factor(mydata$Q37,
levels = c(5,1,6,7),
labels = c("1.701 EUR - 2.500 EUR", "Pension", "2.501 EUR - 3.300 EUR", "Over 3.300 EUR"))
mydata$Q38 <- factor(mydata$Q38,
levels = c(-2,5),
labels = c("No pension", "Pension over 1.110 EUR"))
mydata$Q41 <- factor(mydata$Q41,
levels = c(6,1,2,3,4,5,7),
labels = c("Completed university academic education (also 2nd Bologna level)", "Incomplete primary school", "Completed primary school", "Completed lower or secondary vocational education", "Completed secondary professional or general education", "Completed higher professional or university professional education (also 1st Bologna level)","Completed specialization, scientific master’s degree, doctorate"))
mydata$Q42 <- factor(mydata$Q42,
levels = c(1,2,3,4),
labels = c("Employed for shorter/longer working hours", "Self-employed", "Retired", "Currently unemployed"))
mydata$Q43 <- factor(mydata$Q43,
levels = c(3,1,2,4,5,-2),
labels = c("Office professions (e.g., IT, finance)", "Physical work (e.g., construction, factory work)", "Service industry (e.g., retail, hospitality)", "Public sector (e.g., healthcare, education, politics)", "Creative/artistic work", "Self-employed or retired or currently unemployed"))
mydata$Q44 <- factor(mydata$Q44,
levels = c(1,2,3),
labels = c("Urban area (city or metropolitan region)", "Suburban area (on the outskirts of the city)", "Rural area (village or countryside)"))
mydata$Q45 <- factor(mydata$Q45,
levels = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14),
labels = c("NLB", "OTP", "Unicredit", "Raiffaisen", "Gorenjska banka", "Intesa Sanpaolo", "Delavska Hranilnica", "Revolut", "N26", "Sparkasse", "Sberbank", "Addiko", "Deželna banka Slovenije", "UBS")) # Adding factors
mydata$Q40_Age <- 2025-mydata$Q40 # Creating age out of variable year of birth
summary(mydata)
## Q8 Q19a
## Min. :2 Mostly digital payments :74
## 1st Qu.:2 Cash only : 3
## Median :2 Mostly cash :14
## Mean :2 Half cash, half digital payments:26
## 3rd Qu.:2 Only digital payments :38
## Max. :2
##
## Q19b Q19c
## Only digital payments :99 Only digital payments :104
## Cash only : 5 Cash only : 4
## Mostly cash : 5 Mostly cash : 5
## Half cash, half digital payments:16 Half cash, half digital payments: 12
## Mostly digital payments :30 Mostly digital payments : 30
##
##
## Q19d Q19e
## Only digital payments :111 Half cash, half digital payments:52
## Cash only : 5 Cash only :25
## Mostly cash : 6 Mostly cash :34
## Half cash, half digital payments: 10 Only digital payments :14
## Mostly digital payments : 23 Mostly digital payments :30
##
##
## Q19f Q19g
## Cash only :60 Half cash, half digital payments:45
## Mostly cash :63 Cash only :30
## Half cash, half digital payments:17 Mostly cash :39
## Mostly digital payments : 8 Only digital payments :14
## Only digital payments : 7 Mostly digital payments :27
##
##
## Q19h Q26 Q27a_1
## Mostly digital payments :51 Min. :1.000 Min. :1.000
## Cash only :17 1st Qu.:2.500 1st Qu.:5.000
## Mostly cash :21 Median :3.000 Median :6.000
## Half cash, half digital payments:45 Mean :3.245 Mean :5.768
## Only digital payments :21 3rd Qu.:4.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000
##
## Q27b_1 Q27c_1 Q28
## Min. :1.000 Min. :1.000 Cash :11
## 1st Qu.:6.000 1st Qu.:6.000 Credit/Debit cards :98
## Median :7.000 Median :6.000 Mobile payment options:34
## Mean :6.252 Mean :5.974 Mobile banks :12
## 3rd Qu.:7.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000
##
## Q29 Q30a_1 Q30b_1 Q30c_1
## Less than 50 EUR :71 Min. :1.000 Min. :1 Min. :1.000
## 50 EUR - 100 EUR :60 1st Qu.:5.000 1st Qu.:2 1st Qu.:4.000
## 101 EUR - 300 EUR:17 Median :6.000 Median :4 Median :5.000
## 301 EUR - 500 EUR: 4 Mean :5.497 Mean :4 Mean :4.974
## More than 500 EUR: 3 3rd Qu.:7.000 3rd Qu.:5 3rd Qu.:6.000
## Max. :7.000 Max. :7 Max. :7.000
##
## Q30d_1 Q30e_1 Q30f_1 Q30g_1
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.000 Median :5.000 Median :4.000 Median :5.000
## Mean :3.729 Mean :4.523 Mean :4.039 Mean :4.219
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q31a_1 Q31b_1 Q31c_1 Q32a_1
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.500 1st Qu.:4.000 1st Qu.:2.500 1st Qu.:5.000
## Median :6.000 Median :5.000 Median :4.000 Median :6.000
## Mean :5.277 Mean :5.019 Mean :3.735 Mean :5.594
## 3rd Qu.:7.000 3rd Qu.:6.500 3rd Qu.:5.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q32b_1 Q32c_1 Q32d_1 Q32e_1 Q33
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.0 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.000 1st Qu.:5.00 1st Qu.:3.0 1st Qu.:3.000
## Median :5.000 Median :6.000 Median :6.00 Median :5.0 Median :5.000
## Mean :5.135 Mean :5.484 Mean :5.71 Mean :4.4 Mean :4.381
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:7.00 3rd Qu.:6.0 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.00 Max. :7.0 Max. :7.000
##
## Q34 Q35 Q36a Q36b
## Min. :1.000 Min. :1.000 Min. :0.000 Min. :0.0000
## 1st Qu.:4.500 1st Qu.:3.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :6.000 Median :5.000 Median :1.000 Median :0.0000
## Mean :5.348 Mean :4.529 Mean :0.729 Mean :0.4194
## 3rd Qu.:6.500 3rd Qu.:6.000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :7.000 Max. :7.000 Max. :1.000 Max. :1.0000
##
## Q36c Q36d Q36e Q46
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 1.00
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 68.50
## Median :1.0000 Median :0.0000 Median :0.0000 Median : 80.00
## Mean :0.6194 Mean :0.3613 Mean :0.1677 Mean : 75.92
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.: 90.00
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :100.00
##
## Q1a_1 Q1b_1 Q1c_1 Q1d_1 Q1e_1
## Min. :1.000 Min. :2.000 Min. :1.000 Min. :2.000 Min. :1.0
## 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:5.000 1st Qu.:6.000 1st Qu.:5.0
## Median :6.000 Median :6.000 Median :6.000 Median :6.000 Median :6.0
## Mean :5.781 Mean :5.884 Mean :6.045 Mean :6.045 Mean :5.4
## 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.0
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.0
##
## Q1f_1 Q2a_1 Q2b_1 Q2c_1
## Min. :1.000 Min. :2.000 Min. :2.000 Min. :1.000
## 1st Qu.:5.000 1st Qu.:6.000 1st Qu.:5.000 1st Qu.:5.000
## Median :6.000 Median :6.000 Median :6.000 Median :6.000
## Mean :5.477 Mean :6.039 Mean :5.619 Mean :5.555
## 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:6.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q3a_1 Q3b_1 Q3c_1 Q4a_1
## Min. :1.000 Min. :2.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:6.000 1st Qu.:6.000 1st Qu.:5.000
## Median :5.000 Median :6.000 Median :6.000 Median :6.000
## Mean :4.813 Mean :6.052 Mean :5.974 Mean :5.445
## 3rd Qu.:6.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q4b_1 Q4c_1 Q5a_1 Q5b_1
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:6.000 1st Qu.:5.000 1st Qu.:3.000 1st Qu.:5.000
## Median :6.000 Median :6.000 Median :5.000 Median :6.000
## Mean :6.032 Mean :5.774 Mean :4.529 Mean :5.955
## 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q5c_1 Q6a_1 Q6b_1 Q6c_1
## Min. :1.000 Min. :2.000 Min. :1.000 Min. :1.000
## 1st Qu.:5.000 1st Qu.:6.000 1st Qu.:3.000 1st Qu.:3.000
## Median :6.000 Median :7.000 Median :4.000 Median :4.000
## Mean :5.826 Mean :6.219 Mean :4.252 Mean :4.219
## 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:5.000 3rd Qu.:6.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q7a_1 Q7b_1 Q7c_1 Q39 Q40
## Min. :1.000 Min. :1.000 Min. :1.000 Male :60 Min. :1954
## 1st Qu.:2.000 1st Qu.:5.000 1st Qu.:5.000 Female:95 1st Qu.:1969
## Median :4.000 Median :6.000 Median :6.000 Median :1977
## Mean :3.865 Mean :5.523 Mean :5.645 Mean :1978
## 3rd Qu.:6.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:1986
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :2002
##
## Q37 Q38
## 1.701 EUR - 2.500 EUR:96 No pension :154
## Pension : 1 Pension over 1.110 EUR: 1
## 2.501 EUR - 3.300 EUR:39
## Over 3.300 EUR :19
##
##
##
## Q41
## Completed university academic education (also 2nd Bologna level) :82
## Incomplete primary school : 1
## Completed primary school : 0
## Completed lower or secondary vocational education : 0
## Completed secondary professional or general education :10
## Completed higher professional or university professional education (also 1st Bologna level):26
## Completed specialization, scientific master’s degree, doctorate :36
## Q42
## Employed for shorter/longer working hours:117
## Self-employed : 27
## Retired : 11
## Currently unemployed : 0
##
##
##
## Q43
## Office professions (e.g., IT, finance) :64
## Physical work (e.g., construction, factory work) : 1
## Service industry (e.g., retail, hospitality) : 9
## Public sector (e.g., healthcare, education, politics):33
## Creative/artistic work : 2
## Self-employed or retired or currently unemployed :46
##
## Q44 Q45
## Urban area (city or metropolitan region) :93 NLB :63
## Suburban area (on the outskirts of the city):31 OTP :27
## Rural area (village or countryside) :31 Intesa Sanpaolo:18
## Unicredit :14
## Revolut : 8
## Gorenjska banka: 6
## (Other) :19
## ID Q40_Age
## Min. : 1.0 Min. :23.00
## 1st Qu.: 39.5 1st Qu.:38.50
## Median : 78.0 Median :48.00
## Mean : 78.0 Mean :46.58
## 3rd Qu.:116.5 3rd Qu.:56.00
## Max. :155.0 Max. :71.00
##
mydata_PCA <- mydata[,c("Q2a_1","Q2b_1","Q2c_1","Q3a_1","Q3b_1","Q3c_1","Q4a_1","Q4b_1","Q4c_1","Q5a_1","Q5b_1","Q5c_1","Q6a_1","Q6b_1","Q6c_1","Q7a_1","Q7b_1","Q7c_1")]
colnames(mydata_PCA) <- c("Cash_Security","Credit/Debit cards_Security","Mobile payments_Security","Cash_Speed of transactions","Credit/Debit cards_Speed of transactions","Mobile payments_Speed of transactions","Cash_Ease of use","Credit/Debit cards_Ease of use","Mobile payments_Ease of use","Cash_Convenience","Credit/Debit cards_Convenience","Mobile payments_Convenience","Cash_Privacy","Credit/Debit cards_Privacy","Mobile payments_Privacy","Cash_Tracking expenses","Credit/Debit cards_Tracking expenses","Mobile payments_Tracking expenses")
library(pastecs)
round(stat.desc(mydata_PCA, basic = FALSE), 2)
## Cash_Security Credit/Debit cards_Security Mobile payments_Security
## median 6.00 6.00 6.00
## mean 6.04 5.62 5.55
## SE.mean 0.09 0.09 0.09
## CI.mean.0.95 0.18 0.17 0.18
## var 1.26 1.13 1.26
## std.dev 1.12 1.06 1.12
## coef.var 0.19 0.19 0.20
## Cash_Speed of transactions
## median 5.00
## mean 4.81
## SE.mean 0.15
## CI.mean.0.95 0.29
## var 3.27
## std.dev 1.81
## coef.var 0.38
## Credit/Debit cards_Speed of transactions
## median 6.00
## mean 6.05
## SE.mean 0.07
## CI.mean.0.95 0.15
## var 0.87
## std.dev 0.93
## coef.var 0.15
## Mobile payments_Speed of transactions Cash_Ease of use
## median 6.00 6.00
## mean 5.97 5.45
## SE.mean 0.09 0.13
## CI.mean.0.95 0.18 0.26
## var 1.30 2.74
## std.dev 1.14 1.66
## coef.var 0.19 0.30
## Credit/Debit cards_Ease of use Mobile payments_Ease of use
## median 6.00 6.00
## mean 6.03 5.77
## SE.mean 0.10 0.11
## CI.mean.0.95 0.19 0.21
## var 1.50 1.73
## std.dev 1.22 1.32
## coef.var 0.20 0.23
## Cash_Convenience Credit/Debit cards_Convenience
## median 5.00 6.00
## mean 4.53 5.95
## SE.mean 0.15 0.10
## CI.mean.0.95 0.30 0.20
## var 3.60 1.55
## std.dev 1.90 1.24
## coef.var 0.42 0.21
## Mobile payments_Convenience Cash_Privacy
## median 6.00 7.00
## mean 5.83 6.22
## SE.mean 0.11 0.09
## CI.mean.0.95 0.22 0.18
## var 1.89 1.28
## std.dev 1.37 1.13
## coef.var 0.24 0.18
## Credit/Debit cards_Privacy Mobile payments_Privacy
## median 4.00 4.00
## mean 4.25 4.22
## SE.mean 0.13 0.13
## CI.mean.0.95 0.25 0.26
## var 2.55 2.69
## std.dev 1.60 1.64
## coef.var 0.38 0.39
## Cash_Tracking expenses Credit/Debit cards_Tracking expenses
## median 4.00 6.00
## mean 3.86 5.52
## SE.mean 0.16 0.12
## CI.mean.0.95 0.33 0.23
## var 4.20 2.12
## std.dev 2.05 1.46
## coef.var 0.53 0.26
## Mobile payments_Tracking expenses
## median 6.00
## mean 5.65
## SE.mean 0.12
## CI.mean.0.95 0.24
## var 2.28
## std.dev 1.51
## coef.var 0.27
R <- cor(mydata_PCA)
library(psych)
cortest.bartlett(R, n = nrow(mydata))
## $chisq
## [1] 1272.097
##
## $p.value
## [1] 1.417576e-175
##
## $df
## [1] 153
library(psych)
KMO(R)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA = 0.71
## MSA for each item =
## Cash_Security
## 0.76
## Credit/Debit cards_Security
## 0.67
## Mobile payments_Security
## 0.73
## Cash_Speed of transactions
## 0.73
## Credit/Debit cards_Speed of transactions
## 0.81
## Mobile payments_Speed of transactions
## 0.76
## Cash_Ease of use
## 0.72
## Credit/Debit cards_Ease of use
## 0.76
## Mobile payments_Ease of use
## 0.78
## Cash_Convenience
## 0.64
## Credit/Debit cards_Convenience
## 0.73
## Mobile payments_Convenience
## 0.82
## Cash_Privacy
## 0.80
## Credit/Debit cards_Privacy
## 0.50
## Mobile payments_Privacy
## 0.54
## Cash_Tracking expenses
## 0.67
## Credit/Debit cards_Tracking expenses
## 0.65
## Mobile payments_Tracking expenses
## 0.68
library(FactoMineR)
components <- PCA(mydata_PCA,
scale.unit = TRUE,
graph = FALSE)
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
get_eigenvalue(components)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 4.3576765 24.2093140 24.20931
## Dim.2 3.2645122 18.1361787 42.34549
## Dim.3 1.8980219 10.5445664 52.89006
## Dim.4 1.3961992 7.7566625 60.64672
## Dim.5 1.1869459 6.5941437 67.24087
## Dim.6 0.9251920 5.1399558 72.38082
## Dim.7 0.8178039 4.5433552 76.92418
## Dim.8 0.6538489 3.6324937 80.55667
## Dim.9 0.6307985 3.5044359 84.06111
## Dim.10 0.5747143 3.1928575 87.25396
## Dim.11 0.5207851 2.8932507 90.14721
## Dim.12 0.4027546 2.2375255 92.38474
## Dim.13 0.3591583 1.9953239 94.38006
## Dim.14 0.2702578 1.5014323 95.88150
## Dim.15 0.2287672 1.2709287 97.15242
## Dim.16 0.2124628 1.1803488 98.33277
## Dim.17 0.1681656 0.9342535 99.26703
## Dim.18 0.1319352 0.7329734 100.00000
library(factoextra)
fviz_eig(components,
choice = "eigenvalue",
main = "Scree plot",
ylab = "Eigenvalue",
addlabels = TRUE) # Elbow method
library(psych)
fa.parallel(mydata_PCA,
sim = FALSE,
fa = "pc") # Parallel analysis
## Parallel analysis suggests that the number of factors = NA and the number of components = 3
library(tibble)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:pastecs':
##
## extract
mydata_PCA_final <- mydata_PCA %>%
pivot_longer(everything(), names_to = "name", values_to = "score") %>%
separate(name, into = c("retailer", "dimension"), sep = "_")%>%
pivot_wider(names_from = dimension, values_from = score, values_fn = mean) %>%
column_to_rownames(var = "retailer")
print(mydata_PCA_final)
## Security Speed of transactions Ease of use Convenience
## Cash 6.038710 4.812903 5.445161 4.529032
## Credit/Debit cards 5.619355 6.051613 6.032258 5.954839
## Mobile payments 5.554839 5.974194 5.774194 5.825806
## Privacy Tracking expenses
## Cash 6.219355 3.864516
## Credit/Debit cards 4.251613 5.522581
## Mobile payments 4.219355 5.645161
library(FactoMineR)
components <- PCA(mydata_PCA_final,
scale.unit = TRUE,
graph = FALSE,
ncp = 4)
components
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 3 individuals, described by 6 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
print(components$var$cor)
## Dim.1 Dim.2
## Security -0.9833236 0.181864489
## Speed of transactions 0.9999921 -0.003975209
## Ease of use 0.9233301 0.384007219
## Convenience 0.9997555 0.022112481
## Privacy -0.9972728 0.073803412
## Tracking expenses 0.9926335 -0.121156035
print(components$var$contrib)
## Dim.1 Dim.2
## Security 16.67448 16.441432738
## Speed of transactions 17.24458 0.007855317
## Ease of use 14.70190 73.303147426
## Convenience 17.23642 0.243062971
## Privacy 17.15092 2.707676213
## Tracking expenses 16.99171 7.296825335
library(factoextra)
fviz_pca_biplot(components,
repel = TRUE)
mydata_cluster <- mydata[,c("ID","Q19a","Q19b","Q19c","Q19d","Q19e","Q19f","Q19g","Q19h","Q28","Q29","Q30a_1","Q30b_1","Q30c_1","Q30d_1","Q30e_1","Q30f_1","Q30g_1","Q31a_1","Q31b_1","Q46","Q39","Q40_Age","Q37","Q41","Q42","Q43","Q44","Q45")]
colnames(mydata_cluster) <- c("ID","Grocery shopping", "Online shopping", "Subscriptions", "Bill payments", "Paying for services", "Tips", "Transport", "Social events","Most frequent payment method","Cash carrying","Security concerns","Lack of trust in tehcnology","Privacy concerns","Complexity of use","Hidden transaction costs","Lack of availability","Tehnical issues concerns","Cash_Safety","Cash_privacy","Digital payment usage","Gender","Age","Income","Education","Employment status","Employment","Area of living","Primary bank")
mydata_cluster$Age <- ifelse(mydata_cluster$Age>48,"Older","Younger")
mydata_cluster$Age <- factor(mydata_cluster$Age,
levels = c("Older","Younger"),
labels = c("Older","Younger"))
mydata_cluster$Education <- ifelse(
mydata_cluster$Education == "Completed university academic education (also 2nd Bologna level)" |
mydata_cluster$Education == "Completed specialization, scientific master’s degree, doctorate",
"Higher education",
"Lower education")
mydata_cluster$Education <- factor(mydata_cluster$Education,
levels = c("Higher education","Lower education"),
labels = c("Higher education","Lower education"))
mydata_cluster$`Employment status`<-ifelse(mydata_cluster$`Employment status`=="Employed for shorter/longer working hours","Full-time employees","Others")
mydata_cluster$`Employment status`<-factor(mydata_cluster$`Employment status`,
levels = c("Full-time employees","Others"),
labels = c("Full-time employees","Others"))
mydata_cluster$Employment <-ifelse(mydata_cluster$Employment == "Office professions (e.g., IT, finance)","Mental work",ifelse(mydata_cluster$Employment == "Self-employed or retired or currently unemployed","Other","Physical work"))
mydata_cluster$Employment <-factor(mydata_cluster$Employment,
levels = c("Mental work","Other","Physical work"),
labels = c("Mental work","Other","Physical work"))
mydata_cluster$`Area of living` <- ifelse(mydata_cluster$`Area of living` ==
"Rural area (village or countryside)","Rural area","Urban area")
mydata_cluster$`Area of living` <- factor(mydata_cluster$`Area of living`,
levels = c("Urban area","Rural area"),
labels = c("Urban area","Rural area"))
mydata_cluster$`Primary bank` <- ifelse(
is.na(mydata_cluster$`Primary bank`), "Other banks",
ifelse(mydata_cluster$`Primary bank` == "NLB", "NLB",
ifelse(mydata_cluster$`Primary bank` == "OTP", "OTP","Other banks")))
mydata_cluster$`Primary bank` <- factor(mydata_cluster$`Primary bank`,
levels = c("NLB","OTP","Other banks"),
labels = c("NLB","OTP","Other banks"))
mydata_cluster$`Grocery shopping` <- ifelse(mydata_cluster$`Grocery shopping`== "Mostly digital payments" |mydata_cluster$`Grocery shopping`== "Only digital payments", "Digital payments", "Cash")
mydata_cluster$`Grocery shopping` <- factor(mydata_cluster$`Grocery shopping`,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$`Online shopping` <- ifelse(mydata_cluster$`Online shopping`== "Mostly digital payments" |mydata_cluster$`Online shopping`== "Only digital payments", "Digital payments", "Cash")
mydata_cluster$`Online shopping` <- factor(mydata_cluster$`Online shopping`,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$Subscriptions <- ifelse(mydata_cluster$Subscriptions == "Mostly digital payments" |mydata_cluster$Subscriptions== "Only digital payments", "Digital payments", "Cash")
mydata_cluster$Subscriptions <- factor(mydata_cluster$Subscriptions,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$`Bill payments` <- ifelse(mydata_cluster$`Bill payments` == "Mostly digital payments" |mydata_cluster$`Bill payments`== "Only digital payments", "Digital payments", "Cash")
mydata_cluster$`Bill payments` <- factor(mydata_cluster$`Bill payments`,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$`Paying for services` <- ifelse(mydata_cluster$`Paying for services` == "Mostly digital payments" |mydata_cluster$`Paying for services`== "Only digital payments", "Digital payments", "Cash")
mydata_cluster$`Paying for services` <- factor(mydata_cluster$`Paying for services`,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$Tips <- ifelse(mydata_cluster$Tips == "Mostly digital payments" |mydata_cluster$Tips == "Only digital payments", "Digital payments", "Cash")
mydata_cluster$Tips <- factor(mydata_cluster$Tips,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$Transport <- ifelse(mydata_cluster$Transport == "Mostly digital payments" |mydata_cluster$Transport == "Only digital payments", "Digital payments", "Cash")
mydata_cluster$Transport <- factor(mydata_cluster$Transport,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$`Social events` <- ifelse(mydata_cluster$`Social events` == "Mostly digital payments" |mydata_cluster$`Social events` == "Only digital payments", "Digital payments", "Cash")
mydata_cluster$`Social events` <- factor(mydata_cluster$`Social events`,
levels = c("Digital payments","Cash"),
labels = c("Digital payments","Cash"))
mydata_cluster$`Cash carrying` <-ifelse(mydata_cluster$`Cash carrying`== "Less than 50 EUR","Small amount", "Large amount")
mydata_cluster$`Cash carrying` <- factor(mydata_cluster$`Cash carrying`,
levels = c("Small amount","Large amount"),
labels = c("Small amount","Large amount"))
mydata_cluster$`Digital payment usage` <- ifelse(mydata_cluster$`Digital payment usage` > 80,"High usage","Low usage")
mydata_cluster$`Digital payment usage`<- factor(mydata_cluster$`Digital payment usage`,
levels = c("High usage","Low usage"),
labels = c("High usage","Low usage"))
mydata_cluster_std <- as.data.frame(scale(mydata_cluster[c("Security concerns","Lack of trust in tehcnology","Privacy concerns","Complexity of use","Hidden transaction costs","Lack of availability","Tehnical issues concerns")]))
mydata_cluster$Dissimilarity <- sqrt(mydata_cluster_std$`Security concerns`^2 + mydata_cluster_std$`Lack of trust in tehcnology`^2 + mydata_cluster_std$`Privacy concerns`^2 + mydata_cluster_std$`Complexity of use`^2 +
mydata_cluster_std$`Hidden transaction costs`^2 + mydata_cluster_std$`Lack of availability`^2 + mydata_cluster_std$`Tehnical issues concerns`^2)
head(mydata_cluster[order(-mydata_cluster$Dissimilarity),c("ID","Dissimilarity")])
## ID Dissimilarity
## 95 95 5.160370
## 131 131 5.017509
## 150 150 4.852034
## 135 135 4.303267
## 75 75 4.234314
## 10 10 4.147864
library(factoextra)
# Finding Euclidean distances, based on 7 Cluster variables, then saving them into object Distances
Distances <- get_dist(mydata_cluster_std,
method = "euclidian")
fviz_dist(Distances, # Showing matrix of distances
gradient = list(low = "darkred",
mid = "grey95",
high = "white"))
library(factoextra)
get_clust_tendency(mydata_cluster_std, # Hopkins statistics
n = nrow(mydata_cluster_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.6029137
##
## $plot
## NULL
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:pastecs':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(factoextra)
WARD <-mydata_cluster_std%>%get_dist(method = "euclidean")%>%hclust("ward.D2")
WARD
##
## Call:
## hclust(d = ., method = "ward.D2")
##
## Cluster method : ward.D2
## Distance : euclidean
## Number of objects: 155
library(factoextra)
fviz_dend(WARD)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(factoextra)
library(NbClust)
fviz_nbclust(mydata_cluster_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")
fviz_nbclust(mydata_cluster_std, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette analysis")
library(NbClust)
NbClust(mydata_cluster_std,
distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans",
index = "all")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 8 proposed 2 as the best number of clusters
## * 9 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 2 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 4.0263 89.6074 32.6618 -1.8003 186.9104 9.830137e+13 8553.364 679.8392
## 3 1.5520 70.2370 22.0769 -1.8855 347.9424 7.826123e+13 5522.140 560.2412
## 4 4.2119 60.5834 12.0651 -1.6311 441.9824 7.584616e+13 4934.573 489.1898
## 5 0.3535 51.7394 14.1502 -2.2107 559.2533 5.561248e+13 4382.911 452.9951
## 6 1.1168 47.8054 12.3119 -1.6300 594.0944 6.396073e+13 3663.242 413.9457
## 7 1.8292 44.8783 8.8407 -1.2695 691.3385 4.648777e+13 3116.462 382.3520
## 8 0.6343 41.7440 9.9972 -1.1527 761.4121 3.863523e+13 2732.960 360.7997
## 9 2.6277 39.9858 6.3559 -0.6498 818.2622 3.388444e+13 2387.824 337.8248
## 10 1.6681 37.5376 2.8868 -0.7425 862.5248 3.144097e+13 2157.894 323.7317
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 5.8407 1.5857 0.4185 1.3066 0.3071 1.2713 -24.5384 -0.9624 0.4254
## 3 8.5968 1.9242 0.4305 1.4932 0.2575 1.0640 -4.9961 -0.2709 0.3984
## 4 10.0629 2.2036 0.4432 1.5785 0.2164 1.2914 -14.6655 -1.0104 0.3688
## 5 12.5157 2.3797 0.4231 1.7778 0.2207 1.7604 -23.7575 -1.8992 0.3394
## 6 12.5887 2.6042 0.4280 1.6574 0.2125 1.3692 -11.5943 -1.1798 0.3202
## 7 14.7632 2.8194 0.4089 1.5824 0.1996 1.7031 -11.5596 -1.7741 0.3033
## 8 16.0798 2.9878 0.3971 1.5012 0.2089 1.7010 -6.5936 -1.6725 0.2881
## 9 17.2846 3.1910 0.3923 1.4959 0.2172 1.6978 -13.5634 -1.7828 0.2761
## 10 18.3995 3.3299 0.4535 1.4326 0.2145 0.9441 0.6514 0.2591 0.2643
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 339.9196 0.5316 0.6084 0.6705 0.1223 0.0017 1.6143 1.9983 1.0090
## 3 186.7471 0.5470 1.4263 1.1572 0.1798 0.0020 1.6116 1.8077 0.5689
## 4 122.2975 0.4850 0.7034 1.8101 0.1401 0.0023 1.7873 1.6701 0.4778
## 5 90.5990 0.4533 0.3369 2.4501 0.1136 0.0024 2.1364 1.6084 0.4460
## 6 68.9909 0.4432 0.3131 2.9374 0.1206 0.0025 1.9228 1.5406 0.4227
## 7 54.6217 0.4315 0.2341 3.4551 0.0996 0.0026 1.9782 1.4746 0.4098
## 8 45.1000 0.4268 0.0324 3.7480 0.1040 0.0026 1.9775 1.4313 0.3847
## 9 37.5361 0.4354 0.3079 3.8445 0.1502 0.0026 2.0356 1.3887 0.3669
## 10 32.3732 0.4281 -1.1872 4.1132 0.1259 0.0027 1.9915 1.3639 0.3565
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.7325 41.9944 1.0000
## 3 0.7168 32.7901 1.0000
## 4 0.6881 29.4597 1.0000
## 5 0.6051 35.8946 1.0000
## 6 0.5874 30.2070 1.0000
## 7 0.5300 24.8349 1.0000
## 8 0.4004 23.9565 1.0000
## 9 0.5581 26.1319 1.0000
## 10 0.5874 7.7274 0.9686
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 4.0000 2.0000 3.0000 9.0000 3.000 5.000000e+00 3.000
## Value_Index 4.2119 89.6074 10.5849 -0.6498 161.032 2.858193e+13 3031.224
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 3.0000 3.000 4.0000 9.0000 2.0000 2.0000 2.0000
## Value_Index 48.5467 2.756 -0.1034 0.3923 1.3066 0.3071 1.2713
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 2.0000 2.0000 2.0000 3.0000 3.000 1 2.0000
## Value_Index -24.5384 -0.9624 0.4254 153.1726 0.547 NA 0.6705
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 3.0000 0 3.0000 0 10.0000
## Value_Index 0.1798 0 1.6116 0 0.3565
##
## $Best.partition
## [1] 1 1 3 3 1 2 2 3 3 2 3 2 2 1 3 3 1 1 3 1 1 1 2 2 3 3 3 3 1 3 3 3 2 3 1 1 1
## [38] 1 1 3 2 2 3 2 3 3 2 3 1 1 1 1 1 3 1 1 3 1 3 1 1 3 1 3 1 1 3 1 2 1 2 2 1 1
## [75] 2 1 1 1 2 1 1 3 3 1 1 1 1 3 1 2 3 1 1 1 2 2 1 1 1 1 1 3 1 2 2 1 3 3 2 3 1
## [112] 2 3 2 2 3 3 3 2 2 1 2 1 1 1 2 1 1 2 1 2 1 1 2 2 3 1 2 1 2 3 1 3 3 2 1 1 1
## [149] 3 2 3 3 1 1 3
Clustering <- kmeans(mydata_cluster_std,centers = 5,nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 33, 22, 46, 25, 29
##
## Cluster means:
## Security concerns Lack of trust in tehcnology Privacy concerns
## 1 0.4068932 -0.3248583 0.3594427
## 2 -0.5064599 -0.5360163 -0.6834639
## 3 0.2316735 0.2796607 0.3856747
## 4 -1.4273458 -0.9862699 -1.5781226
## 5 0.7841830 1.1829324 0.8581595
## Complexity of use Hidden transaction costs Lack of availability
## 1 -0.6385697 -0.4393307 -0.8801734
## 2 -0.6074640 -0.1380198 0.5873926
## 3 0.4626393 0.5039033 0.3282859
## 4 -0.9079450 -1.3492894 -0.9839571
## 5 1.2363525 0.9685183 0.8834780
## Tehnical issues concerns
## 1 -0.9369453
## 2 0.4006879
## 3 0.2221564
## 4 -0.9133034
## 5 1.1971535
##
## Clustering vector:
## [1] 3 3 1 1 3 4 4 2 1 4 1 4 4 5 1 3 3 3 1 5 5 5 4 2 3 1 1 2 3 1 1 1 2 2 5 5 5
## [38] 5 3 1 4 4 1 4 1 1 2 1 5 3 3 5 5 2 5 5 3 3 1 5 3 1 5 1 3 5 3 3 4 5 4 4 3 5
## [75] 4 5 3 5 2 3 3 3 1 5 3 3 3 1 5 4 1 5 2 3 4 2 3 3 3 3 3 1 3 4 2 5 1 1 4 1 3
## [112] 2 1 4 4 1 1 1 2 4 3 2 3 3 3 2 2 3 2 3 4 5 2 4 4 3 3 4 2 2 2 3 1 1 2 3 5 5
## [149] 3 4 3 3 5 5 1
##
## Within cluster sum of squares by cluster:
## [1] 97.60250 85.55375 138.94337 74.59530 45.97807
## (between_SS / total_SS = 58.9 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,palette = "Set1", repel = TRUE, ggtheme = theme_bw(),data = mydata_cluster_std)
library(dplyr)
mydata_cluster <-mydata_cluster %>% filter(!ID %in% c(133,139))
mydata_cluster$ID <-seq(1,nrow(mydata_cluster))
mydata_cluster_std <- as.data.frame(scale(mydata_cluster[c("Security concerns","Lack of trust in tehcnology","Privacy concerns","Complexity of use","Hidden transaction costs","Lack of availability","Tehnical issues concerns")]))
Clustering <- kmeans(mydata_cluster_std,centers = 5,nstart = 25)
Clustering
## K-means clustering with 5 clusters of sizes 48, 29, 18, 33, 25
##
## Cluster means:
## Security concerns Lack of trust in tehcnology Privacy concerns
## 1 0.2208215 0.2710113 0.3838910
## 2 0.7795892 1.1842503 0.8574089
## 3 -0.6157766 -0.6801511 -0.8930492
## 4 0.4045221 -0.3207630 0.3613883
## 5 -1.4189109 -0.9809562 -1.5657021
## Complexity of use Hidden transaction costs Lack of availability
## 1 0.4138857 0.4485749 0.3327816
## 2 1.2245582 0.9660758 0.9056486
## 3 -0.5902442 -0.1004808 0.5943471
## 4 -0.6552154 -0.4332418 -0.8677019
## 5 -0.9252877 -1.3376864 -0.9720564
## Tehnical issues concerns
## 1 0.2733460
## 2 1.2078796
## 3 0.2697852
## 4 -0.9240638
## 5 -0.9004458
##
## Clustering vector:
## [1] 1 1 4 4 1 5 5 1 4 5 4 5 5 2 4 1 1 1 4 2 2 2 5 3 1 4 4 3 1 4 4 4 3 3 2 2 2
## [38] 2 1 4 5 5 4 5 4 4 3 4 2 1 1 2 2 3 2 2 1 1 4 2 1 4 2 4 1 2 1 1 5 2 5 5 1 2
## [75] 5 2 1 2 3 1 1 1 4 2 1 1 1 4 2 5 4 2 3 1 5 3 1 1 1 1 1 4 1 5 3 2 4 4 5 4 1
## [112] 3 4 5 5 4 4 4 3 5 1 3 1 1 1 3 1 1 3 1 5 2 5 5 1 1 5 3 3 1 4 4 3 1 2 2 1 5
## [149] 1 1 2 2 4
##
## Within cluster sum of squares by cluster:
## [1] 147.22154 45.98789 67.57078 97.17606 74.23017
## (between_SS / total_SS = 59.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
library(factoextra)
fviz_cluster(Clustering,palette = "Set1", repel = TRUE, ggtheme = theme_bw(),data = mydata_cluster_std)
Averages <-Clustering$centers
Averages
## Security concerns Lack of trust in tehcnology Privacy concerns
## 1 0.2208215 0.2710113 0.3838910
## 2 0.7795892 1.1842503 0.8574089
## 3 -0.6157766 -0.6801511 -0.8930492
## 4 0.4045221 -0.3207630 0.3613883
## 5 -1.4189109 -0.9809562 -1.5657021
## Complexity of use Hidden transaction costs Lack of availability
## 1 0.4138857 0.4485749 0.3327816
## 2 1.2245582 0.9660758 0.9056486
## 3 -0.5902442 -0.1004808 0.5943471
## 4 -0.6552154 -0.4332418 -0.8677019
## 5 -0.9252877 -1.3376864 -0.9720564
## Tehnical issues concerns
## 1 0.2733460
## 2 1.2078796
## 3 0.2697852
## 4 -0.9240638
## 5 -0.9004458
Figure <- as.data.frame(Averages)
Figure$ID <- 1:nrow(Figure)
library(tidyr)
Figure <- pivot_longer(Figure, cols = c("Security concerns","Lack of trust in tehcnology","Privacy concerns","Complexity of use","Hidden transaction costs","Lack of availability","Tehnical issues concerns"))
Figure$Group <- factor(Figure$ID,
levels = c(1, 2, 3, 4, 5),
labels = c("1", "2", "3","4", "5"))
Figure$NameF <- factor(Figure$name,
levels = c("Security concerns","Lack of trust in tehcnology","Privacy concerns","Complexity of use","Hidden transaction costs","Lack of availability","Tehnical issues concerns"),
labels = c("Security concerns","Lack of trust in tehcnology","Privacy concerns","Complexity of use","Hidden transaction costs","Lack of availability","Tehnical issues concerns"))
library(ggplot2)
ggplot(Figure, aes(x = NameF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 3) +
geom_line(aes(group = ID), linewidth = 1) +
ylab("Averages") +
xlab("Cluster variables") +
ylim(-2.2, 2.2) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 10))
mydata_cluster$Group <-Clustering$cluster
fit <- aov(cbind(`Security concerns`,`Lack of trust in tehcnology`,`Privacy concerns`,`Complexity of use`,`Hidden transaction costs`,`Lack of availability`,`Tehnical issues concerns`) ~ as.factor(Group), data = mydata_cluster)
summary(fit)
## Response Security concerns :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 216.22 54.054 43.948 < 2.2e-16 ***
## Residuals 148 182.03 1.230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Lack of trust in tehcnology :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 279.38 69.846 41.085 < 2.2e-16 ***
## Residuals 148 251.61 1.700
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Privacy concerns :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 338.46 84.615 91.826 < 2.2e-16 ***
## Residuals 148 136.38 0.921
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Complexity of use :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 353.31 88.328 59.221 < 2.2e-16 ***
## Residuals 148 220.74 1.491
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Hidden transaction costs :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 291.36 72.841 50.649 < 2.2e-16 ***
## Residuals 148 212.84 1.438
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Lack of availability :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 289.88 72.470 45.619 < 2.2e-16 ***
## Residuals 148 235.11 1.589
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Tehnical issues concerns :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 363.82 90.954 62.814 < 2.2e-16 ***
## Residuals 148 214.30 1.448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aggregate(mydata_cluster$Cash_Safety,by=list(mydata_cluster$Group),FUN=mean) # Cross validity with variable reliability
## Group.1 x
## 1 1 5.583333
## 2 2 6.137931
## 3 3 4.555556
## 4 4 5.242424
## 5 5 4.240000
aggregate(mydata_cluster$Cash_privacy,by=list(mydata_cluster$Group),FUN=mean)
## Group.1 x
## 1 1 5.208333
## 2 2 6.034483
## 3 3 4.500000
## 4 4 5.212121
## 5 5 3.760000
Cross validity successfull, both added variables are similar across all clusters.
library(car) # Criterian validity
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
leveneTest(mydata_cluster$Cash_Safety,as.factor(mydata_cluster$Group))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.0225 0.003982 **
## 148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(car)
leveneTest(mydata_cluster$Cash_privacy,as.factor(mydata_cluster$Group))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.3148 0.002489 **
## 148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(dplyr)
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
mydata_cluster%>%
group_by(as.factor(mydata_cluster$Group)) %>%
shapiro_test(Cash_Safety)
## # A tibble: 5 × 4
## `as.factor(mydata_cluster$Group)` variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 1 Cash_Safety 0.867 0.0000615
## 2 2 Cash_Safety 0.774 0.0000295
## 3 3 Cash_Safety 0.875 0.0219
## 4 4 Cash_Safety 0.871 0.00100
## 5 5 Cash_Safety 0.906 0.0243
mydata_cluster%>%
group_by(as.factor(mydata_cluster$Group)) %>%
shapiro_test(Cash_privacy) # Normality violated
## # A tibble: 5 × 4
## `as.factor(mydata_cluster$Group)` variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 1 Cash_privacy 0.875 0.000109
## 2 2 Cash_privacy 0.780 0.0000366
## 3 3 Cash_privacy 0.863 0.0135
## 4 4 Cash_privacy 0.855 0.000448
## 5 5 Cash_privacy 0.880 0.00682
fit1 <-aov(cbind(mydata_cluster$Cash_Safety,mydata_cluster$Cash_privacy)~as.factor(Group),data = mydata_cluster)
summary(fit1) #Criterian validity check, statistical difference
## Response 1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 62.29 15.5726 6.8557 4.328e-05 ***
## Residuals 148 336.18 2.2715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response 2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 4 77.22 19.3056 6.7474 5.133e-05 ***
## Residuals 148 423.46 2.8612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
kruskal.test(mydata_cluster$Cash_Safety~Group,
data = mydata_cluster) # We reject H0, distribution location of cash safety is different in all groups, data validated.
##
## Kruskal-Wallis rank sum test
##
## data: mydata_cluster$Cash_Safety by Group
## Kruskal-Wallis chi-squared = 20.787, df = 4, p-value = 0.0003489
kruskal.test(mydata_cluster$Cash_privacy~Group,
data = mydata_cluster) # We reject H0, distribution location of cash privacy is different in all groups, data validated.
##
## Kruskal-Wallis rank sum test
##
## data: mydata_cluster$Cash_privacy by Group
## Kruskal-Wallis chi-squared = 20.178, df = 4, p-value = 0.0004606
Descriptor1 <- chisq.test(mydata_cluster$Gender,as.factor(mydata_cluster$Group))
Descriptor1 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Gender and as.factor(mydata_cluster$Group)
## X-squared = 2.3469, df = 4, p-value = 0.6722
addmargins(Descriptor1$observed)
##
## mydata_cluster$Gender 1 2 3 4 5 Sum
## Male 20 9 9 13 8 59
## Female 28 20 9 20 17 94
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor1$expected,2))
##
## mydata_cluster$Gender 1 2 3 4 5 Sum
## Male 18.51 11.18 6.94 12.73 9.64 59
## Female 29.49 17.82 11.06 20.27 15.36 94
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor1$residuals,2)
##
## mydata_cluster$Gender 1 2 3 4 5
## Male 0.35 -0.65 0.78 0.08 -0.53
## Female -0.27 0.52 -0.62 -0.06 0.42
library(effectsize)
##
## Attaching package: 'effectsize'
## The following objects are masked from 'package:rstatix':
##
## cohens_d, eta_squared
## The following object is masked from 'package:psych':
##
## phi
effectsize::cramers_v(mydata_cluster$Gender,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor2 <- chisq.test(mydata_cluster$Age,as.factor(mydata_cluster$Group))
Descriptor2 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Age and as.factor(mydata_cluster$Group)
## X-squared = 4.0682, df = 4, p-value = 0.3969
addmargins(Descriptor2$observed)
##
## mydata_cluster$Age 1 2 3 4 5 Sum
## Older 26 17 7 17 9 76
## Younger 22 12 11 16 16 77
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor2$expected,2))
##
## mydata_cluster$Age 1 2 3 4 5 Sum
## Older 23.84 14.41 8.94 16.39 12.42 76
## Younger 24.16 14.59 9.06 16.61 12.58 77
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor2$residuals,2)
##
## mydata_cluster$Age 1 2 3 4 5
## Older 0.44 0.68 -0.65 0.15 -0.97
## Younger -0.44 -0.68 0.64 -0.15 0.96
library(effectsize)
effectsize::cramers_v(mydata_cluster$Age,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.02 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor3 <- chisq.test(mydata_cluster$Education,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$Education,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor3 # Used for descriptor p-value appropriate (2 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Education and as.factor(mydata_cluster$Group)
## X-squared = 9.2882, df = 4, p-value = 0.05429
addmargins(Descriptor3$observed)
##
## mydata_cluster$Education 1 2 3 4 5 Sum
## Higher education 38 16 15 27 21 117
## Lower education 10 13 3 6 4 36
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor3$expected,2))
##
## mydata_cluster$Education 1 2 3 4 5 Sum
## Higher education 36.71 22.18 13.76 25.24 19.12 117.01
## Lower education 11.29 6.82 4.24 7.76 5.88 35.99
## Sum 48.00 29.00 18.00 33.00 25.00 153.00
round(Descriptor3$residuals,2)
##
## mydata_cluster$Education 1 2 3 4 5
## Higher education 0.21 -1.31 0.33 0.35 0.43
## Lower education -0.39 2.36 -0.60 -0.63 -0.78
library(effectsize)
effectsize::cramers_v(mydata_cluster$Education,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.19 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor4 <- chisq.test(mydata_cluster$`Employment status`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Employment status`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor4 # Used for descriptor p-value appropriate (2 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Employment status` and as.factor(mydata_cluster$Group)
## X-squared = 10.336, df = 4, p-value = 0.03514
addmargins(Descriptor4$observed)
##
## 1 2 3 4 5 Sum
## Full-time employees 36 18 18 23 21 116
## Others 12 11 0 10 4 37
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor4$expected,2))
##
## 1 2 3 4 5 Sum
## Full-time employees 36.39 21.99 13.65 25.02 18.95 116
## Others 11.61 7.01 4.35 7.98 6.05 37
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor4$residuals,2)
##
## 1 2 3 4 5
## Full-time employees -0.07 -0.85 1.18 -0.40 0.47
## Others 0.12 1.51 -2.09 0.71 -0.83
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Employment status`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.20 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor5 <- chisq.test(mydata_cluster$Employment,as.factor(mydata_cluster$Group))
Descriptor5 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Employment and as.factor(mydata_cluster$Group)
## X-squared = 10.262, df = 8, p-value = 0.2471
addmargins(Descriptor5$observed)
##
## mydata_cluster$Employment 1 2 3 4 5 Sum
## Mental work 21 11 7 14 11 64
## Other 16 11 1 10 6 44
## Physical work 11 7 10 9 8 45
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor5$expected,2))
##
## mydata_cluster$Employment 1 2 3 4 5 Sum
## Mental work 20.08 12.13 7.53 13.80 10.46 64
## Other 13.80 8.34 5.18 9.49 7.19 44
## Physical work 14.12 8.53 5.29 9.71 7.35 45
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor5$residuals,2)
##
## mydata_cluster$Employment 1 2 3 4 5
## Mental work 0.21 -0.32 -0.19 0.05 0.17
## Other 0.59 0.92 -1.84 0.17 -0.44
## Physical work -0.83 -0.52 2.05 -0.23 0.24
library(effectsize)
effectsize::cramers_v(mydata_cluster$Employment,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.09 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor6 <- chisq.test(mydata_cluster$`Area of living`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Area of living`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor6 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Area of living` and as.factor(mydata_cluster$Group)
## X-squared = 2.7095, df = 4, p-value = 0.6076
addmargins(Descriptor6$observed)
##
## 1 2 3 4 5 Sum
## Urban area 36 22 16 28 21 123
## Rural area 12 7 2 5 4 30
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor6$expected,2))
##
## 1 2 3 4 5 Sum
## Urban area 38.59 23.31 14.47 26.53 20.1 123
## Rural area 9.41 5.69 3.53 6.47 4.9 30
## Sum 48.00 29.00 18.00 33.00 25.0 153
round(Descriptor6$residuals,2)
##
## 1 2 3 4 5
## Urban area -0.42 -0.27 0.40 0.29 0.20
## Rural area 0.84 0.55 -0.81 -0.58 -0.41
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Area of living`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor7 <- chisq.test(mydata_cluster$`Primary bank`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Primary bank`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor7 # Used for descriptor even though p-value is slightly above 10% (3 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Primary bank` and as.factor(mydata_cluster$Group)
## X-squared = 12.279, df = 8, p-value = 0.1392
addmargins(Descriptor7$observed)
##
## mydata_cluster$`Primary bank` 1 2 3 4 5 Sum
## NLB 25 12 5 12 8 62
## OTP 5 8 1 8 5 27
## Other banks 18 9 12 13 12 64
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor7$expected,2))
##
## mydata_cluster$`Primary bank` 1 2 3 4 5 Sum
## NLB 19.45 11.75 7.29 13.37 10.13 61.99
## OTP 8.47 5.12 3.18 5.82 4.41 27.00
## Other banks 20.08 12.13 7.53 13.80 10.46 64.00
## Sum 48.00 29.00 18.00 32.99 25.00 152.99
round(Descriptor7$residuals,2)
##
## mydata_cluster$`Primary bank` 1 2 3 4 5
## NLB 1.26 0.07 -0.85 -0.38 -0.67
## OTP -1.19 1.27 -1.22 0.90 0.28
## Other banks -0.46 -0.90 1.63 -0.22 0.48
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Primary bank`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.12 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor8 <- chisq.test(mydata_cluster$`Grocery shopping`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Grocery shopping`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor8 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Grocery shopping` and as.factor(mydata_cluster$Group)
## X-squared = 5.5936, df = 4, p-value = 0.2316
addmargins(Descriptor8$observed)
##
## 1 2 3 4 5 Sum
## Digital payments 34 17 15 24 21 111
## Cash 14 12 3 9 4 42
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor8$expected,2))
##
## 1 2 3 4 5 Sum
## Digital payments 34.82 21.04 13.06 23.94 18.14 111
## Cash 13.18 7.96 4.94 9.06 6.86 42
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor8$residuals,2)
##
## 1 2 3 4 5
## Digital payments -0.14 -0.88 0.54 0.01 0.67
## Cash 0.23 1.43 -0.87 -0.02 -1.09
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Grocery shopping`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.10 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor9 <- chisq.test(mydata_cluster$`Online shopping`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Online shopping`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor9 # Used for descriptor p-value appropriate (2 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Online shopping` and as.factor(mydata_cluster$Group)
## X-squared = 12.068, df = 4, p-value = 0.01685
addmargins(Descriptor9$observed)
##
## 1 2 3 4 5 Sum
## Digital payments 39 19 15 29 25 127
## Cash 9 10 3 4 0 26
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor9$expected,2))
##
## 1 2 3 4 5 Sum
## Digital payments 39.84 24.07 14.94 27.39 20.75 126.99
## Cash 8.16 4.93 3.06 5.61 4.25 26.01
## Sum 48.00 29.00 18.00 33.00 25.00 153.00
round(Descriptor9$residuals,2)
##
## 1 2 3 4 5
## Digital payments -0.13 -1.03 0.02 0.31 0.93
## Cash 0.30 2.28 -0.03 -0.68 -2.06
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Online shopping`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.23 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
fisher.test(mydata_cluster$`Online shopping`,as.factor(mydata_cluster$Group)) # Fisher test as more than 2 expected frequencies is below 5, H0: The variables are independent, H1: The variables are not independent. We cannot reject H0, we assume variables are independent at p=1.
##
## Fisher's Exact Test for Count Data
##
## data: mydata_cluster$`Online shopping` and as.factor(mydata_cluster$Group)
## p-value = 0.01044
## alternative hypothesis: two.sided
Descriptor10 <- chisq.test(mydata_cluster$Subscriptions,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$Subscriptions,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor10 # Used for descriptor p-value appropriate (2 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Subscriptions and as.factor(mydata_cluster$Group)
## X-squared = 12.105, df = 4, p-value = 0.01658
addmargins(Descriptor10$observed)
##
## mydata_cluster$Subscriptions 1 2 3 4 5 Sum
## Digital payments 42 20 15 30 25 132
## Cash 6 9 3 3 0 21
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor10$expected,2))
##
## mydata_cluster$Subscriptions 1 2 3 4 5 Sum
## Digital payments 41.41 25.02 15.53 28.47 21.57 132
## Cash 6.59 3.98 2.47 4.53 3.43 21
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor10$residuals,2)
##
## mydata_cluster$Subscriptions 1 2 3 4 5
## Digital payments 0.09 -1.00 -0.13 0.29 0.74
## Cash -0.23 2.52 0.34 -0.72 -1.85
library(effectsize)
effectsize::cramers_v(mydata_cluster$Subscriptions,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.23 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
fisher.test(mydata_cluster$Subscriptions, as.factor(mydata_cluster$Group)) # Fisher test as more than 2 expected frequencies is below 5, H0: The variables are independent, H1: The variables are not independent. We cannot reject H0, we assume variables are independent at p=1.
##
## Fisher's Exact Test for Count Data
##
## data: mydata_cluster$Subscriptions and as.factor(mydata_cluster$Group)
## p-value = 0.01436
## alternative hypothesis: two.sided
Descriptor11 <- chisq.test(mydata_cluster$`Bill payments`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Bill payments`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor11 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Bill payments` and as.factor(mydata_cluster$Group)
## X-squared = 5.5678, df = 4, p-value = 0.2338
addmargins(Descriptor11$observed)
##
## mydata_cluster$`Bill payments` 1 2 3 4 5 Sum
## Digital payments 41 23 15 29 25 133
## Cash 7 6 3 4 0 20
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor11$expected,2))
##
## mydata_cluster$`Bill payments` 1 2 3 4 5 Sum
## Digital payments 41.73 25.21 15.65 28.69 21.73 133.01
## Cash 6.27 3.79 2.35 4.31 3.27 19.99
## Sum 48.00 29.00 18.00 33.00 25.00 153.00
round(Descriptor11$residuals,2)
##
## mydata_cluster$`Bill payments` 1 2 3 4 5
## Digital payments -0.11 -0.44 -0.16 0.06 0.70
## Cash 0.29 1.13 0.42 -0.15 -1.81
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Bill payments`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.10 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
fisher.test(mydata_cluster$`Bill payments`,as.factor(mydata_cluster$Group)) # Fisher test as more than 2 expected frequencies is below 5, H0: The variables are independent, H1: The variables are not independent. We cannot reject H0, we assume variables are independent at p=1.
##
## Fisher's Exact Test for Count Data
##
## data: mydata_cluster$`Bill payments` and as.factor(mydata_cluster$Group)
## p-value = 0.1491
## alternative hypothesis: two.sided
Descriptor12 <- chisq.test(mydata_cluster$`Paying for services`,as.factor(mydata_cluster$Group))
Descriptor12 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Paying for services` and as.factor(mydata_cluster$Group)
## X-squared = 2.7994, df = 4, p-value = 0.5919
addmargins(Descriptor12$observed)
##
## 1 2 3 4 5 Sum
## Digital payments 17 8 3 8 8 44
## Cash 31 21 15 25 17 109
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor12$expected,2))
##
## 1 2 3 4 5 Sum
## Digital payments 13.8 8.34 5.18 9.49 7.19 44
## Cash 34.2 20.66 12.82 23.51 17.81 109
## Sum 48.0 29.00 18.00 33.00 25.00 153
round(Descriptor12$residuals,2)
##
## 1 2 3 4 5
## Digital payments 0.86 -0.12 -0.96 -0.48 0.30
## Cash -0.55 0.07 0.61 0.31 -0.19
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Paying for services`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor13 <- chisq.test(mydata_cluster$Tips,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$Tips, as.factor(mydata_cluster$Group)):
## Chi-squared approximation may be incorrect
Descriptor13 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Tips and as.factor(mydata_cluster$Group)
## X-squared = 2.0889, df = 4, p-value = 0.7194
addmargins(Descriptor13$observed)
##
## mydata_cluster$Tips 1 2 3 4 5 Sum
## Digital payments 4 2 2 2 4 14
## Cash 44 27 16 31 21 139
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor13$expected,2))
##
## mydata_cluster$Tips 1 2 3 4 5 Sum
## Digital payments 4.39 2.65 1.65 3.02 2.29 14
## Cash 43.61 26.35 16.35 29.98 22.71 139
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor13$residuals,2)
##
## mydata_cluster$Tips 1 2 3 4 5
## Digital payments -0.19 -0.40 0.28 -0.59 1.13
## Cash 0.06 0.13 -0.09 0.19 -0.36
library(effectsize)
effectsize::cramers_v(mydata_cluster$Tips,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
fisher.test(mydata_cluster$Tips,as.factor(mydata_cluster$Group)) # Fisher test as more than 2 expected frequencies is below 5, H0: The variables are independent, H1: The variables are not independent. We cannot reject H0, we assume variables are independent at p=1.
##
## Fisher's Exact Test for Count Data
##
## data: mydata_cluster$Tips and as.factor(mydata_cluster$Group)
## p-value = 0.7314
## alternative hypothesis: two.sided
Descriptor14 <- chisq.test(mydata_cluster$Transport,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$Transport,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor14 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$Transport and as.factor(mydata_cluster$Group)
## X-squared = 2.045, df = 4, p-value = 0.7275
addmargins(Descriptor14$observed)
##
## mydata_cluster$Transport 1 2 3 4 5 Sum
## Digital payments 12 5 6 10 7 40
## Cash 36 24 12 23 18 113
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor14$expected,2))
##
## mydata_cluster$Transport 1 2 3 4 5 Sum
## Digital payments 12.55 7.58 4.71 8.63 6.54 40.01
## Cash 35.45 21.42 13.29 24.37 18.46 112.99
## Sum 48.00 29.00 18.00 33.00 25.00 153.00
round(Descriptor14$residuals,2)
##
## mydata_cluster$Transport 1 2 3 4 5
## Digital payments -0.15 -0.94 0.60 0.47 0.18
## Cash 0.09 0.56 -0.35 -0.28 -0.11
library(effectsize)
effectsize::cramers_v(mydata_cluster$Transport,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor15 <- chisq.test(mydata_cluster$`Social events`,as.factor(mydata_cluster$Group))
Descriptor15 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Social events` and as.factor(mydata_cluster$Group)
## X-squared = 1.9514, df = 4, p-value = 0.7447
addmargins(Descriptor15$observed)
##
## mydata_cluster$`Social events` 1 2 3 4 5 Sum
## Digital payments 21 11 10 16 13 71
## Cash 27 18 8 17 12 82
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor15$expected,2))
##
## mydata_cluster$`Social events` 1 2 3 4 5 Sum
## Digital payments 22.27 13.46 8.35 15.31 11.6 70.99
## Cash 25.73 15.54 9.65 17.69 13.4 82.01
## Sum 48.00 29.00 18.00 33.00 25.0 153.00
round(Descriptor15$residuals,2)
##
## mydata_cluster$`Social events` 1 2 3 4 5
## Digital payments -0.27 -0.67 0.57 0.18 0.41
## Cash 0.25 0.62 -0.53 -0.16 -0.38
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Social events`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor16 <- chisq.test(mydata_cluster$`Most frequent payment method`,as.factor(mydata_cluster$Group))
## Warning in chisq.test(mydata_cluster$`Most frequent payment method`,
## as.factor(mydata_cluster$Group)): Chi-squared approximation may be incorrect
Descriptor16 # Used for descriptor p-value appropriate (4 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Most frequent payment method` and as.factor(mydata_cluster$Group)
## X-squared = 20.559, df = 12, p-value = 0.05722
addmargins(Descriptor16$observed)
##
## 1 2 3 4 5 Sum
## Cash 3 3 0 3 2 11
## Credit/Debit cards 34 23 8 17 14 96
## Mobile payment options 5 3 8 11 7 34
## Mobile banks 6 0 2 2 2 12
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor16$expected,2))
##
## 1 2 3 4 5 Sum
## Cash 3.45 2.08 1.29 2.37 1.80 10.99
## Credit/Debit cards 30.12 18.20 11.29 20.71 15.69 96.01
## Mobile payment options 10.67 6.44 4.00 7.33 5.56 34.00
## Mobile banks 3.76 2.27 1.41 2.59 1.96 11.99
## Sum 48.00 28.99 17.99 33.00 25.01 152.99
round(Descriptor16$residuals,2)
##
## 1 2 3 4 5
## Cash -0.24 0.63 -1.14 0.41 0.15
## Credit/Debit cards 0.71 1.13 -0.98 -0.81 -0.43
## Mobile payment options -1.74 -1.36 2.00 1.35 0.61
## Mobile banks 1.15 -1.51 0.50 -0.37 0.03
fisher.test(mydata_cluster$`Most frequent payment method`,
as.factor(mydata_cluster$Group),
workspace = 2e8) # Increase workspace size as contingency table is to large for r to calculate it. Fisher test as more than 2 expected frequencies is below 5, H0: The variables are independent, H1: The variables are not independent. We reject H0, we assume variables are not independent at p=0.031.
##
## Fisher's Exact Test for Count Data
##
## data: mydata_cluster$`Most frequent payment method` and as.factor(mydata_cluster$Group)
## p-value = 0.03078
## alternative hypothesis: two.sided
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Most frequent payment method`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.14 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor17 <- chisq.test(mydata_cluster$`Cash carrying`,as.factor(mydata_cluster$Group))
Descriptor17 # Cannot be used as descriptor p-value too high, not validated
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Cash carrying` and as.factor(mydata_cluster$Group)
## X-squared = 6.2186, df = 4, p-value = 0.1834
addmargins(Descriptor17$observed)
##
## mydata_cluster$`Cash carrying` 1 2 3 4 5 Sum
## Small amount 18 11 10 14 16 69
## Large amount 30 18 8 19 9 84
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor17$expected,2))
##
## mydata_cluster$`Cash carrying` 1 2 3 4 5 Sum
## Small amount 21.65 13.08 8.12 14.88 11.27 69
## Large amount 26.35 15.92 9.88 18.12 13.73 84
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor17$residuals,2)
##
## mydata_cluster$`Cash carrying` 1 2 3 4 5
## Small amount -0.78 -0.57 0.66 -0.23 1.41
## Large amount 0.71 0.52 -0.60 0.21 -1.28
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Cash carrying`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.12 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
Descriptor18 <- chisq.test(mydata_cluster$`Digital payment usage`,as.factor(mydata_cluster$Group))
Descriptor18 # Used for descriptor p-value appropriate (2 categories, sample size 153)
##
## Pearson's Chi-squared test
##
## data: mydata_cluster$`Digital payment usage` and as.factor(mydata_cluster$Group)
## X-squared = 8.0059, df = 4, p-value = 0.09136
addmargins(Descriptor18$observed)
##
## 1 2 3 4 5 Sum
## High usage 18 10 11 14 16 69
## Low usage 30 19 7 19 9 84
## Sum 48 29 18 33 25 153
addmargins(round(Descriptor18$expected,2))
##
## 1 2 3 4 5 Sum
## High usage 21.65 13.08 8.12 14.88 11.27 69
## Low usage 26.35 15.92 9.88 18.12 13.73 84
## Sum 48.00 29.00 18.00 33.00 25.00 153
round(Descriptor18$residuals,2)
##
## 1 2 3 4 5
## High usage -0.78 -0.85 1.01 -0.23 1.41
## Low usage 0.71 0.77 -0.92 0.21 -1.28
library(effectsize)
effectsize::cramers_v(mydata_cluster$`Digital payment usage`,mydata_cluster$Group)
## Cramer's V (adj.) | 95% CI
## --------------------------------
## 0.16 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
mydata_H1 <- mydata[,c("Q2b_1","Q2c_1","Q46")]
head(mydata_H1)
## Q2b_1 Q2c_1 Q46
## 1 6 7 80
## 2 4 4 50
## 3 6 5 90
## 4 5 5 75
## 5 6 5 72
## 6 6 6 81
mydata_H1$`Digital Payments Security` <- rowMeans(mydata_H1[, c("Q2b_1", "Q2c_1")])
mydata_H1$`Digital Payments Security` <- ifelse(mydata_H1$`Digital Payments Security`>5,"High security perception","Low security perception")
mydata_H1$`Digital Payments Security` <- factor(mydata_H1$`Digital Payments Security`,
levels = c("High security perception","Low security perception"),
labels = c("High security perception","Low security perception"))
colnames(mydata_H1) <- c("Cards security", "Mobile payments security", "Digital payment usage","Digital payment security")
library(psych)
describeBy(mydata_H1$`Digital payment usage`,mydata_H1$`Digital payment security`)
##
## Descriptive statistics by group
## group: High security perception
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 107 79.36 15.85 80 81.25 14.83 1 100 99 -1.81 5.48 1.53
## ------------------------------------------------------------
## group: Low security perception
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 48 68.25 25.49 75 71.62 22.24 1 99 98 -1.17 0.56 3.68
library(ggplot2)
Security_High <- ggplot(mydata_H1[mydata_H1$`Digital payment security` == "High security perception", ], aes(x = `Digital payment usage`)) +
theme_linedraw() +
geom_histogram(binwidth = 1, col = "black") +
ylab("Frequency") +
ggtitle("High Security")
Security_Low <- ggplot(mydata_H1[mydata_H1$`Digital payment security` == "Low security perception", ], aes(x = `Digital payment usage`)) +
theme_linedraw() +
geom_histogram(binwidth = 1, col = "black") +
ylab("Frequency") +
ggtitle("Low security")
library(ggpubr)
ggarrange(Security_High, Security_Low,
ncol = 2, nrow = 1)
Normality is violated, Wilcoxon rank sum test
wilcox.test(mydata_H1$`Digital payment usage`~mydata_H1$`Digital payment security`,
correct=FALSE,
exact=FALSE,
alternative="greater")
##
## Wilcoxon rank sum test
##
## data: mydata_H1$`Digital payment usage` by mydata_H1$`Digital payment security`
## W = 3251, p-value = 0.004052
## alternative hypothesis: true location shift is greater than 0
library(effectsize)
effectsize(wilcox.test(mydata_H1$`Digital payment usage`~mydata_H1$`Digital payment security`,
correct=FALSE,
exact=FALSE,
alternative="greater"))
## r (rank biserial) | 95% CI
## --------------------------------
## 0.27 | [0.11, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
interpret_rank_biserial(0.27)
## [1] "medium"
## (Rules: funder2019)
Conclusion: We reject H0 at p = 0.005, we assume that consumers who perceive digital payments as highly secure (Likert score ≥5) use them more frequently than those who perceive them as less secure.
mydata_H3 <- mydata[,c("Q27a_1","Q46")]
colnames(mydata_H3) <- c("Trust large payments", "Digital payment usage")
head(mydata_H3)
## Trust large payments Digital payment usage
## 1 7 80
## 2 7 50
## 3 7 90
## 4 6 75
## 5 6 72
## 6 4 81
mydata_H3$`Trust large payments` <- ifelse(mydata_H3$`Trust large payments`>5,"High trust","Low trust")
mydata_H3$`Trust large payments` <- factor(mydata_H3$`Trust large payments`,
levels = c("High trust","Low trust"),
labels = c("High trust","Low trust"))
library(ggplot2)
Trust_High <- ggplot(mydata_H3[mydata_H3$`Trust large payments` == "High trust", ], aes(x = `Digital payment usage`)) +
theme_linedraw() +
geom_histogram(binwidth = 1, col = "blue") +
ylab("Frequency") +
ggtitle("High trust")
Trust_Low <- ggplot(mydata_H3[mydata_H3$`Trust large payments` == "Low trust", ], aes(x = `Digital payment usage`)) +
theme_linedraw() +
geom_histogram(binwidth = 1, col = "purple") +
ylab("Frequency") +
ggtitle("Low trust")
library(ggpubr)
ggarrange(Trust_High, Trust_Low,
ncol = 2, nrow = 1)
Normality is violated, Wilcoxon rank sum test
wilcox.test(mydata_H3$`Digital payment usage`~mydata_H3$`Trust large payments`,
correct=FALSE,
exact=FALSE,
alternative="greater")
##
## Wilcoxon rank sum test
##
## data: mydata_H3$`Digital payment usage` by mydata_H3$`Trust large payments`
## W = 3939.5, p-value = 5.282e-08
## alternative hypothesis: true location shift is greater than 0
library(effectsize)
effectsize(wilcox.test(mydata_H3$`Digital payment usage`~mydata_H3$`Trust large payments`,
correct=FALSE,
exact=FALSE,
alternative="greater"))
## r (rank biserial) | 95% CI
## --------------------------------
## 0.53 | [0.41, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
interpret_rank_biserial(0.53)
## [1] "very large"
## (Rules: funder2019)
Conclusion: We reject H0 at p < 0.001, we assume that consumers who have high trust indigital payments for large purchases (Likert score >5) use digital payments more likely for daily transactions than those who have lower trust in digital payments for large purchases.