R Markdown
library(readxl)
mydata <- read_xlsx("./dataset.xlsx")
## New names:
## • `Q22a_1` -> `Q22a_1...26`
## • `Q22b_1` -> `Q22b_1...27`
## • `Q22c_1` -> `Q22c_1...28`
## • `Q22d_1` -> `Q22d_1...29`
## • `Q22e_1` -> `Q22e_1...30`
## • `Q22a_1` -> `Q22a_1...32`
## • `Q22b_1` -> `Q22b_1...33`
## • `Q22c_1` -> `Q22c_1...34`
## • `Q22d_1` -> `Q22d_1...72`
## • `Q22e_1` -> `Q22e_1...73`
mydata <- mydata[-1, ]
head(mydata)
## # A tibble: 6 × 73
## Q8 Q66 Q10 Q12 Q11 Q48 Q48_5_text Q14a Q14b Q14c Q14d Q14e
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 3 3 6 2 -2 -2 0 1 0 0 0
## 2 1 2 5 6 1 1 -2 0 1 0 0 0
## 3 1 6 5 6 1 3 -2 0 1 0 1 0
## 4 1 2 5 6 2 -2 -2 1 0 1 1 0
## 5 1 2 2 6 2 -2 -2 0 1 0 0 0
## 6 1 6 7 7 2 -2 -2 0 1 0 0 0
## # ℹ 61 more variables: Q14e_text <chr>, Q15 <chr>, Q16 <chr>, Q17 <chr>,
## # Q18 <chr>, Q25a <chr>, Q25b <chr>, Q25c <chr>, Q25d <chr>, Q25e <chr>,
## # Q25f <chr>, Q25g <chr>, Q25h <chr>, Q22a_1...26 <dbl>, Q22b_1...27 <dbl>,
## # Q22c_1...28 <dbl>, Q22d_1...29 <dbl>, Q22e_1...30 <dbl>, Q24 <chr>,
## # Q22a_1...32 <dbl>, Q22b_1...33 <dbl>, Q22c_1...34 <dbl>, Q13 <chr>,
## # Q20 <chr>, Q46 <chr>, Q1a_1 <dbl>, Q1b_1 <dbl>, Q1c_1 <dbl>, Q1d_1 <dbl>,
## # Q1e_1 <dbl>, Q1f_1 <dbl>, Q2a_1 <dbl>, Q2b_1 <dbl>, Q2c_1 <dbl>, …
mydata$Q39 <- factor(mydata$Q39,
levels = c(1,2),
labels = c("Male", "Female"))
mydata$Q37 <- factor(mydata$Q37,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("Less than 1,000", "1,001 - 1,300", "1,301 - 1,700", "1,701 - 2,500", "2,501 - 3,300", "More than 3,000", "Pension"))
mydata$Q41 <- factor(mydata$Q41,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("Unfinished Elementary", "Finished Elementary", "Vocational School", "General High School", "Undergraduate Degree", "Master's Degree", "PhD"))
mydata$Q42 <- factor(mydata$Q42,
levels = c(1, 2, 3, 4, 5),
labels = c("Employed", "Self-Employed", "Retired", "Unemployed", "Other"))
mydata$Q43 <- factor(mydata$Q43,
levels = c(1, 2, 3, 4, 5),
labels = c("Physical Work", "Service", "Office", "Public Sector", "Creative"))
mydata$Q44 <- factor(mydata$Q44,
levels = c(1, 2, 3),
labels = c("Urban", "Suburban", "Rural"))
mydata$Q45 <- factor(mydata$Q45,
levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
labels = c("NLB", "OTP", "Unicredit", "Raiffaisen", "Gorenjska banka", "Intesa Sanpaolo", "Delavska hranilnica", "Revolut", "N26", "Other"))
mydata$Q11 <- factor(mydata$Q11,
levels = c(1, 2),
labels = c("No", "Yes"))
mydata$Q13 <- factor(mydata$Q13,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("Never", "Less than once a month", "Once a month", "2-3 times a month", "Once a week", "2-3 times a week","Every day"))
mydata$Q20 <- factor(mydata$Q20,
levels = c(1, 2, 3, 4, 5),
labels = c("Less than 50", "50-100", "101-300", "301-500", "More than 500"))
mydata$Q40 <- as.numeric(as.character(mydata$Q40))
mydata$Q40 <- 2025 - mydata$Q40
head(mydata)
## # A tibble: 6 × 73
## Q8 Q66 Q10 Q12 Q11 Q48 Q48_5_text Q14a Q14b Q14c Q14d Q14e
## <chr> <chr> <chr> <chr> <fct> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 3 3 6 Yes -2 -2 0 1 0 0 0
## 2 1 2 5 6 No 1 -2 0 1 0 0 0
## 3 1 6 5 6 No 3 -2 0 1 0 1 0
## 4 1 2 5 6 Yes -2 -2 1 0 1 1 0
## 5 1 2 2 6 Yes -2 -2 0 1 0 0 0
## 6 1 6 7 7 Yes -2 -2 0 1 0 0 0
## # ℹ 61 more variables: Q14e_text <chr>, Q15 <chr>, Q16 <chr>, Q17 <chr>,
## # Q18 <chr>, Q25a <chr>, Q25b <chr>, Q25c <chr>, Q25d <chr>, Q25e <chr>,
## # Q25f <chr>, Q25g <chr>, Q25h <chr>, Q22a_1...26 <dbl>, Q22b_1...27 <dbl>,
## # Q22c_1...28 <dbl>, Q22d_1...29 <dbl>, Q22e_1...30 <dbl>, Q24 <chr>,
## # Q22a_1...32 <dbl>, Q22b_1...33 <dbl>, Q22c_1...34 <dbl>, Q13 <fct>,
## # Q20 <fct>, Q46 <chr>, Q1a_1 <dbl>, Q1b_1 <dbl>, Q1c_1 <dbl>, Q1d_1 <dbl>,
## # Q1e_1 <dbl>, Q1f_1 <dbl>, Q2a_1 <dbl>, Q2b_1 <dbl>, Q2c_1 <dbl>, …
mydata$Q25a <- factor(mydata$Q25a,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25b <- factor(mydata$Q25b,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25c <- factor(mydata$Q25c,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25d <- factor(mydata$Q25d,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25e <- factor(mydata$Q25e,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25f <- factor(mydata$Q25f,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25g <- factor(mydata$Q25g,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
mydata$Q25h <- factor(mydata$Q25h,
levels = c(1, 2, 3, 4, 5),
labels = c("Cash only", "Mostly cash","Half-half", "Mostly digital","Digital only"))
summary(mydata[c(35,36,62,63,65,66,68,69,70)])
## Q13 Q20 Q39 Q40
## Never : 3 Less than 50 :88 Male :63 Min. :20.00
## Less than once a month:40 50-100 :43 Female:89 1st Qu.:26.00
## Once a month :45 101-300 :20 Median :38.00
## 2-3 times a month :39 301-500 : 0 Mean :39.02
## Once a week :11 More than 500: 1 3rd Qu.:49.00
## 2-3 times a week :11 Max. :65.00
## Every day : 3
## Q41 Q42 Q43
## Unfinished Elementary: 0 Employed :118 Physical Work:14
## Finished Elementary : 2 Self-Employed: 25 Service :25
## Vocational School : 9 Retired : 0 Office :48
## General High School :43 Unemployed : 9 Public Sector:29
## Undergraduate Degree :55 Other : 0 Creative : 2
## Master's Degree :35 NA's :34
## PhD : 8
## Q44 Q45
## Urban :77 NLB :56
## Suburban:43 OTP :40
## Rural :32 Intesa Sanpaolo :19
## Unicredit :11
## Delavska hranilnica:10
## Revolut : 8
## (Other) : 8
mydata$Q46 <- as.numeric(as.character(mydata$Q46))
mydata$Q1a_1 <- as.numeric(as.character(mydata$Q1a_1))
library(car)
## Loading required package: carData
scatterplotMatrix(mydata[ ,c(37,38)], smooth=FALSE)

library(Hmisc)
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
rcorr(as.matrix(mydata[ ,c(37,38)]),
type="pearson")
## Q46 Q1a_1
## Q46 1.00 0.01
## Q1a_1 0.01 1.00
##
## n= 152
##
##
## P
## Q46 Q1a_1
## Q46 0.9054
## Q1a_1 0.9054
#security
mydata$Q46 <- as.numeric(as.character(mydata$Q46))
mydata$Q1b_1 <- as.numeric(as.character(mydata$Q1b_1))
library(car)
scatterplotMatrix(mydata[ ,c(37,39)], smooth=FALSE)

library(Hmisc)
rcorr(as.matrix(mydata[ ,c(37,39)]),
type="pearson")
## Q46 Q1b_1
## Q46 1.00 0.21
## Q1b_1 0.21 1.00
##
## n= 152
##
##
## P
## Q46 Q1b_1
## Q46 0.0106
## Q1b_1 0.0106
#speed of transactions
mydata$Q46 <- as.numeric(as.character(mydata$Q46))
mydata$Q1c_1 <- as.numeric(as.character(mydata$Q1c_1))
library(car)
scatterplotMatrix(mydata[ ,c(37,40)], smooth=FALSE)

library(Hmisc)
rcorr(as.matrix(mydata[ ,c(37,40)]),
type="pearson")
## Q46 Q1c_1
## Q46 1.00 0.29
## Q1c_1 0.29 1.00
##
## n= 152
##
##
## P
## Q46 Q1c_1
## Q46 3e-04
## Q1c_1 3e-04
#ease of use
mydata$Q46 <- as.numeric(as.character(mydata$Q46))
mydata$Q1d_1 <- as.numeric(as.character(mydata$Q1d_1))
library(car)
scatterplotMatrix(mydata[ ,c(37,41)], smooth=FALSE)

library(Hmisc)
rcorr(as.matrix(mydata[ ,c(37,41)]),
type="pearson")
## Q46 Q1d_1
## Q46 1.00 0.27
## Q1d_1 0.27 1.00
##
## n= 152
##
##
## P
## Q46 Q1d_1
## Q46 6e-04
## Q1d_1 6e-04
#convenience
mydata$Q46 <- as.numeric(as.character(mydata$Q46))
mydata$Q1e_1 <- as.numeric(as.character(mydata$Q1e_1))
library(car)
scatterplotMatrix(mydata[ ,c(37,42)], smooth=FALSE)

library(Hmisc)
rcorr(as.matrix(mydata[ ,c(37,42)]),
type="pearson")
## Q46 Q1e_1
## Q46 1.00 -0.26
## Q1e_1 -0.26 1.00
##
## n= 152
##
##
## P
## Q46 Q1e_1
## Q46 0.0013
## Q1e_1 0.0013
#privacy
mydata$Q46 <- as.numeric(as.character(mydata$Q46))
mydata$Q1e_1 <- as.numeric(as.character(mydata$Q1e_1))
library(car)
scatterplotMatrix(mydata[ ,c(37,43)], smooth=FALSE)

library(Hmisc)
rcorr(as.matrix(mydata[ ,c(37,43)]),
type="pearson")
## Q46 Q1f_1
## Q46 1.00 -0.02
## Q1f_1 -0.02 1.00
##
## n= 152
##
##
## P
## Q46 Q1f_1
## Q46 0.7674
## Q1f_1 0.7674
#spending control
library(ggpubr)
## Loading required package: ggplot2
ggqqplot(mydata,
"Q46",
facet.by="Q44")

leveneTest(mydata$Q46, group=mydata$Q44)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.2843 0.2799
## 149
shapiro.test(mydata$Q46)
##
## Shapiro-Wilk normality test
##
## data: mydata$Q46
## W = 0.87197, p-value = 3.786e-10
kruskal.test(Q46 ~ Q44, data = mydata)
##
## Kruskal-Wallis rank sum test
##
## data: Q46 by Q44
## Kruskal-Wallis chi-squared = 7.2224, df = 2, p-value = 0.02702
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
kruskal_effsize(Q46 ~ Q44,
data = mydata)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Q46 152 0.0350 eta2[H] small
groups<-wilcox_test(Q46 ~ Q44,
paired=FALSE,
p.adjust.method="bonferroni",
data=mydata)
groups
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Q46 Urban Suburban 77 43 1404. 0.168 0.504 ns
## 2 Q46 Urban Rural 77 32 1509 0.066 0.196 ns
## 3 Q46 Suburban Rural 43 32 928. 0.01 0.031 *
kruskal.test(Q46 ~ Q43, data = mydata)
##
## Kruskal-Wallis rank sum test
##
## data: Q46 by Q43
## Kruskal-Wallis chi-squared = 9.1442, df = 4, p-value = 0.05759
kruskal_effsize(Q46 ~ Q43,
data = mydata)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Q46 152 0.0350 eta2[H] small
groups1<-wilcox_test(Q46 ~ Q43,
paired=FALSE,
p.adjust.method="bonferroni",
data=mydata)
groups1
## # A tibble: 10 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Q46 Physical Work Service 14 25 189 0.692 1 ns
## 2 Q46 Physical Work Office 14 48 224 0.06 0.599 ns
## 3 Q46 Physical Work Public Se… 14 29 154. 0.203 1 ns
## 4 Q46 Physical Work Creative 14 2 14 1 1 ns
## 5 Q46 Service Office 25 48 378 0.01 0.099 ns
## 6 Q46 Service Public Se… 25 29 260. 0.078 0.78 ns
## 7 Q46 Service Creative 25 2 24 0.963 1 ns
## 8 Q46 Office Public Se… 48 29 785 0.351 1 ns
## 9 Q46 Office Creative 48 2 67 0.358 1 ns
## 10 Q46 Public Sector Creative 29 2 35 0.658 1 ns
mydata$Q15 <- as.numeric(as.character(mydata$Q15))
t.test(mydata$Q15,
mu=4,
alternative = "two.sided")
##
## One Sample t-test
##
## data: mydata$Q15
## t = -5.0353, df = 151, p-value = 1.343e-06
## alternative hypothesis: true mean is not equal to 4
## 95 percent confidence interval:
## 2.854945 3.500318
## sample estimates:
## mean of x
## 3.177632
#if faster 10eur
mydata$Q15 <- as.numeric(as.character(mydata$Q15))
shapiro.test(mydata$Q15)
##
## Shapiro-Wilk normality test
##
## data: mydata$Q15
## W = 0.86315, p-value = 1.443e-10
wilcox.test(mydata$Q15,
mu=4,
correct=FALSE)
##
## Wilcoxon signed rank test
##
## data: mydata$Q15
## V = 3009, p-value = 7.715e-06
## alternative hypothesis: true location is not equal to 4
library(effectsize)
##
## Attaching package: 'effectsize'
## The following objects are masked from 'package:rstatix':
##
## cohens_d, eta_squared
effectsize(wilcox.test(mydata$Q15,
mu=4,
correct=FALSE))
## r (rank biserial) | 95% CI
## ----------------------------------
## -0.42 | [-0.57, -0.26]
##
## - Deviation from a difference of 4.
interpret_rank_biserial(0.42, rules="funder2019")
## [1] "very large"
## (Rules: funder2019)
#flik, convenient
mydata$Q17 <- as.numeric(as.character(mydata$Q17))
shapiro.test(mydata$Q17)
##
## Shapiro-Wilk normality test
##
## data: mydata$Q17
## W = 0.78751, p-value = 1.407e-13
wilcox.test(mydata$Q17,
mu=4,
correct=FALSE)
##
## Wilcoxon signed rank test
##
## data: mydata$Q17
## V = 8734, p-value = 7.769e-11
## alternative hypothesis: true location is not equal to 4
library(effectsize)
effectsize(wilcox.test(mydata$Q17,
mu=4,
correct=FALSE))
## r (rank biserial) | 95% CI
## --------------------------------
## 0.61 | [0.47, 0.71]
##
## - Deviation from a difference of 4.
interpret_rank_biserial(0.61, rules="funder2019")
## [1] "very large"
## (Rules: funder2019)
#flik, family
mydata$Q18 <- as.numeric(as.character(mydata$Q18))
shapiro.test(mydata$Q18)
##
## Shapiro-Wilk normality test
##
## data: mydata$Q18
## W = 0.87963, p-value = 9.066e-10
wilcox.test(mydata$Q18,
mu=4,
correct=FALSE)
##
## Wilcoxon signed rank test
##
## data: mydata$Q18
## V = 8163, p-value = 8.609e-09
## alternative hypothesis: true location is not equal to 4
library(effectsize)
effectsize(wilcox.test(mydata$Q18,
mu=4,
correct=FALSE))
## r (rank biserial) | 95% CI
## --------------------------------
## 0.54 | [0.40, 0.66]
##
## - Deviation from a difference of 4.
interpret_rank_biserial(0.54, rules="funder2019")
## [1] "very large"
## (Rules: funder2019)
#infrastructure, cash
mydata$Q10 <- as.numeric(as.character(mydata$Q10))
shapiro.test(mydata$Q10)
##
## Shapiro-Wilk normality test
##
## data: mydata$Q10
## W = 0.91107, p-value = 4.992e-08
wilcox.test(mydata$Q10,
mu=4,
correct=FALSE)
##
## Wilcoxon signed rank test
##
## data: mydata$Q10
## V = 6179.5, p-value = 0.1053
## alternative hypothesis: true location is not equal to 4
library(effectsize)
effectsize(wilcox.test(mydata$Q10,
mu=4,
correct=FALSE))
## r (rank biserial) | 95% CI
## ---------------------------------
## 0.15 | [-0.03, 0.33]
##
## - Deviation from a difference of 4.
interpret_rank_biserial(0.15, rules="funder2019")
## [1] "small"
## (Rules: funder2019)