This educational notebook accompanies an article presenting the opportunities of analyzing data from standardized questionnaires for UX designers and researchers.
The questionnaire serving as an example for analyzes and visualizations is a dummy and has no psychometric validity. It was created from scratch and any resemblance to any particular questionnaire is purely coincidental.
Contact: paul.amat@live.fr ; https://paulamat.design/
Subscales :
Dimensions :
Items :
set.seed(1)
# set number of participants
n <- 200
# initiate list of variables
FLARE <- list()
# initiate subscales
subscale <- c("F","L","A","R","E")
# initiate 2 variables per subscale
FLARE <- rep(list(numeric(0)), length(subscale)*2)
names(FLARE) <- paste0(rep(subscale, each=2), rep(1:2, length(subscale)))
# initiate probabilities
centered <- c(0.1, 0.1, 0.1, 0.7, 0.05, 0.05, 0.05)
centered2 <- c(0.05, 0.05, 0.05, 0.8, 0.025, 0.025, 0.025)
skewed.left <- c(0.05, 0.05, 0.05, 0.05, 0.35, 0.7, 0.25)
skewed.left2 <- c(0.05, 0.05, 0.05, 0.05, 0.15, 0.25, 0.4)
skewed.right <- c(0.2, 0.3, 0.2, 0.1, 0.1, 0.05, 0.05)
skewed.right2 <- c(0.15, 0.2, 0.2, 0.15, 0.1, 0.1, 0.05)
# populate variables
a <- round(pmin(pmax(rnorm(n, mean = 6, sd = 1), 1), 7))
c <- sample(1:7, n, replace = TRUE, prob = skewed.left)
b <- d <- sample(1:7, n, replace = TRUE, prob = skewed.left2)
e <- sample(1:7, n, replace = TRUE, prob = skewed.right)
f <- sample(1:7, n, replace = TRUE, prob = skewed.right2)
g <- i <- sample(1:7, n, replace = TRUE, prob = centered)
h <- j <- sample(1:7, n, replace = TRUE, prob = centered2)
age.list <- c("18-24", "25-34", "35-54", "55+")
age.group <- sample(age.list, n, replace = T)
age <- c(rep(NA, 200))
# make it a data frame
FLARE <- data.frame(a, b, c, d, e, f, g, h, i, j, age.group, age)
colnames(FLARE) <- c("F1", "F2", "L1", "L2", "A1", "A2", "R1", "R2", "E1", "E2", "age.group", "age")
for (i in 1:nrow(FLARE)) {
if (FLARE[i,"F2"] <= 6 && FLARE[i,"age.group"] == "18-24") {
FLARE[i,"F2"] <- sample(1:7, 1, replace = TRUE, prob = skewed.left)
} else if (FLARE[i,"F2"] >= 2 && FLARE[i,"age.group"] == "55+") {
FLARE[i,"F2"] <- sample(1:7, 1, replace = TRUE, prob = skewed.right)
}
}
for (i in 1:nrow(FLARE)) {
if (FLARE[i,"age.group"] == "18-24") {
FLARE[i,"age"] <- sample(18:24, 1, replace = TRUE)
} else if (FLARE[i,"age.group"] == "25-34") {
FLARE[i,"age"] <- sample(25:34, 1, replace = TRUE)
} else if (FLARE[i,"age.group"] == "35-54") {
FLARE[i,"age"] <- sample(35:54, 1, replace = TRUE)
} else if (FLARE[i,"age.group"] == "55+") {
FLARE[i,"age"] <- sample(55:68, 1, replace = TRUE)
}
}
knitr::kable(head(FLARE[,c(1:10)]), caption = "FLARE raw data (6 first rows)")
| F1 | F2 | L1 | L2 | A1 | A2 | R1 | R2 | E1 | E2 |
|---|---|---|---|---|---|---|---|---|---|
| 5 | 2 | 5 | 3 | 4 | 1 | 4 | 4 | 4 | 4 |
| 6 | 2 | 6 | 2 | 2 | 4 | 1 | 4 | 1 | 4 |
| 5 | 7 | 2 | 7 | 6 | 3 | 7 | 5 | 7 | 5 |
| 7 | 6 | 3 | 5 | 5 | 7 | 3 | 4 | 3 | 4 |
| 6 | 1 | 2 | 1 | 2 | 2 | 1 | 4 | 1 | 4 |
| 5 | 1 | 7 | 1 | 3 | 2 | 2 | 3 | 2 | 3 |
write.csv2(FLARE[,c(1:10)], "FLARE.csv", row.names = FALSE)
if (!require(ggplot2)) install.packages("ggplot2")
## Loading required package: ggplot2
library(ggplot2)
ggplot(data = FLARE, aes(x = L1, fill = after_stat(count))) +
geom_bar(width = 0.4) +
coord_cartesian(xlim = c(1, 7)) +
theme_minimal() +
guides(fill = "none") +
labs(x = "L1", y = "Count", title = "Distribution of L1 raw answers (Learnability)", subtitle = paste("From FLARE questionnaire. N=", nrow(FLARE), sep = ""))
Transform data : y = (x - 1) / 6 * 99 + 1
FLARE.tr <- (FLARE[,1:10] - 1) / 6 * 99 + 1
a <- rowMeans(FLARE.tr[,c("F1","F2")])
b <- rowMeans(FLARE.tr[,c("L1","L2")])
c <- rowMeans(FLARE.tr[,c("A1","A2")])
d <- rowMeans(FLARE.tr[,c("R1","R2")])
e <- rowMeans(FLARE.tr[,c("E1","E2")])
FLARE.sub <- data.frame(a,b,c,d,e)
colnames(FLARE.sub) <- c("F","L","A","R","E")
knitr::kable(head(FLARE.sub), caption = "FLARE transformed data (6 first rows)")
| F | L | A | R | E |
|---|---|---|---|---|
| 42.25 | 50.50 | 25.75 | 50.50 | 50.50 |
| 50.50 | 50.50 | 34.00 | 25.75 | 25.75 |
| 83.50 | 58.75 | 58.75 | 83.50 | 83.50 |
| 91.75 | 50.50 | 83.50 | 42.25 | 42.25 |
| 42.25 | 9.25 | 17.50 | 25.75 | 25.75 |
| 34.00 | 50.50 | 25.75 | 25.75 | 25.75 |
limits <- c(0, 100)
market <- 57
ggplot(data = FLARE.sub) +
geom_histogram(aes(F, fill = after_stat(x)), binwidth = 8) +
coord_cartesian(xlim = limits) +
scale_fill_gradient(name = "Score", low = "red", high = "green", limits = limits) +
geom_vline(aes(xintercept = mean(F)), colour = "DodgerBlue") +
geom_text(aes(x = mean(F) + 1, label = round(mean(F)), y = 40), colour = "DodgerBlue", hjust = 0) +
# geom_rect(aes(xmin = CI[2], xmax = CI[3]), ymin = -200, ymax = 200, fill = "DodgerBlue", alpha = 0.25) +
geom_vline(aes(xintercept = market), linetype = "dashed", colour = "#FF7F7F", linewidth = 0.5) +
geom_text(aes(x = market + 1, label = "Market", y = 45), colour = "#FF7F7F", hjust = 0) +
labs(x = "Functional value scores", y = "Density", title ="Distribution of functional value scores", subtitle = paste("w/ mean and market average & IC. N=", nrow(FLARE), sep = ""), caption = "95% CI bootstrap")
a <- c(mean(FLARE.sub$F),mean(FLARE.sub$L),mean(FLARE.sub$A),mean(FLARE.sub$R),mean(FLARE.sub$E))
FLARE.stat.sub <- data.frame(a, subscale)
colnames(FLARE.stat.sub) <- c("Mean", "Subscale")
knitr::kable(head(FLARE.stat.sub), caption = "FLARE subscales data (6 first rows)")
| Mean | Subscale |
|---|---|
| 75.70375 | F |
| 74.46625 | L |
| 36.39250 | A |
| 45.92125 | R |
| 45.92125 | E |
ggplot(FLARE.stat.sub, aes(Mean, Subscale, fill = Mean)) +
geom_col(alpha = 1, width = 0.4) +
coord_cartesian(xlim = limits) +
# geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.3, color = "white") +
scale_fill_gradient(name = "Mean score", low = "red", high = "green", limits = limits) +
geom_vline(aes(xintercept = mean(Mean)), colour = "DodgerBlue") +
geom_text(aes(x = mean(Mean) + 1, label=paste(round(mean(Mean),1)), y = 3), colour = "DodgerBlue", hjust = 0) +
geom_vline(aes(xintercept = market), linetype = "dashed", colour = "#FF7F7F", linewidth = 0.5) +
geom_text(aes(x = market + 1, label = "Market", y = 2), colour = "#FF7F7F", hjust = 0) +
theme_minimal() +
labs(title ="FLARE mean scores", subtitle = paste("By subscale. w/ general mean & market average. N=", nrow(FLARE), sep = ""), x = "Mean", y = "Subscale", caption = "")
boot <- function(x) {
n.sample <- 10000
# initializing bootstrap
table.S <- numeric(n.sample)
# loop to generate means from original data
for(i in 1:n.sample) {
table.S[i] <- mean(sample(x, 10, replace=T))
}
# sort generated means
table.S.sorted <- sort(table.S)
ci1 <- n.sample*0.025
ci2 <- n.sample - (n.sample*0.025)
# catch conf int by selecting heads and tails
IC95.b <- c(table.S.sorted[ci1], table.S.sorted[ci2])
return(c(mean(x), IC95.b[1], IC95.b[2]))
}
FLARE.stat.inf <- list()
a <- c("F1", "F2", "L1", "L2", "A1", "A2", "R1", "R2", "E1", "E2")
for(i in 1:length(a)) {
FLARE.stat.inf[[i]] <- boot(as.vector(FLARE.tr[,a[i]]))
}
FLARE.stat.inf <- t(data.frame(FLARE.stat.inf))
b <- rep(subscale, each = 2)
FLARE.stat.inf <- data.frame(a, b, FLARE.stat.inf)
colnames(FLARE.stat.inf) <- c("Item", "Subscale", "Mean", "CI.low", "CI.high")
rownames(FLARE.stat.inf) <- c()
knitr::kable(head(FLARE.stat.inf), caption = "FLARE inferred data (6 first rows)")
| Item | Subscale | Mean | CI.low | CI.high |
|---|---|---|---|---|
| F1 | F | 82.8400 | 73.60 | 91.75 |
| F2 | F | 68.5675 | 47.20 | 86.80 |
| L1 | L | 73.1875 | 57.10 | 86.80 |
| L2 | L | 75.7450 | 57.10 | 91.75 |
| A1 | A | 33.6700 | 17.50 | 52.15 |
| A2 | A | 39.1150 | 22.45 | 57.10 |
ggplot(data = FLARE.stat.inf, aes(x = Mean, y = Item, fill = Subscale)) +
geom_col(position = "dodge", alpha = 0.7, width = 0.8) +
geom_pointrange(aes(xmin = CI.low, xmax = CI.high), colour = "darkslategray") +
geom_text(aes(x = 100, label = paste(round(Mean), "±", round(CI.high-Mean))), hjust = 1, nudge_x = 0, nudge_y = 0, size = 4, colour = "darkslategray") +
coord_cartesian(xlim = limits) +
theme_minimal() +
labs(title ="FLARE mean scores", subtitle = paste("By item ans subscale. N=", nrow(FLARE), sep = ""), x = "Item", y = "M ± CI", fill = "Subscale", caption = "95% CI (bootstrap)")
Dimensions :
a <- c(FLARE.tr$F1, FLARE.tr$F2, FLARE.tr$L1, FLARE.tr$L2, FLARE.tr$E1, FLARE.tr$E2)
a <- boot(a)
b <- c(FLARE.tr$A1, FLARE.tr$A2, FLARE.tr$R1, FLARE.tr$R2)
b <- boot(b)
c <- c("Quality", "Pleasure")
FLARE.dim <- data.frame(a, b)
FLARE.dim <- t(data.frame(FLARE.dim))
FLARE.dim <- data.frame(c, FLARE.dim)
colnames(FLARE.dim) <- c("Dimension", "Mean", "CI.low", "CI.high")
rownames(FLARE.dim) <- c()
knitr::kable(head(FLARE.dim), caption = "FLARE dimensions data (6 first rows)")
| Dimension | Mean | CI.low | CI.high |
|---|---|---|---|
| Quality | 65.36375 | 47.20 | 81.85 |
| Pleasure | 41.15687 | 25.75 | 57.10 |
xmin <- c(01, 01, 50, 50)
xmax <- c(50, 50, 100, 100)
ymin <- c(01, 50, 01, 50)
ymax <- c(50, 100, 50, 100)
labs <- c("Low quality and pleasure", "Low quality", "Low pleasure", "High quality and pleasure")
tiles <- data.frame(xmin,ymin,xmax,ymax,labs)
ggplot(FLARE.dim, aes()) +
coord_cartesian(xlim = limits, ylim = limits) +
geom_rect(data = tiles, mapping = aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax), alpha = 0.08, color = "white") +
geom_text(data = tiles, mapping = aes(x = xmin+(48/2), y = ymin+(48/2), label = labs), alpha = 0.3) +
geom_point(aes(x = Mean[1], y = Mean[2]), colour = "DodgerBlue", fill = "DodgerBlue", show.legend = T, size = 3) +
geom_text(aes(y = Mean[2], label="M ± CI", x = Mean[1] + 6), colour = "DodgerBlue", angle = 0) +
geom_rect(aes(xmin = CI.low[1], xmax = CI.high[1], ymin = CI.low[2], ymax = CI.high[2]), fill = 'DodgerBlue', alpha = 0.15, show.legend = T) +
geom_point(aes(x = market, y = market), colour = "#FF7F7F", fill = "#FF7F7F", show.legend = T, size = 3) +
geom_text(aes(y = market, label = "Market", x = market + 6), colour = "#FF7F7F", angle = 0) +
theme_light() +
theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
labs(title ="FLARE dimensions mean score analysis", subtitle = paste("w/ market average & IC. N=", nrow(FLARE), sep = ""), x = "Quality", y = "Pleasure", caption = "95% CI (bootstrap)")
a <- rowMeans(FLARE.tr[,c("F1","F2")])
b <- rowMeans(FLARE.tr[,c("L1","L2")])
c <- rowMeans(FLARE.tr[,c("A1","A2")])
d <- rowMeans(FLARE.tr[,c("R1","R2")])
e <- rowMeans(FLARE.tr[,c("E1","E2")])
a <- boot(a)
b <- boot(b)
c <- boot(c)
d <- boot(d)
e <- boot(e)
FLARE.stat.sub.inf <- t(data.frame(a,b,c,d,e))
FLARE.stat.sub.inf <- data.frame(FLARE.stat.sub.inf, subscale)
colnames(FLARE.stat.sub.inf) <- c("Mean","CI.low","CI.high","Subscale")
rownames(FLARE.stat.sub.inf) <- c()
knitr::kable(head(FLARE.stat.sub.inf), caption = "FLARE mean scores data (6 first rows)")
| Mean | CI.low | CI.high | Subscale |
|---|---|---|---|
| 75.70375 | 63.700 | 85.975 | F |
| 74.46625 | 62.875 | 85.150 | L |
| 36.39250 | 24.100 | 50.500 | A |
| 45.92125 | 36.475 | 55.450 | R |
| 45.92125 | 36.475 | 55.450 | E |
xmin <- c(0, 55, 70, 85)
xmax <- c(55, 70, 85, 100)
ymin <- rep(0.5, 4)
ymax <- rep(0.8, 4)
labs <- c("Very low", "Low", "Moderate", "High")
grades <- data.frame(xmin, xmax, ymin, ymax, labs)
ggplot() +
coord_cartesian(xlim = limits, ylim = c(0, 1)) +
geom_rect(data = grades, mapping = aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = labs), alpha = 0.50) +
geom_text(data = grades, mapping = aes(x = xmax - ((xmax - xmin) / 2), y = ymin + 0.15, label = labs), alpha = 0.4) +
scale_fill_manual(values = c("#B5EAD7", "#FFEDCC", "#D6EDC1", "#FFB6B9")) +
geom_point(aes(x = market, y = 0.37), color = "#FF7F7F", size = 4) +
geom_text(aes(x = market, label = "Market", y = 0.43), colour = "#FF7F7F") +
geom_point(data = FLARE.stat.sub.inf, aes(x = Mean[1], y = 0.25), color = "DodgerBlue", size = 4) +
geom_errorbarh(data = FLARE.stat.sub.inf, aes(y = 0.25, xmin = CI.low[1], xmax = CI.high[1]), color = "DodgerBlue") +
geom_text(data = FLARE.stat.sub.inf, aes(x = Mean[1], label=paste(round(Mean[1]),"±", round(CI.high[1]-Mean[1])), y = 0.31), colour = "DodgerBlue") +
theme_minimal() +
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
labs(title ="FLARE functional value mean score interpretation", x = "Functional value mean score", y = " ", subtitle = paste("w/ market average & IC. N=", nrow(FLARE), sep = ""), caption = "95% CI (bootstrap)")
knitr::kable(head(FLARE), caption = "FLARE raw and sociological data (6 first rows)")
| F1 | F2 | L1 | L2 | A1 | A2 | R1 | R2 | E1 | E2 | age.group | age |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 5 | 2 | 5 | 3 | 4 | 1 | 4 | 4 | 4 | 4 | 55+ | 65 |
| 6 | 2 | 6 | 2 | 2 | 4 | 1 | 4 | 1 | 4 | 35-54 | 37 |
| 5 | 7 | 2 | 7 | 6 | 3 | 7 | 5 | 7 | 5 | 35-54 | 49 |
| 7 | 6 | 3 | 5 | 5 | 7 | 3 | 4 | 3 | 4 | 18-24 | 21 |
| 6 | 1 | 2 | 1 | 2 | 2 | 1 | 4 | 1 | 4 | 55+ | 63 |
| 5 | 1 | 7 | 1 | 3 | 2 | 2 | 3 | 2 | 3 | 35-54 | 36 |
ggplot(FLARE, aes(x = F2, y = age.group, color = F2)) +
geom_count() +
scale_color_gradient(name = "Value", low = "red", high = "green", limits = c(1,7)) +
theme_minimal() +
labs(title ="FLARE functional value x age group", x = "Functional value", y = "Age group", subtitle = paste("Frequency of FLARE answers by age group. N=", nrow(FLARE), sep = ""), caption = "")
# Run chisq.test() and extract residuals as a data frame
residuals <- data.frame(chisq.test(FLARE$F2, FLARE$age.group)$residuals)
## Warning in chisq.test(FLARE$F2, FLARE$age.group): Chi-squared approximation may
## be incorrect
# Rename the columns of the residuals data frame
colnames(residuals) <- c("F2", "age.group", "Residuals")
# Create a scatterplot with text labels
ggplot(data = residuals, aes(x = F2, y = age.group, color = as.numeric(F2))) +
scale_color_gradient(name = "Value", low = "#FF4040", high = "#00B050", limits = c(1,7)) +
geom_text(aes(label = format(round(Residuals, 2), nsmall = 2)), nudge_x = 0, nudge_y = 0, size = 4) +
labs(title ="Chisq.test residuals of FLARE functional value x age group",
x = "Functional value",
y = "Age Group",
subtitle = paste("N=", nrow(FLARE), sep = ""),
caption = "High absolute values may indicate a relationship between the two variable modalities.") +
theme_minimal()
FLARE.biv.quant <- data.frame(FLARE.sub$F, FLARE$age)
colnames(FLARE.biv.quant) <- c("F", "age")
knitr::kable(head(FLARE.biv.quant), caption = "FLARE functional value and age data (6 first rows)")
| F | age |
|---|---|
| 42.25 | 65 |
| 50.50 | 37 |
| 83.50 | 49 |
| 91.75 | 21 |
| 42.25 | 63 |
| 34.00 | 36 |
ggplot(FLARE.biv.quant, aes(x = age, y = F)) +
geom_jitter(aes(), color = "darkslategray") +
geom_smooth(method = "lm", formula = y ~ x, se = F, colour = "DodgerBlue") +
geom_text(aes(x = mean(c(age,F)), y = mean(c(age,F))+5, label=paste("r = ", round(cor(age,F),2))), colour = "DodgerBlue") +
theme_minimal() +
theme(legend.position = "none", panel.grid.major = element_blank()) +
labs(title = "FLARE age x functional value", x = "Age", y = "Functional value", subtitle = paste("Correlation analysis. N=", nrow(FLARE), sep = ""), caption = "Jitter added for visualisation confort")