About

This educational notebook accompanies an article presenting the opportunities of analyzing data from standardized questionnaires for UX designers and researchers.

The questionnaire serving as an example for analyzes and visualizations is a dummy and has no psychometric validity. It was created from scratch and any resemblance to any particular questionnaire is purely coincidental.

Contact: paul.amat@live.fr ; https://paulamat.design/

Items

Subscales :

F. Functionality
L. Learnability
A. Aesthetics
R. Rewarding
E. Efficiency

Dimensions :

Quality (F, L, E)
Pleasure (A, R)

Items :

F1: The product’s features are easy to understand.
F2: I can accomplish my tasks efficiently using this product.
L1: It was easy to learn how to use this product.
L2: I felt confident using this product after just a short time.
A1: The visual design of this product is appealing.
A2: The product’s interface is easy on the eyes.
R1: I feel accomplished after using this product.
R2: This product provides me pleasure.
E1: Using this product saves me time and effort.
E2: I can complete my tasks quickly and easily with this product.

Raw data

set.seed(1)

# set number of participants
n <- 200

# initiate list of variables
FLARE <- list()

# initiate subscales
subscale <- c("F","L","A","R","E")

# initiate 2 variables per subscale
FLARE <- rep(list(numeric(0)), length(subscale)*2)
names(FLARE) <- paste0(rep(subscale, each=2), rep(1:2, length(subscale)))

# initiate probabilities
centered <- c(0.1, 0.1, 0.1, 0.7, 0.05, 0.05, 0.05)
centered2 <- c(0.05, 0.05, 0.05, 0.8, 0.025, 0.025, 0.025)
skewed.left <- c(0.05, 0.05, 0.05, 0.05, 0.35, 0.7, 0.25)
skewed.left2 <- c(0.05, 0.05, 0.05, 0.05, 0.15, 0.25, 0.4)
skewed.right <- c(0.2, 0.3, 0.2, 0.1, 0.1, 0.05, 0.05)
skewed.right2 <- c(0.15, 0.2, 0.2, 0.15, 0.1, 0.1, 0.05)

# populate variables
a <- round(pmin(pmax(rnorm(n, mean = 6, sd = 1), 1), 7))
c <- sample(1:7, n, replace = TRUE, prob = skewed.left)
b <- d <- sample(1:7, n, replace = TRUE, prob = skewed.left2)
e <- sample(1:7, n, replace = TRUE, prob = skewed.right)
f <- sample(1:7, n, replace = TRUE, prob = skewed.right2)
g <- i <- sample(1:7, n, replace = TRUE, prob = centered)
h <- j <- sample(1:7, n, replace = TRUE, prob = centered2)

age.list <- c("18-24", "25-34", "35-54", "55+")
age.group <- sample(age.list, n, replace = T)
age <- c(rep(NA, 200))

# make it a data frame
FLARE <- data.frame(a, b, c, d, e, f, g, h, i, j, age.group, age)
colnames(FLARE) <- c("F1", "F2", "L1", "L2", "A1", "A2", "R1", "R2", "E1", "E2", "age.group", "age")

for (i in 1:nrow(FLARE)) {
  if (FLARE[i,"F2"] <= 6 && FLARE[i,"age.group"] == "18-24") {
    FLARE[i,"F2"] <- sample(1:7, 1, replace = TRUE, prob = skewed.left)
  } else if (FLARE[i,"F2"] >= 2 && FLARE[i,"age.group"] == "55+") {
    FLARE[i,"F2"] <- sample(1:7, 1, replace = TRUE, prob = skewed.right)
  }
}

for (i in 1:nrow(FLARE)) {
  if (FLARE[i,"age.group"] == "18-24") {
    FLARE[i,"age"] <- sample(18:24, 1, replace = TRUE)
  } else if (FLARE[i,"age.group"] == "25-34") {
    FLARE[i,"age"] <- sample(25:34, 1, replace = TRUE)
  } else if (FLARE[i,"age.group"] == "35-54") {
    FLARE[i,"age"] <- sample(35:54, 1, replace = TRUE)
  } else if (FLARE[i,"age.group"] == "55+") {
    FLARE[i,"age"] <- sample(55:68, 1, replace = TRUE)
  }
}

knitr::kable(head(FLARE[,c(1:10)]), caption = "FLARE raw data (6 first rows)")

FLARE raw data (6 first rows)
F1	F2	L1	L2	A1	A2	R1	R2	E1	E2
5	2	5	3	4	1	4	4	4	4
6	2	6	2	2	4	1	4	1	4
5	7	2	7	6	3	7	5	7	5
7	6	3	5	5	7	3	4	3	4
6	1	2	1	2	2	1	4	1	4
5	1	7	1	3	2	2	3	2	3

write.csv2(FLARE[,c(1:10)], "FLARE.csv", row.names = FALSE)

if (!require(ggplot2)) install.packages("ggplot2")

## Loading required package: ggplot2

library(ggplot2)

ggplot(data = FLARE, aes(x = L1, fill = after_stat(count))) +
  geom_bar(width = 0.4) +
  coord_cartesian(xlim = c(1, 7)) +
  theme_minimal() +
  guides(fill = "none") +
  labs(x = "L1", y = "Count", title = "Distribution of L1 raw answers (Learnability)", subtitle = paste("From FLARE questionnaire. N=", nrow(FLARE), sep = ""))

Transformed data

Transform data : y = (x - 1) / 6 * 99 + 1

FLARE.tr <- (FLARE[,1:10] - 1) / 6 * 99 + 1

a <- rowMeans(FLARE.tr[,c("F1","F2")])
b <- rowMeans(FLARE.tr[,c("L1","L2")])
c <- rowMeans(FLARE.tr[,c("A1","A2")])
d <- rowMeans(FLARE.tr[,c("R1","R2")])
e <- rowMeans(FLARE.tr[,c("E1","E2")])

FLARE.sub <- data.frame(a,b,c,d,e)
colnames(FLARE.sub) <- c("F","L","A","R","E")

knitr::kable(head(FLARE.sub), caption = "FLARE transformed data (6 first rows)")

FLARE transformed data (6 first rows)
F	L	A	R	E
42.25	50.50	25.75	50.50	50.50
50.50	50.50	34.00	25.75	25.75
83.50	58.75	58.75	83.50	83.50
91.75	50.50	83.50	42.25	42.25
42.25	9.25	17.50	25.75	25.75
34.00	50.50	25.75	25.75	25.75

limits <- c(0, 100)

market <- 57

ggplot(data = FLARE.sub) +
  geom_histogram(aes(F, fill = after_stat(x)), binwidth = 8) +
  coord_cartesian(xlim = limits) +
  scale_fill_gradient(name = "Score", low = "red", high = "green", limits = limits) +
  
  geom_vline(aes(xintercept = mean(F)), colour = "DodgerBlue") +
  geom_text(aes(x = mean(F) + 1, label = round(mean(F)), y = 40), colour = "DodgerBlue", hjust = 0) +
  # geom_rect(aes(xmin = CI[2], xmax = CI[3]), ymin = -200, ymax = 200, fill = "DodgerBlue", alpha = 0.25) +
  
  geom_vline(aes(xintercept = market), linetype = "dashed", colour = "#FF7F7F", linewidth = 0.5) +
  geom_text(aes(x = market + 1, label = "Market", y = 45), colour = "#FF7F7F", hjust = 0) +
  
  labs(x = "Functional value scores", y = "Density", title ="Distribution of functional value scores", subtitle = paste("w/ mean and market average & IC. N=", nrow(FLARE), sep = ""), caption = "95% CI bootstrap")

Subscales

a <- c(mean(FLARE.sub$F),mean(FLARE.sub$L),mean(FLARE.sub$A),mean(FLARE.sub$R),mean(FLARE.sub$E))

FLARE.stat.sub <- data.frame(a, subscale)
colnames(FLARE.stat.sub) <- c("Mean", "Subscale")

knitr::kable(head(FLARE.stat.sub), caption = "FLARE subscales data (6 first rows)")

FLARE subscales data (6 first rows)
Mean	Subscale
75.70375	F
74.46625	L
36.39250	A
45.92125	R
45.92125	E

ggplot(FLARE.stat.sub, aes(Mean, Subscale, fill = Mean)) +
  geom_col(alpha = 1, width = 0.4) +
  coord_cartesian(xlim = limits) +
  # geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.3, color = "white") +
  scale_fill_gradient(name = "Mean score", low = "red", high = "green", limits = limits) +
  geom_vline(aes(xintercept = mean(Mean)), colour = "DodgerBlue") +
  geom_text(aes(x = mean(Mean) + 1, label=paste(round(mean(Mean),1)), y = 3), colour = "DodgerBlue", hjust = 0) +
  geom_vline(aes(xintercept = market), linetype = "dashed", colour = "#FF7F7F", linewidth = 0.5) +
  geom_text(aes(x = market + 1, label = "Market", y = 2), colour = "#FF7F7F", hjust = 0) +
  theme_minimal() +
  labs(title ="FLARE mean scores", subtitle = paste("By subscale. w/ general mean & market average. N=", nrow(FLARE), sep = ""), x = "Mean", y = "Subscale", caption = "")

Infere

boot <- function(x) {
  
  n.sample <- 10000
  
  # initializing bootstrap
  table.S <- numeric(n.sample)
  
  # loop to generate means from original data
  for(i in 1:n.sample) {
    table.S[i] <- mean(sample(x, 10, replace=T))
  }
  
  # sort generated means
  table.S.sorted <- sort(table.S)
  
  ci1 <- n.sample*0.025
  ci2 <- n.sample - (n.sample*0.025)
  
  # catch conf int by selecting heads and tails
  IC95.b <- c(table.S.sorted[ci1], table.S.sorted[ci2])
  
  return(c(mean(x), IC95.b[1], IC95.b[2]))
  }

FLARE.stat.inf <- list()

a <- c("F1", "F2", "L1", "L2", "A1", "A2", "R1", "R2", "E1", "E2")

for(i in 1:length(a)) {
    FLARE.stat.inf[[i]] <- boot(as.vector(FLARE.tr[,a[i]]))
  }

FLARE.stat.inf <- t(data.frame(FLARE.stat.inf))

b <- rep(subscale, each = 2)

FLARE.stat.inf <- data.frame(a, b, FLARE.stat.inf)

colnames(FLARE.stat.inf) <- c("Item", "Subscale", "Mean", "CI.low", "CI.high")
rownames(FLARE.stat.inf) <- c()

knitr::kable(head(FLARE.stat.inf), caption = "FLARE inferred data (6 first rows)")

FLARE inferred data (6 first rows)
Item	Subscale	Mean	CI.low	CI.high
F1	F	82.8400	73.60	91.75
F2	F	68.5675	47.20	86.80
L1	L	73.1875	57.10	86.80
L2	L	75.7450	57.10	91.75
A1	A	33.6700	17.50	52.15
A2	A	39.1150	22.45	57.10

ggplot(data = FLARE.stat.inf, aes(x = Mean, y = Item, fill = Subscale)) +
  geom_col(position = "dodge", alpha = 0.7, width = 0.8) +
  geom_pointrange(aes(xmin = CI.low, xmax = CI.high), colour = "darkslategray") +
  geom_text(aes(x = 100, label = paste(round(Mean), "±", round(CI.high-Mean))), hjust = 1, nudge_x = 0, nudge_y = 0, size = 4, colour = "darkslategray") +
  coord_cartesian(xlim = limits) +
  theme_minimal() +
  labs(title ="FLARE mean scores", subtitle = paste("By item ans subscale. N=", nrow(FLARE), sep = ""), x = "Item", y = "M ± CI", fill = "Subscale", caption = "95% CI (bootstrap)")

Dimensions

Dimensions :

Quality (F, L, E)
Pleasure (A, R)

a <- c(FLARE.tr$F1, FLARE.tr$F2, FLARE.tr$L1, FLARE.tr$L2, FLARE.tr$E1, FLARE.tr$E2)
a <- boot(a)

b <- c(FLARE.tr$A1, FLARE.tr$A2, FLARE.tr$R1, FLARE.tr$R2)
b <- boot(b)

c <- c("Quality", "Pleasure")

FLARE.dim <- data.frame(a, b)

FLARE.dim <- t(data.frame(FLARE.dim))

FLARE.dim <- data.frame(c, FLARE.dim)

colnames(FLARE.dim) <- c("Dimension", "Mean", "CI.low", "CI.high")
rownames(FLARE.dim) <- c()

knitr::kable(head(FLARE.dim), caption = "FLARE dimensions data (6 first rows)")

FLARE dimensions data (6 first rows)
Dimension	Mean	CI.low	CI.high
Quality	65.36375	47.20	81.85
Pleasure	41.15687	25.75	57.10

xmin <- c(01, 01, 50, 50)
xmax <- c(50, 50, 100, 100)
ymin <- c(01, 50, 01, 50)
ymax <- c(50, 100, 50, 100)

labs <- c("Low quality and pleasure", "Low quality", "Low pleasure", "High quality and pleasure")

tiles <- data.frame(xmin,ymin,xmax,ymax,labs)

ggplot(FLARE.dim, aes()) +
  coord_cartesian(xlim = limits, ylim = limits) +
  
  geom_rect(data = tiles, mapping = aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax), alpha = 0.08, color = "white") +
  geom_text(data = tiles, mapping = aes(x = xmin+(48/2), y = ymin+(48/2), label = labs), alpha = 0.3) +
  
  geom_point(aes(x = Mean[1], y = Mean[2]), colour = "DodgerBlue", fill = "DodgerBlue", show.legend = T, size = 3) +
  geom_text(aes(y = Mean[2], label="M ± CI", x = Mean[1] + 6), colour = "DodgerBlue", angle = 0) +
  geom_rect(aes(xmin = CI.low[1], xmax = CI.high[1], ymin = CI.low[2], ymax = CI.high[2]), fill = 'DodgerBlue', alpha = 0.15, show.legend = T) +
  geom_point(aes(x = market, y = market), colour = "#FF7F7F", fill = "#FF7F7F", show.legend = T, size = 3) +
  geom_text(aes(y = market, label = "Market", x = market + 6), colour = "#FF7F7F", angle = 0) +
  
  theme_light() +
  theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  labs(title ="FLARE dimensions mean score analysis", subtitle = paste("w/ market average & IC. N=", nrow(FLARE), sep = ""), x = "Quality", y = "Pleasure", caption = "95% CI (bootstrap)")

Mean score

a <- rowMeans(FLARE.tr[,c("F1","F2")])
b <- rowMeans(FLARE.tr[,c("L1","L2")])
c <- rowMeans(FLARE.tr[,c("A1","A2")])
d <- rowMeans(FLARE.tr[,c("R1","R2")])
e <- rowMeans(FLARE.tr[,c("E1","E2")])

a <- boot(a)
b <- boot(b)
c <- boot(c)
d <- boot(d)
e <- boot(e)

FLARE.stat.sub.inf <- t(data.frame(a,b,c,d,e))

FLARE.stat.sub.inf <- data.frame(FLARE.stat.sub.inf, subscale)

colnames(FLARE.stat.sub.inf) <- c("Mean","CI.low","CI.high","Subscale")
rownames(FLARE.stat.sub.inf) <- c()

knitr::kable(head(FLARE.stat.sub.inf), caption = "FLARE mean scores data (6 first rows)")

FLARE mean scores data (6 first rows)
Mean	CI.low	CI.high	Subscale
75.70375	63.700	85.975	F
74.46625	62.875	85.150	L
36.39250	24.100	50.500	A
45.92125	36.475	55.450	R
45.92125	36.475	55.450	E

xmin <- c(0, 55, 70, 85)
xmax <- c(55, 70, 85, 100)
ymin <- rep(0.5, 4)
ymax <- rep(0.8, 4)
labs <- c("Very low", "Low", "Moderate", "High")

grades <- data.frame(xmin, xmax, ymin, ymax, labs)

ggplot() +
  coord_cartesian(xlim = limits, ylim = c(0, 1)) +
  
  geom_rect(data = grades, mapping = aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = labs), alpha = 0.50) +
  geom_text(data = grades, mapping = aes(x = xmax - ((xmax - xmin) / 2), y = ymin + 0.15, label = labs), alpha = 0.4) +
  
  scale_fill_manual(values = c("#B5EAD7", "#FFEDCC", "#D6EDC1", "#FFB6B9")) +
  
  geom_point(aes(x = market, y = 0.37), color = "#FF7F7F", size = 4) +
  geom_text(aes(x = market, label = "Market", y = 0.43), colour = "#FF7F7F") +
  
  geom_point(data = FLARE.stat.sub.inf, aes(x = Mean[1], y = 0.25), color = "DodgerBlue", size = 4) +
  geom_errorbarh(data = FLARE.stat.sub.inf, aes(y = 0.25, xmin = CI.low[1], xmax = CI.high[1]), color = "DodgerBlue") +
  geom_text(data = FLARE.stat.sub.inf, aes(x = Mean[1], label=paste(round(Mean[1]),"±", round(CI.high[1]-Mean[1])), y = 0.31), colour = "DodgerBlue") +
  
  theme_minimal() +
  theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  labs(title ="FLARE functional value mean score interpretation", x = "Functional value mean score", y = " ", subtitle = paste("w/ market average & IC. N=", nrow(FLARE), sep = ""), caption = "95% CI (bootstrap)")

Bivariate - qualitative

knitr::kable(head(FLARE), caption = "FLARE raw and sociological data (6 first rows)")

FLARE raw and sociological data (6 first rows)
F1	F2	L1	L2	A1	A2	R1	R2	E1	E2	age.group	age
5	2	5	3	4	1	4	4	4	4	55+	65
6	2	6	2	2	4	1	4	1	4	35-54	37
5	7	2	7	6	3	7	5	7	5	35-54	49
7	6	3	5	5	7	3	4	3	4	18-24	21
6	1	2	1	2	2	1	4	1	4	55+	63
5	1	7	1	3	2	2	3	2	3	35-54	36

ggplot(FLARE, aes(x = F2, y = age.group, color = F2)) +
  geom_count() +
  scale_color_gradient(name = "Value", low = "red", high = "green", limits = c(1,7)) +
  theme_minimal() +
  labs(title ="FLARE functional value x age group", x = "Functional value", y = "Age group", subtitle = paste("Frequency of FLARE answers by age group. N=", nrow(FLARE), sep = ""), caption = "")

# Run chisq.test() and extract residuals as a data frame
residuals <- data.frame(chisq.test(FLARE$F2, FLARE$age.group)$residuals)

## Warning in chisq.test(FLARE$F2, FLARE$age.group): Chi-squared approximation may
## be incorrect

# Rename the columns of the residuals data frame
colnames(residuals) <- c("F2", "age.group", "Residuals")

# Create a scatterplot with text labels
ggplot(data = residuals, aes(x = F2, y = age.group, color = as.numeric(F2))) +
  scale_color_gradient(name = "Value", low = "#FF4040", high = "#00B050", limits = c(1,7)) +
  geom_text(aes(label = format(round(Residuals, 2), nsmall = 2)), nudge_x = 0, nudge_y = 0, size = 4) +
  labs(title ="Chisq.test residuals of FLARE functional value x age group", 
       x = "Functional value", 
       y = "Age Group", 
       subtitle = paste("N=", nrow(FLARE), sep = ""), 
       caption = "High absolute values may indicate a relationship between the two variable modalities.") +
  theme_minimal()

Bivariate - quantitative

FLARE.biv.quant <- data.frame(FLARE.sub$F, FLARE$age)
colnames(FLARE.biv.quant) <- c("F", "age")

knitr::kable(head(FLARE.biv.quant), caption = "FLARE functional value and age data (6 first rows)")

FLARE functional value and age data (6 first rows)
F	age
42.25	65
50.50	37
83.50	49
91.75	21
42.25	63
34.00	36

ggplot(FLARE.biv.quant, aes(x = age, y = F)) +
  geom_jitter(aes(), color = "darkslategray") +
  geom_smooth(method = "lm", formula = y ~ x, se = F, colour = "DodgerBlue") +
  geom_text(aes(x = mean(c(age,F)), y = mean(c(age,F))+5, label=paste("r = ", round(cor(age,F),2))), colour = "DodgerBlue") +
  theme_minimal() +
  theme(legend.position = "none", panel.grid.major = element_blank()) +
  labs(title = "FLARE age x functional value", x = "Age", y = "Functional value", subtitle = paste("Correlation analysis. N=", nrow(FLARE), sep = ""), caption = "Jitter added for visualisation confort")

FLARE

Paul Amat

2023-04-29