Bài thực hành mô hình hóa bằng thang đo Likert (Likert Scales hay Likert Data)
Thang đo Likert là thang đánh giá thường được sử dụng trong các surveys. Thang đo Likert thường có 5 cấp độ điển hình: 1. Rất không đồng ý 2. Không đồng ý 3. Trung lập 4. Đồng ý 6. Hoàn toàn đồng ý
Một số graph của thang đo Likert: 1. 100% staked bar 2. Diverging bars with neutrals separate 3. Diverging bars with neutrals split 4. Small multiple bars
Ở bài viết này, tôi thực hành minh họa kiểu 1 - 100% staked bar:
# Reference: http://daydreamingnumbers.com/blog/4-ways-to-visualize-likert-scales/
# Reference: https://rpubs.com/chidungkt/637110
library(tidyverse)
library(extrafont)
size <- 100000
# 5 level likert
responses <- c("Like them a lot", "Like them", "Neutrals", "Dislike them", "Dislike a lot")
brand <- c(rep("Bounty", size),
rep("Snickers", size),
rep("Milky Way", size),
rep("Mars", size),
rep("Galaxy Caramel", size),
rep("Twix", size),
rep("Galaxy", size),
rep("Teaser", size))
lik_res <- c(sample(responses, size = size, replace = TRUE, prob = c(24, 36, 15, 11, 14)),
sample(responses, size = size, replace = TRUE, prob = c(23, 42, 17, 9, 9)),
sample(responses, size = size, replace = TRUE, prob = c(16, 52, 23, 6, 3)),
sample(responses, size = size, replace = TRUE, prob = c(15, 57, 18, 7, 3)),
sample(responses, size = size, replace = TRUE, prob = c(27, 46, 17, 8, 2)),
sample(responses, size = size, replace = TRUE, prob = c(18, 57, 19, 5, 1)),
sample(responses, size = size, replace = TRUE, prob = c(29, 51, 15, 4, 1)),
sample(responses, size = size, replace = TRUE, prob = c(39, 42, 14, 4, 1)))
df_data <- tibble(brand = brand, lik_res = lik_res)
# Prepare data:
df_data %>%
group_by(brand, lik_res) %>%
count() %>%
ungroup() %>%
group_by(brand) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 0)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_ploting
df_ploting %>%
filter(lik_res == responses[5]) %>%
arrange(percent) %>%
pull(brand) -> order_x
# ploting:
my_colors <- c("#3e6487", "#829cb2", "#c7cdd1", "#edad88", "#e36c33")
my_font <- "Roboto Condensed"
theme_set(theme_minimal())
df_ploting %>%
mutate(brand = factor(brand, levels = order_x), lik_res = factor(lik_res, levels = responses[5:1])) -> odered
ploting <- odered %>%
ggplot(aes(x = brand, y = percent, fill = lik_res)) +
geom_col(width = 0.8) +
coord_flip() +
scale_fill_manual(values = my_colors[5:1], name = "") +
theme(legend.position = "top") +
theme(text = element_text(family = my_font)) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = paste0(seq(0, 100, 25), "%"), expand = c(0, 0)) +
theme(plot.title = element_text(size = 20), plot.subtitle = element_text(size = 12, color = "grey20")) +
theme(plot.caption = element_text(family = my_font, size = 12, colour = "grey20", face = "italic")) +
theme(axis.text = element_text(color = "grey20", size = 10.2)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
theme(panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
theme(legend.key.height = unit(0.15, "mm")) +
labs(x = NULL, y = NULL,
title = "Everyone likes chocolates, but Bounty and Snickers get the\nmost extreme opinions",
subtitle = "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale.",
caption = "Source: http://daydreamingnumbers.com/blog/")
# For displaying percent of "Dislike a lot":
odered %>%
filter(lik_res == "Dislike a lot") %>%
filter(percent >= 3) -> df_text1
# For displaying percent of "Like them a lot":
odered %>%
filter(lik_res == "Like them a lot") -> df_text2
# ploting fn:
ploting +
geom_text(data = df_text1 %>% filter(brand != "Bounty"), aes(x = brand, y = 100 - 1.6, label = bar_text), size = 4, color = "white", family = my_font) +
geom_text(data = df_text1 %>% filter(brand == "Bounty"), aes(x = brand, y = 100 - 2.3, label = bar_text), size = 4, color = "white", family = my_font) +
geom_text(data = df_text2, aes(x = brand, y = 2.3, label = bar_text), size = 4, color = "white", family = my_font)