The original post chỉ ra 4 cách thức hình ảnh hóa cho dữ liệu Likert Scale. Đây là kiểu dữ liệu định tính có thứ bậc và do vậy hình ảnh hóa loại dữ liệu cũng cần phải được điều chỉnh theo thứ bậc để truyền tải insight sao cho người tiếp nhận thông tin dễ hiểu nhất có thể, mất ít thời gian nhất để giải mã thông điệp được truyền tải trong graph.
Bằng R/ggplot2 chúng ta có thể tạo ra một mẫu hình ảnh hóa dữ liệu cho loại dữ liệu này như sau:
Vấn đề mà chúng ta phải xử lí khi hình ảnh hóa dữ liệu Likert trong tình huống này là:
Dưới đây là R codes cho hai versions trên:
# Clear R environment:
rm(list = ls())
#===========================
# Simulate data for ploting
#===========================
<- 100000
size
<- c("Like them a lot", "Like them", "Neutrals", "Dislike them", "Dislike a lot")
responses
<- c(rep("Bounty", size),
brand rep("Snickers", size),
rep("Milky Way", size),
rep("Mars", size),
rep("Galaxy Caramel", size),
rep("Twix", size),
rep("Galaxy", size),
rep("Teaser", size))
<- c(sample(responses, size = size, replace = TRUE, prob = c(24, 36, 15, 11, 14)),
cus_res sample(responses, size = size, replace = TRUE, prob = c(23, 42, 17, 9, 9)),
sample(responses, size = size, replace = TRUE, prob = c(16, 52, 23, 6, 3)),
sample(responses, size = size, replace = TRUE, prob = c(15, 57, 18, 7, 3)),
sample(responses, size = size, replace = TRUE, prob = c(27, 46, 17, 8, 2)),
sample(responses, size = size, replace = TRUE, prob = c(18, 57, 19, 5, 1)),
sample(responses, size = size, replace = TRUE, prob = c(29, 51, 15, 4, 1)),
sample(responses, size = size, replace = TRUE, prob = c(39, 42, 14, 4, 1)))
<- tibble(brand = brand, cus_res = cus_res)
data_cus
#===========================
# Prepare data for ploting
#===========================
%>%
data_cus group_by(brand, cus_res) %>%
count() %>%
ungroup() %>%
group_by(brand) %>%
mutate(percent = 100*n / sum(n)) %>%
mutate(percent = round(percent, 0)) %>%
mutate(bar_text = paste0(percent, "%")) %>%
ungroup() -> df_for_ploting
%>%
df_for_ploting filter(cus_res == responses[5]) %>%
arrange(percent) %>%
pull(brand) -> order_y
%>%
df_for_ploting mutate(brand = factor(brand, levels = order_y)) %>%
mutate(cus_res = factor(cus_res, levels = responses[5:1])) -> df_odered
#---------------------
# Data Vis: Version 1
#---------------------
# Prepare colors for ploting:
<- "#e36c33"
col_dislike_alot
<- "#edad88"
col_dislike
<- "#c7cdd1"
col_neutral
<- "#829cb2"
col_like
<- "#3e6487"
col_like_alot
# Select Font for the graph:
<- "Roboto Condensed"
my_font
library(showtext)
font_add_google(name = my_font, family = my_font)
showtext_auto()
library(ggtext) # Ref: https://cran.r-project.org/web/packages/ggtext/index.html
# Prepare title + subtitle + caption:
<- "Everyone <span style = 'color:#3e6487'>likes chocolates</span>, but Bounty and Snickers get <span style = 'color:#e36c33'>the most extreme opinions</span>"
p_title
= "Likert scale is a type of rating scale commonly used in surveys. When responding to a Likert type question,\nrespondents simply state their level of agreement or disagreement on a symmetric agree-disagree scale."
p_subtitle
<- "Source: Nielsen Surveys | Graphic Designer: Nguyen Chi Dung"
p_caption
theme_set(theme_minimal())
%>%
df_odered ggplot(aes(y = brand, x = percent, fill = cus_res)) +
geom_col(width = 0.8, position = "fill") +
theme(legend.position = "top") +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
labs(title = p_title, subtitle = p_subtitle, caption = p_caption) +
scale_fill_manual(values = c(`Like them a lot` = col_like_alot,
`Like them` = col_like,
`Neutrals` = col_neutral,
`Dislike them` = col_dislike,
`Dislike a lot` = col_dislike_alot)) +
theme(text = element_text(family = my_font)) +
theme(legend.title = element_blank()) +
theme(legend.text = element_text(size = 11, family = my_font, color = "grey10")) +
theme(legend.key.height = unit(0.35, "cm")) +
theme(legend.key.width = unit(0.27*3, "cm")) +
theme(axis.title = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_line(color = "grey70", size = 0.8)) +
scale_x_continuous(expand = c(0, 0), labels = paste0(seq(0, 100, 25), "%")) +
scale_y_discrete(expand = c(0, 0)) +
theme(axis.text = element_text(color = "grey30", size = 11, family = my_font)) +
theme(plot.title = element_markdown(size = 16, face = "bold")) +
theme(plot.caption = element_text(size = 10.5, color = "grey40", vjust = -1.5, hjust = 0)) +
theme(plot.subtitle = element_text(size = 11.5, color = "grey10")) +
theme(plot.title.position = "plot") +
theme(plot.caption.position = "plot") -> gg1
# For displaying percent of "Dislike a lot":
%>%
df_odered filter(cus_res == "Dislike a lot") -> df_for_text1
# For displaying percent of "Like them a lot":
%>%
df_odered filter(cus_res == "Like them a lot") -> df_for_text2
# Ad text layers:
+
gg1 geom_text(data = df_for_text1 %>% filter(percent > 2),
aes(y = brand, x = 1, label = bar_text), size = 4, color = "white", family = my_font, hjust = 1.2) +
geom_text(data = df_for_text2,
aes(y = brand, x = 0.03, label = bar_text), size = 4, color = "white", family = my_font)
#---------------------
# Data Vis: Version 2
#---------------------
ggplot() +
geom_segment(aes(x = seq(0, 1, 0.25), xend = seq(0, 1, 0.25), y = 0.6, yend = 8.4), size = 0.7, color = "grey40") +
geom_col(data = df_odered,
aes(y = brand, x = percent, fill = cus_res),
width = 0.8, position = "fill", show.legend = FALSE) + theme(plot.margin = unit(rep(0.7, 4), "cm")) +
labs(title = p_title, subtitle = p_subtitle, caption = p_caption) +
scale_fill_manual(values = c(`Like them a lot` = col_like_alot,
`Like them` = col_like,
`Neutrals` = col_neutral,
`Dislike them` = col_dislike,
`Dislike a lot` = col_dislike_alot)) +
theme(text = element_text(family = my_font)) +
theme(axis.title = element_blank()) +
theme(panel.grid = element_blank()) +
scale_x_continuous(expand = c(0, 0), labels = paste0(seq(0, 100, 25), "%")) +
scale_y_discrete(expand = c(0, 0)) +
theme(axis.text = element_text(color = "grey30", size = 11, family = my_font)) +
theme(plot.title = element_markdown(size = 16, face = "bold")) +
theme(plot.caption = element_text(size = 10.5, color = "grey40", vjust = -1.5, hjust = 0)) +
theme(plot.subtitle = element_text(size = 11.5, color = "grey10")) +
theme(plot.title.position = "plot") +
theme(plot.caption.position = "plot") -> gg2
# http://daydreamingnumbers.com/blog/4-ways-to-visualize-likert-scales/:
+
gg2 annotate("text",
x = c(0.071, 0.42, 0.67, 0.803, 0.94),
y = 9,
label = responses,
vjust = 1.2,
color = c(col_like_alot, col_like, col_neutral, col_dislike, col_dislike_alot),
size = 4,
fontface = "bold",
family = my_font) +
geom_text(data = df_for_text1 %>% filter(percent > 2),
aes(y = brand, x = 1, label = bar_text), size = 4, color = "white", family = my_font, hjust = 1.2) +
geom_text(data = df_for_text2,
aes(y = brand, x = 0.03, label = bar_text), size = 4, color = "white", family = my_font)