This year I realized as clearly as never before how important it is to show examples from A to Z.
When doing correspondence analysis, we discussed biplots with small images of company brands in them.
So, here we go.
I will replicate the beautifully arranged case presented by one of the students and re-build the picture using standard R packages.
The original case can be found here: https://rpubs.com/linalinaa/1005417
# library(datapasta)
# tribble_paste()
tabl <- tibble::tribble(
~V1, ~V2, ~V3, ~V4,
"Portugal R", 7L, 2L, 26L,
"Portugal W", 8L, 2L, 15L,
"France R", 12L, 43L, 40L,
"France W", 10L, 35L, 26L,
"Italy R", 30L, 67L, 58L,
"Italy W", 22L, 49L, 63L,
"Spain R", 29L, 48L, 83L,
"Spain W", 17L, 19L, 43L,
"Georgia R", 26L, 22L, 22L,
"Georgia W", 7L, 12L, 7L,
"Russia R", 40L, 35L, 46L,
"Russia W", 30L, 35L, 41L,
"Chile W", 8L, 13L, 10L,
"Chile R", 7L, 21L, 19L,
"South Africa W", 5L, 5L, 10L,
"South Africa R", 3L, 4L, 19L,
"Argentina W", 2L, 5L, 2L,
"Argentina R", 4L, 7L, 10L
)
tabl1 <- as.data.frame(tabl)
Add a meaningful vector for the color of wine
Wow-wow, what do we see? Lines 1-12 are W-R, lines 13-18 are R-W, so let’s rearrange them
tabl1$color <- c(rep(seq(1, 2, 1), 6), rev(rep(seq(1, 2, 1), 3)))
tabl1$color <- factor(tabl1$color, labels = c("red", "white"))
row.names(tabl1) <- tabl$V1
head(tabl1)
## V1 V2 V3 V4 color
## Portugal R Portugal R 7 2 26 red
## Portugal W Portugal W 8 2 15 white
## France R France R 12 43 40 red
## France W France W 10 35 26 white
## Italy R Italy R 30 67 58 red
## Italy W Italy W 22 49 63 white
tail(tabl1)
## V1 V2 V3 V4 color
## Chile W Chile W 8 13 10 white
## Chile R Chile R 7 21 19 red
## South Africa W South Africa W 5 5 10 white
## South Africa R South Africa R 3 4 19 red
## Argentina W Argentina W 2 5 2 white
## Argentina R Argentina R 4 7 10 red
tabl1$V1 <- NULL
colnames(tabl1) <- c("KB", "Lab", "AM", "color")
chisq.test(tabl1[ , 1:3])$stdres
## KB Lab AM
## Portugal R -0.24607452 -3.6287763 3.6792871
## Portugal W 1.26375036 -2.8111457 1.6423614
## France R -2.22998983 2.3102122 -0.3601715
## France W -1.60177824 2.7130595 -1.2676898
## Italy R -0.75441550 2.4611247 -1.7302022
## Italy W -1.56855119 0.5480167 0.7779573
## Spain R -1.17294899 -1.2681246 2.1885310
## Spain W -0.03805425 -2.0095176 1.9559179
## Georgia R 3.23030504 -0.5466173 -2.1594430
## Georgia W 0.65443948 1.2700731 -1.7597563
## Russia R 3.19532188 -1.3451606 -1.3657038
## Russia W 1.72788893 -0.3228937 -1.1258705
## Chile W 0.56329980 0.8891029 -1.3192448
## Chile R -1.15271941 1.5060045 -0.4847743
## South Africa W 0.36216412 -0.8960768 0.5572922
## South Africa R -1.26941724 -2.0670880 3.0337381
## Argentina W 0.03890720 1.3377738 -1.3133650
## Argentina R -0.29631474 -0.1079913 0.3495123
library(factoextra)
library(FactoMineR)
I downloaded pictures of brands as .png files in my home
directory
res.ca <- CA(tabl1[1:18,1:3], graph = FALSE)
The fviz_ca functionality is not bad at all but the
palette is not helpful
fviz_ca_biplot(
res.ca,
repel = TRUE,
col.row = tabl1$color,
arrows = c(T,T),
title = "CA Biplot for stores and wine"
)
Add a vector with pictures as another row
# library(here)
# here()
tabl1 <- rbind(tabl1, c("kb.png", "ab.png", "am.png", "NA"))
row.names(tabl1) <- c(tabl$V1, c("image"))
tabl1
## KB Lab AM color
## Portugal R 7 2 26 red
## Portugal W 8 2 15 white
## France R 12 43 40 red
## France W 10 35 26 white
## Italy R 30 67 58 red
## Italy W 22 49 63 white
## Spain R 29 48 83 red
## Spain W 17 19 43 white
## Georgia R 26 22 22 red
## Georgia W 7 12 7 white
## Russia R 40 35 46 red
## Russia W 30 35 41 white
## Chile W 8 13 10 white
## Chile R 7 21 19 red
## South Africa W 5 5 10 white
## South Africa R 3 4 19 red
## Argentina W 2 5 2 white
## Argentina R 4 7 10 red
## image kb.png ab.png am.png <NA>
Use ggplot2 to change geom to “image”.
We will need to plot two sets of points, from rows and columns
picdata1 <- as.data.frame(res.ca$row$coord)
picdata2 <- as.data.frame(res.ca$col$coord)
library(ggimage)
img <- c(tabl1$KB[19], tabl1$Lab[19], tabl1$AM[19])
label_wine <- rownames(tabl1)[1:18]
library(ggplot2)
ggplot() +
geom_point(
data = picdata1,
aes(x = `Dim 1`, y = `Dim 2`),
color = tabl1$color[1:18]
) +
geom_segment(
aes(x = 0, y = 0,
xend = picdata2$`Dim 1`[1], yend = picdata2$`Dim 2`[1],
color = "red")
) +
geom_segment(
aes(x = 0, y = 0,
xend = picdata2$`Dim 1`[2], yend = picdata2$`Dim 2`[2],
color = "purple")
) +
geom_segment(
aes(x = 0, y = 0,
xend = picdata2$`Dim 1`[3], yend = picdata2$`Dim 2`[3],
color = "darkred")
) +
geom_image(aes(
x = picdata2$`Dim 1`,
y = picdata2$`Dim 2`,
image = img
), size = 0.1) +
geom_text(
data = picdata1,
aes(x = `Dim 1`, y = `Dim 2`,
label = label_wine,
color = tabl1$color[1:18],
vjust = -0.5, hjust = 0),
check_overlap = T
) +
scale_color_manual(values = c('white' = 'white',
'red' = 'red',
'purple' = 'purple',
'darkred' = 'darkred')) +
coord_cartesian(xlim = c(-0.75, 0.75), ylim = c(-0.5, 0.5)) +
labs(
title = "Wine Origin and Color by Popular Stores",
subtitle = "Farther from the origins = more intertia, more unexpected",
caption = "Original case: rpubs.com/linalinaa"
) +
theme(
panel.background = element_rect(
fill = "lightsteelblue3",
colour = "lightsteelblue3",
linewidth = 0.5,
linetype = "solid"
),
panel.grid.major = element_line(
linewidth = 0.5,
linetype = 'solid',
colour = "lavender"
),
panel.grid.minor = element_line(
linewidth = 0.25,
linetype = 'solid',
colour = "lightsteelblue3"
),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none"
)
See more R color names here: https://bookdown.org/hneth/ds4psy/D-3-apx-colors-basics.html
To sum up, here I built a customized biplot for correspondence analysis.
This tutorial shows how to put a picture instead of a point, add arrows, and reminds how to plot two data frames in one ggplot.
More resources:
Emojis in a ggplot https://www.r-bloggers.com/2019/09/using-emojis-and-png-as-icons-in-your-ggplot/
Check out the party penguins https://rpubs.com/shirokaner/bar and plotting logo https://rpubs.com/shirokaner/logo
Fill in bars with wild topics (fun + exploration - data:ink) https://coolbutuseless.github.io/package/ggpattern/articles/pattern-placeholder.html
library(ggpattern)
bmd <- data.frame(treatment = c("control", "R as calculator", "R for fun"),
outcome = c(2.3, 1.9, 3.2))
ggplot(bmd,
aes(treatment, outcome)) +
geom_col_pattern(
pattern = 'placeholder',
pattern_type = 'kitten',
color = 'black'
) +
theme_bw(16) +
labs(
title = "Thank you for this class!",
subtitle = "Best of luck with data analysis"
) +
theme(legend.position = "none") +
coord_fixed(ratio = 1/2)