1 One-Hot Encoding

char <- c('あ', 'い', 'う', 'え', 'お')
d0 <- data.frame(y = 1, aiueo = rep(char, 3))
x <- model.matrix(y ~ aiueo - 1, data = d0)
colnames(x) <- char

d <- cbind(d0$aiueo, x)

library(kableExtra)
kable(d, caption = 'One-Hot Encoding')%>%kable_paper('striped', full_width = F)%>%
  add_header_above(c('前' = 1, '後' = 5))
One-Hot Encoding
1 0 0 0 0
0 1 0 0 0
0 0 1 0 0
0 0 0 1 0
0 0 0 0 1
1 0 0 0 0
0 1 0 0 0
0 0 1 0 0
0 0 0 1 0
0 0 0 0 1
1 0 0 0 0
0 1 0 0 0
0 0 1 0 0
0 0 0 1 0
0 0 0 0 1

2 Integer Encoding

char <- c('あ', 'い', 'う', 'え', 'お')
int <- seq_along(char)
d0 <- data.frame(前 = rep(char, 3), 後 = rep(int, 3))

kable(d0, caption = 'Integer Encoding')%>%kable_paper('striped', full_width = F)
Integer Encoding
1
2
3
4
5
1
2
3
4
5
1
2
3
4
5