# Importar datos
library(readr)
EmojiFoods <- read_csv("~/Inteligencia de negocios/Dendograma_Emojis/EmojiFoods.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## name = col_character(),
## emoji = col_character()
## )
## i Use `spec()` for the full column specifications.
head(EmojiFoods)
## # A tibble: 6 x 35
## name emoji `Calories (kcal~ `Carbohydrates ~ `Total Sugar (g~ `Protein (g)`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 grapes "\U00~ 0.69 0.181 0.155 0.0072
## 2 melon "\U00~ 0.28 0.0658 0.0569 0.0111
## 3 water~ "\U00~ 0.3 0.0755 0.062 0.0061
## 4 tange~ "\U00~ 0.53 0.133 0.106 0.0081
## 5 lemon "\U00~ 0.290 0.0932 0.025 0.011
## 6 banana "\U00~ 0.89 0.228 0.122 0.0109
## # ... with 29 more variables: Total Fat (g) <dbl>, Saturated Fat (g) <dbl>,
## # Monounsaturated Fat (g) <dbl>, Polyunsaturated Fat (g) <dbl>,
## # Total Fiber (g) <dbl>, Cholesterol (mg) <dbl>, Vitamin B6 (mg) <dbl>,
## # Vitamin A (IU) <dbl>, Vitamin B12 (ug) <dbl>, Vitamin C (mg) <dbl>,
## # Vitamin D (IU) <dbl>, Vitamin E (IU) <dbl>, Vitamin K (ug) <dbl>,
## # Thiamin (mg) <dbl>, Riboflavin (mg) <dbl>, Niacin (mg) <dbl>,
## # Folate (ug) <dbl>, Pantothenic Acid (mg) <dbl>, Choline (mg) <dbl>,
## # Calcium (g) <dbl>, Copper (mg) <dbl>, Iron (mg) <dbl>,
## # Magnesium (mg) <dbl>, Manganese (mg) <dbl>, Phosphorus (g) <dbl>,
## # Potassium (g) <dbl>, Selenium (ug) <dbl>, Sodium (g) <dbl>, Zinc (mg) <dbl>
# Normalizado
emojisFoodN <- scale(EmojiFoods[,3:35])
# Asignación de nombres
rownames(emojisFoodN) <- EmojiFoods$name
# Cálculo distancias
emojiDistancias <- dist(emojisFoodN)
# Agrupamiento jerárquico
clusters <- hclust(emojiDistancias)
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 4.0.5
dendrograma <- ggdendrogram(clusters, size=2)
dendrograma

# Incluir emojis
library(stringi)
trimEmojiCode <- function (df,order){
for(i in 1:nrow(df)){
index <- order[i]
df[index,3] <- order[i]
tmp <- strsplit(stringi::stri_escape_unicode(df[index,2]),"\\\\U000")
df[i,4] <- tmp[[1]][2]
}
colnames(df)<- c('name','emoji','id','code')
return (df)
}
labels <- trimEmojiCode(EmojiFoods[,c(1,2)], clusters$order)
labels
## # A tibble: 58 x 4
## name emoji id code
## <chr> <chr> <int> <chr>
## 1 grapes "\U0001f347" 1 1f95c
## 2 melon "\U0001f348" 2 1f368
## 3 watermelon "\U0001f349" 3 1f36c
## 4 tangerine "\U0001f34a" 4 1f36f
## 5 lemon "\U0001f34b" 5 1f95d
## 6 banana "\U0001f34c" 6 1f336
## 7 pineapple "\U0001f34d" 7 1f35a
## 8 red apple "\U0001f34e" 8 1f35d
## 9 green apple "\U0001f34f" 9 1f36e
## 10 pear "\U0001f350" 10 1f34c
## # ... with 48 more rows
library(ggimage)
## Warning: package 'ggimage' was built under R version 4.0.5
## Loading required package: ggplot2
dendrograma <- ggdendrogram(clusters)
dendrograma <- dendrograma + geom_emoji(data=labels, aes(id,-1, image=code), size= 0.03)
dendrograma <- dendrograma + geom_hline(yintercept = 13, color='red')
dendrograma <- dendrograma + annotate("text", label = "corte", x=0, y=14, color="red")
dendrograma
