# Importar datos
library(readr)
EmojiFoods <- read_csv("~/Inteligencia de negocios/Dendograma_Emojis/EmojiFoods.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   name = col_character(),
##   emoji = col_character()
## )
## i Use `spec()` for the full column specifications.
head(EmojiFoods)
## # A tibble: 6 x 35
##   name   emoji  `Calories (kcal~ `Carbohydrates ~ `Total Sugar (g~ `Protein (g)`
##   <chr>  <chr>             <dbl>            <dbl>            <dbl>         <dbl>
## 1 grapes "\U00~            0.69            0.181            0.155         0.0072
## 2 melon  "\U00~            0.28            0.0658           0.0569        0.0111
## 3 water~ "\U00~            0.3             0.0755           0.062         0.0061
## 4 tange~ "\U00~            0.53            0.133            0.106         0.0081
## 5 lemon  "\U00~            0.290           0.0932           0.025         0.011 
## 6 banana "\U00~            0.89            0.228            0.122         0.0109
## # ... with 29 more variables: Total Fat (g) <dbl>, Saturated Fat (g) <dbl>,
## #   Monounsaturated Fat (g) <dbl>, Polyunsaturated Fat (g) <dbl>,
## #   Total Fiber (g) <dbl>, Cholesterol (mg) <dbl>, Vitamin B6 (mg) <dbl>,
## #   Vitamin A (IU) <dbl>, Vitamin B12 (ug) <dbl>, Vitamin C (mg) <dbl>,
## #   Vitamin D (IU) <dbl>, Vitamin E (IU) <dbl>, Vitamin K (ug) <dbl>,
## #   Thiamin (mg) <dbl>, Riboflavin (mg) <dbl>, Niacin (mg) <dbl>,
## #   Folate (ug) <dbl>, Pantothenic Acid (mg) <dbl>, Choline (mg) <dbl>,
## #   Calcium (g) <dbl>, Copper (mg) <dbl>, Iron (mg) <dbl>,
## #   Magnesium (mg) <dbl>, Manganese (mg) <dbl>, Phosphorus (g) <dbl>,
## #   Potassium (g) <dbl>, Selenium (ug) <dbl>, Sodium (g) <dbl>, Zinc (mg) <dbl>
# Normalizado
emojisFoodN <- scale(EmojiFoods[,3:35])

# Asignación de nombres
rownames(emojisFoodN) <- EmojiFoods$name

# Cálculo distancias
emojiDistancias <- dist(emojisFoodN)

# Agrupamiento jerárquico
clusters <- hclust(emojiDistancias)

library(ggdendro)
## Warning: package 'ggdendro' was built under R version 4.0.5
dendrograma <- ggdendrogram(clusters, size=2)
dendrograma

# Incluir emojis
library(stringi)

trimEmojiCode <- function (df,order){
  for(i in 1:nrow(df)){
    index <- order[i]
    df[index,3] <- order[i]
    tmp <- strsplit(stringi::stri_escape_unicode(df[index,2]),"\\\\U000")
    df[i,4] <- tmp[[1]][2]
  }
  colnames(df)<- c('name','emoji','id','code')
  return (df)
}

labels <- trimEmojiCode(EmojiFoods[,c(1,2)], clusters$order)
labels
## # A tibble: 58 x 4
##    name        emoji           id code 
##    <chr>       <chr>        <int> <chr>
##  1 grapes      "\U0001f347"     1 1f95c
##  2 melon       "\U0001f348"     2 1f368
##  3 watermelon  "\U0001f349"     3 1f36c
##  4 tangerine   "\U0001f34a"     4 1f36f
##  5 lemon       "\U0001f34b"     5 1f95d
##  6 banana      "\U0001f34c"     6 1f336
##  7 pineapple   "\U0001f34d"     7 1f35a
##  8 red apple   "\U0001f34e"     8 1f35d
##  9 green apple "\U0001f34f"     9 1f36e
## 10 pear        "\U0001f350"    10 1f34c
## # ... with 48 more rows
library(ggimage)
## Warning: package 'ggimage' was built under R version 4.0.5
## Loading required package: ggplot2
dendrograma <- ggdendrogram(clusters)
dendrograma <- dendrograma + geom_emoji(data=labels, aes(id,-1, image=code), size= 0.03)
dendrograma <- dendrograma + geom_hline(yintercept = 13, color='red')
dendrograma <- dendrograma + annotate("text", label = "corte", x=0, y=14, color="red")
dendrograma