# install.packages("emojifont")
# Al intentar tejer, me salia error cargando el .csv (no lo leia). Al final, despues de investigar: no cargaban los archivos csv al tejer, por no tener cargada la libreria de lectura de csv que es "tidyverse". PAra excel es "readxl"
library (tidyverse)
EmojisFood <- read_csv("EmojisFood.csv")
#Reviso los datos Y al observar su estructura, no parecen un data frame sino un tibble, por lo que aplico as.data.frame para transformarlo. Entonces reviso su estructura y cabecera:
alimentos_dataframe<- as.data.frame(EmojisFood)
str(alimentos_dataframe)
## 'data.frame': 58 obs. of 35 variables:
## $ name : chr "grapes" "melon" "watermelon" "tangerine" ...
## $ emoji : chr "<U+0001F347>" "<U+0001F348>" "<U+0001F349>" "<U+0001F34A>" ...
## $ Calories (kcal) : num 0.69 0.28 0.3 0.53 0.29 0.89 0.5 0.63 0.58 0.57 ...
## $ Carbohydrates (g) : num 0.181 0.0658 0.0755 0.1334 0.0932 ...
## $ Total Sugar (g) : num 0.1548 0.0569 0.062 0.1058 0.025 ...
## $ Protein (g) : num 0.0072 0.0111 0.0061 0.0081 0.011 0.0109 0.0054 0.002 0.0044 0.0036 ...
## $ Total Fat (g) : num 0.0016 0.001 0.0015 0.0031 0.003 0.0033 0.0012 0.0018 0.0019 0.0014 ...
## $ Saturated Fat (g) : num 0.00054 0.00025 0.00016 0.00039 0.00039 0.00112 0.00009 0 0 0.00022 ...
## $ Monounsaturated Fat (g): num 0.00007 0.00002 0.00037 0.0006 0.00011 0.00032 0.00013 0 0 0.00084 ...
## $ Polyunsaturated Fat (g): num 0.00048 0.00039 0.0005 0.00065 0.00089 0.00073 0.0004 0 0 0.00094 ...
## $ Total Fiber (g) : num 0.009 0.009 0.004 0.018 0.028 0.026 0.014 0.021 0.028 0.031 ...
## $ Cholesterol (mg) : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Vitamin B6 (mg) : num 0.00086 0.00163 0.00045 0.00078 0.0008 0.00367 0.00112 0.00045 0.00037 0.00029 ...
## $ Vitamin A (IU) : num 0.66 0 5.69 6.81 0.22 0.64 0.58 0.38 1 0.25 ...
## $ Vitamin B12 (ug) : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Vitamin C (mg) : num 0.032 0.218 0.081 0.267 0.53 0.087 0.478 0 0 0.043 ...
## $ Vitamin D (IU) : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Vitamin E (IU) : num 0.0019 0.0005 0.0005 0.002 0.0015 0.001 0.0002 0.0018 0.0018 0.0012 ...
## $ Vitamin K (ug) : num 0.146 0.025 0.001 0 0 0.005 0.007 0.01 0.032 0.044 ...
## $ Thiamin (mg) : num 0.00069 0.00015 0.00033 0.00058 0.0004 0.00031 0.00079 0.00013 0.00019 0.00012 ...
## $ Riboflavin (mg) : num 0.0007 0.00031 0.00021 0.00036 0.0002 0.00073 0.00032 0.00026 0.00025 0.00026 ...
## $ Niacin (mg) : num 0.00188 0.00232 0.00178 0.00376 0.001 0.00665 0.005 0.0007 0.00126 0.00161 ...
## $ Folate (ug) : num 0.02 0.08 0.03 0.16 0.11 0.2 0.18 0.03 0.03 0.07 ...
## $ Pantothenic Acid (mg) : num 0.0005 0.00084 0.00221 0.00216 0.0019 0.00334 0.00213 0.00051 0.00056 0.00049 ...
## $ Choline (mg) : num 0.056 0.076 0.041 0.102 0.051 0.098 0.055 0.034 0.034 0.051 ...
## $ Calcium (g) : num 0.1 0.11 0.07 0.37 0.26 0.05 0.13 0.07 0.05 0.09 ...
## $ Copper (mg) : num 0.00127 0.0006 0.00042 0.00042 0.00037 0.00078 0.0011 0.00025 0.00031 0.00082 ...
## $ Iron (mg) : num 0.0036 0.0034 0.0024 0.0015 0.006 0.0026 0.0029 0.001 0.0015 0.0018 ...
## $ Magnesium (mg) : num 0.07 0.11 0.1 0.12 0.08 0.27 0.12 0.05 0.05 0.07 ...
## $ Manganese (mg) : num 0.00071 0.00035 0.00038 0.00039 0.0003 0.0027 0.00927 0.00031 0.00044 0.00048 ...
## $ Phosphorus (g) : num 0.2 0.05 0.11 0.2 0.16 0.22 0.08 0.13 0.12 0.12 ...
## $ Potassium (g) : num 1.91 1.82 1.12 1.66 1.38 3.58 1.09 1.09 1.2 1.16 ...
## $ Selenium (ug) : num 0.001 0.004 0.004 0.001 0.004 0.01 0.001 0 0.001 0.001 ...
## $ Sodium (g) : num 0.02 0.09 0.01 0.02 0.02 0.01 0.01 0.01 0.01 0.01 ...
## $ Zinc (mg) : num 0.0007 0.0007 0.001 0.0007 0.0006 0.0015 0.0012 0.0004 0.0004 0.001 ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. emoji = col_character(),
## .. `Calories (kcal)` = col_double(),
## .. `Carbohydrates (g)` = col_double(),
## .. `Total Sugar (g)` = col_double(),
## .. `Protein (g)` = col_double(),
## .. `Total Fat (g)` = col_double(),
## .. `Saturated Fat (g)` = col_double(),
## .. `Monounsaturated Fat (g)` = col_double(),
## .. `Polyunsaturated Fat (g)` = col_double(),
## .. `Total Fiber (g)` = col_double(),
## .. `Cholesterol (mg)` = col_double(),
## .. `Vitamin B6 (mg)` = col_double(),
## .. `Vitamin A (IU)` = col_double(),
## .. `Vitamin B12 (ug)` = col_double(),
## .. `Vitamin C (mg)` = col_double(),
## .. `Vitamin D (IU)` = col_double(),
## .. `Vitamin E (IU)` = col_double(),
## .. `Vitamin K (ug)` = col_double(),
## .. `Thiamin (mg)` = col_double(),
## .. `Riboflavin (mg)` = col_double(),
## .. `Niacin (mg)` = col_double(),
## .. `Folate (ug)` = col_double(),
## .. `Pantothenic Acid (mg)` = col_double(),
## .. `Choline (mg)` = col_double(),
## .. `Calcium (g)` = col_double(),
## .. `Copper (mg)` = col_double(),
## .. `Iron (mg)` = col_double(),
## .. `Magnesium (mg)` = col_double(),
## .. `Manganese (mg)` = col_double(),
## .. `Phosphorus (g)` = col_double(),
## .. `Potassium (g)` = col_double(),
## .. `Selenium (ug)` = col_double(),
## .. `Sodium (g)` = col_double(),
## .. `Zinc (mg)` = col_double()
## .. )
head(alimentos_dataframe)
## name emoji Calories (kcal) Carbohydrates (g) Total Sugar (g)
## 1 grapes <U+0001F347> 0.69 0.1810 0.1548
## 2 melon <U+0001F348> 0.28 0.0658 0.0569
## 3 watermelon <U+0001F349> 0.30 0.0755 0.0620
## 4 tangerine <U+0001F34A> 0.53 0.1334 0.1058
## 5 lemon <U+0001F34B> 0.29 0.0932 0.0250
## 6 banana <U+0001F34C> 0.89 0.2284 0.1223
## Protein (g) Total Fat (g) Saturated Fat (g) Monounsaturated Fat (g)
## 1 0.0072 0.0016 0.00054 0.00007
## 2 0.0111 0.0010 0.00025 0.00002
## 3 0.0061 0.0015 0.00016 0.00037
## 4 0.0081 0.0031 0.00039 0.00060
## 5 0.0110 0.0030 0.00039 0.00011
## 6 0.0109 0.0033 0.00112 0.00032
## Polyunsaturated Fat (g) Total Fiber (g) Cholesterol (mg) Vitamin B6 (mg)
## 1 0.00048 0.009 0 0.00086
## 2 0.00039 0.009 0 0.00163
## 3 0.00050 0.004 0 0.00045
## 4 0.00065 0.018 0 0.00078
## 5 0.00089 0.028 0 0.00080
## 6 0.00073 0.026 0 0.00367
## Vitamin A (IU) Vitamin B12 (ug) Vitamin C (mg) Vitamin D (IU) Vitamin E (IU)
## 1 0.66 0 0.032 0 0.0019
## 2 0.00 0 0.218 0 0.0005
## 3 5.69 0 0.081 0 0.0005
## 4 6.81 0 0.267 0 0.0020
## 5 0.22 0 0.530 0 0.0015
## 6 0.64 0 0.087 0 0.0010
## Vitamin K (ug) Thiamin (mg) Riboflavin (mg) Niacin (mg) Folate (ug)
## 1 0.146 0.00069 0.00070 0.00188 0.02
## 2 0.025 0.00015 0.00031 0.00232 0.08
## 3 0.001 0.00033 0.00021 0.00178 0.03
## 4 0.000 0.00058 0.00036 0.00376 0.16
## 5 0.000 0.00040 0.00020 0.00100 0.11
## 6 0.005 0.00031 0.00073 0.00665 0.20
## Pantothenic Acid (mg) Choline (mg) Calcium (g) Copper (mg) Iron (mg)
## 1 0.00050 0.056 0.10 0.00127 0.0036
## 2 0.00084 0.076 0.11 0.00060 0.0034
## 3 0.00221 0.041 0.07 0.00042 0.0024
## 4 0.00216 0.102 0.37 0.00042 0.0015
## 5 0.00190 0.051 0.26 0.00037 0.0060
## 6 0.00334 0.098 0.05 0.00078 0.0026
## Magnesium (mg) Manganese (mg) Phosphorus (g) Potassium (g) Selenium (ug)
## 1 0.07 0.00071 0.20 1.91 0.001
## 2 0.11 0.00035 0.05 1.82 0.004
## 3 0.10 0.00038 0.11 1.12 0.004
## 4 0.12 0.00039 0.20 1.66 0.001
## 5 0.08 0.00030 0.16 1.38 0.004
## 6 0.27 0.00270 0.22 3.58 0.010
## Sodium (g) Zinc (mg)
## 1 0.02 0.0007
## 2 0.09 0.0007
## 3 0.01 0.0010
## 4 0.02 0.0007
## 5 0.02 0.0006
## 6 0.01 0.0015
# Busco normalizar con scale. Para evitar errores, me centro en normalizar todas las filas y sólo las columnas numéricas (3:35)
alimentos_dataframeN<- scale(alimentos_dataframe[,c(3:35)])
# Estan normalizados, al revisar el resumen, todos los promedios de las columnas son iguales a 0
summary(alimentos_dataframeN)
## Calories (kcal) Carbohydrates (g) Total Sugar (g) Protein (g)
## Min. :-1.0639 Min. :-1.0219 Min. :-0.53076 Min. :-0.7405
## 1st Qu.:-0.7687 1st Qu.:-0.7282 1st Qu.:-0.53076 1st Qu.:-0.6398
## Median :-0.2925 Median :-0.3300 Median :-0.32710 Median :-0.4445
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.4538 3rd Qu.: 0.2736 3rd Qu.: 0.09345 3rd Qu.: 0.2784
## Max. : 3.9322 Max. : 3.1973 Max. : 4.89934 Max. : 3.3174
## Total Fat (g) Saturated Fat (g) Monounsaturated Fat (g)
## Min. :-0.5343 Min. :-0.5029 Min. :-0.4936
## 1st Qu.:-0.5228 1st Qu.:-0.4988 1st Qu.:-0.4919
## Median :-0.4885 Median :-0.4877 Median :-0.4778
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2602 3rd Qu.: 0.1850 3rd Qu.: 0.1989
## Max. : 5.7458 Max. : 5.2561 Max. : 5.5993
## Polyunsaturated Fat (g) Total Fiber (g) Cholesterol (mg)
## Min. :-0.48592 Min. :-0.8385 Min. :-0.54253
## 1st Qu.:-0.46840 1st Qu.:-0.8385 1st Qu.:-0.54253
## Median :-0.39896 Median :-0.2018 Median :-0.54253
## Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.05455 3rd Qu.: 0.4349 3rd Qu.:-0.04561
## Max. : 4.65856 Max. : 4.0590 Max. : 3.34109
## Vitamin B6 (mg) Vitamin A (IU) Vitamin B12 (ug) Vitamin C (mg)
## Min. :-0.81610 Min. :-0.2333 Min. :-0.39416 Min. :-0.4132
## 1st Qu.:-0.52795 1st Qu.:-0.2329 1st Qu.:-0.39416 1st Qu.:-0.4132
## Median :-0.41563 Median :-0.2148 Median :-0.39416 Median :-0.3951
## Mean : 0.00000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.08841 3rd Qu.:-0.1856 3rd Qu.:-0.07245 3rd Qu.:-0.1557
## Max. : 4.11979 Max. : 5.5979 Max. : 5.75848 Max. : 5.3463
## Vitamin D (IU) Vitamin E (IU) Vitamin K (ug) Thiamin (mg)
## Min. :-0.2218 Min. :-0.41123 Min. :-0.56700 Min. :-0.7580
## 1st Qu.:-0.2218 1st Qu.:-0.41123 1st Qu.:-0.56700 1st Qu.:-0.5955
## Median :-0.2218 Median :-0.27068 Median :-0.43099 Median :-0.3805
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.:-0.2218 3rd Qu.: 0.08709 3rd Qu.: 0.09516 3rd Qu.: 0.1825
## Max. : 7.0921 Max. : 6.68453 Max. : 5.20270 Max. : 3.9035
## Riboflavin (mg) Niacin (mg) Folate (ug) Pantothenic Acid (mg)
## Min. :-1.0235 Min. :-0.7268 Min. :-0.7362 Min. :-0.8894
## 1st Qu.:-0.7851 1st Qu.:-0.6508 1st Qu.:-0.6032 1st Qu.:-0.7173
## Median :-0.4665 Median :-0.4071 Median :-0.3056 Median :-0.2574
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7301 3rd Qu.: 0.1062 3rd Qu.: 0.3211 3rd Qu.: 0.2353
## Max. : 3.0077 Max. : 4.8244 Max. : 5.3418 Max. : 3.3322
## Choline (mg) Calcium (g) Copper (mg) Iron (mg)
## Min. :-0.6944 Min. :-0.43652 Min. :-0.68044 Min. :-0.8899
## 1st Qu.:-0.5831 1st Qu.:-0.37984 1st Qu.:-0.46652 1st Qu.:-0.6601
## Median :-0.3917 Median :-0.31507 Median :-0.22798 Median :-0.4940
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.1226 3rd Qu.:-0.03371 3rd Qu.: 0.08597 3rd Qu.: 0.4182
## Max. : 5.0225 Max. : 6.76942 Max. : 6.33731 Max. : 3.2778
## Magnesium (mg) Manganese (mg) Phosphorus (g) Potassium (g)
## Min. :-0.7685 Min. :-0.49738 Min. :-0.8269 Min. :-1.3421
## 1st Qu.:-0.4432 1st Qu.:-0.42991 1st Qu.:-0.6489 1st Qu.:-0.5695
## Median :-0.3293 Median :-0.26775 Median :-0.3911 Median :-0.2368
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.1100 3rd Qu.: 0.03228 3rd Qu.: 0.3142 3rd Qu.: 0.2711
## Max. : 4.6976 Max. : 6.32613 Max. : 4.5439 Max. : 3.6656
## Selenium (ug) Sodium (g) Zinc (mg)
## Min. :-0.7164 Min. :-0.6320 Min. :-0.68967
## 1st Qu.:-0.6962 1st Qu.:-0.6234 1st Qu.:-0.59653
## Median :-0.6205 Median :-0.5743 Median :-0.35633
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.2522 3rd Qu.: 0.2810 3rd Qu.:-0.03524
## Max. : 2.3510 Max. : 3.1993 Max. : 3.89868
# cuando reviso, no tengo los nombres de los alimentos a los que hemos normalizado. Por ello, los añadimos (ojo, no quedan como columna aparte...el nombre hace parte de las filas -no es 1,2,3....sino grapes, melon, watermelon)
rownames(alimentos_dataframeN)<-alimentos_dataframe$name
# Calculo la distancia de los alimentos
distancia_alimentos<-dist(alimentos_dataframeN)
# agrupamiento jerárquico
clusters <- hclust(distancia_alimentos)
library('ggdendro')
dendrograma <- ggdendrogram(clusters, size=2)
dendrograma

# Ahora, se va a colocar figuras de los alimentos. Implica cambio de codigo que representa a los alimentos
library('stringi')
# function para adicionar datos de labels
trimEmojiCode <- function (df,order){
for(i in 1:nrow(df)){
index <- order[i]
df[index,3] <- order[i]
tmp <- strsplit(stringi::stri_escape_unicode(df[index,2]),"\\\\U000")
df[i,4] <- tmp[[1]][2]
}
colnames(df)<- c('name','emoji','id','code')
return (df)
}
# Nuevo dataset de labels donde el codigo se ha transformado del de origen a uno en markdown
labels <- trimEmojiCode(alimentos_dataframe[,c(1,2)],clusters$order)
library (ggimage)
dendrograma<- ggdendrogram(clusters)
dendrograma<- dendrograma + geom_emoji(data=labels, aes(id,-1,image=code), size=0.03)
dendrograma<- dendrograma + geom_hline(yintercept = 13, color='blue')
dendrograma<- dendrograma + annotate("text", label="corte", x=0, y=14, color='green')
dendrograma
