Data preparation

library(cluster)
library(ggplot2)
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.4.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(NbClust)
library(kohonen)
## Loading required package: class
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(datasets)
library(heatmaply)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Loading required package: viridis
## 
## ---------------------
## Welcome to heatmaply version 0.7.0
## Type ?heatmaply for the main documentation.
## The github page is: https://github.com/talgalili/heatmaply/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/heatmaply/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(heatmaply))
## ---------------------
ProductData <- read.csv("products2.csv", na.strings = "undefined",
                        colClasses = c("character", "character", rep("numeric", 48)))
ProductData$prodid <-NULL
ProdDataSubset <- ProductData[rowSums(is.na(ProductData))<25,unname(colSums(is.na(ProductData)))<1000]

CleanProductData<- ProdDataSubset %>% distinct(prodname, .keep_all = TRUE)
CleanProductData[is.na(CleanProductData)] <- 0
rownames(CleanProductData) <- CleanProductData[,1]
CleanProductData$prodname <- NULL

ScaledProdData <- scale(CleanProductData)
ScaledProdData[is.na(ScaledProdData)] <- 0
stdist <- dist(ScaledProdData, method = "euclidean")
stclusters <- agnes(ScaledProdData, method = "complete", metric = "euclidean")
plot(stclusters, which.plots=2, cex = 0.8)

#Identify the best number of clusters for k-means and perform k-means clustering
#index = kl
bestK1 <- NbClust(ScaledProdData, min.nc=5, max.nc=15, method="kmeans", index = "kl")
bestK1
## $All.index
##      5      6      7      8      9     10     11     12     13     14 
## 0.3165 2.1535 0.9688 1.1218 1.1549 0.8349 2.2557 0.6103 1.4139 0.8082 
##     15 
## 1.3476 
## 
## $Best.nc
## Number_clusters     Value_Index 
##         11.0000          2.2557 
## 
## $Best.partition
##                                    Lucky Charms 
##                                               9 
##                                   Rice krispies 
##                                               9 
##                                     Froot Loops 
##                                               9 
##                                    Cap'n Crunch 
##                                               4 
##                               Special K Protein 
##                                               4 
##                             Sweetened Soy Drink 
##                                              11 
##                            Peanut Butter Crunch 
##                                               4 
##            Oats & More with Strawberries Cereal 
##                                               5 
##                                       Joe's O's 
##                                               4 
##                           Honey Bunches of Oats 
##                                               5 
##                              Cracklin' Oat Bran 
##                                              11 
##                     Large Grade AA 5 Dozen Eggs 
##                                               1 
##             Spongebob Squarepants Fruity Splash 
##                                               9 
##                                   Wavy original 
##                                               6 
##                                        Cheerios 
##                                               9 
##                     Ovaltine Rich Chocolate Mix 
##                                               9 
##                              Special K original 
##                                               4 
##                            Very vanilla Soymilk 
##                                               2 
##                                 Raisin Nut Bran 
##                                              10 
##           Crispy Flakes with Red Berries Cereal 
##                                               5 
##                                       Mini Trix 
##                                               9 
##                           Special K Red Berries 
##                                               4 
##                              Honey Nut Cheerios 
##                                               9 
##                         Frosted Shredded Wheats 
##                                               3 
##                                  Frosted Flakes 
##                                               9 
##                               Grape-Nuts Cereal 
##                                              10 
##                     Count Chocula (family size) 
##                                               9 
##                                 Tootie Fruities 
##                                               9 
##                                         Apricot 
##                                               8 
##                                 Egg Land's best 
##                                               1 
##                                  Chocolate Chex 
##                                               4 
##                                                 
##                                               9 
##                                  Sliced Almonds 
##                                               6 
##  Kidz Zone Perfect Peanut Butter Chocolate Chip 
##                                              11 
##                          Lightly Salted Cachews 
##                                               6 
##                                   life Original 
##                                               4 
##                                       Rice Chex 
##                                               4 
##                                Pure Protein Bar 
##                                               7 
##                                  Chocolate Chip 
##                                               8 
##                           Crunchy Peanut Butter 
##                                               8 
##                    Chocolate Chip Peanut Crunch 
##                                               8 
##                   White Chocolate Macadamia Nut 
##                                               8 
##                                      Coco Puffs 
##                                               9
stclusterk1 <- kmeans(CleanProductData, bestK1$Best.nc[1], nstart = 25)
stclusterk1
## K-means clustering with 11 clusters of sizes 2, 3, 9, 2, 3, 6, 2, 4, 8, 2, 2
## 
## Cluster means:
##    calcium_serving carbohydrates_serving cholesterol_serving
## 1       0.02000000               0.50000              0.2075
## 2       0.25000000              41.00000              0.0000
## 3       0.06666667              22.44444              0.0000
## 4       0.25000000              44.00000              0.0000
## 5       0.07000000              14.00000              0.0030
## 6       0.01666667              24.66667              0.0000
## 7       0.04000000               7.50000              0.0000
## 8       0.01500000              40.75000              0.0000
## 9       0.09625000              24.37500              0.0000
## 10      0.10000000              15.50000              0.0050
## 11      0.09000000              16.50000              0.0025
##    energy_serving energy.from.fat_serving fat_serving fiber_serving
## 1        314.0000               167.00000   4.5000000      0.000000
## 2       1060.0000               251.00000   6.6666667      4.000000
## 3        441.3333                60.66667   1.5000000      1.777778
## 4        983.0000               157.00000   4.2500000      4.500000
## 5        172.6667                 7.00000   1.0000000      2.000000
## 6        453.0000                 3.50000   0.1666667      1.333333
## 7        732.0000               544.00000  14.0000000      2.000000
## 8        826.5000                21.00000   3.0000000      6.000000
## 9        523.0000                55.12500   1.8125000      1.625000
## 10       795.0000               314.00000   8.0000000      1.000000
## 11       565.0000               188.00000   5.2500000      1.500000
##    iron_serving proteins_serving salt_serving saturated.fat_serving
## 1     0.0007200         7.000000    0.1778000             1.2500000
## 2     0.0018000        10.000000    0.5757333             1.5000000
## 3     0.0046000         1.555556    0.4007556             0.2777778
## 4     0.0031500         9.500000    0.3492500             0.7500000
## 5     0.0035400         2.000000    0.2624667             0.1666667
## 6     0.0073500         2.500000    0.4529667             0.0000000
## 7     0.0014400         6.000000    0.0571500             1.7500000
## 8     0.0096750         4.750000    0.3968750             0.8750000
## 9     0.0069975         3.500000    0.3921125             0.1250000
## 10    0.0009000        11.000000    0.4318000             2.2500000
## 11    0.0018900         6.000000    0.1524000             1.7500000
##    sodium_serving sugars_serving trans.fat_serving vitamin.a_serving
## 1       0.0700000       0.000000                 0      0.0000900000
## 2       0.2266667      21.000000                 0      0.0004500000
## 3       0.1577778       8.888889                 0      0.0001166667
## 4       0.1375000      23.500000                 0      0.0003000000
## 5       0.1033333       4.000000                 0      0.0001000000
## 6       0.1783333       6.166667                 0      0.0002375000
## 7       0.0225000       1.500000                 0      0.0000000000
## 8       0.1562500      11.000000                 0      0.0000300000
## 9       0.1543750       8.750000                 0      0.0001875000
## 10      0.1700000       1.500000                 0      0.0001875000
## 11      0.0600000      10.500000                 0      0.0001200000
##    vitamin.c_serving
## 1        0.000000000
## 2        0.037800000
## 3        0.005666667
## 4        0.042000000
## 5        0.004000000
## 6        0.014000000
## 7        0.000000000
## 8        0.003750000
## 9        0.006000000
## 10       0.010500000
## 11       0.003000000
## 
## Clustering vector:
##                                    Lucky Charms 
##                                               3 
##                                   Rice krispies 
##                                               6 
##                                     Froot Loops 
##                                               3 
##                                    Cap'n Crunch 
##                                               3 
##                               Special K Protein 
##                                               9 
##                             Sweetened Soy Drink 
##                                              11 
##                            Peanut Butter Crunch 
##                                               3 
##            Oats & More with Strawberries Cereal 
##                                               9 
##                                       Joe's O's 
##                                               3 
##                           Honey Bunches of Oats 
##                                               9 
##                              Cracklin' Oat Bran 
##                                               8 
##                     Large Grade AA 5 Dozen Eggs 
##                                               1 
##             Spongebob Squarepants Fruity Splash 
##                                               3 
##                                   Wavy original 
##                                              10 
##                                        Cheerios 
##                                               5 
##                     Ovaltine Rich Chocolate Mix 
##                                               5 
##                              Special K original 
##                                               6 
##                            Very vanilla Soymilk 
##                                               9 
##                                 Raisin Nut Bran 
##                                               8 
##           Crispy Flakes with Red Berries Cereal 
##                                               6 
##                                       Mini Trix 
##                                               9 
##                           Special K Red Berries 
##                                               6 
##                              Honey Nut Cheerios 
##                                               3 
##                         Frosted Shredded Wheats 
##                                               8 
##                                  Frosted Flakes 
##                                               6 
##                               Grape-Nuts Cereal 
##                                               8 
##                     Count Chocula (family size) 
##                                               3 
##                                 Tootie Fruities 
##                                               9 
##                                         Apricot 
##                                               4 
##                                 Egg Land's best 
##                                               1 
##                                  Chocolate Chex 
##                                               9 
##                                                 
##                                               5 
##                                  Sliced Almonds 
##                                               7 
##  Kidz Zone Perfect Peanut Butter Chocolate Chip 
##                                              11 
##                          Lightly Salted Cachews 
##                                               7 
##                                   life Original 
##                                               9 
##                                       Rice Chex 
##                                               6 
##                                Pure Protein Bar 
##                                              10 
##                                  Chocolate Chip 
##                                               4 
##                           Crunchy Peanut Butter 
##                                               2 
##                    Chocolate Chip Peanut Crunch 
##                                               2 
##                   White Chocolate Macadamia Nut 
##                                               2 
##                                      Coco Puffs 
##                                               3 
## 
## Within cluster sum of squares by cluster:
##  [1]   882.6255  1179.1762  7078.5369  2807.7526 11850.8572  5478.8918
##  [7] 11478.1357 10319.0001 10882.6743 11638.1585  8853.7502
##  (between_SS / total_SS =  97.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
name<-c()
kl<-list()
for (i in 1:bestK1$Best.nc[1]){
  name<-append(name,paste("group",i,sep=""))
  "text"->kl[[i]]
  kl[[i]]<-kl[[i]][-1]
}
names(kl)<-name 
for (i in 1:length(stclusterk1$cluster)){
  for (j in 1:bestK1$Best.nc[1]){
    if (stclusterk1$cluster[i]==j) {
      kl[[j]]<-append(kl[[j]],names(stclusterk1$cluster[i]))
    }}}
kl
## $group1
## [1] " Large Grade AA 5 Dozen Eggs" " Egg Land's best"            
## 
## $group2
## [1] " Crunchy Peanut Butter"         " Chocolate Chip Peanut Crunch" 
## [3] " White Chocolate Macadamia Nut"
## 
## $group3
## [1] " Lucky Charms"                       
## [2] " Froot Loops"                        
## [3] " Cap'n Crunch"                       
## [4] " Peanut Butter Crunch"               
## [5] " Joe's O's"                          
## [6] " Spongebob Squarepants Fruity Splash"
## [7] " Honey Nut Cheerios"                 
## [8] " Count Chocula (family size)"        
## [9] " Coco Puffs"                         
## 
## $group4
## [1] " Apricot"        " Chocolate Chip"
## 
## $group5
## [1] " Cheerios"                    " Ovaltine Rich Chocolate Mix"
## [3] " "                           
## 
## $group6
## [1] " Rice krispies"                        
## [2] " Special K original"                   
## [3] " Crispy Flakes with Red Berries Cereal"
## [4] " Special K Red Berries"                
## [5] " Frosted Flakes"                       
## [6] " Rice Chex"                            
## 
## $group7
## [1] " Sliced Almonds"         " Lightly Salted Cachews"
## 
## $group8
## [1] " Cracklin' Oat Bran"      " Raisin Nut Bran"        
## [3] " Frosted Shredded Wheats" " Grape-Nuts Cereal"      
## 
## $group9
## [1] " Special K Protein"                   
## [2] " Oats & More with Strawberries Cereal"
## [3] " Honey Bunches of Oats"               
## [4] " Very vanilla Soymilk"                
## [5] " Mini Trix"                           
## [6] " Tootie Fruities"                     
## [7] " Chocolate Chex"                      
## [8] " life Original"                       
## 
## $group10
## [1] " Wavy original"    " Pure Protein Bar"
## 
## $group11
## [1] " Sweetened Soy Drink"                           
## [2] " Kidz Zone Perfect Peanut Butter Chocolate Chip"
#Build a Kohonen SOM.
statesom <- som(data=ScaledProdData, grid = somgrid(5, 5, "hexagonal"),radius = quantile(ScaledProdData, 1) * 5)


plot(statesom, type="mapping", labels = rownames(ScaledProdData),cex = 1)

plot(statesom, type="codes")

heatmaply(ScaledProdData, margins = c(40, 130))