ACP:

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(FactoMineR)
library(corrplot)
data1<-read.csv(file = "data.csv",header = TRUE,sep = ";")
data1<-data1[,-c(5,6,7,9,10,12,14,16,18)]
mcor <- cor(data1[,4:15])
corrplot(mcor, type="upper", order="hclust", tl.col="black", tl.srt=45)

##On a remarqué que plusieurs variables sont fortement correlées ce qui fait appel à réduire le nombre de variables
##ACP : 

Scree plot

library(FactoMineR)
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.3.3
pca1=PCA(data1,graph=F,scale.unit=T,ncp=10,quali.sup=c(1,2,3))
fviz_screeplot(pca1, ncp = 10) + theme_classic()

Cercle de correlation avec une coloration selon le cos2 des variables

fviz_pca_var(pca1, col.var="cos2")+
  scale_color_gradient2(low="white", mid="blue", high="red",midpoint=0.5) + theme_minimal()

Nuage et Biplot avec habillage

fviz_pca_biplot(pca1, label = "var", habillage = data1$Category, addEllipses = TRUE) + theme_classic()

Clustering :

Choix du nombre de classes

data2<-data1[,-c(1,2,3)]
wss = kmeans(data2, centers=1)$tot.withinss
##Calcul des wss pour les classes de 2 à 15 
for (i in 2:15)
  wss[i] = kmeans(data2, centers=i)$tot.withinss
##Plot SSE en fonction des classes 
library(ggvis)
## Warning: package 'ggvis' was built under R version 3.3.3
## 
## Attaching package: 'ggvis'
## The following object is masked from 'package:ggplot2':
## 
##     resolution
sse = data.frame(c(1:15), c(wss))
names(sse)[1] = 'Clusters'
names(sse)[2] = 'SSE'
sse %>%
  ggvis(~Clusters, ~SSE) %>%
  layer_points(fill := 'blue') %>% 
  layer_lines() %>%
  set_options(height = 300, width = 400)

#Création Clusters

clusters = kmeans(data2, 6)
clusters
## K-means clustering with 6 clusters of sizes 81, 44, 5, 39, 61, 30
## 
## Cluster means:
##    Calories Saturated.Fat Cholesterol     Sodium Carbohydrates
## 1  299.6296      3.932099   21.790123  158.76543      50.06173
## 2  567.9545     10.704545  132.045455 1374.09091      48.02273
## 3 1232.0000     18.200000  305.000000 2494.00000     114.00000
## 4  381.5385      5.974359   73.333333  843.07692      35.56410
## 5  104.5902      1.000000    5.737705   48.68852      20.91803
## 6  635.6667     12.916667   65.833333  241.83333      96.96667
##   Dietary.Fiber    Sugars   Protein Vitamin.A....Daily.Value.
## 1     1.3209877 42.592593  9.320988                 12.864198
## 2     2.9318182  7.613636 27.363636                 15.840909
## 3     6.4000000 14.000000 45.800000                  6.800000
## 4     2.5641026  6.153846 19.333333                 19.794872
## 5     0.3934426 18.491803  2.000000                  5.934426
## 6     1.0666667 80.900000 13.466667                 19.466667
##   Vitamin.C....Daily.Value. Calcium....Daily.Value. Iron....Daily.Value.
## 1                  9.666667               28.185185            3.0000000
## 2                 10.159091               17.840909           18.8636364
## 3                  4.600000               22.600000           33.0000000
## 4                  5.384615               16.179487           14.5897436
## 5                 11.245902                5.622951            0.7868852
## 6                  2.333333               43.266667            5.2000000
## 
## Clustering vector:
##   [1] 4 4 4 4 4 4 2 2 2 2 4 2 2 2 2 2 2 2 2 2 2 4 2 2 2 2 2 2 2 2 2 3 3 3 3
##  [36] 4 4 4 1 6 1 1 4 2 2 2 4 2 1 4 4 2 4 4 4 4 4 4 4 2 2 2 2 2 2 4 4 2 2 4
##  [71] 2 2 2 2 2 2 2 4 1 4 4 2 3 4 1 4 4 5 4 4 4 4 4 4 4 4 1 1 6 5 5 5 5 1 5
## [106] 5 5 1 1 1 5 5 1 5 5 5 5 5 5 5 1 5 5 5 5 5 5 5 1 5 5 5 5 5 5 1 5 5 5 5
## [141] 5 5 5 5 5 5 5 5 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 5 1 1 1 1 1 1 1 5 1 1
## [176] 5 1 1 1 1 6 1 1 1 1 1 6 1 1 1 1 1 6 1 1 1 5 5 1 5 5 1 5 5 1 5 5 1 5 5
## [211] 5 1 1 6 1 1 1 1 1 6 1 1 1 1 6 6 1 6 6 6 6 6 5 1 1 5 1 1 5 1 1 6 6 6 6
## [246] 6 6 6 6 6 6 6 6 6 1 6 6 1 6 1
## 
## Within cluster sum of squares by cluster:
## [1] 1085044.5 2497007.3 2357034.0 1343976.8  620651.6  743143.8
##  (between_SS / total_SS =  91.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"

Variable Cluster

data2$cluster= clusters$cluster
head(data2)
##   Calories Saturated.Fat Cholesterol Sodium Carbohydrates Dietary.Fiber
## 1      300             5         260    750            31             4
## 2      250             3          25    770            30             4
## 3      370             8          45    780            29             4
## 4      450            10         285    860            30             4
## 5      400             8          50    880            30             4
## 6      430             9         300    960            31             4
##   Sugars Protein Vitamin.A....Daily.Value. Vitamin.C....Daily.Value.
## 1      3      17                        10                         0
## 2      3      18                         6                         0
## 3      2      14                         8                         0
## 4      2      21                        15                         0
## 5      2      21                         6                         0
## 6      3      26                        15                         2
##   Calcium....Daily.Value. Iron....Daily.Value. cluster
## 1                      25                   15       4
## 2                      25                    8       4
## 3                      25                   10       4
## 4                      30                   15       4
## 5                      25                   10       4
## 6                      30                   20       4

Plot Clusters

library(cluster)
## Warning: package 'cluster' was built under R version 3.3.3
clusplot(data2, clusters$cluster, color=T, shade=F,labels=0,lines=0, main='k-Means Cluster Analysis')