ACP:
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(FactoMineR)
library(corrplot)
data1<-read.csv(file = "data.csv",header = TRUE,sep = ";")
data1<-data1[,-c(5,6,7,9,10,12,14,16,18)]
mcor <- cor(data1[,4:15])
corrplot(mcor, type="upper", order="hclust", tl.col="black", tl.srt=45)

##On a remarqué que plusieurs variables sont fortement correlées ce qui fait appel à réduire le nombre de variables
##ACP :
Scree plot
library(FactoMineR)
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.3.3
pca1=PCA(data1,graph=F,scale.unit=T,ncp=10,quali.sup=c(1,2,3))
fviz_screeplot(pca1, ncp = 10) + theme_classic()

Cercle de correlation avec une coloration selon le cos2 des variables
fviz_pca_var(pca1, col.var="cos2")+
scale_color_gradient2(low="white", mid="blue", high="red",midpoint=0.5) + theme_minimal()

Nuage et Biplot avec habillage
fviz_pca_biplot(pca1, label = "var", habillage = data1$Category, addEllipses = TRUE) + theme_classic()

Clustering :
Choix du nombre de classes
data2<-data1[,-c(1,2,3)]
wss = kmeans(data2, centers=1)$tot.withinss
##Calcul des wss pour les classes de 2 à 15
for (i in 2:15)
wss[i] = kmeans(data2, centers=i)$tot.withinss
##Plot SSE en fonction des classes
library(ggvis)
## Warning: package 'ggvis' was built under R version 3.3.3
##
## Attaching package: 'ggvis'
## The following object is masked from 'package:ggplot2':
##
## resolution
sse = data.frame(c(1:15), c(wss))
names(sse)[1] = 'Clusters'
names(sse)[2] = 'SSE'
sse %>%
ggvis(~Clusters, ~SSE) %>%
layer_points(fill := 'blue') %>%
layer_lines() %>%
set_options(height = 300, width = 400)
#Création Clusters
clusters = kmeans(data2, 6)
clusters
## K-means clustering with 6 clusters of sizes 81, 44, 5, 39, 61, 30
##
## Cluster means:
## Calories Saturated.Fat Cholesterol Sodium Carbohydrates
## 1 299.6296 3.932099 21.790123 158.76543 50.06173
## 2 567.9545 10.704545 132.045455 1374.09091 48.02273
## 3 1232.0000 18.200000 305.000000 2494.00000 114.00000
## 4 381.5385 5.974359 73.333333 843.07692 35.56410
## 5 104.5902 1.000000 5.737705 48.68852 20.91803
## 6 635.6667 12.916667 65.833333 241.83333 96.96667
## Dietary.Fiber Sugars Protein Vitamin.A....Daily.Value.
## 1 1.3209877 42.592593 9.320988 12.864198
## 2 2.9318182 7.613636 27.363636 15.840909
## 3 6.4000000 14.000000 45.800000 6.800000
## 4 2.5641026 6.153846 19.333333 19.794872
## 5 0.3934426 18.491803 2.000000 5.934426
## 6 1.0666667 80.900000 13.466667 19.466667
## Vitamin.C....Daily.Value. Calcium....Daily.Value. Iron....Daily.Value.
## 1 9.666667 28.185185 3.0000000
## 2 10.159091 17.840909 18.8636364
## 3 4.600000 22.600000 33.0000000
## 4 5.384615 16.179487 14.5897436
## 5 11.245902 5.622951 0.7868852
## 6 2.333333 43.266667 5.2000000
##
## Clustering vector:
## [1] 4 4 4 4 4 4 2 2 2 2 4 2 2 2 2 2 2 2 2 2 2 4 2 2 2 2 2 2 2 2 2 3 3 3 3
## [36] 4 4 4 1 6 1 1 4 2 2 2 4 2 1 4 4 2 4 4 4 4 4 4 4 2 2 2 2 2 2 4 4 2 2 4
## [71] 2 2 2 2 2 2 2 4 1 4 4 2 3 4 1 4 4 5 4 4 4 4 4 4 4 4 1 1 6 5 5 5 5 1 5
## [106] 5 5 1 1 1 5 5 1 5 5 5 5 5 5 5 1 5 5 5 5 5 5 5 1 5 5 5 5 5 5 1 5 5 5 5
## [141] 5 5 5 5 5 5 5 5 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 5 1 1 1 1 1 1 1 5 1 1
## [176] 5 1 1 1 1 6 1 1 1 1 1 6 1 1 1 1 1 6 1 1 1 5 5 1 5 5 1 5 5 1 5 5 1 5 5
## [211] 5 1 1 6 1 1 1 1 1 6 1 1 1 1 6 6 1 6 6 6 6 6 5 1 1 5 1 1 5 1 1 6 6 6 6
## [246] 6 6 6 6 6 6 6 6 6 1 6 6 1 6 1
##
## Within cluster sum of squares by cluster:
## [1] 1085044.5 2497007.3 2357034.0 1343976.8 620651.6 743143.8
## (between_SS / total_SS = 91.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
Variable Cluster
data2$cluster= clusters$cluster
head(data2)
## Calories Saturated.Fat Cholesterol Sodium Carbohydrates Dietary.Fiber
## 1 300 5 260 750 31 4
## 2 250 3 25 770 30 4
## 3 370 8 45 780 29 4
## 4 450 10 285 860 30 4
## 5 400 8 50 880 30 4
## 6 430 9 300 960 31 4
## Sugars Protein Vitamin.A....Daily.Value. Vitamin.C....Daily.Value.
## 1 3 17 10 0
## 2 3 18 6 0
## 3 2 14 8 0
## 4 2 21 15 0
## 5 2 21 6 0
## 6 3 26 15 2
## Calcium....Daily.Value. Iron....Daily.Value. cluster
## 1 25 15 4
## 2 25 8 4
## 3 25 10 4
## 4 30 15 4
## 5 25 10 4
## 6 30 20 4
Plot Clusters
library(cluster)
## Warning: package 'cluster' was built under R version 3.3.3
clusplot(data2, clusters$cluster, color=T, shade=F,labels=0,lines=0, main='k-Means Cluster Analysis')
