ACP Normé

Hafid Elyoussefi

14/12/2021


importer les données depuis un fichier excel.csv

M<-read.csv2(file.choose(),row.names=1);M
##      PAO PAA The  JE POT LEC RAI PLP
## AGRI 167   1 163  23  41   8   6   6
## SAAG 162   2 141  12  40  12   4  15
## PRIN 119   6  69  56  39   5  13  41
## CSUP  87  11  63 111  27   3  18  39
## CMOY 103   5  68  77  32   4  11  30
## EMPL 111   4  72  66  34   6  10  28
## OUVR 130   3  76  52  43   7   7  16
## INAC 138   7 117  74  53   8  12  20
summary(M)
##       PAO             PAA              The               JE        
##  Min.   : 87.0   Min.   : 1.000   Min.   : 63.00   Min.   : 12.00  
##  1st Qu.:109.0   1st Qu.: 2.750   1st Qu.: 68.75   1st Qu.: 44.75  
##  Median :124.5   Median : 4.500   Median : 74.00   Median : 61.00  
##  Mean   :127.1   Mean   : 4.875   Mean   : 96.12   Mean   : 58.88  
##  3rd Qu.:144.0   3rd Qu.: 6.250   3rd Qu.:123.00   3rd Qu.: 74.75  
##  Max.   :167.0   Max.   :11.000   Max.   :163.00   Max.   :111.00  
##       POT             LEC              RAI             PLP       
##  Min.   :27.00   Min.   : 3.000   Min.   : 4.00   Min.   : 6.00  
##  1st Qu.:33.50   1st Qu.: 4.750   1st Qu.: 6.75   1st Qu.:15.75  
##  Median :39.50   Median : 6.500   Median :10.50   Median :24.00  
##  Mean   :38.62   Mean   : 6.625   Mean   :10.12   Mean   :24.38  
##  3rd Qu.:41.50   3rd Qu.: 8.000   3rd Qu.:12.25   3rd Qu.:32.25  
##  Max.   :53.00   Max.   :12.000   Max.   :18.00   Max.   :41.00
boxplot(M,col=2:8)

la matrice de corrélation

round(cor(M),2)
##       PAO   PAA   The    JE   POT   LEC   RAI   PLP
## PAO  1.00 -0.77  0.93 -0.91  0.66  0.89 -0.83 -0.86
## PAA -0.77  1.00 -0.60  0.90 -0.33 -0.67  0.96  0.77
## The  0.93 -0.60  1.00 -0.75  0.52  0.79 -0.67 -0.83
## JE  -0.91  0.90 -0.75  1.00 -0.42 -0.84  0.92  0.72
## POT  0.66 -0.33  0.52 -0.42  1.00  0.60 -0.41 -0.55
## LEC  0.89 -0.67  0.79 -0.84  0.60  1.00 -0.82 -0.75
## RAI -0.83  0.96 -0.67  0.92 -0.41 -0.82  1.00  0.83
## PLP -0.86  0.77 -0.83  0.72 -0.55 -0.75  0.83  1.00

charger les packages

require(FactoMineR)
## Le chargement a nécessité le package : FactoMineR
require(factoextra)
## Le chargement a nécessité le package : factoextra
## Le chargement a nécessité le package : ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(GGally)
## Warning: le package 'GGally' a été compilé avec la version R 4.1.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(M)

ggcorr(M)

effectuer la PCA sans graphique

res.pca<-PCA(M,graph=FALSE)

les valeurs propres

get_eigenvalue(res.pca)
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1 6.207946839      77.59933549                    77.59934
## Dim.2 0.879681393      10.99601741                    88.59535
## Dim.3 0.415961123       5.19951404                    93.79487
## Dim.4 0.306454670       3.83068337                    97.62555
## Dim.5 0.168441497       2.10551872                    99.73107
## Dim.6 0.018067709       0.22584636                    99.95692
## Dim.7 0.003446769       0.04308461                   100.00000
fviz_eig(res.pca)

fviz_eig(res.pca,addlabels=TRUE, ylim  = c(0, 80))

get_pca_ind(res.pca)
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
get_pca_var(res.pca)
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
fviz_pca_biplot(res.pca)

fviz_pca_var(res.pca,col.var="red")

library("corrplot")
## corrplot 0.90 loaded
corrplot(res.pca$var$cos2, is.corr=FALSE)

fviz_cos2(res.pca,choice="var",axes=1:2)

fviz_pca_var(res.pca,col.var="cos2",gradient.cols=c("#00AFBB","#E7B800","#FC4E07","#E7B800","#FC4E07","#E7B800","#E7B800"),repel=TRUE)

fviz_pca_var(res.pca, alpha.var="cos2")

corrplot(res.pca$var$contrib,is.corr=FALSE)

la contribution des variables dans l’axe 1

fviz_contrib(res.pca,choice="var",axes=1,top=10)

la contribution des variables dans l’axe 2

fviz_contrib(res.pca,choice="var",axes=2,top=10)

la contribution des variables dans les axes 1 et 2

fviz_contrib(res.pca,choice="var",axes=1:2,top=10)

la contribution des variables

fviz_pca_var(res.pca,col.var="contrib",gradient.cols=c("#00AFBB","#E7B800","#FC4E07","#E7B800","#FC4E07","#E7B800","#E7B800"))

fviz_pca_var(res.pca, alpha.var="contrib")

set.seed(123)
my.cont.var <-rnorm(8)
fviz_pca_var(res.pca,col.var=my.cont.var,gradient.cols=c("blue","yellow","red",3:7),legend.title= "Cont.var")

créer des groupes de variables en utilisant : kmeans

Créer 3 groupes de variables (centers = 3)

set.seed(123)
res.km <- kmeans(res.pca$var$coord, centers = 3, nstart = 25)
grp <- as.factor(res.km$cluster)

Couleur des variables par groupes

fviz_pca_var(res.pca, col.var = grp,palette = c("#0073C2FF", "#EFC000FF", "#868686FF"),legend.title = "Cluster")

Description de la dimension 1

res.desc <-dimdesc(res.pca,axes=c(1,2),proba=0.05)
res.desc$Dim.1
## $quanti
##     correlation      p.value
## JE    0.9309151 7.821882e-04
## RAI   0.9294859 8.308315e-04
## PLP   0.9011429 2.239726e-03
## PAA   0.8687483 5.110853e-03
## The  -0.8700402 4.966446e-03
## LEC  -0.9089814 1.758745e-03
## PAO  -0.9749797 3.842664e-05
## 
## attr(,"class")
## [1] "condes" "list"

PCA des individus

ind <- get_pca_ind(res.pca);ind
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
fviz_pca_ind(res.pca)

fviz_pca_ind(res.pca,col.ind="cos2",gradient.cols=c("#00AFBB", "#E7B800", "#FC4E07"),repel = TRUE)

fviz_pca_ind(res.pca,pointsize="cos2",pointshape=21,fill="#E7B800",repel=TRUE)

fviz_pca_ind(res.pca, col.ind = "cos2", pointsize = "cos2",gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),repel = TRUE)

fviz_cos2(res.pca,choice="ind")

la contribution totale dans PC1 et PC2

fviz_contrib(res.pca,choice="ind",axes=1:2)

supprimer les variables spécifiques avant d’effectuer la PCA

iris.pca <- PCA(iris[,-5],graph=FALSE)

les Variables dans les dimensions 2 et 3

fviz_pca_var(res.pca,axes=c(2,3))

les Individus dans les dimensions 2 and 3

fviz_pca_ind(res.pca,axes=c(2,3))

afficher le point et le texte du variable

fviz_pca_var(res.pca,geom.var=c("point","text"))

afficher le texte de l’individu uniquement

fviz_pca_ind(res.pca,geom.ind="text")

Changer la taille de l ’ arrows et le labels

fviz_pca_var(res.pca,arrowsize=1,labelsize=5)

Changer la taille et remplir des points

Changer la taille du label

fviz_pca_ind(res.pca,pointsize=3,pointshape=21,fill="red",labelsize=5,repel=TRUE)

merci cher visiteur , chère visiteuse