Analyse en composantes principales

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
Tab51<-tibble(
Student =  paste("S",1:6,sep="" ),
M=
c(69.0,67.2,78.6,84.4,56.3,87.9),
P=
c(66.4,53.6,96.9,87.7,68.7,88.8),
C=
c(77.0,53.9,97.3,83.9,72.1,76.0),
B=
c(74.1,58.7,96.2,69.8,56.8,57.2)
)
install.packages("stargazer")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library("stargazer")
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
#Attention il faut absolument mettre results='asis' 
Tab51%>%as.data.frame()%>%stargazer(type="latex",summary = FALSE,digits=1,
title = "Test scores for four courses, M (mathematics), P (physics), C (chemistry), and B (biology), with their averages and standard deviations (SD) (artificial example)")
% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com % Date and time: Tue, Jan 24, 2023 - 10:24:04
Tab51%>%select(-1)%>%as.matrix()->X
X
##         M    P    C    B
## [1,] 69.0 66.4 77.0 74.1
## [2,] 67.2 53.6 53.9 58.7
## [3,] 78.6 96.9 97.3 96.2
## [4,] 84.4 87.7 83.9 69.8
## [5,] 56.3 68.7 72.1 56.8
## [6,] 87.9 88.8 76.0 57.2
X%>%scale(scale = FALSE)->Xc
X%>%scale()->Xcs
svd(Xc)
## $d
## [1] 56.570366 28.103845 15.719070  5.160117
## 
## $u
##             [,1]        [,2]        [,3]        [,4]
## [1,] -0.09528954  0.37824718 -0.19446787 -0.71204268
## [2,] -0.59636701  0.07314216 -0.52499084  0.41511664
## [3,]  0.67163148  0.42517264 -0.06883412  0.43466043
## [4,]  0.25172409 -0.29942568 -0.05114484 -0.33474804
## [5,] -0.33260999  0.16673869  0.82399914  0.11034454
## [6,]  0.10091097 -0.74387498  0.01543854  0.08666911
## 
## $v
##           [,1]       [,2]       [,3]       [,4]
## [1,] 0.3098626 -0.5991341 -0.6792040 -0.2893188
## [2,] 0.6182634 -0.3780807  0.3672023  0.5830677
## [3,] 0.5422622  0.1708748  0.4023153 -0.7175625
## [4,] 0.4771659  0.6847591 -0.4919215  0.2478524
K<-svd(Xc)$u
D<-diag(svd(Xc)$d)
L<-svd(Xc)$v

K%*%D%*%t(L)
##       [,1]       [,2]  [,3]  [,4]
## [1,]  -4.9 -10.616667   0.3   5.3
## [2,]  -6.7 -23.416667 -22.8 -10.1
## [3,]   4.7  19.883333  20.6  27.4
## [4,]  10.5  10.683333   7.2   1.0
## [5,] -17.6  -8.316667  -4.6 -12.0
## [6,]  14.0  11.783333  -0.7 -11.6
Xc[,]
##          M          P     C     B
## [1,]  -4.9 -10.616667   0.3   5.3
## [2,]  -6.7 -23.416667 -22.8 -10.1
## [3,]   4.7  19.883333  20.6  27.4
## [4,]  10.5  10.683333   7.2   1.0
## [5,] -17.6  -8.316667  -4.6 -12.0
## [6,]  14.0  11.783333  -0.7 -11.6
t(K)%*%K%>%round(2)
##      [,1] [,2] [,3] [,4]
## [1,]    1    0    0    0
## [2,]    0    1    0    0
## [3,]    0    0    1    0
## [4,]    0    0    0    1
t(L)%*%L%>%round(2)
##      [,1] [,2] [,3] [,4]
## [1,]    1    0    0    0
## [2,]    0    1    0    0
## [3,]    0    0    1    0
## [4,]    0    0    0    1
K[,1:2]%>%round(2)
##       [,1]  [,2]
## [1,] -0.10  0.38
## [2,] -0.60  0.07
## [3,]  0.67  0.43
## [4,]  0.25 -0.30
## [5,] -0.33  0.17
## [6,]  0.10 -0.74
D[1:2,1:2]%>%round(2)
##       [,1] [,2]
## [1,] 56.57  0.0
## [2,]  0.00 28.1
L[,1:2]%>%t()%>%round(2)
##       [,1]  [,2] [,3] [,4]
## [1,]  0.31  0.62 0.54 0.48
## [2,] -0.60 -0.38 0.17 0.68
K[,1:2]%*%D[1:2,1:2]
##            [,1]       [,2]
## [1,]  -5.390564  10.630200
## [2,] -33.736700   2.055576
## [3,]  37.994438  11.948986
## [4,]  14.240124  -8.415013
## [5,] -18.815869   4.685998
## [6,]   5.708571 -20.905747
Xc%*%L[,1:2]
##            [,1]       [,2]
## [1,]  -5.390564  10.630200
## [2,] -33.736700   2.055576
## [3,]  37.994438  11.948986
## [4,]  14.240124  -8.415013
## [5,] -18.815869   4.685998
## [6,]   5.708571 -20.905747
L[,1:2]%*%D[1:2,1:2]
##          [,1]       [,2]
## [1,] 17.52904 -16.837973
## [2,] 34.97538 -10.625522
## [3,] 30.67597   4.802239
## [4,] 26.99345  19.244365
t(Xc)%*%K[,1:2]
##       [,1]       [,2]
## M 17.52904 -16.837973
## P 34.97538 -10.625522
## C 30.67597   4.802239
## B 26.99345  19.244365
(sqrt(6)*K[,1:2])%>%round(2)
##       [,1]  [,2]
## [1,] -0.23  0.93
## [2,] -1.46  0.18
## [3,]  1.65  1.04
## [4,]  0.62 -0.73
## [5,] -0.81  0.41
## [6,]  0.25 -1.82
((1/sqrt(6))*L[,1:2]%*%D[1:2,1:2])%>%round(2)
##       [,1]  [,2]
## [1,]  7.16 -6.87
## [2,] 14.28 -4.34
## [3,] 12.52  1.96
## [4,] 11.02  7.86
(sqrt(6)*L[,1:2]%*%solve(D[1:2,1:2]))%>%round(2)
##      [,1]  [,2]
## [1,] 0.01 -0.05
## [2,] 0.03 -0.03
## [3,] 0.02  0.01
## [4,] 0.02  0.06
t(Xc)%*%Xc
##        M        P       C       B
## M 707.00  725.880  394.87  229.78
## P 725.88 1378.548 1047.24  698.84
## C 394.87 1047.240 1017.78  866.83
## B 229.78  698.840  866.83 1160.42
L%*%D^2%*%t(L)
##        [,1]     [,2]    [,3]    [,4]
## [1,] 707.00  725.880  394.87  229.78
## [2,] 725.88 1378.548 1047.24  698.84
## [3,] 394.87 1047.240 1017.78  866.83
## [4,] 229.78  698.840  866.83 1160.42
F%>%round(2)
## [1] 0
F<-sqrt(6)*K[,1:2]
(5/6)*cov(F)%>%round(2)
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
(1/6)*t(F)%*%F%>%round(2)
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
A<-((1/sqrt(6))*L[,1:2]%*%D[1:2,1:2])
t(A)%*%A%>%round(2)
##        [,1]   [,2]
## [1,] 533.37   0.00
## [2,]   0.00 131.64
X
##         M    P    C    B
## [1,] 69.0 66.4 77.0 74.1
## [2,] 67.2 53.6 53.9 58.7
## [3,] 78.6 96.9 97.3 96.2
## [4,] 84.4 87.7 83.9 69.8
## [5,] 56.3 68.7 72.1 56.8
## [6,] 87.9 88.8 76.0 57.2
F
##            [,1]       [,2]
## [1,] -0.2334108  0.9265126
## [2,] -1.4607949  0.1791610
## [3,]  1.6451544  1.0414560
## [4,]  0.6165956 -0.7334401
## [5,] -0.8147247  0.4084247
## [6,]  0.2471804 -1.8221141
A
##          [,1]      [,2]
## [1,]  7.15620 -6.874074
## [2,] 14.27864 -4.337851
## [3,] 12.52341  1.960506
## [4,] 11.02003  7.856479
(5/6)*cov(X[,1:4],F[,1:2])
##       [,1]      [,2]
## M  7.15620 -6.874074
## P 14.27864 -4.337851
## C 12.52341  1.960506
## B 11.02003  7.856479
df<-data.frame(y=X[,1],F)
names(df)[2:3]<-c("F1","F2")
LM<-lm(y~F1+F2,df)
LM$coefficients
## (Intercept)          F1          F2 
##   73.900000    7.156200   -6.874074
A[1,]
## [1]  7.156200 -6.874074
(sqrt(6/5)*Xcs)%>%round(2)
##          M     P     C     B
## [1,] -0.45 -0.70  0.02  0.38
## [2,] -0.62 -1.54 -1.75 -0.73
## [3,]  0.43  1.31  1.58  1.97
## [4,]  0.97  0.70  0.55  0.07
## [5,] -1.62 -0.55 -0.35 -0.86
## [6,]  1.29  0.78 -0.05 -0.83
## attr(,"scaled:center")
##        M        P        C        B 
## 73.90000 77.01667 76.70000 68.80000 
## attr(,"scaled:scale")
##        M        P        C        B 
## 11.89117 16.60451 14.26731 15.23430
(K<-svd(sqrt(6/5)*Xcs)$u%>%round(2))
##       [,1]  [,2]  [,3]  [,4]
## [1,] -0.09  0.32 -0.26 -0.72
## [2,] -0.58 -0.05 -0.55  0.41
## [3,]  0.65  0.45 -0.13  0.44
## [4,]  0.27 -0.29  0.01 -0.33
## [5,] -0.38  0.30  0.77  0.10
## [6,]  0.13 -0.72  0.16  0.10
(D<-diag(svd(sqrt(6/5)*Xcs)$d)%>%round(2))
##      [,1] [,2] [,3] [,4]
## [1,] 4.16 0.00 0.00 0.00
## [2,] 0.00 2.25 0.00 0.00
## [3,] 0.00 0.00 1.23 0.00
## [4,] 0.00 0.00 0.00 0.38
(L<-svd(sqrt(6/5)*Xcs)$v%>%round(2))
##      [,1]  [,2]  [,3]  [,4]
## [1,] 0.42 -0.72 -0.51 -0.23
## [2,] 0.56 -0.22  0.47  0.65
## [3,] 0.56  0.26  0.40 -0.68
## [4,] 0.45  0.61 -0.60  0.25
K<-svd(sqrt(6/5)*Xcs)$u
D<-diag(svd(sqrt(6/5)*Xcs)$d)
L<-svd(sqrt(6/5)*Xcs)$v
((1/sqrt(6)) *L[,1:2]%*%D[1:2,1:2])%>%round(2)
##      [,1]  [,2]
## [1,] 0.71 -0.66
## [2,] 0.95 -0.20
## [3,] 0.95  0.24
## [4,] 0.77  0.56
(sqrt(6) *L[,1:2]%*%solve(D[1:2,1:2]))%>%round(2)
##      [,1]  [,2]
## [1,] 0.24 -0.78
## [2,] 0.33 -0.24
## [3,] 0.33  0.28
## [4,] 0.27  0.66
D<-diag(svd(sqrt(6/5)*Xcs)$d)
D%>%round(2)
##      [,1] [,2] [,3] [,4]
## [1,] 4.16 0.00 0.00 0.00
## [2,] 0.00 2.25 0.00 0.00
## [3,] 0.00 0.00 1.23 0.00
## [4,] 0.00 0.00 0.00 0.38
sum(diag(D)[1:2]^2)/sum(diag(D^2))
## [1] 0.9311948
D<-diag(svd(Xc)$d)
D%>%round(2)
##       [,1] [,2]  [,3] [,4]
## [1,] 56.57  0.0  0.00 0.00
## [2,]  0.00 28.1  0.00 0.00
## [3,]  0.00  0.0 15.72 0.00
## [4,]  0.00  0.0  0.00 5.16
sum(diag(D)[1:2]^2)/sum(diag(D^2))
## [1] 0.9358039
library(FactoMineR)
pcafm<-PCA(Xc,scale.unit=FALSE)

SVD<-pcafm$svd
SVD$vs
## [1] 23.094755 11.473347  6.417283  2.106609
svd(Xc)$d
## [1] 56.570366 28.103845 15.719070  5.160117
SVD$vs
## [1] 23.094755 11.473347  6.417283  2.106609
svd(Xc)$d/sqrt(6)
## [1] 23.094755 11.473347  6.417283  2.106609
SVD$U
##            [,1]       [,2]        [,3]       [,4]
## [1,] -0.2334108 -0.9265126  0.47634705  1.7441412
## [2,] -1.4607949 -0.1791610  1.28595969 -1.0168240
## [3,]  1.6451544 -1.0414560  0.16860848 -1.0646963
## [4,]  0.6165956  0.7334401  0.12527877  0.8199619
## [5,] -0.8147247 -0.4084247 -2.01837744 -0.2702878
## [6,]  0.2471804  1.8221141 -0.03781654 -0.2122951
(sqrt(6))*svd(Xc)$u
##            [,1]       [,2]        [,3]       [,4]
## [1,] -0.2334108  0.9265126 -0.47634705 -1.7441412
## [2,] -1.4607949  0.1791610 -1.28595969  1.0168240
## [3,]  1.6451544  1.0414560 -0.16860848  1.0646963
## [4,]  0.6165956 -0.7334401 -0.12527877 -0.8199619
## [5,] -0.8147247  0.4084247  2.01837744  0.2702878
## [6,]  0.2471804 -1.8221141  0.03781654  0.2122951
SVD$V
##           [,1]       [,2]       [,3]       [,4]
## [1,] 0.3098626  0.5991341  0.6792040  0.2893188
## [2,] 0.6182634  0.3780807 -0.3672023 -0.5830677
## [3,] 0.5422622 -0.1708748 -0.4023153  0.7175625
## [4,] 0.4771659 -0.6847591  0.4919215 -0.2478524
svd(Xc)$v
##           [,1]       [,2]       [,3]       [,4]
## [1,] 0.3098626 -0.5991341 -0.6792040 -0.2893188
## [2,] 0.6182634 -0.3780807  0.3672023  0.5830677
## [3,] 0.5422622  0.1708748  0.4023153 -0.7175625
## [4,] 0.4771659  0.6847591 -0.4919215  0.2478524
pcafm
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 6 individuals, described by 4 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"