Analyse en composantes principales
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
Tab51<-tibble(
Student = paste("S",1:6,sep="" ),
M=
c(69.0,67.2,78.6,84.4,56.3,87.9),
P=
c(66.4,53.6,96.9,87.7,68.7,88.8),
C=
c(77.0,53.9,97.3,83.9,72.1,76.0),
B=
c(74.1,58.7,96.2,69.8,56.8,57.2)
)
install.packages("stargazer")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library("stargazer")
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
#Attention il faut absolument mettre results='asis'
Tab51%>%as.data.frame()%>%stargazer(type="latex",summary = FALSE,digits=1,
title = "Test scores for four courses, M (mathematics), P (physics), C (chemistry), and B (biology), with their averages and standard deviations (SD) (artificial example)")
% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy
Institute. E-mail: marek.hlavac at gmail.com % Date and time: Tue, Jan
24, 2023 - 10:24:04
Tab51%>%select(-1)%>%as.matrix()->X
X
## M P C B
## [1,] 69.0 66.4 77.0 74.1
## [2,] 67.2 53.6 53.9 58.7
## [3,] 78.6 96.9 97.3 96.2
## [4,] 84.4 87.7 83.9 69.8
## [5,] 56.3 68.7 72.1 56.8
## [6,] 87.9 88.8 76.0 57.2
X%>%scale(scale = FALSE)->Xc
X%>%scale()->Xcs
svd(Xc)
## $d
## [1] 56.570366 28.103845 15.719070 5.160117
##
## $u
## [,1] [,2] [,3] [,4]
## [1,] -0.09528954 0.37824718 -0.19446787 -0.71204268
## [2,] -0.59636701 0.07314216 -0.52499084 0.41511664
## [3,] 0.67163148 0.42517264 -0.06883412 0.43466043
## [4,] 0.25172409 -0.29942568 -0.05114484 -0.33474804
## [5,] -0.33260999 0.16673869 0.82399914 0.11034454
## [6,] 0.10091097 -0.74387498 0.01543854 0.08666911
##
## $v
## [,1] [,2] [,3] [,4]
## [1,] 0.3098626 -0.5991341 -0.6792040 -0.2893188
## [2,] 0.6182634 -0.3780807 0.3672023 0.5830677
## [3,] 0.5422622 0.1708748 0.4023153 -0.7175625
## [4,] 0.4771659 0.6847591 -0.4919215 0.2478524
K<-svd(Xc)$u
D<-diag(svd(Xc)$d)
L<-svd(Xc)$v
K%*%D%*%t(L)
## [,1] [,2] [,3] [,4]
## [1,] -4.9 -10.616667 0.3 5.3
## [2,] -6.7 -23.416667 -22.8 -10.1
## [3,] 4.7 19.883333 20.6 27.4
## [4,] 10.5 10.683333 7.2 1.0
## [5,] -17.6 -8.316667 -4.6 -12.0
## [6,] 14.0 11.783333 -0.7 -11.6
Xc[,]
## M P C B
## [1,] -4.9 -10.616667 0.3 5.3
## [2,] -6.7 -23.416667 -22.8 -10.1
## [3,] 4.7 19.883333 20.6 27.4
## [4,] 10.5 10.683333 7.2 1.0
## [5,] -17.6 -8.316667 -4.6 -12.0
## [6,] 14.0 11.783333 -0.7 -11.6
t(K)%*%K%>%round(2)
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1
t(L)%*%L%>%round(2)
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1
K[,1:2]%>%round(2)
## [,1] [,2]
## [1,] -0.10 0.38
## [2,] -0.60 0.07
## [3,] 0.67 0.43
## [4,] 0.25 -0.30
## [5,] -0.33 0.17
## [6,] 0.10 -0.74
D[1:2,1:2]%>%round(2)
## [,1] [,2]
## [1,] 56.57 0.0
## [2,] 0.00 28.1
L[,1:2]%>%t()%>%round(2)
## [,1] [,2] [,3] [,4]
## [1,] 0.31 0.62 0.54 0.48
## [2,] -0.60 -0.38 0.17 0.68
K[,1:2]%*%D[1:2,1:2]
## [,1] [,2]
## [1,] -5.390564 10.630200
## [2,] -33.736700 2.055576
## [3,] 37.994438 11.948986
## [4,] 14.240124 -8.415013
## [5,] -18.815869 4.685998
## [6,] 5.708571 -20.905747
Xc%*%L[,1:2]
## [,1] [,2]
## [1,] -5.390564 10.630200
## [2,] -33.736700 2.055576
## [3,] 37.994438 11.948986
## [4,] 14.240124 -8.415013
## [5,] -18.815869 4.685998
## [6,] 5.708571 -20.905747
L[,1:2]%*%D[1:2,1:2]
## [,1] [,2]
## [1,] 17.52904 -16.837973
## [2,] 34.97538 -10.625522
## [3,] 30.67597 4.802239
## [4,] 26.99345 19.244365
t(Xc)%*%K[,1:2]
## [,1] [,2]
## M 17.52904 -16.837973
## P 34.97538 -10.625522
## C 30.67597 4.802239
## B 26.99345 19.244365
(sqrt(6)*K[,1:2])%>%round(2)
## [,1] [,2]
## [1,] -0.23 0.93
## [2,] -1.46 0.18
## [3,] 1.65 1.04
## [4,] 0.62 -0.73
## [5,] -0.81 0.41
## [6,] 0.25 -1.82
((1/sqrt(6))*L[,1:2]%*%D[1:2,1:2])%>%round(2)
## [,1] [,2]
## [1,] 7.16 -6.87
## [2,] 14.28 -4.34
## [3,] 12.52 1.96
## [4,] 11.02 7.86
(sqrt(6)*L[,1:2]%*%solve(D[1:2,1:2]))%>%round(2)
## [,1] [,2]
## [1,] 0.01 -0.05
## [2,] 0.03 -0.03
## [3,] 0.02 0.01
## [4,] 0.02 0.06
t(Xc)%*%Xc
## M P C B
## M 707.00 725.880 394.87 229.78
## P 725.88 1378.548 1047.24 698.84
## C 394.87 1047.240 1017.78 866.83
## B 229.78 698.840 866.83 1160.42
L%*%D^2%*%t(L)
## [,1] [,2] [,3] [,4]
## [1,] 707.00 725.880 394.87 229.78
## [2,] 725.88 1378.548 1047.24 698.84
## [3,] 394.87 1047.240 1017.78 866.83
## [4,] 229.78 698.840 866.83 1160.42
F%>%round(2)
## [1] 0
F<-sqrt(6)*K[,1:2]
(5/6)*cov(F)%>%round(2)
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
(1/6)*t(F)%*%F%>%round(2)
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
A<-((1/sqrt(6))*L[,1:2]%*%D[1:2,1:2])
t(A)%*%A%>%round(2)
## [,1] [,2]
## [1,] 533.37 0.00
## [2,] 0.00 131.64
X
## M P C B
## [1,] 69.0 66.4 77.0 74.1
## [2,] 67.2 53.6 53.9 58.7
## [3,] 78.6 96.9 97.3 96.2
## [4,] 84.4 87.7 83.9 69.8
## [5,] 56.3 68.7 72.1 56.8
## [6,] 87.9 88.8 76.0 57.2
F
## [,1] [,2]
## [1,] -0.2334108 0.9265126
## [2,] -1.4607949 0.1791610
## [3,] 1.6451544 1.0414560
## [4,] 0.6165956 -0.7334401
## [5,] -0.8147247 0.4084247
## [6,] 0.2471804 -1.8221141
A
## [,1] [,2]
## [1,] 7.15620 -6.874074
## [2,] 14.27864 -4.337851
## [3,] 12.52341 1.960506
## [4,] 11.02003 7.856479
(5/6)*cov(X[,1:4],F[,1:2])
## [,1] [,2]
## M 7.15620 -6.874074
## P 14.27864 -4.337851
## C 12.52341 1.960506
## B 11.02003 7.856479
df<-data.frame(y=X[,1],F)
names(df)[2:3]<-c("F1","F2")
LM<-lm(y~F1+F2,df)
LM$coefficients
## (Intercept) F1 F2
## 73.900000 7.156200 -6.874074
A[1,]
## [1] 7.156200 -6.874074
(sqrt(6/5)*Xcs)%>%round(2)
## M P C B
## [1,] -0.45 -0.70 0.02 0.38
## [2,] -0.62 -1.54 -1.75 -0.73
## [3,] 0.43 1.31 1.58 1.97
## [4,] 0.97 0.70 0.55 0.07
## [5,] -1.62 -0.55 -0.35 -0.86
## [6,] 1.29 0.78 -0.05 -0.83
## attr(,"scaled:center")
## M P C B
## 73.90000 77.01667 76.70000 68.80000
## attr(,"scaled:scale")
## M P C B
## 11.89117 16.60451 14.26731 15.23430
(K<-svd(sqrt(6/5)*Xcs)$u%>%round(2))
## [,1] [,2] [,3] [,4]
## [1,] -0.09 0.32 -0.26 -0.72
## [2,] -0.58 -0.05 -0.55 0.41
## [3,] 0.65 0.45 -0.13 0.44
## [4,] 0.27 -0.29 0.01 -0.33
## [5,] -0.38 0.30 0.77 0.10
## [6,] 0.13 -0.72 0.16 0.10
(D<-diag(svd(sqrt(6/5)*Xcs)$d)%>%round(2))
## [,1] [,2] [,3] [,4]
## [1,] 4.16 0.00 0.00 0.00
## [2,] 0.00 2.25 0.00 0.00
## [3,] 0.00 0.00 1.23 0.00
## [4,] 0.00 0.00 0.00 0.38
(L<-svd(sqrt(6/5)*Xcs)$v%>%round(2))
## [,1] [,2] [,3] [,4]
## [1,] 0.42 -0.72 -0.51 -0.23
## [2,] 0.56 -0.22 0.47 0.65
## [3,] 0.56 0.26 0.40 -0.68
## [4,] 0.45 0.61 -0.60 0.25
K<-svd(sqrt(6/5)*Xcs)$u
D<-diag(svd(sqrt(6/5)*Xcs)$d)
L<-svd(sqrt(6/5)*Xcs)$v
((1/sqrt(6)) *L[,1:2]%*%D[1:2,1:2])%>%round(2)
## [,1] [,2]
## [1,] 0.71 -0.66
## [2,] 0.95 -0.20
## [3,] 0.95 0.24
## [4,] 0.77 0.56
(sqrt(6) *L[,1:2]%*%solve(D[1:2,1:2]))%>%round(2)
## [,1] [,2]
## [1,] 0.24 -0.78
## [2,] 0.33 -0.24
## [3,] 0.33 0.28
## [4,] 0.27 0.66
D<-diag(svd(sqrt(6/5)*Xcs)$d)
D%>%round(2)
## [,1] [,2] [,3] [,4]
## [1,] 4.16 0.00 0.00 0.00
## [2,] 0.00 2.25 0.00 0.00
## [3,] 0.00 0.00 1.23 0.00
## [4,] 0.00 0.00 0.00 0.38
sum(diag(D)[1:2]^2)/sum(diag(D^2))
## [1] 0.9311948
D<-diag(svd(Xc)$d)
D%>%round(2)
## [,1] [,2] [,3] [,4]
## [1,] 56.57 0.0 0.00 0.00
## [2,] 0.00 28.1 0.00 0.00
## [3,] 0.00 0.0 15.72 0.00
## [4,] 0.00 0.0 0.00 5.16
sum(diag(D)[1:2]^2)/sum(diag(D^2))
## [1] 0.9358039
library(FactoMineR)
pcafm<-PCA(Xc,scale.unit=FALSE)


SVD<-pcafm$svd
SVD$vs
## [1] 23.094755 11.473347 6.417283 2.106609
svd(Xc)$d
## [1] 56.570366 28.103845 15.719070 5.160117
SVD$vs
## [1] 23.094755 11.473347 6.417283 2.106609
svd(Xc)$d/sqrt(6)
## [1] 23.094755 11.473347 6.417283 2.106609
SVD$U
## [,1] [,2] [,3] [,4]
## [1,] -0.2334108 -0.9265126 0.47634705 1.7441412
## [2,] -1.4607949 -0.1791610 1.28595969 -1.0168240
## [3,] 1.6451544 -1.0414560 0.16860848 -1.0646963
## [4,] 0.6165956 0.7334401 0.12527877 0.8199619
## [5,] -0.8147247 -0.4084247 -2.01837744 -0.2702878
## [6,] 0.2471804 1.8221141 -0.03781654 -0.2122951
(sqrt(6))*svd(Xc)$u
## [,1] [,2] [,3] [,4]
## [1,] -0.2334108 0.9265126 -0.47634705 -1.7441412
## [2,] -1.4607949 0.1791610 -1.28595969 1.0168240
## [3,] 1.6451544 1.0414560 -0.16860848 1.0646963
## [4,] 0.6165956 -0.7334401 -0.12527877 -0.8199619
## [5,] -0.8147247 0.4084247 2.01837744 0.2702878
## [6,] 0.2471804 -1.8221141 0.03781654 0.2122951
SVD$V
## [,1] [,2] [,3] [,4]
## [1,] 0.3098626 0.5991341 0.6792040 0.2893188
## [2,] 0.6182634 0.3780807 -0.3672023 -0.5830677
## [3,] 0.5422622 -0.1708748 -0.4023153 0.7175625
## [4,] 0.4771659 -0.6847591 0.4919215 -0.2478524
svd(Xc)$v
## [,1] [,2] [,3] [,4]
## [1,] 0.3098626 -0.5991341 -0.6792040 -0.2893188
## [2,] 0.6182634 -0.3780807 0.3672023 0.5830677
## [3,] 0.5422622 0.1708748 0.4023153 -0.7175625
## [4,] 0.4771659 0.6847591 -0.4919215 0.2478524
pcafm
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 6 individuals, described by 4 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"