library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
data<-
c(0.617,0.545,0.496,0.493,0.437,0.408,
731,680,621,591,617,615,140,139,143,128,
186,184,3.24,4.13,3.68,4.00,4.80,4.80)
data %>% length()
## [1] 24
data%>% matrix(nrow = 6) -> X
X%>%as.data.frame()->DF
rownames(DF)<-c("Tigers","Dragons","BayStars","Swallows","Giants","Carp")
colnames(DF)<-c("Win","Runs","HR","ERA")
DF%>%rownames_to_column(var="Team")->DF
DF%>%knitr::kable("simple")
Team | Win | Runs | HR | ERA |
---|---|---|---|---|
Tigers | 0.617 | 731 | 140 | 3.24 |
Dragons | 0.545 | 680 | 139 | 4.13 |
BayStars | 0.496 | 621 | 143 | 3.68 |
Swallows | 0.493 | 591 | 128 | 4.00 |
Giants | 0.437 | 617 | 186 | 4.80 |
Carp | 0.408 | 615 | 184 | 4.80 |
X%>%t()
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.617 0.545 0.496 0.493 0.437 0.408
## [2,] 731.000 680.000 621.000 591.000 617.000 615.000
## [3,] 140.000 139.000 143.000 128.000 186.000 184.000
## [4,] 3.240 4.130 3.680 4.000 4.800 4.800
print(X)
## [,1] [,2] [,3] [,4]
## [1,] 0.617 731 140 3.24
## [2,] 0.545 680 139 4.13
## [3,] 0.496 621 143 3.68
## [4,] 0.493 591 128 4.00
## [5,] 0.437 617 186 4.80
## [6,] 0.408 615 184 4.80
knitr::kable(t(X),caption = "**X'**")
0.617 | 0.545 | 0.496 | 0.493 | 0.437 | 0.408 |
731.000 | 680.000 | 621.000 | 591.000 | 617.000 | 615.000 |
140.000 | 139.000 | 143.000 | 128.000 | 186.000 | 184.000 |
3.240 | 4.130 | 3.680 | 4.000 | 4.800 | 4.800 |
X<-matrix(c(3,-2,6,8,0,-2),nrow=2,byrow = T)
X%>%knitr::kable(caption = "**X**")
3 | -2 | 6 |
8 | 0 | -2 |
Y<-matrix(c(2,1,-9,-7,2,-3),nrow=2,byrow = T)
Y%>%knitr::kable(caption = "**Y**")
2 | 1 | -9 |
-7 | 2 | -3 |
(X+Y)%>%knitr::kable(caption = "**X+Y**")
5 | -1 | -3 |
1 | 2 | -5 |
Z<-matrix(c(8,-2,6,-5,0,-3),nrow=2,byrow = T)
Z%>%knitr::kable(caption = "**Z**")
8 | -2 | 6 |
-5 | 0 | -3 |
(-0.1*Z)%>%knitr::kable(caption = "-0.1**Z**")
-0.8 | 0.2 | -0.6 |
0.5 | 0.0 | 0.3 |
X<-matrix(c(4,-2,6,8,0,-2),nrow=2,byrow = T)
X%>%knitr::kable(caption = "**X**")
4 | -2 | 6 |
8 | 0 | -2 |
Y<-matrix(c(2,1,-9,-7,2,-3),nrow=2,byrow = T)
Y%>%knitr::kable(caption = "**Y**")
2 | 1 | -9 |
-7 | 2 | -3 |
(0.5*X+(-2)*Y)%>%knitr::kable(caption = "0.5**X**+(-2)**Y**")
-2 | -3 | 21 |
18 | -4 | 5 |
A<-matrix(c(2,-4,1,7),nrow=2,byrow = T)
A%>%knitr::kable(caption = "**A**")
2 | -4 |
1 | 7 |
B<-matrix(c(-3,1,2,-5),nrow=2,byrow = T)
B%>%knitr::kable(caption = "**B**")
-3 | 1 |
2 | -5 |
(A%*%B)%>%knitr::kable(caption = "**AB**")
-14 | 22 |
11 | -34 |
X<-matrix(c(2,3,-1,-2,0,4),nrow=2,byrow = T)
X%>%knitr::kable(caption = "**X**")
2 | 3 | -1 |
-2 | 0 | 4 |
Y<-matrix(c(3,5,4,-1,0,-2,0,6,0),nrow=3,byrow = T)
Y%>%knitr::kable(caption = "**Y**")
3 | 5 | 4 |
-1 | 0 | -2 |
0 | 6 | 0 |
(X%*%Y)%>%knitr::kable(caption = "**XY**")
3 | 4 | 2 |
-6 | 14 | -8 |
F<-matrix(c(2,-1,-3,0,1,3,-2,-3),nrow=4,byrow = T)
F%>%knitr::kable(caption = "**F**")
2 | -1 |
-3 | 0 |
1 | 3 |
-2 | -3 |
A<-matrix(c(-4,1,6,-3,2,5),nrow=3,byrow = T)
A%>%knitr::kable(caption = "**A**")
-4 | 1 |
6 | -3 |
2 | 5 |
(F%*%t(A)) %>%knitr::kable(caption = "**FA'**")
-9 | 15 | -1 |
12 | -18 | -6 |
-1 | -3 | 17 |
5 | -3 | -19 |
A<-matrix(c(-4,1,6,-3,2,5),nrow=3,byrow = T)
A%>%knitr::kable(caption = "**A**")
-4 | 1 |
6 | -3 |
2 | 5 |
(A%*%t(A)) %>%knitr::kable(caption = "**S=AA'**")
17 | -27 | -3 |
-27 | 45 | -3 |
-3 | -3 | 29 |
(t(A)%*%A) %>%knitr::kable(caption = "**T=A'A**")
56 | -12 |
-12 | 35 |
u<-matrix(c(2,-1,3),nrow=3,byrow = T)
u%>%knitr::kable(caption = "**u**")
2 |
-1 |
3 |
v<-matrix(c(-2,3,-4),nrow=3,byrow = T)
v%>%knitr::kable(caption = "**v**")
-2 |
3 |
-4 |
(t(u)%*%v) %>%knitr::kable(caption = "**u'v**")
-19 |
(u%*%t(v)) %>%knitr::kable(caption = "**uv'**")
-4 | 6 | -8 |
2 | -3 | 4 |
-6 | 9 | -12 |
library(plotly)
X<-data.frame(x=c(1, 4),y=c(2, 5),z=c(3,6))
X%>%knitr::kable("simple")
x | y | z |
---|---|---|
1 | 2 | 3 |
4 | 5 | 6 |
X%>%plot_ly(x =~x,y =~y,z =~z)%>%add_markers()%>%
add_trace(x = c(0, X[1,1 ]), y = c(0, X[1,2 ]), z = c(0, X[1,3 ]),type = "scatter3d", mode = "lines", name = "lines", showlegend = FALSE,color='#CCCCCC')%>% add_trace(x = c(0, 4), y = c(0, 5), z = c(0, 6),type = "scatter3d", mode = "lines", name = "lines", showlegend = FALSE,color='#CCCCCC')
install.packages("plot3D")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library("plot3D")
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
cols <- c("red", "red")
arrows3D(rep(0,2), rep(0,2), rep(0,2), X[,1], X[,2], X[,3], col = cols,
lwd = 2, d = 3,
main = "Représentation des vecteurs en 3D", bty ="g", ticktype = "detailed")
set.seed(1)
n=2
rnorm(n^2,10,2)%>%matrix(,nrow=n)->X
X%>%data.frame()%>%knitr::kable()
X1 | X2 |
---|---|
8.747092 | 8.328743 |
10.367287 | 13.190562 |
(t(X)%*%X)%>%diag()%>%sum()
## [1] 427.3511
norm(X,type = "F")^2 # Frobenius norm
## [1] 427.3511
n=10
p=5
matrix(rep(0,p),ncol = 1)
## [,1]
## [1,] 0
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
matrix(rep(0,n*p),ncol = p)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0 0 0 0 0
## [2,] 0 0 0 0 0
## [3,] 0 0 0 0 0
## [4,] 0 0 0 0 0
## [5,] 0 0 0 0 0
## [6,] 0 0 0 0 0
## [7,] 0 0 0 0 0
## [8,] 0 0 0 0 0
## [9,] 0 0 0 0 0
## [10,] 0 0 0 0 0
matrix(rep(1,5),ncol = 1)
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
matrix(rep(1,10*5),ncol = 5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 1 1 1 1
## [2,] 1 1 1 1 1
## [3,] 1 1 1 1 1
## [4,] 1 1 1 1 1
## [5,] 1 1 1 1 1
## [6,] 1 1 1 1 1
## [7,] 1 1 1 1 1
## [8,] 1 1 1 1 1
## [9,] 1 1 1 1 1
## [10,] 1 1 1 1 1
matrix(rep(1,10),ncol = 1)%*%t(matrix(rep(1,5),ncol = 1))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 1 1 1 1
## [2,] 1 1 1 1 1
## [3,] 1 1 1 1 1
## [4,] 1 1 1 1 1
## [5,] 1 1 1 1 1
## [6,] 1 1 1 1 1
## [7,] 1 1 1 1 1
## [8,] 1 1 1 1 1
## [9,] 1 1 1 1 1
## [10,] 1 1 1 1 1
### Le produit d’une
matrice par sa transposée est symétrique
diag(3)%>% knitr::kable()
1 | 0 | 0 |
0 | 1 | 0 |
0 | 0 | 1 |
TabA<-data.frame(Participant = 1:6,
Japan = c(82,96,84,90,93,82),
English = c(70,67,54,66,74,60),
Science = c(76,71,65,80,77,89)
)
library(tidyverse)
TabA%>%as_tibble()->TabA
(TabA%>%as.matrix()->MA)
## Participant Japan English Science
## [1,] 1 82 70 76
## [2,] 2 96 67 71
## [3,] 3 84 54 65
## [4,] 4 90 66 80
## [5,] 5 93 74 77
## [6,] 6 82 60 89
data<-
c(0.617,0.545,0.496,0.493,0.437,0.408,
731,680,621,591,617,615,140,139,143,128,
186,184,3.24,4.13,3.68,4.00,4.80,4.80)
data%>% matrix(nrow = 6) -> MC
MC%>%as.data.frame()->TabC
rownames(TabC)<-c("Tigers","Dragons","BayStars","Swallows","Giants","Carp")
colnames(TabC)<-c("Win","Runs","HR","ERA")
TabC%>%rownames_to_column(var="Team")->TabC
TabC%>%knitr::kable("simple")
Team | Win | Runs | HR | ERA |
---|---|---|---|---|
Tigers | 0.617 | 731 | 140 | 3.24 |
Dragons | 0.545 | 680 | 139 | 4.13 |
BayStars | 0.496 | 621 | 143 | 3.68 |
Swallows | 0.493 | 591 | 128 | 4.00 |
Giants | 0.437 | 617 | 186 | 4.80 |
Carp | 0.408 | 615 | 184 | 4.80 |
MC
## [,1] [,2] [,3] [,4]
## [1,] 0.617 731 140 3.24
## [2,] 0.545 680 139 4.13
## [3,] 0.496 621 143 3.68
## [4,] 0.493 591 128 4.00
## [5,] 0.437 617 186 4.80
## [6,] 0.408 615 184 4.80
TabAp<-data.frame(Participant = 1:6,
History = c(66,72,44,58,70,56),
Mathematics = c(74,98,62,88,56,84)
)
library(tidyverse)
TabAp%>%as_tibble()->TabAp
(TabAp%>%as.matrix()->MAp)
## Participant History Mathematics
## [1,] 1 66 74
## [2,] 2 72 98
## [3,] 3 44 62
## [4,] 4 58 88
## [5,] 5 70 56
## [6,] 6 56 84
library(ggplot2)
TabAp%>%
ggplot(aes(x=History, y=0,label =History))+geom_point(size = 2,shape=25)+ scale_color_manual(values = unname(colours)) +geom_text(vjust = -1,size=2)+geom_hline(yintercept=0,linetype="dashed",
color = "red", size=1)+xlim(0,100)
TabAp%>%
ggplot(aes(x=History, y=0,label =History))+ geom_boxplot(alpha = 0.80) +
geom_point(size = 2)+geom_text(vjust = -1)+
theme(text = element_text(size = 10),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none")+xlim(0,100)
TabAp%>%summarise(mean = mean(History), n = n())
matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x
t(u)%*%x/6
## History
## [1,] 61
matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x
x-mean(x)
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
x-mean(x)*u
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
J<-diag(6)-(1/6)*u%*%t(u)
J%*%x
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
## Variance selon les différentes formules
matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x
x-mean(x)
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
x-mean(x)*u
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
J<-diag(6)-(1/6)*u%*%t(u)
(1/6)*t(x-mean(x)*u)%*%(x-mean(x)*u)
## History
## History 91.66667
(1/6)*t(x)%*%J%*%x
## History
## History 91.66667
(1/6)*(norm(J%*%x, type="2"))^2
## [1] 91.66667
(5/6)*var(x)
## History
## History 91.66667
## Ecart-type
sqrt(1/6)*(norm(J%*%x, type="2"))
## [1] 9.574271
sqrt(5/6)*sd(x)
## [1] 9.574271
matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x
(x-mean(x))/(sd(x))
## History
## [1,] 0.4767313
## [2,] 1.0488088
## [3,] -1.6208864
## [4,] -0.2860388
## [5,] 0.8581163
## [6,] -0.4767313
scale(x)
## History
## [1,] 0.4767313
## [2,] 1.0488088
## [3,] -1.6208864
## [4,] -0.2860388
## [5,] 0.8581163
## [6,] -0.4767313
## attr(,"scaled:center")
## History
## 61
## attr(,"scaled:scale")
## History
## 10.48809
scale(x,scale = FALSE)
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
## attr(,"scaled:center")
## History
## 61
x-mean(x)
## History
## [1,] 5
## [2,] 11
## [3,] -17
## [4,] -3
## [5,] 9
## [6,] -5
`
X<-data.frame(Food=1:10,
Sweet=c(32,28,20,34,25,35,25,30,34,22),
Spice=c(10,20,19,21,16,14,20,18,13,26),
Sales=c(62,83,34,91,53,70,62,73,84,63))
## Reproduire ces trois graphiques avec un
panel ggplot2
X[,2:4]%>% as.matrix()->M
n<-dim(M)[1]
t(M[,1]-mean(M[,1]))%*%(M[,2]-mean(M[,2]))/n
## [,1]
## [1,] -12.65
u<-matrix(1,nrow = n)
J<-diag(n)-u%*%t(u)/n
t(M[,1])%*%J%*%M[,2]/n
## [,1]
## [1,] -12.65
((n-1)/n)*cov(M)
## Sweet Spice Sales
## Sweet 25.65 -12.65 60.15
## Spice -12.65 19.01 0.15
## Sales 60.15 0.15 251.45
X[,2:4]%>% as.matrix()->M
n<-dim(M)[1]
res<-t(J%*%M[,1])%*%J%*%M[,2]/(norm(J%*%M[,1],type="2")*norm(J%*%M[,2],type="2"))
round(res,2)
## [,1]
## [1,] -0.57
cor(M)%>%round(2)
## Sweet Spice Sales
## Sweet 1.00 -0.57 0.75
## Spice -0.57 1.00 0.00
## Sales 0.75 0.00 1.00
angles<-cor(M)%>%acos()
round(angles*360/(2*pi),0)
## Sweet Spice Sales
## Sweet 0 125 41
## Spice 125 0 90
## Sales 41 90 0
scale(M)
## Sweet Spice Sales
## [1,] 0.65561007 -1.67540930 -0.3290471
## [2,] -0.09365858 0.50044693 0.9273147
## [3,] -1.59219588 0.28286131 -2.0041962
## [4,] 1.03024439 0.71803256 1.4059287
## [5,] -0.65561007 -0.36989556 -0.8674879
## [6,] 1.21756156 -0.80506681 0.1495669
## [7,] -0.65561007 0.50044693 -0.3290471
## [8,] 0.28097574 0.06527569 0.3290471
## [9,] 1.03024439 -1.02265243 0.9871414
## [10,] -1.21756156 1.80596067 -0.2692204
## attr(,"scaled:center")
## Sweet Spice Sales
## 28.5 17.7 67.5
## attr(,"scaled:scale")
## Sweet Spice Sales
## 5.338539 4.595892 16.714930
round(cov(scale(M)),2)
## Sweet Spice Sales
## Sweet 1.00 -0.57 0.75
## Spice -0.57 1.00 0.00
## Sales 0.75 0.00 1.00
t(M)%*%J%*%M/n
## Sweet Spice Sales
## Sweet 25.65 -12.65 60.15
## Spice -12.65 19.01 0.15
## Sales 60.15 0.15 251.45
Y<-M%>%scale(center = TRUE, scale = FALSE)
t(Y)%*%Y/n
## Sweet Spice Sales
## Sweet 25.65 -12.65 60.15
## Spice -12.65 19.01 0.15
## Sales 60.15 0.15 251.45
t(J%*%M)%*%(J%*%M)/n
## Sweet Spice Sales
## Sweet 25.65 -12.65 60.15
## Spice -12.65 19.01 0.15
## Sales 60.15 0.15 251.45
Z<-sqrt(n/(n-1))*scale(M)
t(Z)%*%Z/n
## Sweet Spice Sales
## Sweet 1.0000000 -0.572869608 0.748972940
## Spice -0.5728696 1.000000000 0.002169574
## Sales 0.7489729 0.002169574 1.000000000
D<-(diag(diag(((n-1)/n)*var(M))))^(1/2)
solve(D)%*%t(M)%*%J%*%M%*%solve(D)/n
## [,1] [,2] [,3]
## [1,] 1.0000000 -0.572869608 0.748972940
## [2,] -0.5728696 1.000000000 0.002169574
## [3,] 0.7489729 0.002169574 1.000000000
solve(D)%*%var(M)%*%solve(D)*((n-1)/n)
## [,1] [,2] [,3]
## [1,] 1.0000000 -0.572869608 0.748972940
## [2,] -0.5728696 1.000000000 0.002169574
## [3,] 0.7489729 0.002169574 1.000000000
- Erratum : \[\underbrace{\mathbf{H}}_{n\times
p}\underbrace{\mathbf{b}}_{p\times 1}=\underbrace{\mathbf{O}_n}_{n\times
1}\]
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
P<-matrix(c(1,1,9,2,2,6,1,3,4,2,4,7),byrow = TRUE,ncol = 3)
solve(P[-4,],rep(0,3))
## [1] 0 0 0
P%>%rankMatrix()
## [1] 3
## attr(,"method")
## [1] "tolNorm2"
## attr(,"useGrad")
## [1] FALSE
## attr(,"tol")
## [1] 8.881784e-16
Q<-matrix(c(1,2,3,2,4,6,1,2,3,2,4,6),byrow = TRUE,ncol = 3)
rankMatrix(Q)
## [1] 1
## attr(,"method")
## [1] "tolNorm2"
## attr(,"useGrad")
## [1] FALSE
## attr(,"tol")
## [1] 8.881784e-16
rankMatrix(t(Q))
## [1] 1
## attr(,"method")
## [1] "tolNorm2"
## attr(,"useGrad")
## [1] FALSE
## attr(,"tol")
## [1] 8.881784e-16
R<-matrix(c(1,1,3,2,2,6,1,3,5,2,4,8),byrow = TRUE,ncol = 3)
R%>%rankMatrix()
## [1] 2
## attr(,"method")
## [1] "tolNorm2"
## attr(,"useGrad")
## [1] FALSE
## attr(,"tol")
## [1] 8.881784e-16
tibble(
Product =1:50 ,
Quality = c(10,5,5,5,6,6,5,3,3,5,6,9,8,6,5,10,4,2,7,4,6,9,5,6,8,4,3,6,4,9,5,4,4,3,3,1,4,5,1,6,4,2,4,3,7,7,5,5,7,7)
,
Price = c(1800,1550,1250,1150,1700,1550,1200,1000,1300,1300,1550,1800,1400,1300,1400,1950,1550,1300,1800,1300,1350,1450,1300,1450,1750,1500,1700,1500,1250,1650,1500,1350,1350,1550,1200,1450,1600,1600,1100,1600,1450,1300,1200,1150,1350,1200,1550,1600,1400,1600),
Appearance = c(2.6,4.2,3.0,1.0,7.0,4.0,3.6,1.8,5.8,3.0,5.8,4.2,4.4,3.0,3.8,3.0,5.2,4.0,6.8,3.4,4.0,1.8,4.2,4.0,4.0,4.2,4.6,2.2,3.4,3.2,3.4,3.8,3.8,4.6,3.6,6.0,4.8,3.8,4.2,3.8,6.6,1.6,5.2,2.4,3.2,1.2,5.0,4.2,3.8,5.4),
Sales = c(48,104,122,104,125,105,135,128,145,124,99,102,146,138,122,13,103,86,109,103,113,100,111,138,101,126,29,73,129,77,84,103,112,77,135,112,106,99,143,54,139,90,203,96,125,107,130,72,137,106)
)->table41
- Plus généralement, on a une condition du
premier ordre
\[ \begin{align*} \mathbf{X^\prime JXb}&=\mathbf{X^\prime Jy}&\iff\\ \mathbf{X^\prime J}(\mathbf{y}-\mathbf{Xb})&=\mathbf{O}_p&\iff\\ \mathbf{X^\prime J}(\mathbf{y}-\mathbf{Xb}-c\mathbf{1}_n)&=\mathbf{O}_p&\implies\\ (\mathbf{Xb}+c\mathbf{1}_n)^\prime\mathbf{J}(\mathbf{y}-\mathbf{Xb}-c\mathbf{1}_n)&=0&\iff\\ (\mathbf{Xb}+c\mathbf{1}_n)^\prime\mathbf{J}\mathbf{e}&=0. \\ (\mathbf{Xb}+c\mathbf{1}_n)^\prime\mathbf{e}&=0. \end{align*} \] - L’erreur et la prédiction doivent être othogonales.
H<-matrix(c(3,-1,2,-4,6,-3,1,0,5),byrow = TRUE,ncol = 3)
H%>%solve()%>%round(2)
## [,1] [,2] [,3]
## [1,] 0.49 0.08 -0.15
## [2,] 0.28 0.21 0.02
## [3,] -0.10 -0.02 0.23
H%>%t()%>%solve()%>%round(2)
## [,1] [,2] [,3]
## [1,] 0.49 0.28 -0.10
## [2,] 0.08 0.21 -0.02
## [3,] -0.15 0.02 0.23
H%>%solve()%>%t()%>%round(2)
## [,1] [,2] [,3]
## [1,] 0.49 0.28 -0.10
## [2,] 0.08 0.21 -0.02
## [3,] -0.15 0.02 0.23
n=50
table41%>%select(Sales)%>%as.matrix(,nrow=n)->y
table41%>%select(Quality,Price,Appearance)%>%as.matrix(,nrow=n)->X
u=matrix(1,nrow=n,ncol=1)
J<-diag(1,nrow=n,ncol=n)-u%*%t(u)/n
(b<-solve(t(X)%*%J%*%X)%*%t(X)%*%J%*%y %>% round(2))
## Sales
## Quality 7.61
## Price -0.18
## Appearance 18.23
c<-t(u)%*%(y-X%*%b)/n
round(c,2)
## Sales
## [1,] 256.46
yhat<-round(X%*%b+c[1],1)
y-yhat
## Sales
## [1,] -8.0
## [2,] 11.9
## [3,] -2.2
## [4,] -1.7
## [5,] 1.3
## [6,] 9.0
## [7,] -9.1
## [8,] -4.1
## [9,] -6.0
## [10,] 8.8
## [11,] -29.9
## [12,] 24.5
## [13,] 0.4
## [14,] 15.2
## [15,] 10.2
## [16,] -23.3
## [17,] 0.3
## [18,] -24.6
## [19,] -0.7
## [20,] -11.9
## [21,] -19.0
## [22,] 3.2
## [23,] -26.1
## [24,] 24.0
## [25,] 25.7
## [26,] 32.5
## [27,] -28.1
## [28,] 0.8
## [29,] 5.1
## [30,] -9.3
## [31,] -2.5
## [32,] -10.2
## [33,] -1.2
## [34,] -7.1
## [35,] 6.1
## [36,] -0.5
## [37,] 19.6
## [38,] 23.2
## [39,] 0.4
## [40,] -29.4
## [41,] -7.2
## [42,] 23.2
## [43,] 37.3
## [44,] -20.0
## [45,] -0.1
## [46,] -8.6
## [47,] 23.3
## [48,] -11.1
## [49,] 10.0
## [50,] -14.2
round(J%*%y-J%*%X%*%b,2)
## Sales
## [1,] -7.96
## [2,] 11.92
## [3,] -2.20
## [4,] -1.74
## [5,] 1.27
## [6,] 8.96
## [7,] -9.14
## [8,] -4.10
## [9,] -6.02
## [10,] 8.80
## [11,] -29.85
## [12,] 24.48
## [13,] 0.45
## [14,] 15.19
## [15,] 10.22
## [16,] -23.25
## [17,] 0.30
## [18,] -24.60
## [19,] -0.69
## [20,] -11.88
## [21,] -19.04
## [22,] 3.24
## [23,] -26.08
## [24,] 23.96
## [25,] 25.74
## [26,] 32.53
## [27,] -28.15
## [28,] 0.77
## [29,] 5.12
## [30,] -9.29
## [31,] -2.49
## [32,] -10.17
## [33,] -1.17
## [34,] -7.15
## [35,] 6.08
## [36,] -0.45
## [37,] 19.60
## [38,] 23.22
## [39,] 0.36
## [40,] -29.39
## [41,] -7.22
## [42,] 23.15
## [43,] 37.30
## [44,] -20.04
## [45,] -0.07
## [46,] -8.61
## [47,] 23.34
## [48,] -11.08
## [49,] 10.00
## [50,] -14.17
norm(y-yhat)^2
## [1] 399550.4
(t(J%*%yhat)%*%J%*%yhat)/(t(J%*%y)%*%J%*%y)
## Sales
## Sales 0.7305324
(norm(J%*%yhat,type = "2")/norm(J%*%y,type = "2"))^2
## [1] 0.7305324
var(yhat)/var(y)
## Sales
## Sales 0.7305324
cor(y,yhat)%>%round(2)
## Sales
## Sales 0.85
table41%>%select(Quality)%>%as.matrix()->X2
solve(t(X2)%*%J%*%X2)%*%t(X2)%*%J%*%y->b2
t(u)%*%(y-X2%*%b2)/n->c2
table41%>%as.data.frame()->DF
lm(Sales ~ Quality,DF)%>%summary()
##
## Call:
## lm(formula = Sales ~ Quality, data = DF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -87.680 -11.952 4.901 17.798 90.338
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 128.734 11.590 11.107 7.29e-15 ***
## Quality -4.018 2.057 -1.953 0.0567 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.83 on 48 degrees of freedom
## Multiple R-squared: 0.07362, Adjusted R-squared: 0.05432
## F-statistic: 3.814 on 1 and 48 DF, p-value: 0.05666
J%*%y/(sqrt((n-1)/n)*sd(y))->yS
((n-1)/n)*diag(var(X))%>%sqrt()%>%diag(nrow = 3)->D
J%*%X%*%solve(D)->Z
solve(t(Z)%*%J%*%Z)%*%t(Z)%*%yS
## Sales
## [1,] 0.5084701
## [2,] -1.1603693
## [3,] 0.7637386
Une seule matrice de données \(\mathbf{X}\) est analysée en PCA
Le problème a été formulé à l’origine par Hotelling (1933) mais aussi plus tôt par Pearson (1901)
L’APC peut être formulée de différentes manières
La formulation dans cette section est basée sur Pearson
Reduction de variables en composantes
L’ACP est généralement utilisée pour une matrice de données \(\mathbf{X}\) centrée \(n\text{-individus}\times p\text{-variables}\), de sorte que
\[ \mathbf{1}^\prime_n\mathbf{X}=\mathbf{O}_p \]
(
Tab51<-tibble(
Student = paste("S",1:6,sep="" ),
M=
c(69.0,67.2,78.6,84.4,56.3,87.9),
P=
c(66.4,53.6,96.9,87.7,68.7,88.8),
C=
c(77.0,53.9,97.3,83.9,72.1,76.0),
B=
c(74.1,58.7,96.2,69.8,56.8,57.2)
)
)
(Tab51%>%select(-1)%>%as.matrix()->X)
## M P C B
## [1,] 69.0 66.4 77.0 74.1
## [2,] 67.2 53.6 53.9 58.7
## [3,] 78.6 96.9 97.3 96.2
## [4,] 84.4 87.7 83.9 69.8
## [5,] 56.3 68.7 72.1 56.8
## [6,] 87.9 88.8 76.0 57.2
Pour une telle matrice de données \(\mathbf{X}\) étant donnée, PCA peut être formulé de la manière suivante : nous recherchons une décomposition du type \[ \mathbf{X}=\mathbf{F}\mathbf{A}^\prime+\mathbf{E} \]
Dans cette formule, \(\mathbf{F}\) est une matrice \(n\text{-individus} \times m\text{-composantes}\) dont les éléments sont appelés scores des composantes principales (PC scores)
\(\mathbf{A}\) est une matrice \(p\text{-variables}\times \text{m-composantes}\) dont les éléments sont appelés loading des composantes
\(\mathbf{E}\) es une matrice qui contient des erreurs, avec \[ m\leq \text{rank}(\mathbf{X}) \leq \min(n,p) \]
Le terme composantes désigne les entités dans lesquelles les \(p\) variables sont résumées ou réduites
Les \(k\)ièmes colonnes de \(\mathbf{F}\) et \(\mathbf{A}\) sont appelées les \(k\)ièmes composantes
Ici, la matrice \(\mathbf{X}\) est supposée approximée par le produit de matrices inconnues \(\mathbf{F}\) et la transposée de \(\mathbf{A}\) -le nombre de colonnes (composantes) dans \(\mathbf{F}\) et \(\mathbf{A}\) est supposée inférieur à celui de \(\mathbf{X}\)X
Pour obtenir ces matrices, une méthode des moindres carrés est utilisée
La somme des carrés des erreurs dans \(\mathbf{E} = \mathbf{X} − \mathbf{F}\mathbf{A}^\prime\), est donné par \[ f(\mathbf{F},\mathbf{A})=||\mathbf{E}||^2=||\mathbf{X}-\mathbf{F}\mathbf{A}^\prime||^2 \]
Cette fonction doit être minimisée par rapport à \(\mathbf{F}\) et \(\mathbf{A}\)
Lorsque \(\mathbf{X}\) est la matrice \(6 \times 4\) de notre tableau de données, et que l’on fixe \(m=2\), on obtient le résultat ci-après