Matrices avec R et applications

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Saisir une matrice

data<-
  c(0.617,0.545,0.496,0.493,0.437,0.408,
731,680,621,591,617,615,140,139,143,128,
186,184,3.24,4.13,3.68,4.00,4.80,4.80)
data %>% length()

## [1] 24

Imprimer avec kable

data%>% matrix(nrow = 6) -> X
X%>%as.data.frame()->DF
rownames(DF)<-c("Tigers","Dragons","BayStars","Swallows","Giants","Carp")
colnames(DF)<-c("Win","Runs","HR","ERA")
DF%>%rownames_to_column(var="Team")->DF
DF%>%knitr::kable("simple")

Team	Win	Runs	HR	ERA
Tigers	0.617	731	140	3.24
Dragons	0.545	680	139	4.13
BayStars	0.496	621	143	3.68
Swallows	0.493	591	128	4.00
Giants	0.437	617	186	4.80
Carp	0.408	615	184	4.80

Transposer des matrices

X%>%t()

##         [,1]    [,2]    [,3]    [,4]    [,5]    [,6]
## [1,]   0.617   0.545   0.496   0.493   0.437   0.408
## [2,] 731.000 680.000 621.000 591.000 617.000 615.000
## [3,] 140.000 139.000 143.000 128.000 186.000 184.000
## [4,]   3.240   4.130   3.680   4.000   4.800   4.800

print(X)

##       [,1] [,2] [,3] [,4]
## [1,] 0.617  731  140 3.24
## [2,] 0.545  680  139 4.13
## [3,] 0.496  621  143 3.68
## [4,] 0.493  591  128 4.00
## [5,] 0.437  617  186 4.80
## [6,] 0.408  615  184 4.80

knitr::kable(t(X),caption = "**X'**")

X’
0.617	0.545	0.496	0.493	0.437	0.408
731.000	680.000	621.000	591.000	617.000	615.000
140.000	139.000	143.000	128.000	186.000	184.000
3.240	4.130	3.680	4.000	4.800	4.800

Somme de matrices et multiplication par un scalaire

X<-matrix(c(3,-2,6,8,0,-2),nrow=2,byrow = T)
X%>%knitr::kable(caption =   "**X**")

X
3	-2	6
8	0	-2

Y<-matrix(c(2,1,-9,-7,2,-3),nrow=2,byrow = T)
Y%>%knitr::kable(caption =   "**Y**")

Y
2	1	-9
-7	2	-3

(X+Y)%>%knitr::kable(caption =   "**X+Y**")

**X+Y**
5	-1	-3
1	2	-5

Z<-matrix(c(8,-2,6,-5,0,-3),nrow=2,byrow = T)
Z%>%knitr::kable(caption =   "**Z**")

Z
8	-2	6
-5	0	-3

(-0.1*Z)%>%knitr::kable(caption = "-0.1**Z**")

-0.1Z
-0.8	0.2	-0.6
0.5	0.0	0.3

X<-matrix(c(4,-2,6,8,0,-2),nrow=2,byrow = T)
X%>%knitr::kable(caption =   "**X**")

X
4	-2	6
8	0	-2

Y<-matrix(c(2,1,-9,-7,2,-3),nrow=2,byrow = T)
Y%>%knitr::kable(caption =   "**Y**")

Y
2	1	-9
-7	2	-3

(0.5*X+(-2)*Y)%>%knitr::kable(caption =   "0.5**X**+(-2)**Y**")

0.5X+(-2)Y
-2	-3	21
18	-4	5

Produit de matrices

Produit de deux vecteurs

A<-matrix(c(2,-4,1,7),nrow=2,byrow = T)
A%>%knitr::kable(caption =   "**A**")

A
2	-4
1	7

B<-matrix(c(-3,1,2,-5),nrow=2,byrow = T)
B%>%knitr::kable(caption =   "**B**")

B
-3	1
2	-5

(A%*%B)%>%knitr::kable(caption =   "**AB**")

AB
-14	22
11	-34

X<-matrix(c(2,3,-1,-2,0,4),nrow=2,byrow = T)
X%>%knitr::kable(caption =   "**X**")

X
2	3	-1
-2	0	4

Y<-matrix(c(3,5,4,-1,0,-2,0,6,0),nrow=3,byrow = T)
Y%>%knitr::kable(caption =   "**Y**")

Y
3	5	4
-1	0	-2
0	6	0

(X%*%Y)%>%knitr::kable(caption =   "**XY**")

XY
3	4	2
-6	14	-8

F<-matrix(c(2,-1,-3,0,1,3,-2,-3),nrow=4,byrow = T)
F%>%knitr::kable(caption =   "**F**")

F
2	-1
-3	0
1	3
-2	-3

A<-matrix(c(-4,1,6,-3,2,5),nrow=3,byrow = T)
A%>%knitr::kable(caption =   "**A**")

A
-4	1
6	-3
2	5

(F%*%t(A)) %>%knitr::kable(caption =   "**FA'**")

**FA’**
-9	15	-1
12	-18	-6
-1	-3	17
5	-3	-19

A<-matrix(c(-4,1,6,-3,2,5),nrow=3,byrow = T)

A%>%knitr::kable(caption =   "**A**")

A
-4	1
6	-3
2	5

(A%*%t(A)) %>%knitr::kable(caption =   "**S=AA'**")

**S=AA’**
17	-27	-3
-27	45	-3
-3	-3	29

(t(A)%*%A) %>%knitr::kable(caption =   "**T=A'A**")

**T=A’A**
56	-12
-12	35

u<-matrix(c(2,-1,3),nrow=3,byrow = T)

u%>%knitr::kable(caption =   "**u**")

u
2
-1
3

v<-matrix(c(-2,3,-4),nrow=3,byrow = T)

v%>%knitr::kable(caption =   "**v**")

v
-2
3
-4

(t(u)%*%v) %>%knitr::kable(caption =   "**u'v**")

**u’v**
-19

(u%*%t(v)) %>%knitr::kable(caption =   "**uv'**")

**uv’**
-4	6	-8
2	-3	4
-6	9	-12

Représenter des vecteurs en 3D

Avec le package plotly

library(plotly)

X<-data.frame(x=c(1, 4),y=c(2, 5),z=c(3,6))
X%>%knitr::kable("simple")

x	y	z
1	2	3
4	5	6

X%>%plot_ly(x =~x,y =~y,z =~z)%>%add_markers()%>%
  add_trace(x = c(0, X[1,1 ]), y = c(0, X[1,2 ]), z = c(0, X[1,3 ]),type = "scatter3d", mode = "lines", name = "lines", showlegend = FALSE,color='#CCCCCC')%>%  add_trace(x = c(0, 4), y = c(0, 5), z = c(0, 6),type = "scatter3d", mode = "lines", name = "lines", showlegend = FALSE,color='#CCCCCC')

Avec le package plot3D

install.packages("plot3D")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)

library("plot3D")

## Warning in fun(libname, pkgname): couldn't connect to display ":0"

cols <- c("red", "red")

arrows3D(rep(0,2), rep(0,2), rep(0,2), X[,1], X[,2], X[,3], col = cols,
         lwd = 2, d = 3, 
         main = "Représentation des vecteurs en 3D", bty ="g", ticktype = "detailed")

Trace et norme d’une matrice

set.seed(1)
n=2
rnorm(n^2,10,2)%>%matrix(,nrow=n)->X
X%>%data.frame()%>%knitr::kable()

X1	X2
8.747092	8.328743
10.367287	13.190562

(t(X)%*%X)%>%diag()%>%sum()

## [1] 427.3511

norm(X,type = "F")^2 # Frobenius norm

## [1] 427.3511

Vecteurs composés que de 0 (vecteurs nuls)

Avec R, créez le vecteur nul avec \(p=5\), et la matrice nulle de dimension \(10\times 5\).

n=10
p=5
matrix(rep(0,p),ncol = 1)

##      [,1]
## [1,]    0
## [2,]    0
## [3,]    0
## [4,]    0
## [5,]    0

matrix(rep(0,n*p),ncol = p)

##       [,1] [,2] [,3] [,4] [,5]
##  [1,]    0    0    0    0    0
##  [2,]    0    0    0    0    0
##  [3,]    0    0    0    0    0
##  [4,]    0    0    0    0    0
##  [5,]    0    0    0    0    0
##  [6,]    0    0    0    0    0
##  [7,]    0    0    0    0    0
##  [8,]    0    0    0    0    0
##  [9,]    0    0    0    0    0
## [10,]    0    0    0    0    0

Vecteurs composés que de 1

Avec R, créez le vecteur avec \(p=5\) répétitions de 1, et la matrice avec des \(1\) uniquement et de dimension \(10\times 5\) .

matrix(rep(1,5),ncol = 1)

##      [,1]
## [1,]    1
## [2,]    1
## [3,]    1
## [4,]    1
## [5,]    1

matrix(rep(1,10*5),ncol = 5)

##       [,1] [,2] [,3] [,4] [,5]
##  [1,]    1    1    1    1    1
##  [2,]    1    1    1    1    1
##  [3,]    1    1    1    1    1
##  [4,]    1    1    1    1    1
##  [5,]    1    1    1    1    1
##  [6,]    1    1    1    1    1
##  [7,]    1    1    1    1    1
##  [8,]    1    1    1    1    1
##  [9,]    1    1    1    1    1
## [10,]    1    1    1    1    1

matrix(rep(1,10),ncol = 1)%*%t(matrix(rep(1,5),ncol = 1))

##       [,1] [,2] [,3] [,4] [,5]
##  [1,]    1    1    1    1    1
##  [2,]    1    1    1    1    1
##  [3,]    1    1    1    1    1
##  [4,]    1    1    1    1    1
##  [5,]    1    1    1    1    1
##  [6,]    1    1    1    1    1
##  [7,]    1    1    1    1    1
##  [8,]    1    1    1    1    1
##  [9,]    1    1    1    1    1
## [10,]    1    1    1    1    1

Matrices carrées spécifiques

Matrices symmétriques

Définition

### Le produit d’une matrice par sa transposée est symétrique

Matrice diagonale

Matrice identité

diag(3)%>% knitr::kable()

1	0	0
0	1	0
0	0	1

Statistiques avec les matrices

TabA<-data.frame(Participant = 1:6,
Japan = c(82,96,84,90,93,82),
English = c(70,67,54,66,74,60),
Science = c(76,71,65,80,77,89)
)
library(tidyverse)
TabA%>%as_tibble()->TabA
(TabA%>%as.matrix()->MA)

##      Participant Japan English Science
## [1,]           1    82      70      76
## [2,]           2    96      67      71
## [3,]           3    84      54      65
## [4,]           4    90      66      80
## [5,]           5    93      74      77
## [6,]           6    82      60      89

data<-
  c(0.617,0.545,0.496,0.493,0.437,0.408,
731,680,621,591,617,615,140,139,143,128,
186,184,3.24,4.13,3.68,4.00,4.80,4.80)

data%>% matrix(nrow = 6) -> MC
MC%>%as.data.frame()->TabC
rownames(TabC)<-c("Tigers","Dragons","BayStars","Swallows","Giants","Carp")
colnames(TabC)<-c("Win","Runs","HR","ERA")
TabC%>%rownames_to_column(var="Team")->TabC
TabC%>%knitr::kable("simple")

Team	Win	Runs	HR	ERA
Tigers	0.617	731	140	3.24
Dragons	0.545	680	139	4.13
BayStars	0.496	621	143	3.68
Swallows	0.493	591	128	4.00
Giants	0.437	617	186	4.80
Carp	0.408	615	184	4.80

MC

##       [,1] [,2] [,3] [,4]
## [1,] 0.617  731  140 3.24
## [2,] 0.545  680  139 4.13
## [3,] 0.496  621  143 3.68
## [4,] 0.493  591  128 4.00
## [5,] 0.437  617  186 4.80
## [6,] 0.408  615  184 4.80

TabAp<-data.frame(Participant = 1:6,
History = c(66,72,44,58,70,56),
Mathematics = c(74,98,62,88,56,84)
)
library(tidyverse)
TabAp%>%as_tibble()->TabAp
(TabAp%>%as.matrix()->MAp)

##      Participant History Mathematics
## [1,]           1      66          74
## [2,]           2      72          98
## [3,]           3      44          62
## [4,]           4      58          88
## [5,]           5      70          56
## [6,]           6      56          84

library(ggplot2)
TabAp%>%
  ggplot(aes(x=History, y=0,label =History))+geom_point(size = 2,shape=25)+  scale_color_manual(values = unname(colours)) +geom_text(vjust = -1,size=2)+geom_hline(yintercept=0,linetype="dashed", 
                color = "red", size=1)+xlim(0,100)

TabAp%>%
  ggplot(aes(x=History, y=0,label =History))+  geom_boxplot(alpha = 0.80) +
  geom_point(size = 2)+geom_text(vjust = -1)+
  theme(text = element_text(size = 10),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.position = "none")+xlim(0,100)

Moyenne

TabAp%>%summarise(mean = mean(History), n = n())

matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x  
t(u)%*%x/6

##      History
## [1,]      61

Scores centrés

matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x  
x-mean(x)

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5

x-mean(x)*u

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5

J<-diag(6)-(1/6)*u%*%t(u)
J%*%x

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5

Variance et écart-type

## Variance selon les différentes formules
matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x  
x-mean(x)

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5

x-mean(x)*u

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5

J<-diag(6)-(1/6)*u%*%t(u)

(1/6)*t(x-mean(x)*u)%*%(x-mean(x)*u)

##          History
## History 91.66667

(1/6)*t(x)%*%J%*%x

##          History
## History 91.66667

(1/6)*(norm(J%*%x, type="2"))^2

## [1] 91.66667

(5/6)*var(x)

##          History
## History 91.66667

## Ecart-type
sqrt(1/6)*(norm(J%*%x, type="2"))

## [1] 9.574271

sqrt(5/6)*sd(x)

## [1] 9.574271

Standard Scores

matrix(rep(1,6),ncol = 1)->u
TabAp%>%select(History)%>%as.matrix(ncol = 1)->x  

(x-mean(x))/(sd(x))

##         History
## [1,]  0.4767313
## [2,]  1.0488088
## [3,] -1.6208864
## [4,] -0.2860388
## [5,]  0.8581163
## [6,] -0.4767313

scale(x)

##         History
## [1,]  0.4767313
## [2,]  1.0488088
## [3,] -1.6208864
## [4,] -0.2860388
## [5,]  0.8581163
## [6,] -0.4767313
## attr(,"scaled:center")
## History 
##      61 
## attr(,"scaled:scale")
##  History 
## 10.48809

scale(x,scale = FALSE)

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5
## attr(,"scaled:center")
## History 
##      61

x-mean(x)

##      History
## [1,]       5
## [2,]      11
## [3,]     -17
## [4,]      -3
## [5,]       9
## [6,]      -5

Que se passe-t-il lorsqu’on standardise ?

Représentation matricielle

Exercices

Variabilité inter-variables

X<-data.frame(Food=1:10,
Sweet=c(32,28,20,34,25,35,25,30,34,22),
Spice=c(10,20,19,21,16,14,20,18,13,26),
Sales=c(62,83,34,91,53,70,62,73,84,63))

## Reproduire ces trois graphiques avec un panel ggplot2

Covariance

X[,2:4]%>% as.matrix()->M
n<-dim(M)[1]
t(M[,1]-mean(M[,1]))%*%(M[,2]-mean(M[,2]))/n

##        [,1]
## [1,] -12.65

u<-matrix(1,nrow = n)
J<-diag(n)-u%*%t(u)/n
t(M[,1])%*%J%*%M[,2]/n

##        [,1]
## [1,] -12.65

((n-1)/n)*cov(M)

##        Sweet  Spice  Sales
## Sweet  25.65 -12.65  60.15
## Spice -12.65  19.01   0.15
## Sales  60.15   0.15 251.45

Coefficient de corrélation

X[,2:4]%>% as.matrix()->M
n<-dim(M)[1]
res<-t(J%*%M[,1])%*%J%*%M[,2]/(norm(J%*%M[,1],type="2")*norm(J%*%M[,2],type="2"))
round(res,2)

##       [,1]
## [1,] -0.57

cor(M)%>%round(2)

##       Sweet Spice Sales
## Sweet  1.00 -0.57  0.75
## Spice -0.57  1.00  0.00
## Sales  0.75  0.00  1.00

Vecteurs et corrélations

angles<-cor(M)%>%acos()
round(angles*360/(2*pi),0)

##       Sweet Spice Sales
## Sweet     0   125    41
## Spice   125     0    90
## Sales    41    90     0

Calcul matriciel et analyses multivariées pour data scientists

Notes de lecture, Karim Kilani

Matrices avec R et applications

Saisir une matrice

Imprimer avec kable

Transposer des matrices

Somme de matrices et multiplication par un scalaire

Produit de matrices

Produit de deux vecteurs

Représenter des vecteurs en 3D

Avec le package plotly

Avec le package plot3D

Trace et norme d’une matrice

Vecteurs composés que de 0 (vecteurs nuls)

Vecteurs composés que de 1

Matrices carrées spécifiques

Matrices symmétriques

Définition

Matrice diagonale

Matrice identité

Statistiques avec les matrices

Moyenne

Scores centrés

Variance et écart-type

Standard Scores

Que se passe-t-il lorsqu’on standardise ?

Représentation matricielle

Exercices

Variabilité inter-variables

Covariance

Coefficient de corrélation

Vecteurs et corrélations

Covariances, Correlations pour des variables stadardisées