Dhafer Malouche
Ecole Supérieure de la Statistique et de l’Analyse de l’Information
Website: http://essai.academia.edu/DhaferMalouche
email : dhafer.malouche@yale.edu
Source des données : INS, Recencement 2014.
Problématique: Correspondance gouvernat x meéthodes d’assainissements.
Etape 1: Importation des données dans R.
> repart_logement_methode_assainissement <- read.csv("~/Documents/Teaching/Analyse_des_donnees_2016/Analyse_des_correspondances/repart_logement_methode_assainissement.csv")
ade4> library(ade4)
> X=repart_logement_methode_assainissement[,-1]
> rownames(X)=repart_logement_methode_assainissement$Gouvernorat
> ca=dudi.coa(X,scannf = F)
ca> summary(ca)
Class: coa dudi
Call: dudi.coa(df = X, scannf = F)
Total inertia: 0.2595
Eigenvalues:
Ax1 Ax2
0.21196 0.04758
Projected inertia (%):
Ax1 Ax2
81.67 18.33
Cumulative projected inertia (%):
Ax1 Ax1:2
81.67 100.00
Lignes x Colonnes> library(ggplot2)
> library(ggrepel)
> dt=data.frame(c(ca$l1[,1],ca$c1[,1]),c(ca$l1[,2],ca$c1[,2]),
+ c(rownames(X),colnames(X)),c(rep("gouv",24),rep("var",3)))
> colnames(dt)=c("pc1","pc2","text","variable")
>
> p<-ggplot(dt,aes(x=pc1,y=pc2,col=variable,fill=variable,label=text))+
+ geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+
+ geom_point(alpha=.5)+
+ geom_text_repel()
> p<-p+theme_bw()
> p<-p+theme(legend.position="none")+xlab("Axis1")+ylab("Axis2")
> p
> sum(ca$li[,1]^2*ca$lw)
[1] 0.2119624
> ca$eig[1]
[1] 0.2119624
> sum(ca$li[,1]*ca$lw)
[1] -4.075245e-17
Source ISIE 2011.
Correspondances Délégations x partis politiques
Importer les données
> load("~/Documents/Teaching/Analyse_des_donnees_2016/Analyse_des_correspondances/votesAriana.RData")
FactoMineR> library(FactoMineR)
> x1=votesAriana[1:15,]
> ca1=CA(X = x1,graph = F)
> summary(ca1)
Call:
CA(X = x1, graph = F)
The chi square of independence between the two variables is equal to 34905.17 (p-value = 0 ).
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
Variance 0.160 0.010 0.004 0.003 0.000 0.000
% of var. 90.462 5.721 2.195 1.432 0.117 0.073
Cumulative % of var. 90.462 96.183 98.378 99.811 99.927 100.000
Rows (the 10 first)
Iner*1000 Dim.1 ctr cos2 Dim.2 ctr
Ettakatol | 33.352 | -0.481 20.218 0.972 | -0.081 9.096
Aridha | 10.305 | 0.631 5.456 0.849 | 0.161 5.647
MDS | 0.330 | 0.338 0.128 0.623 | -0.090 0.143
Afek | 6.192 | -0.515 3.775 0.977 | 0.065 0.945
PDM | 45.047 | -0.795 27.453 0.977 | 0.049 1.639
CPR | 7.676 | -0.250 3.533 0.738 | -0.115 11.760
UPL | 4.074 | 0.350 0.509 0.200 | 0.449 13.223
Dostourna | 4.507 | -0.709 2.674 0.951 | 0.124 1.291
DestLib | 2.202 | 0.544 0.445 0.324 | 0.670 10.644
Ennahdha | 34.426 | 0.306 21.085 0.982 | -0.037 4.926
cos2 Dim.3 ctr cos2
Ettakatol 0.028 | -0.006 0.138 0.000 |
Aridha 0.056 | -0.171 16.463 0.062 |
MDS 0.044 | -0.111 0.567 0.067 |
Afek 0.015 | 0.022 0.284 0.002 |
PDM 0.004 | 0.027 1.316 0.001 |
CPR 0.155 | -0.035 2.865 0.015 |
UPL 0.329 | 0.514 45.234 0.432 |
Dostourna 0.029 | 0.004 0.004 0.000 |
DestLib 0.490 | -0.401 9.933 0.175 |
Ennahdha 0.015 | 0.003 0.112 0.000 |
Columns
Iner*1000 Dim.1 ctr cos2 Dim.2 ctr
Ariana Médina | 89.886 | -0.531 55.669 0.993 | 0.038 4.503
Kalaat El Andalous | 14.619 | 0.429 4.930 0.541 | 0.356 53.747
Mnihla | 22.411 | 0.445 13.630 0.975 | -0.011 0.122
Raoued | 3.318 | 0.069 0.476 0.230 | -0.108 18.306
Sidi Thabet | 12.003 | 0.451 4.848 0.647 | 0.191 13.714
Soukra | 1.503 | 0.026 0.081 0.086 | -0.069 9.006
Ettadhamen | 33.450 | 0.486 20.366 0.976 | -0.021 0.602
cos2 Dim.3 ctr cos2
Ariana Médina 0.005 | 0.008 0.530 0.000 |
Kalaat El Andalous 0.373 | -0.157 27.164 0.072 |
Mnihla 0.001 | -0.021 1.266 0.002 |
Raoued 0.559 | -0.006 0.127 0.001 |
Sidi Thabet 0.116 | 0.262 67.443 0.218 |
Soukra 0.607 | -0.024 2.763 0.071 |
Ettadhamen 0.002 | 0.014 0.708 0.001 |
cos2> cos2row=rowSums(ca1$row$cos2[,1:2])
> cos2col=rowSums(ca1$col$cos2[,1:2])
>
> dt=data.frame(c(ca1$row$coord[,1]/sqrt(ca1$eig[1,1]),ca1$col$coord[,1]/sqrt(ca1$eig[1,1])),
+ c(ca1$row$coord[,2]/sqrt(ca1$eig[2,1]),ca1$col$coord[,2]/sqrt(ca1$eig[2,1])),
+ c(rownames(x1),colnames(x1)),c(rep("partie",nrow(x1)),rep("délégation",ncol(x1))),
+ c(cos2row,cos2col))
> colnames(dt)=c("pc1","pc2","text","variable","cos2")
>
> library(ggplot2)
> library(ggrepel)
>
> p<-ggplot(dt,aes(x=pc1,y=pc2,col=variable,fill=variable,label=text,size=cos2))+
+ geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+
+ geom_point(alpha=.5)+
+ geom_text_repel()
> p<-p+theme_bw()
> p<-p+theme(legend.position="none")+xlab("Axis1")+ylab("Axis2")
> p
> pc1=PCA(x1,scale.unit = T,ncp = 2,graph = F)
> library(factoextra)
> fviz_screeplot(pc1)+theme_bw()
> fviz_pca(pc1)+theme_bw()
avec celle obtenue à l’aide d’une AC.
> fviz_ca(ca1)+theme_bw()
> x1=votesAriana[c(1:15,19),]
> ca3=CA(X = x1,row.sup = 16,graph = F)
> summary(ca3)
Call:
CA(X = x1, row.sup = 16, graph = F)
The chi square of independence between the two variables is equal to 34905.17 (p-value = 0 ).
Eigenvalues
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
Variance 0.160 0.010 0.004 0.003 0.000 0.000
% of var. 90.462 5.721 2.195 1.432 0.117 0.073
Cumulative % of var. 90.462 96.183 98.378 99.811 99.927 100.000
Rows (the 10 first)
Iner*1000 Dim.1 ctr cos2 Dim.2 ctr
Ettakatol | 33.352 | -0.481 20.218 0.972 | -0.081 9.096
Aridha | 10.305 | 0.631 5.456 0.849 | 0.161 5.647
MDS | 0.330 | 0.338 0.128 0.623 | -0.090 0.143
Afek | 6.192 | -0.515 3.775 0.977 | 0.065 0.945
PDM | 45.047 | -0.795 27.453 0.977 | 0.049 1.639
CPR | 7.676 | -0.250 3.533 0.738 | -0.115 11.760
UPL | 4.074 | 0.350 0.509 0.200 | 0.449 13.223
Dostourna | 4.507 | -0.709 2.674 0.951 | 0.124 1.291
DestLib | 2.202 | 0.544 0.445 0.324 | 0.670 10.644
Ennahdha | 34.426 | 0.306 21.085 0.982 | -0.037 4.926
cos2 Dim.3 ctr cos2
Ettakatol 0.028 | -0.006 0.138 0.000 |
Aridha 0.056 | -0.171 16.463 0.062 |
MDS 0.044 | -0.111 0.567 0.067 |
Afek 0.015 | 0.022 0.284 0.002 |
PDM 0.004 | 0.027 1.316 0.001 |
CPR 0.155 | -0.035 2.865 0.015 |
UPL 0.329 | 0.514 45.234 0.432 |
Dostourna 0.029 | 0.004 0.004 0.000 |
DestLib 0.490 | -0.401 9.933 0.175 |
Ennahdha 0.015 | 0.003 0.112 0.000 |
Columns
Iner*1000 Dim.1 ctr cos2 Dim.2 ctr
Ariana Médina | 89.886 | -0.531 55.669 0.993 | 0.038 4.503
Kalaat El Andalous | 14.619 | 0.429 4.930 0.541 | 0.356 53.747
Mnihla | 22.411 | 0.445 13.630 0.975 | -0.011 0.122
Raoued | 3.318 | 0.069 0.476 0.230 | -0.108 18.306
Sidi Thabet | 12.003 | 0.451 4.848 0.647 | 0.191 13.714
Soukra | 1.503 | 0.026 0.081 0.086 | -0.069 9.006
Ettadhamen | 33.450 | 0.486 20.366 0.976 | -0.021 0.602
cos2 Dim.3 ctr cos2
Ariana Médina 0.005 | 0.008 0.530 0.000 |
Kalaat El Andalous 0.373 | -0.157 27.164 0.072 |
Mnihla 0.001 | -0.021 1.266 0.002 |
Raoued 0.559 | -0.006 0.127 0.001 |
Sidi Thabet 0.116 | 0.262 67.443 0.218 |
Soukra 0.607 | -0.024 2.763 0.071 |
Ettadhamen 0.002 | 0.014 0.708 0.001 |
Supplementary row
Dim.1 cos2 Dim.2 cos2 Dim.3 cos2
NonParticipants | 0.138 0.521 | -0.017 0.007 | 0.087 0.207 |
> ca3$row.sup$coord[,1:2]
Dim 1 Dim 2
0.13779736 -0.01653216
>
> cos2row=rowSums(ca3$row$cos2[,1:2])
> cos2col=rowSums(ca3$col$cos2[,1:2])
>
>
> dt=data.frame(c(ca3$row$coord[,1]/sqrt(ca3$eig[1,1]),
+ ca3$col$coord[,1]/sqrt(ca3$eig[1,1]),
+ ca3$row.sup$coord[,1]/sqrt(ca3$eig[1,1])),
+ c(ca3$row$coord[,2]/sqrt(ca3$eig[2,1]),
+ ca3$col$coord[,2]/sqrt(ca3$eig[2,1]),
+ ca3$row.sup$coord[,2]/sqrt(ca3$eig[2,1])),
+ c(rownames(x1)[-16],colnames(x1),"Non-participants"),c(rep("partie",15),rep("délégation",ncol(x1)),"1"),
+ c(cos2row,cos2col,sum(ca3$row.sup$cos2[,1:2])))
> colnames(dt)=c("pc1","pc2","text","variable","cos2")
>
> library(ggplot2)
> library(ggrepel)
>
> p<-ggplot(dt,aes(x=pc1,y=pc2,col=variable,fill=variable,label=text,size=cos2))+
+ geom_vline(xintercept = 0)+geom_hline(yintercept = 0)+
+ geom_point(alpha=.5)+
+ geom_text_repel()
> p<-p+theme_bw()
> p<-p+theme(legend.position="none")+xlab("Axis1")+ylab("Axis2")
> p
tea data dans FactoMineRDans les données les 18 premieères decrivent la façon avec laquelle on consomme le thé. Ensuite plusieurs il y a un ensemble de variables qui décrivent la percetion du consommateur et quelques informations personnelles sur les consommateurs.
Charger les données
> data(tea)
> library(sjPlot)
> sjp.setTheme(theme = "scatter",
+ geom.label.size = 2.5,
+ geom.label.color = "black",
+ axis.textsize =.9,
+ axis.title.size =1.19)
> sjp.frq(tea$tea.time,
+ coord.flip = TRUE,
+ hjust = "top")
>
> sjp.frq(tea$home,
+ type = "dots",
+ coord.flip = TRUE,
+ showCountValues = FALSE,
+ geom.size = 2.5)
>
> sjp.frq(tea$breakfast,
+ showCI = TRUE, # show confidence intervals
+ type = "dots", # dot plot
+ coord.flip = TRUE, # flip coordinates
+ showCountValues = FALSE, # no counts, only percentages
+ geom.size = 2.5)
> sjp.frq(tea$age, showValueLabels = FALSE)
> sjp.frq(tea$age, autoGroupAt = 5)
FactoMineR> res.mca <- MCA(tea,quanti.sup=19,quali.sup=20:36) ## 19 : c'est l'age, et de 20 Ă 36 sont les variables personnelles
ade4> library(ade4)
>
> res.mca2=dudi.acm(tea[,1:18],scannf = F,nf=3)
> res.mca$eig[,1]
[1] 0.14827441 0.12154673 0.09000954 0.07805440 0.07374870 0.07138044
[7] 0.06782906 0.06532655 0.06184167 0.05852817 0.05707772 0.05441920
[13] 0.05192969 0.04874462 0.04831065 0.04690465 0.04554779 0.04024922
[19] 0.03812120 0.03657138 0.03566464 0.03484898 0.03082882 0.02873151
[25] 0.02732068 0.02110048 0.01708910
> res.mca2$eig
[1] 0.14827441 0.12154673 0.09000954 0.07805440 0.07374870 0.07138044
[7] 0.06782906 0.06532655 0.06184167 0.05852817 0.05707772 0.05441920
[13] 0.05192969 0.04874462 0.04831065 0.04690465 0.04554779 0.04024922
[19] 0.03812120 0.03657138 0.03566464 0.03484898 0.03082882 0.02873151
[25] 0.02732068 0.02110048 0.01708910
> sum(res.mca2$eig)
[1] 1.5
> f1=prop.table(xtabs(~tea$breakfast))
> 1/f1-1
tea$breakfast
breakfast Not.breakfast
1.0833333 0.9230769
> res.mca2$tab[1:10,1:2]
breakfast.breakfast breakfast.Not.breakfast
1 1.083333 -1.0000000
2 1.083333 -1.0000000
3 -1.000000 0.9230769
4 -1.000000 0.9230769
5 1.083333 -1.0000000
6 -1.000000 0.9230769
7 1.083333 -1.0000000
8 -1.000000 0.9230769
9 1.083333 -1.0000000
10 1.083333 -1.0000000
> sum(res.mca2$tab[,1]*res.mca2$lw)
[1] -1.196959e-16
> sum(res.mca2$tab[,1]^2*res.mca2$lw)
[1] 1.083333
> 1/(1+sum(res.mca2$tab[,2]^2*res.mca2$lw))
[1] 0.52
> f1
tea$breakfast
breakfast Not.breakfast
0.48 0.52
> res.mca2$cw[1:2]
breakfast.breakfast breakfast.Not.breakfast
0.02666667 0.02888889
> f1[1]/18
breakfast
0.02666667
> f1[2]/18
Not.breakfast
0.02888889
> library(factoextra)
> fviz_mca(res.mca)+theme_bw()
> library(factoextra)
> fviz_mca(res.mca,invisible = c("quali.sup","ind"))+theme_bw()