library(ggplot2)
library(magrittr)
library(rmarkdown)
library(class)
library(readr)
library(pwr)
library(readxl)
library(survival)
library(ISwR)
library(magrittr)
library(tidyr)
library(dplyr)
library(survminer)
library(survsim)
library(ade4)
library(rgl)
library(gplots)
library(adegraphics)
library(factoextra)
Slides >>> id: Supp!R mdp : avdU1mdpa6l
Autres supports >>> id : maimirm1 mdp : maimirm1
# Crée un vecteur
A=c(1,2,3,4)
A
# ou
1:4
# Répeter un
rep(1:4)
rep(1:4,4)
B=matrix(A,2,2,byrow = T)
B
#transposé
t(A)
# carré
A^2
B+c(1,2)
# apply
apply(B,1,sum)
\(\bar{fkkkkof}\)
f="http://pbil.univ-lyon1.fr/R/donnees/essence.txt"
data=read.table(f,header = T,dec = ',')
head(data)
data$Gazole
#Exo 1
moyenne=function(x){
sum(x)/length(x)
}
variance=function(x){
v=0
for(i in 1:length(x)){
v=v+(x[i]-moyenne(x))^2
}
v/length(x)
}
ecart=function(x){
sd=variance(x)
return(sqrt(sd))
}
moyenne(data$Gazole)
mean(data$Gazole)
variance(data$Gazole)
var(data$Gazole)
sd(data$Gazole)
ecart(data$Gazole)
# exo 2
vecteur=rep(1,10)
mean(vecteur)
var(vecteur)
vecteur1=1:10
mean(vecteur1)
var(vecteur1)
ip=c(1,3,7)
ph=rep(1,10)
ph[ip]=2
moyenneN=mean(data$Gazole*ph)
var=variance(data$Gazole*ph)
var
ecart(data$Gazole*ph)
#produit scalaire
sum(data$Gazole*data$SP95)
sum(data$Gazole*data$SP98)
#norme
norme=function(x,poid){
sqrt(sum(poid*x*x))
}
norme(data$Gazole,ph)
norme(data$SP95,ph)
norme(data$SP98,ph)
# coefficient de projection
prog=function(omega,x,y){
sum(omega*x*y)/sum(omega*x*x)
}
z=prog(rep(1/10,10),y=data$Gazole,x=rep(1,length(data$Gazole)))
z
#norme de la difference
norme(data$Gazole-z,rep(1/10,length(data$Gazole)))
#
cosangle=function(poid,x,y){
sum(poid*x*y)/(norme(x,poid)*norme(y,poid))
}
# proche de 1
cos=cosangle(ph,data$Gazole,data$SP95)
# distance entre deux vecteur (ecart)
distance=function(x,y,omega) norme(x-y,omega)
distance(data$Gazole,data$SP95,ph)
# exo 4
data_conso=tbl_df(read.table("http://pbil.univ-lyon1.fr/R/donnees/conso.txt",header=T,))
nu=NULL
for(i in 1:nrow(data_conso)){
data_conso$nu[i]=norme(data_conso[i,],rep(1/ncol(data_conso),ncol(data_conso)))
}
dist=NULL
for(i in 1:nrow(data_conso)){
for(j in 1:nrow(data_conso)){
dist=c(dist,distance(data_conso[i,],data_conso[j,],rep(1/5,5)))
}
}
matrix(dist,6,6)
#
#3.a)
poid=rep(1,6)
poid[c(1,3)]=5
nD=NULL
for(i in 1:nrow(data_conso)){
nD=c(nD,norme(data_conso[i,],poid))
}
data_conso$nD=nD
which(data_conso$nD==max(data_conso$nD))
which(data_conso$nu==max(data_conso$nu))
#3.b
dist1=NULL
for(i in 1:nrow(data_conso)){
for(j in 1:nrow(data_conso)){
dist1=c(dist1,distance(data_conso[i,],data_conso[j,],poid))
}
}
matrix(dist1,nrow=6,ncol = 6)
#4.a
cos=NULL
for(i in 1:nrow(data_conso)){
for(j in 1:nrow(data_conso)){
cos=c(cos,cosangle(poid=rep(1,5),
x= data_conso[i,1:5],
y= data_conso[j,1:5]))
}
}
#4.b
matrix(cos,6,6) # on remarquera que l'individu 5 et 6 on le meme logique de comportement
# 5.a
# norme de variable
nV=NULL
for(i in 1:ncol(data_conso)){
nV=c(nV,norme(data_conso[,i],
poid = rep(1,nrow(data_conso))
))
}
nV=paste0(colnames(data_conso),"=",nV)
varV=NULL
for(i in 1:ncol(data_conso)){
varV=c(varV,var(data_conso[,i]))
}
varV=paste0(colnames(data_conso),"=",varV)
# Transport et cigarettes
varV
#4.1.5.c
data_conso_centre=scale(data_conso,center = T,scale=F)
dist2=NULL
for(i in 1:nrow(data_conso)){
for(j in 1:nrow(data_conso)){
dist2=c(dist2,distance(data_conso_centre[i,],
data_conso_centre[j],
rep(1,ncol(data_conso))))
}
}
matrix(dist,6,6)
data_conso_norm=tbl_df(scale(data_conso,T))
nVr=NULL
for(i in 1:nrow(data_conso)){
nVr=c(nVr,norme(data_conso_norm[,i],
rep(1,nrow(data_conso))))
}
nVr
# 4.1.5.e
# le contraire
# 4.2
# working directory
dir()
load("pps066.rda")
data_alc=pps066
data_alc
apply(data_alc$beer,2,var)->beer
apply(data_alc$spir,2,var)->spir
apply(data_alc$wine,2,var)->wine
# colle les colonnes
varconso=as.data.frame(cbind(beer,spir,wine))
# on fait la transposé
t(varconso)
# ajoute de variable temps
varconso$temps=1961:(1961+nrow(varconso)-1)
# 4.2.2
for(i in 1:3){
plot(x=varconso[,i],y=varconso$temps,pch=2)
}
#ou
varconso1 = varconso %>%
gather(key = "Product",value="Variance",c(-temps))
varconso1$Product=as.factor(varconso1$Product)
#package ggplot
ggplot(data=varconso1, aes(x =temps ,y=Variance,col=Product))+
geom_line()
varcenter=tbl_df(cbind(scale(varconso[,1:3],center=F,scale=T),
temps=varconso$temps)) %>%
gather(key = "Product",value = "Variance",c(-temps))
varcenter$Product=as.factor(varcenter$Product)
ggplot(data=varcenter, aes(x =temps ,y=Variance,col=Product))+
geom_line()
# acceder un repertoire de travail (working directory)
setwd()
# donne l'espace de travail
getwd()
# donnne la list des fichiers dans wd
dir()
# cree un repertoire
dir.create("monrepertoire")
#decharger package
detach("package:nompackaage",unload = T)
# voir fichier de donnée disponible
data()
# telecharger fichier via url
download.file("adresse","nomdufichierloaded")
CSD=read_csv("http://pbil.univ-lyon1.fr/R/donnees/CrimeStateDate.csv")
## Parsed with column specification:
## cols(
## Etat = col_character(),
## Date = col_integer(),
## Population = col_integer(),
## Crime_Violent = col_integer()
## )
#ou
CSD1=read.csv("http://pbil.univ-lyon1.fr/R/donnees/CrimeStateDate.csv",header=T)
# tete
head(CSD)
# fin
tail(CSD)
# montre les arguments de la fonction
args(read.csv)
## function (file, header = TRUE, sep = ",", quote = "\"", dec = ".",
## fill = TRUE, comment.char = "", ...)
## NULL
path="http://pbil.univ-lyon1.fr/R/donnees/SecRoutiere0910.txt"
SR0910=read.table(file=path,header = T)
# nom de chaque colonne
names(SR0910)
## [1] "departement" "numdep" "region" "numregion"
## [5] "acc.corps.10" "acc.corps.09" "tues.10" "tues.09"
## [9] "blesses.10" "blesses.09" "population" "ratio"
# dim nb colonne nb ligne
dim(SR0910)
## [1] 100 12
# pour connaitre le working directory
library(ade4)
data(banque)
# nom des colonnes
names(banque)
## [1] "csp" "duree" "oppo" "age" "sexe" "interdit"
## [7] "cableue" "assurvi" "soldevu" "eparlog" "eparliv" "credhab"
## [13] "credcon" "versesp" "retresp" "remiche" "preltre" "prelfin"
## [19] "viredeb" "virecre" "porttit"
# recupere les lignes correspondant la comparaison pour la variable date=2005
cv2005=CSD[CSD$Date=="2005",]
# affectation de la colonnes crime violent
crime= cv2005$Crime_Violent
# Type de variable
class(crime)
## [1] "integer"
typeof(crime)
## [1] "integer"
hist(crime,
col= "lightblue" ,# couleur
main="Nombre de crime " # Titre
)
# log des valeur
crimelog=log(crime)
# avec log
hist(crimelog,
col= "lightblue" ,# couleur
main="Nombre de crime " # Titre
)
# deux graphique sur une ligne
par(mfrow=c(1,2))
dotchart(crime,main= "serie brute", pch=20 # Type de point
)
dotchart(sort(crime), # Orde croissant avec ma fonction sort
main="serie Ordonnée", pch=20)
# avec log
par(mfrow=c(1,2))
dotchart(crimelog,main= "serie brute", pch=20 # Type de point
)
dotchart(sort(crimelog), # Orde croissant avec ma fonction sort
main="serie Ordonnée", pch=20)
# boxplot
par(mfrow=c(1,1))
boxplot(crime, col="lightblue", main="titre")
# version horizontal
boxplot(crime,col="lightblue", main="titre", horizontal = T)
# Avec log
boxplot(crimelog,col="lightblue", main="titre", horizontal = T)
# type de variable
class(banque$csp)
## [1] "factor"
# affiche les modalités
levels(banque$csp)
## [1] "agric" "artis" "cadsu" "inter" "emplo" "ouvri" "retra" "inact" "etudi"
# renvoie le nombre de modalité
length(levels(banque$csp))
## [1] 9
# Calculer les proportions de chaque modalités
freqrel= summary(banque$csp)/length(banque$csp)
freqrel
## agric artis cadsu inter emplo ouvri
## 0.03580247 0.05925926 0.12716049 0.12592593 0.18641975 0.22592593
## retra inact etudi
## 0.06419753 0.10493827 0.07037037
# mettre sur base 360
360*freqrel
## agric artis cadsu inter emplo ouvri retra inact
## 12.88889 21.33333 45.77778 45.33333 67.11111 81.33333 23.11111 37.77778
## etudi
## 25.33333
# camembert
pie(summary(banque$csp),main="Titre")
pie(summary(banque$sexe),
col=c("darkblue","pink"),
label=c("homme","femme"))
# pour interdit bancaire
class(banque$interdit)
## [1] "factor"
length(levels(banque$interdit))
## [1] 2
pie(summary(banque$interdit),
col=c("darkblue","pink"),
label=c("Non","Oui"))
summary(banque$interdit)/length(banque$interdit)
## non oui
## 0.92839506 0.07160494
par(mfrow=c(1,2))
barplot(summary(banque$csp), main="Frequence absolue",
col="lightblue",
las= 2)
barplot(summary(banque$csp)/length(banque$csp), main="Frequence absolue",
col="lightblue",
las= 2)
# version horizontal
barplot(summary(banque$csp)/length(banque$csp), main="Frequence absolue",
col="lightblue",
las= 2, horiz=T)
# Avec soldevu
class(banque$soldevu)
## [1] "factor"
levels(banque$soldevu)
## [1] "p4" "p3" "p2" "p1" "n1" "n2"
barplot(summary(banque$soldevu)/length(banque$soldevu), main="Frequence absolue",
col="lightblue",
las= 2, horiz=T)
pie(summary(banque$soldevu),
col=rainbow(6),
label=levels(banque$soldevu))
summary(banque$soldevu)/nrow(banque)
## p4 p3 p2 p1 n1 n2
## 0.11728395 0.08518519 0.17901235 0.33456790 0.20000000 0.08395062
# 1)
table(SR0910$region)
##
## Alsace Aquitaine
## 2 5
## Auvergne Basse_Normandie
## 4 3
## Bourgogne Bretagne
## 4 4
## Centre Champagne_Ardenne
## 6 4
## Corse Franche_Comte
## 2 4
## Guadeloupe Guyane
## 1 1
## Haute_Normandie Ile_De_France
## 2 8
## Languedoc_Roussillon Limousin
## 5 3
## Lorraine Martinique
## 4 1
## Midi_Pyrenees Nord_Pas_De_Calais
## 8 2
## Pays_De_Loire Picardie
## 5 3
## Poitou_Charentes Provence_Alpes_Cote_dAzur
## 4 6
## Reunion Rhone_Alpes
## 1 8
# 2)
boxplot(SR0910$tues.09,col="red",main="Nbre de mort en 2009")
# 3)
boxplot(SR0910$tues.10,col="red",main="Nbre de mort en 2009")
# 4)
par(mfrow=c(1,2))
dotchart(sort(SR0910$tues.09),pch=10, col ="red", main="2009")
dotchart(sort(SR0910$tues.10),pch=10, col ="red",main="2010")
depsante <- read.table("http://pbil.univ-lyon1.fr/R/donnees/depsante.txt",
h=TRUE,dec=",")
names(depsante)
## [1] "age" "groupe" "effectif" "maladie" "dentaire" "optique"
# struture pareil que la fonction str()
glimpse(depsante)
## Observations: 20
## Variables: 6
## $ age <int> 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70...
## $ groupe <fct> jeunes, jeunes, jeunes, jeunes, actifs, actifs, actif...
## $ effectif <int> 410, 400, 462, 572, 348, 371, 252, 220, 279, 435, 667...
## $ maladie <dbl> 64.2, 70.2, 70.9, 57.9, 66.1, 96.0, 82.2, 78.1, 98.8,...
## $ dentaire <dbl> 2.0, 75.2, 258.9, 10.7, 20.1, 19.8, 22.3, 51.9, 31.8,...
## $ optique <dbl> 21.0, 36.4, 37.7, 34.7, 45.3, 43.3, 26.5, 25.0, 45.5,...
groupe <- depsante$groupe
summary(groupe)
## actifs anciens jeunes
## 8 8 4
# création de vecteurs couleurs
couleur <- rep(c("orange","red2","red4"),c(4,8,8))
# on recuperer trois colonnes et affecte ca dans mesures
mesures <- depsante[,4:6]
names(mesures)
## [1] "maladie" "dentaire" "optique"
# plot xy
plot(mesures, col = couleur, pch=19)
# fonction du package library(rgl)
plot3d(mesures, type = "s", col = couleur)
# fonction qui calcul moyenne sur chaque colonne
colMeans(mesures)
## maladie dentaire optique
## 116.950 49.395 29.885
varn <- function(x) var(x)*(length(x)-1)/length(x)
sapply(mesures,varn)
## maladie dentaire optique
## 2759.8585 2830.5495 164.9503
sapply(mesures,var)
## maladie dentaire optique
## 2905.1142 2979.5258 173.6319
# sclacewt package ade4
mesures.cr <- scalewt(mesures, center=TRUE, scale=TRUE)
class(mesures.cr)
## [1] "matrix"
mesurescr <- as.data.frame(mesures.cr)
#moyenne avec version centré et réduites des valeurs
sapply(mesurescr,mean)
## maladie dentaire optique
## 1.100194e-17 -8.082185e-17 -9.998512e-17
# moyenne arrondie au 100éme
round(sapply(mesurescr,mean),2)
## maladie dentaire optique
## 0 0 0
# variance de la version normaliser
sapply(mesurescr,varn)
## maladie dentaire optique
## 1 1 1
# vecteur contenant le min et le max
lims <- c(min(mesurescr),max(mesurescr))
plot3d(mesurescr, type = "s", col = couleur, xlim = lims, ylim = lims,
zlim = lims)
plot3d(mesurescr, type = "s", col = couleur,
xlim = lims,
ylim = lims,
zlim = lims)
# version avec ellipsoide
plot3d( ellipse3d(cor(mesurescr)), col="grey", alpha=0.5, add = TRUE)
# package ad4
acp <- dudi.pca(mesures, center=TRUE, scale=TRUE, scannf = F, nf = 3)
# names élémeent de l'acp
names(acp)
## [1] "tab" "cw" "lw" "eig" "rank" "nf" "c1" "li" "co" "l1"
## [11] "call" "cent" "norm"
# pareil avec version centré reduites (cette version demande selectionner le nbre d'axe)
acp1 <- dudi.pca(mesures, center=TRUE, scale=TRUE,nf=3,scannf = F)
head(acp$tab)
head(mesurescr)
# poid des colonnes (par defaut 1)
acp$cw
## [1] 1 1 1
#poid des lignes (par defaut 1/n)
acp$lw
## [1] 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## [15] 0.05 0.05 0.05 0.05 0.05 0.05
# base *n
head(acp$lw)*nrow(mesures)
## [1] 1 1 1 1 1 1
# valeurs propres
acp$eig
## [1] 1.7371612 0.9015541 0.3612847
# somme des valeurs
sum(acp$eig)
## [1] 3
# pourcentages inerties pour chaque axes
pve <- 100*acp$eig/sum(acp$eig)
pve
## [1] 57.90537 30.05180 12.04282
#somme cumulative des pourcentages
cumsum(pve)
## [1] 57.90537 87.95718 100.00000
# Avec summary() nous avons toutes ces informations
summary(acp)
## Class: pca dudi
## Call: dudi.pca(df = mesures, center = TRUE, scale = TRUE, scannf = F,
## nf = 3)
##
## Total inertia: 3
##
## Eigenvalues:
## Ax1 Ax2 Ax3
## 1.7372 0.9016 0.3613
##
## Projected inertia (%):
## Ax1 Ax2 Ax3
## 57.91 30.05 12.04
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3
## 57.91 87.96 100.00
# donnne le rank de la matrice
acp$rank
## [1] 3
# duplicate les 3 colonnes
bismesures <- cbind(mesures,mesures)
head(bismesures)
# le rang ne change pas meme en doublant chaque colonnes
colnames(bismesures) <- c("maladie1","dentaire1","optique1",
"maladie2","dentaire2","optique2")
dudi.pca(bismesures,scann=F,n=3)$rank
## [1] 3
# nombre de facteurs choisis
acp$nf
## [1] 3
# donne les coordonnées des variables (colonnes) version normé
acp$c1
sapply(1:3, function(x) sum(acp$cw*acp$c1[,x]^2))
## [1] 1 1 1
# donne les coordonnées des individus
acp$l1
head(acp$l1)
# coordonnée de variables (version normés à la racines carré des valeurs propres)
acp$co
# dans les deux cas nous obtenons des valeurs propres
sapply(1:3, function(x) sum(acp$cw*acp$co[,x]^2))
## [1] 1.7371612 0.9015541 0.3612847
acp$eig
## [1] 1.7371612 0.9015541 0.3612847
#
acp$c1$CS1 * sqrt(acp$eig[1])
## [1] 0.8676353 -0.4555425 -0.8813916
acp$co
acp$eig
## [1] 1.7371612 0.9015541 0.3612847
t(t(acp$c1) * sqrt(acp$eig))
## CS1 CS2 CS3
## maladie 0.8676353 -0.2662627 -0.41989654
## dentaire -0.4555425 -0.8896976 0.03031894
## optique -0.8813916 0.1977284 -0.42901321
# donne les coordonnées des individus( version normé à la racine carré de la valeur propre )
head(acp$li)
acp$eig
## [1] 1.7371612 0.9015541 0.3612847
# lien entre L1 et li pour l'axes 1
head(acp$l1$RS1 * sqrt(acp$eig[1]))
## [1] 0.1095310 -1.0926721 -2.3449799 -0.7392648 -1.2495023 -0.7687505
# lien entre L1 et Li pour tous les axes
head(t(t(acp$l1) * sqrt(acp$eig)))
## RS1 RS2 RS3
## 1 0.1095310 0.97223653 1.1502870
## 2 -1.0926721 -0.09929729 0.2840670
## 3 -2.3449799 -3.31729801 0.3766787
## 4 -0.7392648 1.07477518 0.4809507
## 5 -1.2495023 1.03732155 -0.2082590
## 6 -0.7687505 0.85057328 -0.4949950
# affiche la fonction de l'on Ă fait
acp$call
## dudi.pca(df = mesures, center = TRUE, scale = TRUE, scannf = F,
## nf = 3)
# permet d'évaluer la fonction sortie
eval(acp$call)
## Duality diagramm
## class: pca dudi
## $call: dudi.pca(df = mesures, center = TRUE, scale = TRUE, scannf = F,
## nf = 3)
##
## $nf: 3 axis-components saved
## $rank: 3
## eigen values: 1.737 0.9016 0.3613
## vector length mode content
## 1 $cw 3 numeric column weights
## 2 $lw 20 numeric row weights
## 3 $eig 3 numeric eigen values
##
## data.frame nrow ncol content
## 1 $tab 20 3 modified array
## 2 $li 20 3 row coordinates
## 3 $l1 20 3 row normed scores
## 4 $co 3 3 column coordinates
## 5 $c1 3 3 column normed scores
## other elements: cent norm
identical(eval(acp$call), acp)
## [1] TRUE
#Ce vecteur donne les moyennes (cent pour centrage)
acp$cent
## maladie dentaire optique
## 116.950 49.395 29.885
colMeans(mesures)
## maladie dentaire optique
## 116.950 49.395 29.885
# Ce vecteur donne les ecarts-types par rapport Ă squrt(n)
acp$norm
## maladie dentaire optique
## 52.53436 53.20291 12.84330
var.n <- function(x) sum((x-mean(x))^2)/length(x)
sd.n <- function(x) sqrt(var.n(x))
apply(mesures, 2, sd.n)
## maladie dentaire optique
## 52.53436 53.20291 12.84330
cbind(
)
## NULL
# décentrage et déreduction
(mesures.cr*acp$norm )+acp$cent
## maladie dentaire optique
## [1,] 64.200000 18.44374 12.58920
## [2,] 2.050059 142.43073 36.40000
## [3,] 18.626961 258.90000 148.91656
## [4,] 57.900000 20.54394 69.34097
## [5,] -2.102118 88.02312 45.30000
## [6,] 24.763265 19.80000 171.82286
## [7,] 82.200000 23.34421 35.37276
## [8,] 10.050594 119.42352 25.00000
## [9,] 25.447792 31.80000 180.82176
## [10,] 110.400000 27.90671 131.89219
## [11,] 41.850191 123.96572 40.70000
## [12,] 25.472240 47.70000 166.50532
## [13,] 110.400000 31.96227 36.61550
## [14,] 73.143425 132.85262 24.60000
## [15,] 34.762261 88.80000 111.28478
## [16,] 158.300000 27.35148 33.71577
## [17,] 88.638133 112.41274 21.10000
## [18,] 49.504058 8.90000 55.65520
## [19,] 277.100000 26.31346 -42.09134
## [20,] 92.790310 78.34627 0.00000
## attr(,"scaled:center")
## maladie dentaire optique
## 116.950 49.395 29.885
## attr(,"scaled:scale")
## maladie dentaire optique
## 52.53436 53.20291 12.84330
# graph des individus
s.label(acp$li, xax = 1, yax = 2, clabel=1.5)
## Warning: Unused parameters: clabel
s.label(acp$li, xax = 1, yax = 2,
label=as.character(depsante$age) # pour utiliser leur noms
, clabel=1.5)
## Warning: Unused parameters: clabel
# s.class permets d'avoir des informations supplémentaire par rapport au variable
gcol <- c("red1", "red4","orange")
s.class(dfxy = acp$li, fac = depsante$groupe, col = gcol, xax = 1, yax = 2)
s.class(dfxy = acp$li, fac = depsante$groupe, col = gcol, xax = 1, yax = 3)
# cercle de correlation
s.corcircle(acp$co, xax = 1, yax = 2)
s.corcircle(acp$co, xax = 1, yax = 3)
par(mfrow=c(2,2))
par(mar=c(2,2,2,1))
graphe <- function (i) plot(x = acp$li[,1], y = mesures[,i], pch = 20, col = couleur,
xlab = "Axe 1", las = 1,
ylab = colnames(mesures)[i])
sapply(1:3,graphe)
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
# rpz individu variable
scatter(acp,posieig="bottomright")
groupe=depsante$groupe
scatter(acp, clab.row = 0, posieig = "none")
## Warning: Unused parameters: clab
## Warning: Unused parameters: clab
s.class(acp$li,groupe,col=gcol, add.plot = TRUE,
cstar = 0, clabel = 0, cellipse = 0, cpoint=2)
## Warning: Unused parameters: add cstar clabel cellipse cpoint
# pondération individus
acp$lw
## [1] 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05 0.05
## [15] 0.05 0.05 0.05 0.05 0.05 0.05
poidsU <- acp$lw
# changement de pondération
poidsD <- depsante$effectif
poidsD <- poidsD/sum(poidsD)
round(poidsD,3)
## [1] 0.059 0.058 0.067 0.083 0.050 0.054 0.036 0.032 0.040 0.063 0.096
## [12] 0.095 0.063 0.046 0.039 0.056 0.037 0.021 0.004 0.001
sum(poidsD)
## [1] 1
ggplot(data =tbl_df(cbind(age=depsante$age,poidsD)) ,
aes(x=age,y=poidsD))+
geom_point()+
geom_hline(aes(yintercept=mean(poidsD),col="red"))
moypond <- function(x,pond) sum(pond*x)
varpond <- function(x,pond) sum(pond*(x-sum(pond*x))^2)
(resmU <- sapply(mesures,moypond,poidsU))
## maladie dentaire optique
## 116.950 49.395 29.885
(resmD <- sapply(mesures,moypond,poidsD))
## maladie dentaire optique
## 101.03121 55.27092 34.46188
(resvU <- sapply(mesures,varpond,poidsU))
## maladie dentaire optique
## 2759.8585 2830.5495 164.9503
(resvD <- sapply(mesures,varpond,poidsD))
## maladie dentaire optique
## 1198.40706 3446.77153 88.41082
acpD <- dudi.pca(mesures, row.w=poidsD, # pour predefinir le poid
scannf=FALSE, nf=3)
liminf <- min(cbind(acp$li,acpD$li))-0.10
limsup <- max(cbind(acp$li,acpD$li))+0.10
lims <- c(liminf,limsup)
par(mfrow=c(2,2))
s.corcircle(acp$co)
s.corcircle(acpD$co)
s.class(dfxy = acp$li, fac = groupe, col = gcol, xlim=lims, ylim=lims)
s.class(dfxy = acpD$li, fac = groupe, col = gcol, xlim=lims, ylim=lims)
depsante
pond2015=c(rep(6.2,3),6.1,5.7,
6,6.2,6.1,6.9,6.8,6.7,6.4,
6.1,5.6,3.7,rep(1.82,5))
acpechan <- dudi.pca(mesures, row.w=poidsD # pour predefinir le poid
, scannf=FALSE, nf=3)
acp2015<-dudi.pca(mesures, row.w=pond2015 # pour predefinir le poid
, scannf=FALSE, nf=3)
liminf <- min(cbind(acpechan$li,acp2015$li,acp$li))-0.10
limsup <- max(cbind(acp$li,acp2015$li,acpechan$li))+0.10
lims <- c(liminf,limsup)
par(mfrow=c(2,3))
s.corcircle(acp$co)
s.corcircle(acpechan$co)
s.corcircle(acp2015$co)
s.class(dfxy = acp$li, fac = groupe, col = gcol, xlim=lims, ylim=lims)
s.class(dfxy = acpechan$li, fac = groupe, col = gcol, xlim=lims, ylim=lims)
s.class(dfxy = acp2015$li, fac = groupe, col = gcol, xlim=lims, ylim=lims)
f="http://pbil.univ-lyon1.fr/R/donnees/SecRoutiere0910.txt"
SR0910=tbl_df(read_tsv(f))
acpSR=dudi.pca(SR0910[,5:10],
center = TRUE ,scale =TRUE, # pour center et reduire les données avant
scannf=F,nf=2) # Le nombre d'axes que l'on garde
acpSR$eig
## [1] 4.583862289 1.328971708 0.069553111 0.016112379 0.001343034 0.000157480
barplot(acpSR$eig)
summary(acpSR)
## Class: pca dudi
## Call: dudi.pca(df = SR0910[, 5:10], center = TRUE, scale = TRUE, scannf = F,
## nf = 2)
##
## Total inertia: 6
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 4.583862 1.328972 0.069553 0.016112 0.001343
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 76.39770 22.14953 1.15922 0.26854 0.02238
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 76.40 98.55 99.71 99.97 100.00
##
## (Only 5 dimensions (out of 6) are shown)
s.corcircle(acpSR$co,xax=1,yax=2,clabel = 0.8)
## Warning: Unused parameters: clabel
acpSR$co
#ou
## Warning: Unused parameters: clab
## Warning: Unused parameters: clab
acpSR$l1$RS1 * sqrt(acpSR$eig[1])
## [1] 0.17837200 0.71899190 0.94323094 1.76092255 1.98211317
## [6] -3.93125511 1.39456075 1.69638830 1.92270677 1.49281311
## [11] 0.40190541 1.50379473 -9.38292766 0.37628605 2.12235930
## [16] 1.31304925 -1.18621586 0.88288265 1.36284842 1.68819563
## [21] 1.00864521 0.37462981 0.50327265 2.08238442 0.95788250
## [26] 0.66808552 0.18291104 0.42556189 0.33398282 0.10713902
## [31] -1.69698109 -1.36187218 1.40947842 -3.51847822 -2.63245936
## [36] -0.55434969 1.46740087 0.10452232 -1.07287757 1.39244443
## [41] 0.69779277 0.86729654 -0.06895567 1.52569943 -1.49410805
## [46] -0.17354612 1.76435140 0.99600477 2.11709038 -0.60022844
## [51] 0.45722909 0.51952571 1.61020901 1.54894725 0.29096850
## [56] 1.67619038 -0.10271507 -0.29004935 1.66003812 -3.89115462
## [61] -0.12489584 1.38684957 -1.09467470 -0.10250142 -0.25099481
## [66] 1.61015135 1.20757444 -0.73209288 0.44764720 -3.85119071
## [71] 1.52432519 0.24840272 0.65110293 1.34513410 0.14017672
## [76] -12.59179027 -1.11323897 -1.50578744 -1.58945558 1.13274307
## [81] -0.12726022 0.97707276 1.26718584 -2.22396560 0.29990011
## [86] 0.12565197 1.07061373 0.79546852 0.98421074 0.96004643
## [91] 1.86614136 -1.13836263 -2.83867884 -3.66806040 -2.70715022
## [96] -0.24121303 -0.20489378 0.66129643 1.00069923 -0.12911820
par(mfrow=c(1,2))
s.label(acpSR$li,label=SR0910$departement,clabel=0.75)
## Warning: Unused parameters: clabel
popu <- scale(SR0910$population,center=TRUE,scale=TRUE)
s.value(acpSR$li[,1:2],popu)
data("macon")
macon1=tbl_df(macon)
summary(macon1)
## a b c d
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75
## Median :4.50 Median :4.50 Median :4.50 Median :4.50
## Mean :4.50 Mean :4.50 Mean :4.50 Mean :4.50
## 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25
## Max. :8.00 Max. :8.00 Max. :8.00 Max. :8.00
## e f g h
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75
## Median :4.50 Median :4.50 Median :4.50 Median :4.50
## Mean :4.50 Mean :4.50 Mean :4.50 Mean :4.50
## 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25
## Max. :8.00 Max. :8.00 Max. :8.00 Max. :8.00
## i j k l
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75
## Median :4.50 Median :4.50 Median :4.50 Median :4.50
## Mean :4.50 Mean :4.50 Mean :4.50 Mean :4.50
## 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25
## Max. :8.00 Max. :8.00 Max. :8.00 Max. :8.00
## m n o p
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75
## Median :4.50 Median :4.50 Median :4.50 Median :4.50
## Mean :4.50 Mean :4.50 Mean :4.50 Mean :4.50
## 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25
## Max. :8.00 Max. :8.00 Max. :8.00 Max. :8.00
## q r s t
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75
## Median :4.50 Median :4.50 Median :4.50 Median :4.50
## Mean :4.50 Mean :4.50 Mean :4.50 Mean :4.50
## 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25
## Max. :8.00 Max. :8.00 Max. :8.00 Max. :8.00
## u v w x
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75 1st Qu.:2.75
## Median :4.50 Median :4.50 Median :4.50 Median :4.50
## Mean :4.50 Mean :4.50 Mean :4.50 Mean :4.50
## 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25 3rd Qu.:6.25
## Max. :8.00 Max. :8.00 Max. :8.00 Max. :8.00
## y
## Min. :1.00
## 1st Qu.:2.75
## Median :4.50
## Mean :4.50
## 3rd Qu.:6.25
## Max. :8.00
acpWine=dudi.pca(macon1,scale =T,scannf = F,nf=6)
summary(acpWine)
## Class: pca dudi
## Call: dudi.pca(df = macon1, scale = T, scannf = F, nf = 6)
##
## Total inertia: 25
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 8.774 5.635 3.741 2.672 1.953
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 35.096 22.541 14.966 10.689 7.813
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 35.10 57.64 72.60 83.29 91.10
##
## (Only 5 dimensions (out of 7) are shown)
par(mfrow=c(1,3))
s.corcircle(dfxy = acpWine$co,xax =1,yax=2)
s.corcircle(dfxy = acpWine$co,xax =3,yax=4)
s.corcircle(dfxy = acpWine$co,xax =5,yax=6)
acpWine$co
macont=tbl_df(t(macon1))
summary(macont)
## A B C D
## Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00
## 1st Qu.:4.00 1st Qu.:3.00 1st Qu.:1.00 1st Qu.:5.00
## Median :5.00 Median :5.00 Median :2.00 Median :6.00
## Mean :4.76 Mean :4.76 Mean :3.04 Mean :5.64
## 3rd Qu.:6.00 3rd Qu.:7.00 3rd Qu.:5.00 3rd Qu.:7.00
## Max. :8.00 Max. :8.00 Max. :7.00 Max. :8.00
## E F G H
## Min. :1.00 Min. :1.0 Min. :1.00 Min. :1.00
## 1st Qu.:1.00 1st Qu.:3.0 1st Qu.:3.00 1st Qu.:3.00
## Median :2.00 Median :5.0 Median :5.00 Median :4.00
## Mean :3.24 Mean :5.2 Mean :4.88 Mean :4.48
## 3rd Qu.:4.00 3rd Qu.:7.0 3rd Qu.:7.00 3rd Qu.:6.00
## Max. :8.00 Max. :8.0 Max. :8.00 Max. :8.00
acpWine2=dudi.pca(macont,scannf = F,nf=6)
summary(acpWine2)
## Class: pca dudi
## Call: dudi.pca(df = macont, scannf = F, nf = 6)
##
## Total inertia: 8
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 2.1744 1.8315 1.2627 1.1800 0.7547
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 27.180 22.894 15.784 14.750 9.434
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 27.18 50.07 65.86 80.61 90.04
##
## (Only 5 dimensions (out of 7) are shown)
gcol=rainbow(8)
groupe=colnames(macont)
par(mfrow=c(2,3))
s.corcircle(acpWine2$co,1,2)
s.corcircle(acpWine2$co,3,4)
s.corcircle(acpWine2$co,5,6)
scatter(acpWine2,xax = 1,yax=2)
scatter(acpWine2,xax = 3,yax=4)
scatter(acpWine2,xax = 5,yax=6)
acpWine$l1
rapport <- CSD$Crime_Violent/CSD$Population
rapport <- rapport*1000
annees <- as.character(1965:2005)
#
crimes <- matrix(0,ncol=length(annees),nrow=51)
for (i in 1:length(annees)) crimes[,i] <- rapport[CSD$Date==annees[i]]
crimes <- as.data.frame(crimes)
rownames(crimes) <- unique(CSD$Etat)
colnames(crimes) <- paste("A",annees,sep="")
tprofils <- apply(crimes,1,function(x) x/sum(x))
profils <- t(tprofils)
# ou faire çà :)
csd=CSD %>%
mutate(rapport=Crime_Violent/CSD$Population)%>%
subset(select=c(1,2,5))%>%
spread(key = Date,value =rapport)%>%
subset(select=c(-1))%>%
as.matrix()
rownames(csd)=levels(as.factor(CSD$Etat))
for(i in 1:ncol(csd)){
csd[,i]=csd[,i]/rowSums(csd)
}
head(csd[,1:6])
## 1960 1961 1962 1963 1964
## Alabama 0.009341078 0.008129334 0.007361194 0.008323083 0.009428492
## Alaska 0.004823885 0.004038660 0.004097001 0.004843828 0.006516803
## Arizona 0.009187949 0.007055875 0.007251167 0.007955809 0.008518942
## Arkansas 0.006584791 0.005953779 0.005254443 0.006007662 0.007453520
## California 0.007778029 0.007443326 0.007389990 0.007471331 0.008100841
## Colorado 0.007659601 0.008047278 0.008162761 0.006557966 0.007779465
## 1965
## Alabama 0.008562573
## Alaska 0.006335780
## Arizona 0.007497958
## Arkansas 0.007171153
## California 0.008466638
## Colorado 0.007267971
acpcsd=dudi.pca(df=na.omit(csd),center = F,scale=F,scannf = F)
# 97% de inertie sur premier axes
summary(acpcsd)
## Class: pca dudi
## Call: dudi.pca(df = na.omit(csd), center = F, scale = F, scannf = F)
##
## Total inertia: 0.005866
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 5.718e-03 5.261e-05 3.093e-05 1.726e-05 1.320e-05
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 97.4807 0.8969 0.5273 0.2943 0.2250
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 97.48 98.38 98.90 99.20 99.42
##
## (Only 5 dimensions (out of 46) are shown)
# eig =46
acpcsd$eig
## [1] 5.717959e-03 5.261037e-05 3.092776e-05 1.726015e-05 1.319561e-05
## [6] 6.495587e-06 5.381599e-06 3.704215e-06 2.982948e-06 2.380196e-06
## [11] 1.954612e-06 1.748885e-06 1.540215e-06 1.009269e-06 9.757133e-07
## [16] 7.466391e-07 6.722476e-07 5.800493e-07 5.073366e-07 4.011648e-07
## [21] 3.680724e-07 3.129525e-07 2.732833e-07 2.181141e-07 1.993620e-07
## [26] 1.871744e-07 1.733108e-07 1.341411e-07 1.255529e-07 1.133812e-07
## [31] 9.902356e-08 8.927599e-08 6.958199e-08 6.778051e-08 5.918842e-08
## [36] 5.095442e-08 4.477119e-08 3.248768e-08 2.028833e-08 1.860231e-08
## [41] 1.707134e-08 8.665101e-09 7.003672e-09 4.236037e-09 3.765648e-09
## [46] 1.008934e-09
#
scatter(acpcsd)
newcsd=tbl_df(cbind(city=rownames(csd),csd))%>%
gather(key=date,value = crime_rate,-city)%>%
filter(city==c("New_York","Ohio","Oregon","Maine"))%>%
na.omit()
## Warning in city == c("New_York", "Ohio", "Oregon", "Maine"): la taille d'un
## objet plus long n'est pas multiple de la taille d'un objet plus court
newcsd$crime_rate=as.double(newcsd$crime_rate)
newcsd$city=as.factor(newcsd$city)
newcsd$date=as.integer(newcsd$date)
ggplot(data=newcsd,aes(x=date,y=crime_rate,col=city))+
geom_line()+
theme_dark()
snee74 <- read.table("http://pbil.univ-lyon1.fr/R/donnees/snee74.txt",
header = TRUE)
names(snee74)
## [1] "cheveux" "yeux" "sexe"
cheveux <- snee74$cheveux
summary(cheveux)
## Blond Marron Noir Roux
## 127 286 108 71
yeux <- snee74$yeux
summary(yeux)
## Bleu Marron Noisette Vert
## 215 220 93 64
(couleurs <- table(yeux, cheveux))
## cheveux
## yeux Blond Marron Noir Roux
## Bleu 94 84 20 17
## Marron 7 119 68 26
## Noisette 10 54 15 14
## Vert 16 29 5 14
(dfcouleurs <- data.frame(unclass(couleurs)))
scorecheveux <- c(1, -1, -1, 1)
names(scorecheveux) <- colnames(couleurs)
scorecheveux
## Blond Marron Noir Roux
## 1 -1 -1 1
#frequence
dfcouleurs <- data.frame(unclass(couleurs))
dfcouleurs["Bleu", ]/sum(dfcouleurs["Bleu", ])
# score moyen yeux
yeux.bleu <- dfcouleurs["Bleu", ]/sum(dfcouleurs["Bleu", ])
yeux.bleu * scorecheveux
sum(yeux.bleu * scorecheveux)
## [1] 0.03255814
freqyeux <- apply(dfcouleurs, 1, function(x) x/sum(x))
freqyeux
## Bleu Marron Noisette Vert
## Blond 0.43720930 0.03181818 0.1075269 0.250000
## Marron 0.39069767 0.54090909 0.5806452 0.453125
## Noir 0.09302326 0.30909091 0.1612903 0.078125
## Roux 0.07906977 0.11818182 0.1505376 0.218750
t(freqyeux)
## Blond Marron Noir Roux
## Bleu 0.43720930 0.3906977 0.09302326 0.07906977
## Marron 0.03181818 0.5409091 0.30909091 0.11818182
## Noisette 0.10752688 0.5806452 0.16129032 0.15053763
## Vert 0.25000000 0.4531250 0.07812500 0.21875000
scoreyeux <- apply(t(freqyeux), 1, function(x) sum(x * scorecheveux))
scoreyeux
## Bleu Marron Noisette Vert
## 0.03255814 -0.70000000 -0.48387097 -0.06250000
ac <- dudi.coa(dfcouleurs, scannf = F, nf = 3)
rownames(ac$c1)
## [1] "Blond" "Marron" "Noir" "Roux"
#score optimal premier axes
ac$c1[, 1]
## [1] 1.8282287 -0.3244635 -1.1042772 -0.2834725
# score moyen yeux Ă partir de score optimaux cheveux
scoreyeux <- apply(t(freqyeux), 1, function(x) sum(x * ac$c1[,1]))
scoreyeux
## Bleu Marron Noisette Vert
## 0.5474139 -0.4921577 -0.2125969 0.1617534
# meme
ac$li[,1]
## [1] 0.5474139 -0.4921577 -0.2125969 0.1617534
# plot des scores
score(ac)
scatter(ac)
sitpay <- matrix(c(209, 1483, 41, 320, 60, 34, 151, 1, 70, 10, 535,
2448, 33, 897, 135, 77, 245, 4, 139, 9), byrow = T, ncol = 5)
colnames(sitpay) <- c("celibataire", "concubin","divorcee", "marie", "veuf")
rownames(sitpay) <- c("annuel", "mensuel", "semestriel", "trimestriel")
sitpay
## celibataire concubin divorcee marie veuf
## annuel 209 1483 41 320 60
## mensuel 34 151 1 70 10
## semestriel 535 2448 33 897 135
## trimestriel 77 245 4 139 9
n <- sum(sitpay)
I <- nrow(sitpay)
J <- ncol(sitpay)
freqsitpay <- sitpay/n
round(freqsitpay, digits = 4)
## celibataire concubin divorcee marie veuf
## annuel 0.0303 0.2149 0.0059 0.0464 0.0087
## mensuel 0.0049 0.0219 0.0001 0.0101 0.0014
## semestriel 0.0775 0.3547 0.0048 0.1300 0.0196
## trimestriel 0.0112 0.0355 0.0006 0.0201 0.0013
library(gplots)
#representation graphique de tableau de contigence
balloonplot(as.table(sitpay))
# plot de table de contigence
table.cont(sitpay, row.labels = rownames(sitpay), col.labels = colnames(sitpay),
csize = 2)
#mosaic plot
mosaicplot(sitpay, shade = TRUE)
profLignes <- prop.table(sitpay, 1 ) # definit frequence ligne
profLignes
## celibataire concubin divorcee marie veuf
## annuel 0.0989115 0.7018457 0.019403691 0.1514434 0.02839565
## mensuel 0.1278195 0.5676692 0.003759398 0.2631579 0.03759398
## semestriel 0.1321640 0.6047431 0.008152174 0.2215909 0.03334980
## trimestriel 0.1624473 0.5168776 0.008438819 0.2932489 0.01898734
profColonnes <- prop.table(sitpay, 2) # Definit frequence colonnes
profColonnes
## celibataire concubin divorcee marie veuf
## annuel 0.24444444 0.34273168 0.51898734 0.22440393 0.28037383
## mensuel 0.03976608 0.03489716 0.01265823 0.04908836 0.04672897
## semestriel 0.62573099 0.56574994 0.41772152 0.62903226 0.63084112
## trimestriel 0.09005848 0.05662122 0.05063291 0.09747546 0.04205607
# Test khideux
reschi <- chisq.test(sitpay)
## Warning in chisq.test(sitpay): Chi-squared approximation may be incorrect
reschi$expected
## celibataire concubin divorcee marie veuf
## annuel 261.79032 1324.8734 24.188813 436.62339 65.52413
## mensuel 32.95609 166.7848 3.045066 54.96537 8.24866
## semestriel 501.52731 2538.1388 46.339951 836.46544 125.52847
## trimestriel 58.72627 297.2030 5.426170 97.94580 14.69874
reschi
##
## Pearson's Chi-squared test
##
## data: sitpay
## X-squared = 129.22, df = 12, p-value < 2.2e-16
#p-value trop petite on rejette h_0 : independant contre hyp h_1
# on parle de liaison statistiquement significatifs
# effectifs théorique
outer(margin.table(sitpay, 1 # somme ligne
), margin.table(sitpay, 2 #somme colonnes
))/sum(sitpay) # total
## celibataire concubin divorcee marie veuf
## annuel 261.79032 1324.8734 24.188813 436.62339 65.52413
## mensuel 32.95609 166.7848 3.045066 54.96537 8.24866
## semestriel 501.52731 2538.1388 46.339951 836.46544 125.52847
## trimestriel 58.72627 297.2030 5.426170 97.94580 14.69874
dfsitpay <- as.data.frame(sitpay)
afc <- dudi.coa(dfsitpay, scannf = F, nf = 3)
names(afc)
## [1] "tab" "cw" "lw" "eig" "rank" "nf" "l1" "co" "li" "c1"
## [11] "call" "N"
# tableau analysée
afc$tab
# meme chose donc c'esdt le lien entre effectifs observée et effectifs théorique
(dfsitpay - reschi$expected)/reschi$expected
# lien entre les pondérations cw de l'afc et les fréquences marginale
afc$cw
## celibataire concubin divorcee marie veuf
## 0.12389509 0.62701058 0.01144762 0.20663672 0.03101000
apply(dfsitpay, 2, function(x) sum(x)/n)
## celibataire concubin divorcee marie veuf
## 0.12389509 0.62701058 0.01144762 0.20663672 0.03101000
afc$lw
## annuel mensuel semestriel trimestriel
## 0.30618751 0.03854514 0.58658165 0.06868570
apply(dfsitpay, 1, function(x) sum(x)/n)
## annuel mensuel semestriel trimestriel
## 0.30618751 0.03854514 0.58658165 0.06868570
# matrice diagonalisée H
matZ <- as.matrix(afc$tab)
DI <- diag(afc$lw)
DrJ <- diag(sqrt(afc$cw))
matH <- DrJ %*% t(matZ) %*% DI %*% matZ %*% DrJ
# rank egale au min
min(I - 1, J - 1)
## [1] 3
afc$rank
## [1] 3
eigen(matH)
## eigen() decomposition
## $values
## [1] 1.765310e-02 9.560696e-04 1.158312e-04 -1.453509e-18 -2.520770e-18
##
## $vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.38337054 -0.15133044 0.82387057 0.0000000 -0.3890545
## [2,] -0.51798828 0.08710879 -0.01869848 -0.6187179 -0.5838996
## [3,] -0.34849169 -0.61094688 -0.13452330 0.5784541 -0.3906300
## [4,] 0.68039274 -0.18151055 -0.54538573 -0.1881184 -0.4138665
## [5,] 0.01828753 0.75053913 -0.07311325 0.4971826 -0.4287429
reseigen <- eigen(matH)
# on retourve bien les valeurs propres
reseigen$values
## [1] 1.765310e-02 9.560696e-04 1.158312e-04 -1.453509e-18 -2.520770e-18
afc$eig
## [1] 0.0176531021 0.0009560696 0.0001158312
# Pour calculer les coordonnée lignes
matZ %*% DrJ %*% reseigen$vectors[, 1:3]
## [,1] [,2] [,3]
## annuel -0.18496237 -0.01556050 -0.002942584
## mensuel 0.15341763 0.04117812 -0.050292921
## semestriel 0.05693235 0.01681398 0.005110564
## trimestriel 0.25222413 -0.09733548 -0.002303726
afc$li
#pour calculer les coordonnées colonne
diag(1/sqrt(afc$cw)) %*% reseigen$vectors[, 1:3] %*% diag(sqrt(afc$eig))
## [,1] [,2] [,3]
## [1,] 0.14471122 -0.013293643 0.0251909629
## [2,] -0.08691466 0.003401491 -0.0002541451
## [3,] -0.43275831 -0.176559333 -0.0135317134
## [4,] 0.19886870 -0.012346472 -0.0129125746
## [5,] 0.01379796 0.131785378 -0.0044684615
afc$co
# InertieTOt=X^2/n
reschi$statistic/sum(sitpay)
## X-squared
## 0.018725
sum(afc$eig)
## [1] 0.018725
aides <- inertia.dudi(afc, row.inertia = TRUE, col.inertia = TRUE)
names(aides)
## [1] "tot.inertia" "row.contrib" "row.abs" "row.rel" "row.cum"
## [6] "col.contrib" "col.abs" "col.rel" "col.cum" "nf"
## [11] "call"
# calcule de pourcentag d'inertie
IT <- sum(afc$eig)
IT
## [1] 0.018725
afc$eig/IT
## [1] 0.942755638 0.051058450 0.006185912
aides$tot.inertia
sitpay[3, ]
## celibataire concubin divorcee marie veuf
## 535 2448 33 897 135
# multiplié par mille pour facilité la lecture
aides$row.abs
# calcule de contribution
afc$li[, 1] * afc$li[, 1] * afc$lw/afc$eig[1]
## annuel mensuel semestriel trimestriel
## 0.59338040 0.05139243 0.10770248 0.24752470
# contribution relatives
# multiplié par mille et le signes pour situer la modalité sur l'axe
aides$row.rel
(afc$li[3, ] * afc$li[3, ])/(sum(afc$tab[3, ] * afc$tab[3, ] * afc$cw))
temp <- (afc$tab[3, ] * sqrt(afc$cw)) * sqrt(afc$lw[3])
sum(temp * temp)/sum(afc$eig)
## [1] 0.1112115
aides$row.cum
entfum <- read.table("http://pbil.univ-lyon1.fr/R/donnees/entfum.txt",h=T,
row.names=1)
#entfum<-read_tsv("http://pbil.univ-lyon1.fr/R/donnees/entfum.txt")
entfum
afc=dudi.coa(entfum,scannf = F)
barplot(afc$eig/sum(afc$eig))
moynat <- c(42,29,20,9)
moynat <- data.frame(t(moynat))
colnames(moynat) <- colnames(entfum)
sup=suprow(afc,moynat)
row.names(sup$lisup)='moyenne'
afc$li=rbind(afc$li,sup$lisup)
scatter(afc,posieig = "none")
text(sup$lisup[1:2],label="moyen",col="green")
casA <- read.table("http://pbil.univ-lyon1.fr/R/donnees/qualprodA.txt", h=TRUE, row.names=1)
apply(casA,1,sum)
## nettoyage resistance rincage odeur ongles coins
## 4452 3985 3886 4236 3454 4149
## tenu rayure brillance recurage
## 4593 3568 2871 4607
apply(casA,2,sum)
## P1 P2 P3 P4 P5 P8 P9 P10
## 3973 5537 4609 5702 3817 4123 5856 6184
afc1=dudi.coa(casA,scannf = F)
chisq.test(casA)
##
## Pearson's Chi-squared test
##
## data: casA
## X-squared = 1770, df = 63, p-value < 2.2e-16
summary(afc1)
## Class: coa dudi
## Call: dudi.coa(df = casA, scannf = F)
##
## Total inertia: 0.04447
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 0.0247031 0.0156372 0.0023617 0.0012057 0.0003968
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 55.5469 35.1615 5.3104 2.7112 0.8923
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 55.55 90.71 96.02 98.73 99.62
##
## (Only 5 dimensions (out of 7) are shown)
barplot(afc1$eig/sum(afc$eig))
fviz_eig(afc1)
fviz_ca(afc1,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.
scatter(afc1,posieig = "none")
casB <- read.table("http://pbil.univ-lyon1.fr/R/donnees/qualprodB.txt", h=TRUE, row.names=1)
margeL <- apply(casB,1,sum)
tri <- seq(1,10,by=2)
sapply(tri, function(x) margeL[x]+margeL[x+1])
## nettoie_bien resistant rincer_facile odeurs_enlevees
## 6144 6144 6144 6144
## ongles_saufs
## 6144
crime=CSD
annees <- as.character(1965:2005)
ans90 <- as.character(65:99)
ans90 <- paste("A",ans90,sep="")
ans2000 <- as.character(0:5)
ans2000 <- paste("A0",ans2000,sep="")
ans <- c(ans90,ans2000)
#
crimes <- matrix(0,ncol=length(annees),nrow=51)
for (i in 1:length(annees)) crimes[,i] <- CSD$Crime_Violent[CSD$Date==annees[i]]
crimes <- as.data.frame(crimes)
rownames(crimes) <- unique(CSD$Etat)
colnames(crimes) <- ans
cds=dudi.coa(tbl_df(crimes),scannf = F)
summary(cds)
## Class: coa dudi
## Call: dudi.coa(df = tbl_df(crimes), scannf = F)
##
## Total inertia: 0.03197
##
## Eigenvalues:
## Ax1 Ax2 Ax3 Ax4 Ax5
## 0.0230207 0.0043659 0.0015226 0.0008384 0.0004279
##
## Projected inertia (%):
## Ax1 Ax2 Ax3 Ax4 Ax5
## 72.015 13.658 4.763 2.623 1.339
##
## Cumulative projected inertia (%):
## Ax1 Ax1:2 Ax1:3 Ax1:4 Ax1:5
## 72.02 85.67 90.44 93.06 94.40
##
## (Only 5 dimensions (out of 40) are shown)
fviz_eig(cds)