Lectura de Base de Datos
library(readxl)
gorrion=read_excel("C:/Users/Alumno/Desktop/baseTaller/gorriones.xlsx")
gorrion=data.frame(gorrion)
head(gorrion)
## x1 x2 x3 x4 x5 sobrevi
## 1 156 245 31.6 18.5 20.5 sobrevivió
## 2 154 240 30.4 17.9 19.6 sobrevivió
## 3 153 240 31.0 18.4 20.6 sobrevivió
## 4 153 236 30.9 17.7 20.2 sobrevivió
## 5 155 243 31.5 18.6 20.3 sobrevivió
## 6 163 247 32.0 19.0 20.9 sobrevivió
Calcular la matriz de covarianza y correlaciones
cov.gorrion=cov(gorrion[1:5])
cov.gorrion
## x1 x2 x3 x4 x5
## x1 13.353741 13.610969 1.9220663 1.3306122 2.1922194
## x2 13.610969 25.682823 2.7136054 2.1977041 2.6578231
## x3 1.922066 2.713605 0.6316327 0.3422662 0.4146471
## x4 1.330612 2.197704 0.3422662 0.3184184 0.3393707
## x5 2.192219 2.657823 0.4146471 0.3393707 0.9828231
cor.gorrion=cor(gorrion[1:5])
cor.gorrion
## x1 x2 x3 x4 x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000
Autovalores y auto vectores de la matriz de covarianzas de la muestra
aucor=eigen(cor.gorrion)
aucor
## eigen() decomposition
## $values
## [1] 3.6159783 0.5315041 0.3864245 0.3015655 0.1645275
##
## $vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.4517989 0.05072137 0.6904702 0.42041399 -0.3739091
## [2,] -0.4616809 -0.29956355 0.3405484 -0.54786307 0.5300805
## [3,] -0.4505416 -0.32457242 -0.4544927 0.60629605 0.3427923
## [4,] -0.4707389 -0.18468403 -0.4109350 -0.38827811 -0.6516665
## [5,] -0.3976754 0.87648935 -0.1784558 -0.06887199 0.1924341
Definir las nuevas variables componentes principales
zgorrion=data.frame(scale(gorrion [1:5]))
head(zgorrion)
## x1 x2 x3 x4 x5
## 1 -0.5417191 0.7248615 0.17718246 0.05424955 -0.32937165
## 2 -1.0890230 -0.2617555 -1.33272023 -1.00904159 -1.23720227
## 3 -1.3626749 -0.2617555 -0.57776889 -0.12296564 -0.22850158
## 4 -1.3626749 -1.0510492 -0.70359411 -1.36347197 -0.63198186
## 5 -0.8153711 0.3302147 0.05135723 0.23146474 -0.53111179
## 6 1.3738444 1.1195083 0.68048336 0.94032550 0.07410862
Escribir Ecuaciones de componentes
paste(paste("Y",1:5,"=",sep=""),round(-aucor$vectors[,1],3),"*Z1","+",round(-aucor$vectors[,2],3),"*Z2","+",round(-aucor$vectors[,3],3),"*Z3","+",round(-aucor$vectors[,4],3),"*Z4","+",round(-aucor$vectors[,5],3),"*Z5",sep="")
## [1] "Y1=0.452*Z1+-0.051*Z2+-0.69*Z3+-0.42*Z4+0.374*Z5"
## [2] "Y2=0.462*Z1+0.3*Z2+-0.341*Z3+0.548*Z4+-0.53*Z5"
## [3] "Y3=0.451*Z1+0.325*Z2+0.454*Z3+-0.606*Z4+-0.343*Z5"
## [4] "Y4=0.471*Z1+0.185*Z2+0.411*Z3+0.388*Z4+0.652*Z5"
## [5] "Y5=0.398*Z1+-0.876*Z2+0.178*Z3+0.069*Z4+-0.192*Z5"
Valores de Evaluar las ecuaciones
y1=-aucor$vectors[1,1]*zgorrion[,1]-aucor$vectors[2,1]*zgorrion[,2]-aucor$vectors[3,1]*zgorrion[,3]-aucor$vectors[4,1]*zgorrion[,4]-aucor$vectors[5,1]*zgorrion[,5]
y2=-aucor$vectors[1,2]*zgorrion[,1]-aucor$vectors[2,2]*zgorrion[,2]-aucor$vectors[3,2]*zgorrion[,3]-aucor$vectors[4,2]*zgorrion[,4]-aucor$vectors[5,2]*zgorrion[,5]
y3=-aucor$vectors[1,3]*zgorrion[,1]-aucor$vectors[2,3]*zgorrion[,2]-aucor$vectors[3,3]*zgorrion[,3]-aucor$vectors[4,3]*zgorrion[,4]-aucor$vectors[5,3]*zgorrion[,5]
y4=-aucor$vectors[1,4]*zgorrion[,1]-aucor$vectors[2,4]*zgorrion[,2]-aucor$vectors[3,4]*zgorrion[,3]-aucor$vectors[4,4]*zgorrion[,4]-aucor$vectors[5,4]*zgorrion[,5]
y5=-aucor$vectors[1,5]*zgorrion[,1]-aucor$vectors[2,5]*zgorrion[,2]-aucor$vectors[3,5]*zgorrion[,3]-aucor$vectors[4,5]*zgorrion[,4]-aucor$vectors[5,5]*zgorrion[,5]
y=data.matrix(zgorrion)%*%(data.matrix(aucor$vectors))
head(y)
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.06428901 -0.60083713 -0.1712334 -0.515825561 0.5487904
## [2,] 2.18031283 -0.44230082 0.4000696 -0.645459959 0.2310766
## [3,] 1.14556567 0.01925412 -0.6761269 -0.716298164 0.2088714
## [4,] 2.31106565 0.17199267 -0.3059621 0.149289289 0.4781034
## [5,] 0.29504203 -0.66520783 -0.4742138 -0.545862110 0.2444780
## [6,] -1.91626198 -0.59525444 0.6209330 0.006608669 -0.2855166
“Scree Plot” Autovalors screeplot matriz de correlaciones entre los compontes principales y las variables originales (lamba)
lamda=matrix(diag(aucor$values),ncol=5,nrow=5)
Matrix de auto vectores
t(-aucor$vectors)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.45179893 0.4616809 0.4505416 0.4707389 0.39767537
## [2,] -0.05072137 0.2995635 0.3245724 0.1846840 -0.87648935
## [3,] -0.69047023 -0.3405484 0.4544927 0.4109350 0.17845580
## [4,] -0.42041399 0.5478631 -0.6062960 0.3882781 0.06887199
## [5,] 0.37390910 -0.5300805 -0.3427923 0.6516665 -0.19243414
matriz de correlaciones
moc=sqrt(lamda)%*%t(-aucor$vectors)
Matriz de cuadrada de las correlaciones al cuadrado
moc*moc
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.738101709 0.77074292 0.73399926 0.80128302 0.571851431
## [2,] 0.001367378 0.04769628 0.05599250 0.01812864 0.408319277
## [3,] 0.184227570 0.04481491 0.07982124 0.06525458 0.012306259
## [4,] 0.053301078 0.09051608 0.11085394 0.04546399 0.001430431
## [5,] 0.023002265 0.04622981 0.01933306 0.06986978 0.006092602
Gráfico de sedimentación
plot(1:5,aucor$values,type="l",xlab="Componetes",ylab="autovalores")
Porcentajes de variación explicada por cada componente
Prop.Var=aucor$values/sum(aucor$values)*100
cumProp.var=cumsum(aucor$values/sum(aucor$values)*100)
porc=data.frame(Comp=1:5,Autovalor=round(aucor$values,1),Porc.Var=round(Prop.Var,1),Acum.Porc.Var=round(cumProp.var,1))
porc
## Comp Autovalor Porc.Var Acum.Porc.Var
## 1 1 3.6 72.3 72.3
## 2 2 0.5 10.6 82.9
## 3 3 0.4 7.7 90.7
## 4 4 0.3 6.0 96.7
## 5 5 0.2 3.3 100.0
barplot(porc[,2],porc[,1],xlab="Componetes",ylab="Autovalores")
Valores de las variables Componentes Principales scores plot gráficos de individuos sobre las componentes principales. install.packages(‘ggplot2’)
library(ggplot2)
grap.comp=data.frame(y1=y[,1],y2=y[,2],lab=1:49,grupo=gorrion[,6])
ggplot(grap.comp,aes(y1,y2,label=lab,color=grupo))+geom_point()+geom_text(vjust = 2)+xlab("Componete 1")+ylab("Componete 2")+geom_hline(yintercept=0,size=1)+geom_vline(xintercept=0,size=1)
Matriz de correlaciones entre las componentes principales y las variables originales
moc
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.85912846 0.8779197 0.8567376 0.8951441 0.75620859
## [2,] -0.03697807 0.2183948 0.2366273 0.1346426 -0.63899865
## [3,] -0.42921739 -0.2116953 0.2825265 0.2554498 0.11093358
## [4,] -0.23087026 0.3008589 -0.3329474 0.2132229 0.03782104
## [5,] 0.15166498 -0.2150112 -0.1390434 0.2643289 -0.07805512
“Plot loading” gráficos pater plot, el patrón de los componten principales
comp.base=data.frame(c1=moc[1,],c2=moc[2,],lab=names(gorrion[1:5]))
ggplot(comp.base,aes(c1,c2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("Correlaciones Componete 1")+ylab("Correlaciones Componete 2")+geom_hline(yintercept=0,size=1)+geom_vline(xintercept=0,size=1)
Cuadrados de las correlaciones entre las componentes principales y las variables originales
MCP=round(moc*moc,4)
MCP
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.7381 0.7707 0.7340 0.8013 0.5719
## [2,] 0.0014 0.0477 0.0560 0.0181 0.4083
## [3,] 0.1842 0.0448 0.0798 0.0653 0.0123
## [4,] 0.0533 0.0905 0.1109 0.0455 0.0014
## [5,] 0.0230 0.0462 0.0193 0.0699 0.0061
MCP[1,]+MCP[2,]
## [1] 0.7395 0.8184 0.7900 0.8194 0.9802
% de Retención
sum(MCP[1:2,])/5*100
## [1] 82.95