Lectura de Base de Datos
library(readxl)
gorrion=read_excel("C:/Users/Alumno/Desktop/baseTaller/gorriones.xlsx")
gorrion=data.frame(gorrion)
head(gorrion)
## x1 x2 x3 x4 x5 sobrevi
## 1 156 245 31.6 18.5 20.5 sobrevivió
## 2 154 240 30.4 17.9 19.6 sobrevivió
## 3 153 240 31.0 18.4 20.6 sobrevivió
## 4 153 236 30.9 17.7 20.2 sobrevivió
## 5 155 243 31.5 18.6 20.3 sobrevivió
## 6 163 247 32.0 19.0 20.9 sobrevivió
Estadísticas Resumen
summary(gorrion[1:5])
## x1 x2 x3 x4
## Min. :152 Min. :230.0 Min. :30.10 Min. :17.20
## 1st Qu.:155 1st Qu.:238.0 1st Qu.:30.90 1st Qu.:18.10
## Median :158 Median :242.0 Median :31.50 Median :18.50
## Mean :158 Mean :241.3 Mean :31.46 Mean :18.47
## 3rd Qu.:161 3rd Qu.:245.0 3rd Qu.:32.00 3rd Qu.:18.80
## Max. :165 Max. :252.0 Max. :33.40 Max. :19.80
## x5
## Min. :18.60
## 1st Qu.:20.20
## Median :20.70
## Mean :20.83
## 3rd Qu.:21.50
## Max. :23.10
rbind(summary(gorrion[1:5]))
## x1 x2 x3 x4
## "Min. :152 " "Min. :230.0 " "Min. :30.10 " "Min. :17.20 "
## "1st Qu.:155 " "1st Qu.:238.0 " "1st Qu.:30.90 " "1st Qu.:18.10 "
## "Median :158 " "Median :242.0 " "Median :31.50 " "Median :18.50 "
## "Mean :158 " "Mean :241.3 " "Mean :31.46 " "Mean :18.47 "
## "3rd Qu.:161 " "3rd Qu.:245.0 " "3rd Qu.:32.00 " "3rd Qu.:18.80 "
## "Max. :165 " "Max. :252.0 " "Max. :33.40 " "Max. :19.80 "
## x5
## "Min. :18.60 "
## "1st Qu.:20.20 "
## "Median :20.70 "
## "Mean :20.83 "
## "3rd Qu.:21.50 "
## "Max. :23.10 "
x1: Longitud total del Gorrion
barplot(table(gorrion[,1]),xlab="Longitud total del Gorrión",ylab="N de Gorriones")
boxplot(gorrion[,1],xlab="Longitud total del Gorrión",ylab="N de Gorriones")
boxplot(gorrion[,1]~gorrion[,6],xlab="Longitud total del Gorrión",ylab="N de Gorriones")
x5: Longitud de la quilla del externon
barplot(table(gorrion[,5]),xlab="Longitud de la quilla del externon",ylab="N de Gorriones")
boxplot(gorrion[,5],xlab="Longitud de la quilla del externon",ylab="N de Gorriones")
boxplot(gorrion[,5]~gorrion[,6],xlab="Longitud de la quilla del externon",ylab="N de Gorriones")
#Correlación x1 Vs X2
cor(gorrion[,c(1,5)])
## x1 x5
## x1 1.0000000 0.6051247
## x5 0.6051247 1.0000000
cov(gorrion[,c(1,5)])
## x1 x5
## x1 13.353741 2.1922194
## x5 2.192219 0.9828231
#covarinza x1 Vs X2
var(gorrion[,c(1,5)])
## x1 x5
## x1 13.353741 2.1922194
## x5 2.192219 0.9828231
Diagrama de dispersión
library(ggplot2)
ggplot(gorrion,aes(gorrion[,5],gorrion[,1]))+geom_point()+ geom_smooth(method=lm,se = TRUE)+xlab("Longitud total del Gorrión")+ylab("Longitud de la quilla del externon")
VECTORES DE MEDIAS Y MATRICES DE COVARIANZAS Vector de Medias
media=colMeans(gorrion[1:5])
media
## x1 x2 x3 x4 x5
## 157.97959 241.32653 31.45918 18.46939 20.82653
MATRICES DE COVARIANZAS
covar=cov(gorrion[1:5])
covar
## x1 x2 x3 x4 x5
## x1 13.353741 13.610969 1.9220663 1.3306122 2.1922194
## x2 13.610969 25.682823 2.7136054 2.1977041 2.6578231
## x3 1.922066 2.713605 0.6316327 0.3422662 0.4146471
## x4 1.330612 2.197704 0.3422662 0.3184184 0.3393707
## x5 2.192219 2.657823 0.4146471 0.3393707 0.9828231
coeficiente variación
varm=diag(covar)
round(sqrt(varm)/media*100,1)
## x1 x2 x3 x4 x5
## 2.3 2.1 2.5 3.1 4.8
cor(gorrion[1:5])
## x1 x2 x3 x4 x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000
Por condiciones sobrevivió
media =colMeans(subset(gorrion, gorrion[,6] == "sobrevivió")[1:5])
covas=cov(subset(gorrion, gorrion[,6] == "sobrevivió")[1:5])
varm=diag(covas)
round(sqrt(varm)/media*100,1)
## x1 x2 x3 x4 x5
## 2.1 1.7 2.3 2.3 3.6
cor(subset(gorrion, sobrevi == "sobrevivió")[1:5])
## x1 x2 x3 x4 x5
## x1 1.0000000 0.6544674 0.6425068 0.6239195 0.5103557
## x2 0.6544674 1.0000000 0.6263698 0.7464418 0.2774378
## x3 0.6425068 0.6263698 1.0000000 0.6180476 0.4336368
## x4 0.6239195 0.7464418 0.6180476 1.0000000 0.4165447
## x5 0.5103557 0.2774378 0.4336368 0.4165447 1.0000000
murió
media=colMeans(subset(gorrion, gorrion[,6] == "murió")[1:5])
covam=cov(subset(gorrion, sobrevi == "murió")[1:5])
varm=diag(covam)
round(sqrt(varm)/media*100,1)
## x1 x2 x3 x4 x5
## 2.5 2.4 2.7 3.6 5.5
cor(subset(gorrion, sobrevi == "murió")[1:5])
## x1 x2 x3 x4 x5
## x1 1.0000000 0.7761963 0.6769768 0.6824212 0.6568714
## x2 0.7761963 1.0000000 0.6978185 0.7845546 0.6200093
## x3 0.6769768 0.6978185 1.0000000 0.8347046 0.5698878
## x4 0.6824212 0.7845546 0.8347046 1.0000000 0.6677936
## x5 0.6568714 0.6200093 0.5698878 0.6677936 1.0000000
ESTANDARIZACIÓN DE VARIABLES
scale(gorrion[1:5])
## x1 x2 x3 x4 x5
## [1,] -0.541719129 0.7248615 0.17718246 0.05424955 -0.32937165
## [2,] -1.089022992 -0.2617555 -1.33272023 -1.00904159 -1.23720227
## [3,] -1.362674923 -0.2617555 -0.57776889 -0.12296564 -0.22850158
## [4,] -1.362674923 -1.0510492 -0.70359411 -1.36347197 -0.63198186
## [5,] -0.815371061 0.3302147 0.05135723 0.23146474 -0.53111179
## [6,] 1.373844390 1.1195083 0.68048336 0.94032550 0.07410862
## [7,] -0.268067198 -0.6564024 -0.70359411 -0.12296564 -0.63198186
## [8,] -0.815371061 -0.4590790 1.68708515 0.23146474 0.37671883
## [9,] 1.647496321 1.3168318 1.56125993 1.11754069 0.27584876
## [10,] 0.005584733 -0.6564024 -0.57776889 0.58589512 1.18367938
## [11,] 0.005584733 -0.2617555 -0.20029321 0.23146474 1.18367938
## [12,] 0.552888596 0.5275381 -0.45194366 0.23146474 -0.32937165
## [13,] 0.826540527 0.9221849 1.05795903 1.47197107 0.98193924
## [14,] -0.268067198 0.7248615 0.68048336 1.11754069 -0.83372199
## [15,] -0.268067198 -1.2483726 0.05135723 -0.65461121 -1.03546213
## [16,] -0.541719129 -0.8537258 -0.70359411 -0.83182640 -0.53111179
## [17,] 0.005584733 0.5275381 -0.07446799 0.05424955 0.78019910
## [18,] -1.362674923 -0.6564024 -1.20689501 -0.47739602 0.07410862
## [19,] -0.815371061 -1.0510492 -1.45854546 0.05424955 -0.73285193
## [20,] 1.373844390 0.9221849 1.30960948 0.23146474 1.08280931
## [21,] 0.279236665 -1.0510492 0.05135723 -0.83182640 0.67932903
## [22,] -0.815371061 -0.2617555 -0.07446799 -0.83182640 -0.12763152
## [23,] -0.541719129 -0.2617555 0.05135723 -0.47739602 -0.22850158
## [24,] 0.552888596 0.1328913 1.43543470 0.58589512 0.88106917
## [25,] -1.636326855 -1.8403429 -1.45854546 -2.24954792 -1.03546213
## [26,] 0.552888596 1.7114786 0.30300768 0.58589512 1.68802972
## [27,] -0.815371061 -0.8537258 -0.57776889 0.05424955 -0.83372199
## [28,] -0.268067198 0.7248615 0.93213380 1.82640145 0.57845896
## [29,] 1.921148253 0.7248615 2.06456082 2.35804702 1.88976985
## [30,] -1.362674923 -2.0376663 -1.71019591 -2.07233273 -1.03546213
## [31,] 1.100192459 -0.4590790 -1.45854546 -0.83182640 2.29325013
## [32,] 1.100192459 0.3302147 0.17718246 0.58589512 0.47758890
## [33,] 0.279236665 0.7248615 0.42883291 0.05424955 0.88106917
## [34,] 0.279236665 1.1195083 -0.70359411 -0.65461121 -1.84242268
## [35,] -0.815371061 0.3302147 -0.70359411 0.05424955 0.47758890
## [36,] 1.100192459 2.1061254 0.55465813 1.11754069 1.38541951
## [37,] -1.636326855 -2.2349897 -1.33272023 -2.07233273 -2.24590295
## [38,] 0.279236665 0.1328913 -0.82941934 -0.47739602 -0.32937165
## [39,] -0.815371061 -0.6564024 -0.32611844 -1.00904159 -1.53981247
## [40,] 1.373844390 1.5141552 2.44203650 1.82640145 1.99063992
## [41,] 1.373844390 0.1328913 -0.57776889 -0.65461121 -0.12763152
## [42,] -0.541719129 -0.8537258 0.30300768 -0.47739602 -0.53111179
## [43,] 0.279236665 -0.6564024 0.05135723 -0.12296564 -0.53111179
## [44,] 0.826540527 0.7248615 0.80630858 1.11754069 -0.02676145
## [45,] -0.815371061 -1.2483726 -0.95524456 -1.36347197 -1.23720227
## [46,] 1.100192459 1.1195083 0.55465813 1.11754069 -0.43024172
## [47,] -1.362674923 -0.8537258 -1.08106978 0.23146474 -0.43024172
## [48,] 1.100192459 0.7248615 1.30960948 0.05424955 0.27584876
## [49,] 1.647496321 1.3168318 1.05795903 0.58589512 0.07410862
## attr(,"scaled:center")
## x1 x2 x3 x4 x5
## 157.97959 241.32653 31.45918 18.46939 20.82653
## attr(,"scaled:scale")
## x1 x2 x3 x4 x5
## 3.6542772 5.0678223 0.7947532 0.5642857 0.9913744
estgor=data.frame(scale(gorrion [1:5]))
mediaest=colMeans(estgor[1:5])
cov((estgor[1:5]))
## x1 x2 x3 x4 x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000
DISTANCIAS EUCLIDIANAS
diseu=dist(estgor)
head(diseu)
## [1] 2.3468075 1.5029681 2.5894212 0.5644181 2.2419564 1.6975043
DISTANCIA DE MAHALANOBIS
distmaha=mahalanobis(gorrion[1:5],colMeans(gorrion[1:5]),cov(gorrion[1:5]))
PRUEBA DE NORMALIDAD MULTIVARIADA install.packages(“MVN”)
library(MVN)
## sROC 0.1-2 loaded
mvn(gorrion[1:5], subset = NULL, mvnTest = c("mardia"),multivariateOutlierMethod = "adj")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 41.9757769421056 0.194182864711016 YES
## 2 Mardia Kurtosis 0.390420248948055 0.696225817061108 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9509 0.0401 NO
## 2 Shapiro-Wilk x2 0.9789 0.5192 YES
## 3 Shapiro-Wilk x3 0.9738 0.3391 YES
## 4 Shapiro-Wilk x4 0.9810 0.6068 YES
## 5 Shapiro-Wilk x5 0.9868 0.8534 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## x1 49 157.97959 3.6542772 158.0 152.0 165.0 155.0 161.0 0.14396799
## x2 49 241.32653 5.0678223 242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49 31.45918 0.7947532 31.5 30.1 33.4 30.9 32.0 0.36905148
## x4 49 18.46939 0.5642857 18.5 17.2 19.8 18.1 18.8 -0.05133353
## x5 49 20.82653 0.9913744 20.7 18.6 23.1 20.2 21.5 0.22573600
## Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
## x5 -0.3692336
subset: análisis de subconjuntos multivariateOutlierMethod: “quan” basado método cuantil, “adj” ajustado Distancia de mahalanobis (para detección de datos Perdidos)
distmaha=mahalanobis(gorrion[1:5],colMeans(gorrion[1:5]),cov(gorrion[1:5]))
explicación de Calculos
n=length(gorrion[,1])
mtdist=data.frame(distmaha,lab=1:n)
dm2=mtdist[order(mtdist[,1]),]
propEm=(1:n-0.5)/n
mah.comp=data.frame(dm2,propEm=propEm,dm=sqrt(dm2[,1]),ch2=sqrt(qchisq(propEm,5)))
ggplot(mah.comp,aes(dm,ch2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("distmaha")+ylab("Probabilidad Emperica")