Lectura de Base de Datos

library(readxl)
gorrion=read_excel("C:/Users/Alumno/Desktop/baseTaller/gorriones.xlsx")
gorrion=data.frame(gorrion)
head(gorrion)
##    x1  x2   x3   x4   x5    sobrevi
## 1 156 245 31.6 18.5 20.5 sobrevivió
## 2 154 240 30.4 17.9 19.6 sobrevivió
## 3 153 240 31.0 18.4 20.6 sobrevivió
## 4 153 236 30.9 17.7 20.2 sobrevivió
## 5 155 243 31.5 18.6 20.3 sobrevivió
## 6 163 247 32.0 19.0 20.9 sobrevivió

Estadísticas Resumen

summary(gorrion[1:5])
##        x1            x2              x3              x4       
##  Min.   :152   Min.   :230.0   Min.   :30.10   Min.   :17.20  
##  1st Qu.:155   1st Qu.:238.0   1st Qu.:30.90   1st Qu.:18.10  
##  Median :158   Median :242.0   Median :31.50   Median :18.50  
##  Mean   :158   Mean   :241.3   Mean   :31.46   Mean   :18.47  
##  3rd Qu.:161   3rd Qu.:245.0   3rd Qu.:32.00   3rd Qu.:18.80  
##  Max.   :165   Max.   :252.0   Max.   :33.40   Max.   :19.80  
##        x5       
##  Min.   :18.60  
##  1st Qu.:20.20  
##  Median :20.70  
##  Mean   :20.83  
##  3rd Qu.:21.50  
##  Max.   :23.10
rbind(summary(gorrion[1:5]))
##        x1              x2                x3                x4         
##  "Min.   :152  " "Min.   :230.0  " "Min.   :30.10  " "Min.   :17.20  "
##  "1st Qu.:155  " "1st Qu.:238.0  " "1st Qu.:30.90  " "1st Qu.:18.10  "
##  "Median :158  " "Median :242.0  " "Median :31.50  " "Median :18.50  "
##  "Mean   :158  " "Mean   :241.3  " "Mean   :31.46  " "Mean   :18.47  "
##  "3rd Qu.:161  " "3rd Qu.:245.0  " "3rd Qu.:32.00  " "3rd Qu.:18.80  "
##  "Max.   :165  " "Max.   :252.0  " "Max.   :33.40  " "Max.   :19.80  "
##        x5         
##  "Min.   :18.60  "
##  "1st Qu.:20.20  "
##  "Median :20.70  "
##  "Mean   :20.83  "
##  "3rd Qu.:21.50  "
##  "Max.   :23.10  "

x1: Longitud total del Gorrion

barplot(table(gorrion[,1]),xlab="Longitud total del Gorrión",ylab="N de Gorriones")

boxplot(gorrion[,1],xlab="Longitud total del Gorrión",ylab="N de Gorriones")

boxplot(gorrion[,1]~gorrion[,6],xlab="Longitud total del Gorrión",ylab="N de Gorriones")

x5: Longitud de la quilla del externon

barplot(table(gorrion[,5]),xlab="Longitud de la quilla del externon",ylab="N de Gorriones")

boxplot(gorrion[,5],xlab="Longitud de la quilla del externon",ylab="N de Gorriones")

boxplot(gorrion[,5]~gorrion[,6],xlab="Longitud de la quilla del externon",ylab="N de Gorriones")

#Correlación x1 Vs X2
cor(gorrion[,c(1,5)])
##           x1        x5
## x1 1.0000000 0.6051247
## x5 0.6051247 1.0000000
cov(gorrion[,c(1,5)])
##           x1        x5
## x1 13.353741 2.1922194
## x5  2.192219 0.9828231
#covarinza x1 Vs X2
var(gorrion[,c(1,5)])
##           x1        x5
## x1 13.353741 2.1922194
## x5  2.192219 0.9828231

Diagrama de dispersión

library(ggplot2)
ggplot(gorrion,aes(gorrion[,5],gorrion[,1]))+geom_point()+ geom_smooth(method=lm,se = TRUE)+xlab("Longitud total del Gorrión")+ylab("Longitud de la quilla del externon")

VECTORES DE MEDIAS Y MATRICES DE COVARIANZAS Vector de Medias

media=colMeans(gorrion[1:5])
media
##        x1        x2        x3        x4        x5 
## 157.97959 241.32653  31.45918  18.46939  20.82653

MATRICES DE COVARIANZAS

covar=cov(gorrion[1:5])
covar
##           x1        x2        x3        x4        x5
## x1 13.353741 13.610969 1.9220663 1.3306122 2.1922194
## x2 13.610969 25.682823 2.7136054 2.1977041 2.6578231
## x3  1.922066  2.713605 0.6316327 0.3422662 0.4146471
## x4  1.330612  2.197704 0.3422662 0.3184184 0.3393707
## x5  2.192219  2.657823 0.4146471 0.3393707 0.9828231

coeficiente variación

varm=diag(covar)
round(sqrt(varm)/media*100,1)
##  x1  x2  x3  x4  x5 
## 2.3 2.1 2.5 3.1 4.8
cor(gorrion[1:5])
##           x1        x2        x3        x4        x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000

Por condiciones sobrevivió

media =colMeans(subset(gorrion, gorrion[,6] == "sobrevivió")[1:5])
covas=cov(subset(gorrion, gorrion[,6] == "sobrevivió")[1:5])
varm=diag(covas)
round(sqrt(varm)/media*100,1)
##  x1  x2  x3  x4  x5 
## 2.1 1.7 2.3 2.3 3.6
cor(subset(gorrion, sobrevi == "sobrevivió")[1:5])
##           x1        x2        x3        x4        x5
## x1 1.0000000 0.6544674 0.6425068 0.6239195 0.5103557
## x2 0.6544674 1.0000000 0.6263698 0.7464418 0.2774378
## x3 0.6425068 0.6263698 1.0000000 0.6180476 0.4336368
## x4 0.6239195 0.7464418 0.6180476 1.0000000 0.4165447
## x5 0.5103557 0.2774378 0.4336368 0.4165447 1.0000000

murió

media=colMeans(subset(gorrion, gorrion[,6] == "murió")[1:5])
covam=cov(subset(gorrion, sobrevi == "murió")[1:5])
varm=diag(covam)
round(sqrt(varm)/media*100,1)
##  x1  x2  x3  x4  x5 
## 2.5 2.4 2.7 3.6 5.5
cor(subset(gorrion, sobrevi == "murió")[1:5])
##           x1        x2        x3        x4        x5
## x1 1.0000000 0.7761963 0.6769768 0.6824212 0.6568714
## x2 0.7761963 1.0000000 0.6978185 0.7845546 0.6200093
## x3 0.6769768 0.6978185 1.0000000 0.8347046 0.5698878
## x4 0.6824212 0.7845546 0.8347046 1.0000000 0.6677936
## x5 0.6568714 0.6200093 0.5698878 0.6677936 1.0000000

ESTANDARIZACIÓN DE VARIABLES

scale(gorrion[1:5])
##                 x1         x2          x3          x4          x5
##  [1,] -0.541719129  0.7248615  0.17718246  0.05424955 -0.32937165
##  [2,] -1.089022992 -0.2617555 -1.33272023 -1.00904159 -1.23720227
##  [3,] -1.362674923 -0.2617555 -0.57776889 -0.12296564 -0.22850158
##  [4,] -1.362674923 -1.0510492 -0.70359411 -1.36347197 -0.63198186
##  [5,] -0.815371061  0.3302147  0.05135723  0.23146474 -0.53111179
##  [6,]  1.373844390  1.1195083  0.68048336  0.94032550  0.07410862
##  [7,] -0.268067198 -0.6564024 -0.70359411 -0.12296564 -0.63198186
##  [8,] -0.815371061 -0.4590790  1.68708515  0.23146474  0.37671883
##  [9,]  1.647496321  1.3168318  1.56125993  1.11754069  0.27584876
## [10,]  0.005584733 -0.6564024 -0.57776889  0.58589512  1.18367938
## [11,]  0.005584733 -0.2617555 -0.20029321  0.23146474  1.18367938
## [12,]  0.552888596  0.5275381 -0.45194366  0.23146474 -0.32937165
## [13,]  0.826540527  0.9221849  1.05795903  1.47197107  0.98193924
## [14,] -0.268067198  0.7248615  0.68048336  1.11754069 -0.83372199
## [15,] -0.268067198 -1.2483726  0.05135723 -0.65461121 -1.03546213
## [16,] -0.541719129 -0.8537258 -0.70359411 -0.83182640 -0.53111179
## [17,]  0.005584733  0.5275381 -0.07446799  0.05424955  0.78019910
## [18,] -1.362674923 -0.6564024 -1.20689501 -0.47739602  0.07410862
## [19,] -0.815371061 -1.0510492 -1.45854546  0.05424955 -0.73285193
## [20,]  1.373844390  0.9221849  1.30960948  0.23146474  1.08280931
## [21,]  0.279236665 -1.0510492  0.05135723 -0.83182640  0.67932903
## [22,] -0.815371061 -0.2617555 -0.07446799 -0.83182640 -0.12763152
## [23,] -0.541719129 -0.2617555  0.05135723 -0.47739602 -0.22850158
## [24,]  0.552888596  0.1328913  1.43543470  0.58589512  0.88106917
## [25,] -1.636326855 -1.8403429 -1.45854546 -2.24954792 -1.03546213
## [26,]  0.552888596  1.7114786  0.30300768  0.58589512  1.68802972
## [27,] -0.815371061 -0.8537258 -0.57776889  0.05424955 -0.83372199
## [28,] -0.268067198  0.7248615  0.93213380  1.82640145  0.57845896
## [29,]  1.921148253  0.7248615  2.06456082  2.35804702  1.88976985
## [30,] -1.362674923 -2.0376663 -1.71019591 -2.07233273 -1.03546213
## [31,]  1.100192459 -0.4590790 -1.45854546 -0.83182640  2.29325013
## [32,]  1.100192459  0.3302147  0.17718246  0.58589512  0.47758890
## [33,]  0.279236665  0.7248615  0.42883291  0.05424955  0.88106917
## [34,]  0.279236665  1.1195083 -0.70359411 -0.65461121 -1.84242268
## [35,] -0.815371061  0.3302147 -0.70359411  0.05424955  0.47758890
## [36,]  1.100192459  2.1061254  0.55465813  1.11754069  1.38541951
## [37,] -1.636326855 -2.2349897 -1.33272023 -2.07233273 -2.24590295
## [38,]  0.279236665  0.1328913 -0.82941934 -0.47739602 -0.32937165
## [39,] -0.815371061 -0.6564024 -0.32611844 -1.00904159 -1.53981247
## [40,]  1.373844390  1.5141552  2.44203650  1.82640145  1.99063992
## [41,]  1.373844390  0.1328913 -0.57776889 -0.65461121 -0.12763152
## [42,] -0.541719129 -0.8537258  0.30300768 -0.47739602 -0.53111179
## [43,]  0.279236665 -0.6564024  0.05135723 -0.12296564 -0.53111179
## [44,]  0.826540527  0.7248615  0.80630858  1.11754069 -0.02676145
## [45,] -0.815371061 -1.2483726 -0.95524456 -1.36347197 -1.23720227
## [46,]  1.100192459  1.1195083  0.55465813  1.11754069 -0.43024172
## [47,] -1.362674923 -0.8537258 -1.08106978  0.23146474 -0.43024172
## [48,]  1.100192459  0.7248615  1.30960948  0.05424955  0.27584876
## [49,]  1.647496321  1.3168318  1.05795903  0.58589512  0.07410862
## attr(,"scaled:center")
##        x1        x2        x3        x4        x5 
## 157.97959 241.32653  31.45918  18.46939  20.82653 
## attr(,"scaled:scale")
##        x1        x2        x3        x4        x5 
## 3.6542772 5.0678223 0.7947532 0.5642857 0.9913744
estgor=data.frame(scale(gorrion [1:5]))
mediaest=colMeans(estgor[1:5])
cov((estgor[1:5]))
##           x1        x2        x3        x4        x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000

DISTANCIAS EUCLIDIANAS

diseu=dist(estgor)
head(diseu)
## [1] 2.3468075 1.5029681 2.5894212 0.5644181 2.2419564 1.6975043

DISTANCIA DE MAHALANOBIS

distmaha=mahalanobis(gorrion[1:5],colMeans(gorrion[1:5]),cov(gorrion[1:5]))

PRUEBA DE NORMALIDAD MULTIVARIADA install.packages(“MVN”)

library(MVN)
## sROC 0.1-2 loaded
mvn(gorrion[1:5], subset = NULL, mvnTest = c("mardia"),multivariateOutlierMethod = "adj")

## $multivariateNormality
##              Test         Statistic           p value Result
## 1 Mardia Skewness  41.9757769421056 0.194182864711016    YES
## 2 Mardia Kurtosis 0.390420248948055 0.696225817061108    YES
## 3             MVN              <NA>              <NA>    YES
## 
## $univariateNormality
##           Test  Variable Statistic   p value Normality
## 1 Shapiro-Wilk    x1        0.9509    0.0401    NO    
## 2 Shapiro-Wilk    x2        0.9789    0.5192    YES   
## 3 Shapiro-Wilk    x3        0.9738    0.3391    YES   
## 4 Shapiro-Wilk    x4        0.9810    0.6068    YES   
## 5 Shapiro-Wilk    x5        0.9868    0.8534    YES   
## 
## $Descriptives
##     n      Mean   Std.Dev Median   Min   Max  25th  75th        Skew
## x1 49 157.97959 3.6542772  158.0 152.0 165.0 155.0 161.0  0.14396799
## x2 49 241.32653 5.0678223  242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49  31.45918 0.7947532   31.5  30.1  33.4  30.9  32.0  0.36905148
## x4 49  18.46939 0.5642857   18.5  17.2  19.8  18.1  18.8 -0.05133353
## x5 49  20.82653 0.9913744   20.7  18.6  23.1  20.2  21.5  0.22573600
##      Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
## x5 -0.3692336

subset: análisis de subconjuntos multivariateOutlierMethod: “quan” basado método cuantil, “adj” ajustado Distancia de mahalanobis (para detección de datos Perdidos)

distmaha=mahalanobis(gorrion[1:5],colMeans(gorrion[1:5]),cov(gorrion[1:5]))

explicación de Calculos

n=length(gorrion[,1])
mtdist=data.frame(distmaha,lab=1:n)
dm2=mtdist[order(mtdist[,1]),]
propEm=(1:n-0.5)/n
mah.comp=data.frame(dm2,propEm=propEm,dm=sqrt(dm2[,1]),ch2=sqrt(qchisq(propEm,5)))
ggplot(mah.comp,aes(dm,ch2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("distmaha")+ylab("Probabilidad Emperica")