Lectura de Base de Datos

library(readxl)
gorrion=read_excel("C:/Users/Alumno/Desktop/baseTaller/gorriones.xlsx")
gorrion=data.frame(gorrion)
head(gorrion)
##    x1  x2   x3   x4   x5    sobrevi
## 1 156 245 31.6 18.5 20.5 sobrevivió
## 2 154 240 30.4 17.9 19.6 sobrevivió
## 3 153 240 31.0 18.4 20.6 sobrevivió
## 4 153 236 30.9 17.7 20.2 sobrevivió
## 5 155 243 31.5 18.6 20.3 sobrevivió
## 6 163 247 32.0 19.0 20.9 sobrevivió

El tamaño muestral Mínimo 50 casos, sugerido más de 200. Al menos 10 casos por cada variable; la cantidad de PRUEBAS DE HIPÓTESIS Matriz de Correlaciones

rcor=cor(gorrion[,1:5])
rcor
##           x1        x2        x3        x4        x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000

0,3 (baja colinealidad), 0,5 (colinealidad media), 0,7 (colinealidad alta). Supuesto de colinealidad

det(rcor)
## [1] 0.03684829

cercano a 0, indica alta multicolinealidad entre las variables; igual a 0 (matriz no singular). Supuesto de multicolinealidad test de esfericidad de Bartlett busca contrastar la hipótesis nula de que la matriz de correlaciones es igual a una matriz de identidad

library(psych)
cortest.bartlett(rcor,n=nrow(gorrion))
## $chisq
## [1] 150.193
## 
## $p.value
## [1] 3.401733e-27
## 
## $df
## [1] 10
KMO(rcor)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = rcor)
## Overall MSA =  0.83
## MSA for each item = 
##   x1   x2   x3   x4   x5 
## 0.82 0.82 0.86 0.79 0.87

0,90 > KMO Muy bueno 0,90 > KMO > 0,80 Bueno 0,80 > KMO > 0,70 Aceptable 0,70 > KMO > 0,60 Mediocre o regular 0,60 > KMO > 0,50 Malo 0,50 > KMO Inaceptable o muy malo Autovalores y auto vectores de la matriz de covarianzas de la muestra

aucor=eigen(rcor)
aucor
## eigen() decomposition
## $values
## [1] 3.6159783 0.5315041 0.3864245 0.3015655 0.1645275
## 
## $vectors
##            [,1]        [,2]       [,3]        [,4]       [,5]
## [1,] -0.4517989  0.05072137  0.6904702  0.42041399 -0.3739091
## [2,] -0.4616809 -0.29956355  0.3405484 -0.54786307  0.5300805
## [3,] -0.4505416 -0.32457242 -0.4544927  0.60629605  0.3427923
## [4,] -0.4707389 -0.18468403 -0.4109350 -0.38827811 -0.6516665
## [5,] -0.3976754  0.87648935 -0.1784558 -0.06887199  0.1924341

Estimación por componentes principales

aucor$values*sqrt(aucor$values)
## [1] 6.87604533 0.38748947 0.24021331 0.16560465 0.06673563

Matriz de Autovalores(egien)

mautov=matrix(diag(aucor$values),ncol=5,nrow=5)

Matriz de Cargas Factoriales

lamda=aucor$vectors%*%sqrt(mautov)
(-1)*lamda
##           [,1]        [,2]       [,3]        [,4]        [,5]
## [1,] 0.8591285 -0.03697807 -0.4292174 -0.23087026  0.15166498
## [2,] 0.8779197  0.21839478 -0.2116953  0.30085890 -0.21501119
## [3,] 0.8567376  0.23662734  0.2825265 -0.33294736 -0.13904337
## [4,] 0.8951441  0.13464265  0.2554498  0.21322285  0.26432892
## [5,] 0.7562086 -0.63899865  0.1109336  0.03782104 -0.07805512

Prueba de hipótesis para m=2 factores

hi=lamda[,1:2]%*%t(lamda[,1:2])
hi
##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 0.7394691 0.7461699 0.7272976 0.7640650 0.6733093
## [2,] 0.7461699 0.8184392 0.8038249 0.8152699 0.5243364
## [3,] 0.7272976 0.8038249 0.7899918 0.7987637 0.4966678
## [4,] 0.7640650 0.8152699 0.7987637 0.8194117 0.5908792
## [5,] 0.6733093 0.5243364 0.4966678 0.5908792 0.9801707

comunalidad

hi2=diag(hi)
hi2
## [1] 0.7394691 0.8184392 0.7899918 0.8194117 0.9801707

Especificidad

ei=1-hi2
ei
## [1] 0.26053091 0.18156080 0.21000824 0.18058834 0.01982929
matrix(diag(ei),5,5)
##           [,1]      [,2]      [,3]      [,4]       [,5]
## [1,] 0.2605309 0.0000000 0.0000000 0.0000000 0.00000000
## [2,] 0.0000000 0.1815608 0.0000000 0.0000000 0.00000000
## [3,] 0.0000000 0.0000000 0.2100082 0.0000000 0.00000000
## [4,] 0.0000000 0.0000000 0.0000000 0.1805883 0.00000000
## [5,] 0.0000000 0.0000000 0.0000000 0.0000000 0.01982929

Test para ajuste de m factores Es adecuado m factores para explicar la estructura de asociación

f=fa(gorrion[,1:5],nfactors=2,fm="ml",rotate="none", max.iter=100)
names(f)
##  [1] "residual"      "dof"           "chi"           "nh"           
##  [5] "rms"           "EPVAL"         "crms"          "EBIC"         
##  [9] "ESABIC"        "fit"           "fit.off"       "sd"           
## [13] "factors"       "complexity"    "n.obs"         "objective"    
## [17] "criteria"      "STATISTIC"     "PVAL"          "Call"         
## [21] "null.model"    "null.dof"      "null.chisq"    "TLI"          
## [25] "F0"            "RMSEA"         "BIC"           "SABIC"        
## [29] "r.scores"      "R2"            "valid"         "weights"      
## [33] "rotation"      "communality"   "communalities" "uniquenesses" 
## [37] "values"        "e.values"      "loadings"      "model"        
## [41] "fm"            "Structure"     "method"        "scores"       
## [45] "R2.scores"     "r"             "np.obs"        "fn"           
## [49] "Vaccounted"
f$communalities
##        x1        x2        x3        x4        x5 
## 0.9950000 0.6932702 0.6375419 0.9848457 0.4483184

Test M-BOX (1949) comparación de matrices de Covarianzas

library(biotools)
## Loading required package: rpanel
## Loading required package: tcltk
## Package `rpanel', version 1.1-4: type help(rpanel) for summary information
## Loading required package: tkrplot
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: SpatialEpi
## Loading required package: sp
## ---
## biotools version 3.1
## 
boxM(gorrion[,1:5],gorrion[,6])
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  gorrion[, 1:5]
## Chi-Sq (approx.) = 10.408, df = 15, p-value = 0.7933

probando la normalidad

library(MVN)
## sROC 0.1-2 loaded
mvn(data = gorrion[,1:5], mvnTest = "mardia")
## $multivariateNormality
##              Test         Statistic           p value Result
## 1 Mardia Skewness  41.9757769421056 0.194182864711016    YES
## 2 Mardia Kurtosis 0.390420248948055 0.696225817061108    YES
## 3             MVN              <NA>              <NA>    YES
## 
## $univariateNormality
##           Test  Variable Statistic   p value Normality
## 1 Shapiro-Wilk    x1        0.9509    0.0401    NO    
## 2 Shapiro-Wilk    x2        0.9789    0.5192    YES   
## 3 Shapiro-Wilk    x3        0.9738    0.3391    YES   
## 4 Shapiro-Wilk    x4        0.9810    0.6068    YES   
## 5 Shapiro-Wilk    x5        0.9868    0.8534    YES   
## 
## $Descriptives
##     n      Mean   Std.Dev Median   Min   Max  25th  75th        Skew
## x1 49 157.97959 3.6542772  158.0 152.0 165.0 155.0 161.0  0.14396799
## x2 49 241.32653 5.0678223  242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49  31.45918 0.7947532   31.5  30.1  33.4  30.9  32.0  0.36905148
## x4 49  18.46939 0.5642857   18.5  17.2  19.8  18.1  18.8 -0.05133353
## x5 49  20.82653 0.9913744   20.7  18.6  23.1  20.2  21.5  0.22573600
##      Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
## x5 -0.3692336
#Estudio de datos Perdidos
mvn(data = gorrion[,1:4], mvnTest = "mardia",multivariateOutlierMethod="adj")

## $multivariateNormality
##              Test          Statistic           p value Result
## 1 Mardia Skewness   16.0590007821897 0.712957258765201    YES
## 2 Mardia Kurtosis -0.782283336778933 0.434048077459422    YES
## 3             MVN               <NA>              <NA>    YES
## 
## $univariateNormality
##           Test  Variable Statistic   p value Normality
## 1 Shapiro-Wilk    x1        0.9509    0.0401    NO    
## 2 Shapiro-Wilk    x2        0.9789    0.5192    YES   
## 3 Shapiro-Wilk    x3        0.9738    0.3391    YES   
## 4 Shapiro-Wilk    x4        0.9810    0.6068    YES   
## 
## $Descriptives
##     n      Mean   Std.Dev Median   Min   Max  25th  75th        Skew
## x1 49 157.97959 3.6542772  158.0 152.0 165.0 155.0 161.0  0.14396799
## x2 49 241.32653 5.0678223  242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49  31.45918 0.7947532   31.5  30.1  33.4  30.9  32.0  0.36905148
## x4 49  18.46939 0.5642857   18.5  17.2  19.8  18.1  18.8 -0.05133353
##      Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
mvn(data = gorrion[,1:4], mvnTest = "mardia",multivariateOutlierMethod ="quan")

## $multivariateNormality
##              Test          Statistic           p value Result
## 1 Mardia Skewness   16.0590007821897 0.712957258765201    YES
## 2 Mardia Kurtosis -0.782283336778933 0.434048077459422    YES
## 3             MVN               <NA>              <NA>    YES
## 
## $univariateNormality
##           Test  Variable Statistic   p value Normality
## 1 Shapiro-Wilk    x1        0.9509    0.0401    NO    
## 2 Shapiro-Wilk    x2        0.9789    0.5192    YES   
## 3 Shapiro-Wilk    x3        0.9738    0.3391    YES   
## 4 Shapiro-Wilk    x4        0.9810    0.6068    YES   
## 
## $Descriptives
##     n      Mean   Std.Dev Median   Min   Max  25th  75th        Skew
## x1 49 157.97959 3.6542772  158.0 152.0 165.0 155.0 161.0  0.14396799
## x2 49 241.32653 5.0678223  242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49  31.45918 0.7947532   31.5  30.1  33.4  30.9  32.0  0.36905148
## x4 49  18.46939 0.5642857   18.5  17.2  19.8  18.1  18.8 -0.05133353
##      Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
mvn(data = gorrion[,1:4], mvnTest = "mardia",multivariatePlot="qq")

## $multivariateNormality
##              Test          Statistic           p value Result
## 1 Mardia Skewness   16.0590007821897 0.712957258765201    YES
## 2 Mardia Kurtosis -0.782283336778933 0.434048077459422    YES
## 3             MVN               <NA>              <NA>    YES
## 
## $univariateNormality
##           Test  Variable Statistic   p value Normality
## 1 Shapiro-Wilk    x1        0.9509    0.0401    NO    
## 2 Shapiro-Wilk    x2        0.9789    0.5192    YES   
## 3 Shapiro-Wilk    x3        0.9738    0.3391    YES   
## 4 Shapiro-Wilk    x4        0.9810    0.6068    YES   
## 
## $Descriptives
##     n      Mean   Std.Dev Median   Min   Max  25th  75th        Skew
## x1 49 157.97959 3.6542772  158.0 152.0 165.0 155.0 161.0  0.14396799
## x2 49 241.32653 5.0678223  242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49  31.45918 0.7947532   31.5  30.1  33.4  30.9  32.0  0.36905148
## x4 49  18.46939 0.5642857   18.5  17.2  19.8  18.1  18.8 -0.05133353
##      Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
#Distancia de mahalanobis (para detección de datos Perdidos)
distmaha=mahalanobis(gorrion[1:5],colMeans(gorrion[1:5]),cov(gorrion[1:5]))

Explicación

n=length(gorrion[,1])
mtdist=data.frame(distmaha,lab=1:n)
dm2=mtdist[order(mtdist[,1]),]
propEm=(1:n-0.5)/n
mah.comp=data.frame(dm2,propEm=propEm,dm=sqrt(dm2[,1]),ch2=sqrt(qchisq(propEm,5)))
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(mah.comp,aes(dm,ch2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("distmaha")+ylab("Probabilidad Emperica")