Lectura de Base de Datos
library(readxl)
gorrion=read_excel("C:/Users/Alumno/Desktop/baseTaller/gorriones.xlsx")
gorrion=data.frame(gorrion)
head(gorrion)
## x1 x2 x3 x4 x5 sobrevi
## 1 156 245 31.6 18.5 20.5 sobrevivió
## 2 154 240 30.4 17.9 19.6 sobrevivió
## 3 153 240 31.0 18.4 20.6 sobrevivió
## 4 153 236 30.9 17.7 20.2 sobrevivió
## 5 155 243 31.5 18.6 20.3 sobrevivió
## 6 163 247 32.0 19.0 20.9 sobrevivió
El tamaño muestral Mínimo 50 casos, sugerido más de 200. Al menos 10 casos por cada variable; la cantidad de PRUEBAS DE HIPÓTESIS Matriz de Correlaciones
rcor=cor(gorrion[,1:5])
rcor
## x1 x2 x3 x4 x5
## x1 1.0000000 0.7349642 0.6618119 0.6452841 0.6051247
## x2 0.7349642 1.0000000 0.6737411 0.7685087 0.5290138
## x3 0.6618119 0.6737411 1.0000000 0.7631899 0.5262701
## x4 0.6452841 0.7685087 0.7631899 1.0000000 0.6066493
## x5 0.6051247 0.5290138 0.5262701 0.6066493 1.0000000
0,3 (baja colinealidad), 0,5 (colinealidad media), 0,7 (colinealidad alta). Supuesto de colinealidad
det(rcor)
## [1] 0.03684829
cercano a 0, indica alta multicolinealidad entre las variables; igual a 0 (matriz no singular). Supuesto de multicolinealidad test de esfericidad de Bartlett busca contrastar la hipótesis nula de que la matriz de correlaciones es igual a una matriz de identidad
library(psych)
cortest.bartlett(rcor,n=nrow(gorrion))
## $chisq
## [1] 150.193
##
## $p.value
## [1] 3.401733e-27
##
## $df
## [1] 10
KMO(rcor)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = rcor)
## Overall MSA = 0.83
## MSA for each item =
## x1 x2 x3 x4 x5
## 0.82 0.82 0.86 0.79 0.87
0,90 > KMO Muy bueno 0,90 > KMO > 0,80 Bueno 0,80 > KMO > 0,70 Aceptable 0,70 > KMO > 0,60 Mediocre o regular 0,60 > KMO > 0,50 Malo 0,50 > KMO Inaceptable o muy malo Autovalores y auto vectores de la matriz de covarianzas de la muestra
aucor=eigen(rcor)
aucor
## eigen() decomposition
## $values
## [1] 3.6159783 0.5315041 0.3864245 0.3015655 0.1645275
##
## $vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.4517989 0.05072137 0.6904702 0.42041399 -0.3739091
## [2,] -0.4616809 -0.29956355 0.3405484 -0.54786307 0.5300805
## [3,] -0.4505416 -0.32457242 -0.4544927 0.60629605 0.3427923
## [4,] -0.4707389 -0.18468403 -0.4109350 -0.38827811 -0.6516665
## [5,] -0.3976754 0.87648935 -0.1784558 -0.06887199 0.1924341
Estimación por componentes principales
aucor$values*sqrt(aucor$values)
## [1] 6.87604533 0.38748947 0.24021331 0.16560465 0.06673563
Matriz de Autovalores(egien)
mautov=matrix(diag(aucor$values),ncol=5,nrow=5)
Matriz de Cargas Factoriales
lamda=aucor$vectors%*%sqrt(mautov)
(-1)*lamda
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.8591285 -0.03697807 -0.4292174 -0.23087026 0.15166498
## [2,] 0.8779197 0.21839478 -0.2116953 0.30085890 -0.21501119
## [3,] 0.8567376 0.23662734 0.2825265 -0.33294736 -0.13904337
## [4,] 0.8951441 0.13464265 0.2554498 0.21322285 0.26432892
## [5,] 0.7562086 -0.63899865 0.1109336 0.03782104 -0.07805512
Prueba de hipótesis para m=2 factores
hi=lamda[,1:2]%*%t(lamda[,1:2])
hi
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.7394691 0.7461699 0.7272976 0.7640650 0.6733093
## [2,] 0.7461699 0.8184392 0.8038249 0.8152699 0.5243364
## [3,] 0.7272976 0.8038249 0.7899918 0.7987637 0.4966678
## [4,] 0.7640650 0.8152699 0.7987637 0.8194117 0.5908792
## [5,] 0.6733093 0.5243364 0.4966678 0.5908792 0.9801707
comunalidad
hi2=diag(hi)
hi2
## [1] 0.7394691 0.8184392 0.7899918 0.8194117 0.9801707
Especificidad
ei=1-hi2
ei
## [1] 0.26053091 0.18156080 0.21000824 0.18058834 0.01982929
matrix(diag(ei),5,5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.2605309 0.0000000 0.0000000 0.0000000 0.00000000
## [2,] 0.0000000 0.1815608 0.0000000 0.0000000 0.00000000
## [3,] 0.0000000 0.0000000 0.2100082 0.0000000 0.00000000
## [4,] 0.0000000 0.0000000 0.0000000 0.1805883 0.00000000
## [5,] 0.0000000 0.0000000 0.0000000 0.0000000 0.01982929
Test para ajuste de m factores Es adecuado m factores para explicar la estructura de asociación
f=fa(gorrion[,1:5],nfactors=2,fm="ml",rotate="none", max.iter=100)
names(f)
## [1] "residual" "dof" "chi" "nh"
## [5] "rms" "EPVAL" "crms" "EBIC"
## [9] "ESABIC" "fit" "fit.off" "sd"
## [13] "factors" "complexity" "n.obs" "objective"
## [17] "criteria" "STATISTIC" "PVAL" "Call"
## [21] "null.model" "null.dof" "null.chisq" "TLI"
## [25] "F0" "RMSEA" "BIC" "SABIC"
## [29] "r.scores" "R2" "valid" "weights"
## [33] "rotation" "communality" "communalities" "uniquenesses"
## [37] "values" "e.values" "loadings" "model"
## [41] "fm" "Structure" "method" "scores"
## [45] "R2.scores" "r" "np.obs" "fn"
## [49] "Vaccounted"
f$communalities
## x1 x2 x3 x4 x5
## 0.9950000 0.6932702 0.6375419 0.9848457 0.4483184
Test M-BOX (1949) comparación de matrices de Covarianzas
library(biotools)
## Loading required package: rpanel
## Loading required package: tcltk
## Package `rpanel', version 1.1-4: type help(rpanel) for summary information
## Loading required package: tkrplot
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: SpatialEpi
## Loading required package: sp
## ---
## biotools version 3.1
##
boxM(gorrion[,1:5],gorrion[,6])
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: gorrion[, 1:5]
## Chi-Sq (approx.) = 10.408, df = 15, p-value = 0.7933
probando la normalidad
library(MVN)
## sROC 0.1-2 loaded
mvn(data = gorrion[,1:5], mvnTest = "mardia")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 41.9757769421056 0.194182864711016 YES
## 2 Mardia Kurtosis 0.390420248948055 0.696225817061108 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9509 0.0401 NO
## 2 Shapiro-Wilk x2 0.9789 0.5192 YES
## 3 Shapiro-Wilk x3 0.9738 0.3391 YES
## 4 Shapiro-Wilk x4 0.9810 0.6068 YES
## 5 Shapiro-Wilk x5 0.9868 0.8534 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## x1 49 157.97959 3.6542772 158.0 152.0 165.0 155.0 161.0 0.14396799
## x2 49 241.32653 5.0678223 242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49 31.45918 0.7947532 31.5 30.1 33.4 30.9 32.0 0.36905148
## x4 49 18.46939 0.5642857 18.5 17.2 19.8 18.1 18.8 -0.05133353
## x5 49 20.82653 0.9913744 20.7 18.6 23.1 20.2 21.5 0.22573600
## Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
## x5 -0.3692336
#Estudio de datos Perdidos
mvn(data = gorrion[,1:4], mvnTest = "mardia",multivariateOutlierMethod="adj")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 16.0590007821897 0.712957258765201 YES
## 2 Mardia Kurtosis -0.782283336778933 0.434048077459422 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9509 0.0401 NO
## 2 Shapiro-Wilk x2 0.9789 0.5192 YES
## 3 Shapiro-Wilk x3 0.9738 0.3391 YES
## 4 Shapiro-Wilk x4 0.9810 0.6068 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## x1 49 157.97959 3.6542772 158.0 152.0 165.0 155.0 161.0 0.14396799
## x2 49 241.32653 5.0678223 242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49 31.45918 0.7947532 31.5 30.1 33.4 30.9 32.0 0.36905148
## x4 49 18.46939 0.5642857 18.5 17.2 19.8 18.1 18.8 -0.05133353
## Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
mvn(data = gorrion[,1:4], mvnTest = "mardia",multivariateOutlierMethod ="quan")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 16.0590007821897 0.712957258765201 YES
## 2 Mardia Kurtosis -0.782283336778933 0.434048077459422 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9509 0.0401 NO
## 2 Shapiro-Wilk x2 0.9789 0.5192 YES
## 3 Shapiro-Wilk x3 0.9738 0.3391 YES
## 4 Shapiro-Wilk x4 0.9810 0.6068 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## x1 49 157.97959 3.6542772 158.0 152.0 165.0 155.0 161.0 0.14396799
## x2 49 241.32653 5.0678223 242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49 31.45918 0.7947532 31.5 30.1 33.4 30.9 32.0 0.36905148
## x4 49 18.46939 0.5642857 18.5 17.2 19.8 18.1 18.8 -0.05133353
## Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
mvn(data = gorrion[,1:4], mvnTest = "mardia",multivariatePlot="qq")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 16.0590007821897 0.712957258765201 YES
## 2 Mardia Kurtosis -0.782283336778933 0.434048077459422 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9509 0.0401 NO
## 2 Shapiro-Wilk x2 0.9789 0.5192 YES
## 3 Shapiro-Wilk x3 0.9738 0.3391 YES
## 4 Shapiro-Wilk x4 0.9810 0.6068 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## x1 49 157.97959 3.6542772 158.0 152.0 165.0 155.0 161.0 0.14396799
## x2 49 241.32653 5.0678223 242.0 230.0 252.0 238.0 245.0 -0.12595919
## x3 49 31.45918 0.7947532 31.5 30.1 33.4 30.9 32.0 0.36905148
## x4 49 18.46939 0.5642857 18.5 17.2 19.8 18.1 18.8 -0.05133353
## Kurtosis
## x1 -1.2028081
## x2 -0.6907094
## x3 -0.6078152
## x4 -0.1181914
#Distancia de mahalanobis (para detección de datos Perdidos)
distmaha=mahalanobis(gorrion[1:5],colMeans(gorrion[1:5]),cov(gorrion[1:5]))
Explicación
n=length(gorrion[,1])
mtdist=data.frame(distmaha,lab=1:n)
dm2=mtdist[order(mtdist[,1]),]
propEm=(1:n-0.5)/n
mah.comp=data.frame(dm2,propEm=propEm,dm=sqrt(dm2[,1]),ch2=sqrt(qchisq(propEm,5)))
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(mah.comp,aes(dm,ch2,label=lab))+geom_point()+geom_text(vjust = 2)+xlab("distmaha")+ylab("Probabilidad Emperica")