Lectura de Datos
library(readxl)
pulgas=read_excel("C:/Users/Alumno/Desktop/baseTaller/pulgas.xlsx")
pulgas=data.frame(pulgas)
head(pulgas)
## x1 x2 x3 x4 Grupo
## 1 189 245 137 163 Especie Haltica Oleracea
## 2 192 260 132 217 Especie Haltica Oleracea
## 3 217 276 141 192 Especie Haltica Oleracea
## 4 221 299 142 213 Especie Haltica Oleracea
## 5 171 239 128 158 Especie Haltica Oleracea
## 6 192 262 147 173 Especie Haltica Oleracea
Supuestos de ANOVA Prueba de Normalidad probando la normalidad install.packages(“MVN”)
library(MVN)
## sROC 0.1-2 loaded
mvn(data = pulgas[,1:4], mvnTest = "mardia")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 14.5033549394537 0.804085904776078 YES
## 2 Mardia Kurtosis -2.1127725094963 0.0346202436872789 NO
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9710 0.4021 YES
## 2 Shapiro-Wilk x2 0.9595 0.1714 YES
## 3 Shapiro-Wilk x3 0.9619 0.2066 YES
## 4 Shapiro-Wilk x4 0.9843 0.8510 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## x1 39 186.8205 14.03168 184 158 221 177.0 193.5 0.3790264 -0.1856496
## x2 39 279.2308 22.42116 278 237 317 262.5 299.0 -0.2462722 -1.0623505
## x3 39 147.5385 14.69845 146 121 184 137.5 161.0 0.3847308 -0.6784951
## x4 39 197.8974 18.48868 197 158 235 187.0 213.0 -0.1040536 -0.6351910
mvn(data = pulgas[,1:4], mvnTest = "hz")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 0.856282 0.1467007 YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9710 0.4021 YES
## 2 Shapiro-Wilk x2 0.9595 0.1714 YES
## 3 Shapiro-Wilk x3 0.9619 0.2066 YES
## 4 Shapiro-Wilk x4 0.9843 0.8510 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## x1 39 186.8205 14.03168 184 158 221 177.0 193.5 0.3790264 -0.1856496
## x2 39 279.2308 22.42116 278 237 317 262.5 299.0 -0.2462722 -1.0623505
## x3 39 147.5385 14.69845 146 121 184 137.5 161.0 0.3847308 -0.6784951
## x4 39 197.8974 18.48868 197 158 235 187.0 213.0 -0.1040536 -0.6351910
mvn(data = pulgas[,1:4], mvnTest = "royston")
## $multivariateNormality
## Test H p value MVN
## 1 Royston 4.191205 0.3790543 YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9710 0.4021 YES
## 2 Shapiro-Wilk x2 0.9595 0.1714 YES
## 3 Shapiro-Wilk x3 0.9619 0.2066 YES
## 4 Shapiro-Wilk x4 0.9843 0.8510 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## x1 39 186.8205 14.03168 184 158 221 177.0 193.5 0.3790264 -0.1856496
## x2 39 279.2308 22.42116 278 237 317 262.5 299.0 -0.2462722 -1.0623505
## x3 39 147.5385 14.69845 146 121 184 137.5 161.0 0.3847308 -0.6784951
## x4 39 197.8974 18.48868 197 158 235 187.0 213.0 -0.1040536 -0.6351910
Estudio de datos Perdidos
mvn(data = pulgas[,1:4], mvnTest = "mardia",multivariateOutlierMethod="adj")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 14.5033549394537 0.804085904776078 YES
## 2 Mardia Kurtosis -2.1127725094963 0.0346202436872789 NO
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9710 0.4021 YES
## 2 Shapiro-Wilk x2 0.9595 0.1714 YES
## 3 Shapiro-Wilk x3 0.9619 0.2066 YES
## 4 Shapiro-Wilk x4 0.9843 0.8510 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## x1 39 186.8205 14.03168 184 158 221 177.0 193.5 0.3790264 -0.1856496
## x2 39 279.2308 22.42116 278 237 317 262.5 299.0 -0.2462722 -1.0623505
## x3 39 147.5385 14.69845 146 121 184 137.5 161.0 0.3847308 -0.6784951
## x4 39 197.8974 18.48868 197 158 235 187.0 213.0 -0.1040536 -0.6351910
mvn(data = pulgas[,1:4], mvnTest = "mardia",multivariatePlot="qq")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 14.5033549394537 0.804085904776078 YES
## 2 Mardia Kurtosis -2.1127725094963 0.0346202436872789 NO
## 3 MVN <NA> <NA> NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk x1 0.9710 0.4021 YES
## 2 Shapiro-Wilk x2 0.9595 0.1714 YES
## 3 Shapiro-Wilk x3 0.9619 0.2066 YES
## 4 Shapiro-Wilk x4 0.9843 0.8510 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## x1 39 186.8205 14.03168 184 158 221 177.0 193.5 0.3790264 -0.1856496
## x2 39 279.2308 22.42116 278 237 317 262.5 299.0 -0.2462722 -1.0623505
## x3 39 147.5385 14.69845 146 121 184 137.5 161.0 0.3847308 -0.6784951
## x4 39 197.8974 18.48868 197 158 235 187.0 213.0 -0.1040536 -0.6351910
Prueba de Homocedasticidad
library(car)
## Loading required package: carData
leveneTest(pulgas[,1],pulgas[,5],center=mean)
## Warning in leveneTest.default(pulgas[, 1], pulgas[, 5], center = mean):
## pulgas[, 5] coerced to factor.
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 1 1.2583 0.2692
## 37
Multivariado test que contrastan la homogeneidad de varianza (boxM) install.packages(“biotools”)
library(biotools)
## Loading required package: rpanel
## Loading required package: tcltk
## Package `rpanel', version 1.1-4: type help(rpanel) for summary information
## Loading required package: tkrplot
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: SpatialEpi
## Loading required package: sp
## ---
## biotools version 3.1
##
boxM(pulgas[1:4],pulgas[,5])
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: pulgas[1:4]
## Chi-Sq (approx.) = 8.7457, df = 10, p-value = 0.5564
Calculo del p-Value
qchisq(0.95,10,ncp=0,lower.tail = TRUE,log.p = FALSE)
## [1] 18.30704
Test de esfericidad de Barltet (mide si existe relación entre las variables analizadas)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
R=cor(pulgas[1:4])
cortest.bartlett(R,n=39)
## $chisq
## [1] 54.82669
##
## $p.value
## [1] 5.024446e-10
##
## $df
## [1] 6
MANOVA
mlm=lm(cbind(x1,x2,x3,x4) ~Grupo, data =pulgas)
mnova.m=manova(mlm)
summary(mnova.m)
## Df Pillai approx F num Df den Df Pr(>F)
## Grupo 1 0.78298 30.666 4 34 7.522e-11 ***
## Residuals 37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Por variables predictoras
summary.aov(mnova.m)
## Response x1 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Grupo 1 2170.1 2170.06 15.116 0.0004049 ***
## Residuals 37 5311.7 143.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response x2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Grupo 1 5494.8 5494.8 14.94 0.0004326 ***
## Residuals 37 13608.1 367.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response x3 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Grupo 1 3832.1 3832.1 32.389 1.645e-06 ***
## Residuals 37 4377.6 118.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response x4 :
## Df Sum Sq Mean Sq F value Pr(>F)
## Grupo 1 5290.9 5290.9 25.428 1.236e-05 ***
## Residuals 37 7698.7 208.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mnova.m,test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Grupo 1 0.21702 30.666 4 34 7.522e-11 ***
## Residuals 37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mnova.m,test="Roy")
## Df Roy approx F num Df den Df Pr(>F)
## Grupo 1 3.6078 30.666 4 34 7.522e-11 ***
## Residuals 37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mnova.m,test="Hotelling-Lawley")
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## Grupo 1 3.6078 30.666 4 34 7.522e-11 ***
## Residuals 37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(rrcov)
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:psych':
##
## cushny
## Scalable Robust Estimators with High Breakdown Point (version 1.4-4)
Wilks.test(pulgas[,5]~pulgas[,1]+pulgas[,2]+pulgas[,3]+pulgas[,4], data = pulgas, method = "c")
##
## One-way MANOVA (Bartlett Chi2)
##
## data: x
## Wilks' Lambda = 0.21702, Chi2-Value = 53.471, DF = 4.000, p-value
## = 6.791e-11
## sample estimates:
## pulgas[, 1] pulgas[, 2] pulgas[, 3] pulgas[, 4]
## Especie Haltica Carduorum 179.5500 290.8000 157.2000 209.2500
## Especie Haltica Oleracea 194.4737 267.0526 137.3684 185.9474
“c” para los estimadores estándar de la media y la varianza, “mcd” para los estimadores de MCD: Matriz de covarianzas de determinante mínimo “rank” para la lambda de wilks de rango según lo propuesto por Nath y Pavur (1985). —- Comparaciones múltiples Pruebas post hoc
# promedios cuadráticos
library(lsmeans)
## The 'lsmeans' package is being deprecated.
## Users are encouraged to switch to 'emmeans'.
## See help('transition') for more information, including how
## to convert 'lsmeans' objects and scripts to work with 'emmeans'.
Vlsm=lsmeans(mlm, "Grupo")
# Multiple comparisons with fdr p value adjustment
test(contrast(Vlsm, "pairwise"), side = "=", adjust = "fdr")
## contrast estimate SE
## Especie Haltica Carduorum - Especie Haltica Oleracea 12.98947 3.500922
## df t.ratio p.value
## 37 3.71 0.0007
##
## Results are averaged over the levels of: rep.meas
# With threshold spcified
test(contrast(Vlsm, "pairwise"), side = "=", adjust = "fdr", delta = 0.25)
## contrast estimate SE
## Especie Haltica Carduorum - Especie Haltica Oleracea 12.98947 3.500922
## df t.ratio p.value
## 37 3.639 0.9996
##
## Results are averaged over the levels of: rep.meas
## Statistics are tests of equivalence with a threshold of 0.25
## P values are left-tailed
Ejemplo con dos Factores
library(readxl)
merca=read_excel("C:/Users/Alumno/Desktop/baseTaller/Mercado.xlsx")
merca=data.frame(merca)
head(merca)
## semana tipsuper cola ron
## 1 S1 UC 11 5
## 2 S1 UC 8 7
## 3 S1 UC 10 5
## 4 S1 UC 14 6
## 5 S1 UC 17 9
## 6 S1 UC 18 10
Supuestos de ANOVA Prueba de Normalidad install.packages(“MVN”)
library(MVN)
mvn(data = merca[,3:4], mvnTest = "mardia")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 8.67982469368392 0.0696200200606715 YES
## 2 Mardia Kurtosis -0.440425348113378 0.659629069378468 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk cola 0.9016 0.0038 NO
## 2 Shapiro-Wilk ron 0.9636 0.2763 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## cola 36 12 5.237229 10 5 24 8 15.5 0.7912766 -0.5383030
## ron 36 7 3.577709 6 1 15 4 10.0 0.3821405 -0.7613661
mvn(data = merca[,3:4], mvnTest = "hz")
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 1.559498 0.0005021985 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk cola 0.9016 0.0038 NO
## 2 Shapiro-Wilk ron 0.9636 0.2763 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## cola 36 12 5.237229 10 5 24 8 15.5 0.7912766 -0.5383030
## ron 36 7 3.577709 6 1 15 4 10.0 0.3821405 -0.7613661
mvn(data = merca[,3:4], mvnTest = "royston")
## $multivariateNormality
## Test H p value MVN
## 1 Royston 8.66777 0.0105294 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk cola 0.9016 0.0038 NO
## 2 Shapiro-Wilk ron 0.9636 0.2763 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## cola 36 12 5.237229 10 5 24 8 15.5 0.7912766 -0.5383030
## ron 36 7 3.577709 6 1 15 4 10.0 0.3821405 -0.7613661
Estudio de datos Perdidos
mvn(data = merca[,3:4], mvnTest = "mardia",multivariateOutlierMethod="adj")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 8.67982469368392 0.0696200200606715 YES
## 2 Mardia Kurtosis -0.440425348113378 0.659629069378468 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk cola 0.9016 0.0038 NO
## 2 Shapiro-Wilk ron 0.9636 0.2763 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## cola 36 12 5.237229 10 5 24 8 15.5 0.7912766 -0.5383030
## ron 36 7 3.577709 6 1 15 4 10.0 0.3821405 -0.7613661
mvn(data = merca[,3:4], mvnTest = "mardia",multivariatePlot="qq")
## $multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 8.67982469368392 0.0696200200606715 YES
## 2 Mardia Kurtosis -0.440425348113378 0.659629069378468 YES
## 3 MVN <NA> <NA> YES
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk cola 0.9016 0.0038 NO
## 2 Shapiro-Wilk ron 0.9636 0.2763 YES
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## cola 36 12 5.237229 10 5 24 8 15.5 0.7912766 -0.5383030
## ron 36 7 3.577709 6 1 15 4 10.0 0.3821405 -0.7613661
Prueba de Homocedasticidad
library(car)
leveneTest(merca[,3],merca[,4],center=mean)
## Warning in leveneTest.default(merca[, 3], merca[, 4], center = mean):
## merca[, 4] coerced to factor.
## Levene's Test for Homogeneity of Variance (center = mean)
## Df F value Pr(>F)
## group 14 3.4085 0.005614 **
## 21
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Multivariado, test que contrastan la homogeneidad de varianza (boxM) install.packages(“biotools”)
library(biotools)
boxM(merca[,3:4],merca[,1])
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: merca[, 3:4]
## Chi-Sq (approx.) = 10.094, df = 3, p-value = 0.01779
boxM(merca[,3:4],merca[,2])
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: merca[, 3:4]
## Chi-Sq (approx.) = 13.049, df = 6, p-value = 0.04226
Calculo del p-Value
qchisq(0.95,10,ncp=0,lower.tail = TRUE,log.p = FALSE)
## [1] 18.30704
Test de esfericidad de Barltet (mide si existe relacion entre las variables analizadas)
library(psych)
R=cor(merca[,3:4])
cortest.bartlett(R,n=37)
## $chisq
## [1] 35.80355
##
## $p.value
## [1] 2.182494e-09
##
## $df
## [1] 1
MANOVA
mlm=lm(cbind(cola,ron)~semana*tipsuper,data=merca)
mnova.m=manova(mlm)
summary(mnova.m)
## Df Pillai approx F num Df den Df Pr(>F)
## semana 1 0.40954 10.057 2 29 0.0004812 ***
## tipsuper 2 1.00348 15.105 4 60 1.347e-08 ***
## semana:tipsuper 2 0.36521 3.351 4 60 0.0152903 *
## Residuals 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Por variables predictors
summary.aov(mnova.m)
## Response cola :
## Df Sum Sq Mean Sq F value Pr(>F)
## semana 1 144 144 18.0 0.0001952 ***
## tipsuper 2 504 252 31.5 4.262e-08 ***
## semana:tipsuper 2 72 36 4.5 0.0195366 *
## Residuals 30 240 8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response ron :
## Df Sum Sq Mean Sq F value Pr(>F)
## semana 1 100 100.000 17.4419 0.0002347 ***
## tipsuper 2 168 84.000 14.6512 3.637e-05 ***
## semana:tipsuper 2 8 4.000 0.6977 0.5056373
## Residuals 30 172 5.733
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mnova.m,test="Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## semana 1 0.59046 10.0569 2 29 0.0004812 ***
## tipsuper 2 0.21490 16.7790 4 58 3.438e-09 ***
## semana:tipsuper 2 0.64823 3.5095 4 58 0.0123884 *
## Residuals 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mnova.m,test="Roy")
## Df Roy approx F num Df den Df Pr(>F)
## semana 1 0.69358 10.057 2 29 0.0004812 ***
## tipsuper 2 2.16865 32.530 2 30 3.068e-08 ***
## semana:tipsuper 2 0.47858 7.179 2 30 0.0028335 **
## Residuals 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mnova.m,test="Hotelling-Lawley")
## Df Hotelling-Lawley approx F num Df den Df Pr(>F)
## semana 1 0.69358 10.0569 2 29 0.0004812 ***
## tipsuper 2 2.63722 18.4606 4 56 1.005e-09 ***
## semana:tipsuper 2 0.52191 3.6534 4 56 0.0102934 *
## Residuals 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Comparaciones múltiples Pruebas post hoc (semana)
# promedios cuadráticos
library(lsmeans)
Vlsm=lsmeans(mlm, "semana")
## NOTE: Results may be misleading due to involvement in interactions
# Multiple comparisons with fdr p value adjustment
test(contrast(Vlsm, "pairwise"), side = "=", adjust = "fdr")
## contrast estimate SE df t.ratio p.value
## S1 - S2 -3.666667 0.8039256 30 -4.561 0.0001
##
## Results are averaged over the levels of: tipsuper, rep.meas
# With threshold spcified
test(contrast(Vlsm, "pairwise"), side = "=", adjust = "fdr", delta = 0.25)
## contrast estimate SE df t.ratio p.value
## S1 - S2 -3.666667 0.8039256 30 4.25 0.9999
##
## Results are averaged over the levels of: tipsuper, rep.meas
## Statistics are tests of equivalence with a threshold of 0.25
## P values are left-tailed
Comparaciones múltiples Pruebas post hoc (tipsuper)
# promedios cuadráticos
library(lsmeans)
Vlsm=lsmeans(mlm, "tipsuper")
## NOTE: Results may be misleading due to involvement in interactions
# Multiple comparisons with fdr p value adjustment
test(contrast(Vlsm, "pairwise"), side = "=", adjust = "fdr")
## contrast estimate SE df t.ratio p.value
## R - UC -7.0 0.9846037 30 -7.109 <.0001
## R - UR -3.5 0.9846037 30 -3.555 0.0013
## UC - UR 3.5 0.9846037 30 3.555 0.0013
##
## Results are averaged over the levels of: semana, rep.meas
## P value adjustment: fdr method for 3 tests
# With threshold spcified
test(contrast(Vlsm, "pairwise"), side = "=", adjust = "fdr", delta = 0.25)
## contrast estimate SE df t.ratio p.value
## R - UC -7.0 0.9846037 30 6.856 1.0000
## R - UR -3.5 0.9846037 30 3.301 1.0000
## UC - UR 3.5 0.9846037 30 3.301 1.0000
##
## Results are averaged over the levels of: semana, rep.meas
## P value adjustment: fdr method for 3 tests
## Statistics are tests of equivalence with a threshold of 0.25
## P values are left-tailed