options(scipen=999) # para omitir notación científica
library(ICSNP)
## Loading required package: mvtnorm
## Loading required package: ICS
library(mvtnorm)
library(readxl)
T2_2mi <- read_excel("G:/Mi unidad/MAESTRÍA CIENCIA Y TECNOLOGÍA ALIMENTOS/Métodos Multivariados/Clases/Clase 02.03.2023/mm2023_1_hotelling_030223.xlsx")
T2_2mi
## # A tibble: 24 × 4
## L a b region
## <dbl> <dbl> <dbl> <chr>
## 1 47.8 19.8 -2.61 int
## 2 46.5 20.4 -2.63 int
## 3 47.1 20.1 -2.62 int
## 4 39.3 21.4 -2.85 int
## 5 37.0 22.1 -2.73 int
## 6 38.2 21.7 -2.79 int
## 7 41.9 20.9 -1.89 int
## 8 43.2 20.9 -1.84 int
## 9 42.5 20.9 -1.86 int
## 10 37.5 22.7 -1.28 int
## # … with 14 more rows
class(T2_2mi )
## [1] "tbl_df" "tbl" "data.frame"
cor(T2_2mi[,-4])
## L a b
## L 1.0000000 -0.4092125 0.3190057
## a -0.4092125 1.0000000 -0.9906084
## b 0.3190057 -0.9906084 1.0000000
cor(T2_2mi[,-4],method = "spearman")
## L a b
## L 1.0000000 -0.6608696 0.2695652
## a -0.6608696 1.0000000 -0.7434783
## b 0.2695652 -0.7434783 1.0000000
library(psych)
psych::corPlot(T2_2mi[,-4])

Y=as.matrix(cbind(T2_2mi[,-4]));Y
## L a b
## [1,] 47.800 19.810 -2.610
## [2,] 46.480 20.370 -2.630
## [3,] 47.140 20.090 -2.620
## [4,] 39.330 21.350 -2.850
## [5,] 37.050 22.080 -2.730
## [6,] 38.190 21.715 -2.790
## [7,] 41.860 20.880 -1.890
## [8,] 43.200 20.900 -1.840
## [9,] 42.530 20.890 -1.865
## [10,] 37.510 22.720 -1.280
## [11,] 35.390 21.670 -1.570
## [12,] 36.450 22.195 -1.425
## [13,] 39.720 7.110 14.770
## [14,] 39.770 6.880 13.490
## [15,] 39.745 6.995 14.130
## [16,] 47.280 3.840 14.830
## [17,] 45.660 4.390 17.550
## [18,] 46.470 4.115 16.190
## [19,] 42.810 4.700 18.480
## [20,] 44.480 4.840 16.560
## [21,] 43.645 4.770 17.520
## [22,] 42.050 4.250 16.660
## [23,] 45.060 3.780 16.180
## [24,] 43.550 4.015 16.420
#install.packages("rgl")
library(rgl)
mi_color = ifelse(T2_2mi$region =='int','red','blue')
plot3d(T2_2mi$L,
T2_2mi$a,
T2_2mi$b,
col=mi_color,
size=5,type='s')
g=c(T2_2mi$region);length(g)
## [1] 24
describeBy(T2_2mi[,-4],T2_2mi[,4])
##
## Descriptive statistics by group
## region: ext
## vars n mean sd median trimmed mad min max range skew kurtosis se
## L 1 12 43.35 2.63 43.60 43.32 2.68 39.72 47.28 7.56 -0.16 -1.45 0.76
## a 2 12 4.97 1.27 4.54 4.88 0.71 3.78 7.11 3.33 0.81 -1.18 0.37
## b 3 12 16.07 1.49 16.30 16.08 1.82 13.49 18.48 4.99 -0.19 -1.21 0.43
## ------------------------------------------------------------
## region: int
## vars n mean sd median trimmed mad min max range skew kurtosis se
## L 1 12 41.08 4.39 40.59 40.97 4.91 35.39 47.80 12.41 0.27 -1.60 1.27
## a 2 12 21.22 0.89 21.12 21.21 1.00 19.81 22.72 2.91 0.01 -1.31 0.26
## b 3 12 -2.17 0.58 -2.25 -2.20 0.66 -2.85 -1.28 1.57 0.20 -1.80 0.17
describeBy(Y,g)
##
## Descriptive statistics by group
## INDICES: ext
## vars n mean sd median trimmed mad min max range skew kurtosis se
## L 1 12 43.35 2.63 43.60 43.32 2.68 39.72 47.28 7.56 -0.16 -1.45 0.76
## a 2 12 4.97 1.27 4.54 4.88 0.71 3.78 7.11 3.33 0.81 -1.18 0.37
## b 3 12 16.07 1.49 16.30 16.08 1.82 13.49 18.48 4.99 -0.19 -1.21 0.43
## ------------------------------------------------------------
## INDICES: int
## vars n mean sd median trimmed mad min max range skew kurtosis se
## L 1 12 41.08 4.39 40.59 40.97 4.91 35.39 47.80 12.41 0.27 -1.60 1.27
## a 2 12 21.22 0.89 21.12 21.21 1.00 19.81 22.72 2.91 0.01 -1.31 0.26
## b 3 12 -2.17 0.58 -2.25 -2.20 0.66 -2.85 -1.28 1.57 0.20 -1.80 0.17
HotellingsT2(formula=Y~g)
##
## Hotelling's two sample T2-test
##
## data: Y by g
## T.2 = 1159.5, df1 = 3, df2 = 20, p-value < 0.00000000000000022
## alternative hypothesis: true location difference is not equal to c(0,0,0)
Yint=Y[g=='int',]
Yext=Y[g=='ext',]
dE = sqrt(
(Yint[,1] - Yext[,1])^2+
(Yint[,2] - Yext[,2])^2+
(Yint[,3] - Yext[,3])^2)
dE
## [1] 22.99219 22.06487 22.51061 26.12250 28.25503 27.17644 26.03136 24.45653
## [9] 25.23640 26.14567 26.99303 26.44554
sum(dE)
## [1] 304.4302
plot(dE,pch=16,cex=2)

#################################################
#Supuestos
#Normalidad univariante
shapiro.test(T2_2mi$L)
##
## Shapiro-Wilk normality test
##
## data: T2_2mi$L
## W = 0.95407, p-value = 0.331
shapiro.test(T2_2mi$a)
##
## Shapiro-Wilk normality test
##
## data: T2_2mi$a
## W = 0.73658, p-value = 0.00003192
shapiro.test(T2_2mi$b)
##
## Shapiro-Wilk normality test
##
## data: T2_2mi$b
## W = 0.72884, p-value = 0.00002483
plot(density(T2_2mi$L), xlim=c(-3,50), col='blue', lwd=2)
lines(density(T2_2mi$a), col='red', lwd=2)
lines(density(T2_2mi$b), col='green', lwd=2)

library(MVN)
mvn(data = T2_2mi,
subset = 'region',
mvnTest = 'royston')
## $multivariateNormality
## $multivariateNormality$ext
## Test H p value MVN
## 1 Royston 3.934984 0.07899132 YES
##
## $multivariateNormality$int
## Test H p value MVN
## 1 Royston 2.055495 0.1987468 YES
##
##
## $univariateNormality
## $univariateNormality$ext
## Test Variable Statistic p value Normality
## 1 Anderson-Darling L 0.3107 0.5073 YES
## 2 Anderson-Darling a 1.1386 0.0034 NO
## 3 Anderson-Darling b 0.2891 0.5516 YES
##
## $univariateNormality$int
## Test Variable Statistic p value Normality
## 1 Anderson-Darling L 0.3975 0.3105 YES
## 2 Anderson-Darling a 0.1804 0.8923 YES
## 3 Anderson-Darling b 0.7224 0.0432 NO
##
##
## $Descriptives
## $Descriptives$ext
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## L 12 43.35333 2.625300 43.5975 39.72 47.28 41.480 45.210 -0.1556916 -1.452462
## a 12 4.97375 1.266664 4.5450 3.78 7.11 4.090 5.350 0.8057906 -1.180957
## b 12 16.06500 1.491963 16.3050 13.49 18.48 14.815 16.875 -0.1895573 -1.205732
##
## $Descriptives$int
## n Mean Std.Dev Median Min Max 25th 75th Skew Kurtosis
## L 12 41.0775 4.3948381 40.595 35.39 47.80 37.3950 44.02000 0.26918748 -1.597383
## a 12 21.2225 0.8903000 21.125 19.81 22.72 20.7525 21.80625 0.01403846 -1.308349
## b 12 -2.1750 0.5842828 -2.250 -2.85 -1.28 -2.6550 -1.77250 0.19846034 -1.795987
# Igualdad de matrices de varianzas y covarianzas
cov(Yint)
## L a b
## L 19.314602 -3.6262386 -1.0554318
## a -3.626239 0.7926341 0.2430068
## b -1.055432 0.2430068 0.3413864
cov(Yext)
## L a b
## L 6.892202 -2.882234 1.730950
## a -2.882234 1.604437 -1.190202
## b 1.730950 -1.190202 2.225955
library(biotools)
## Loading required package: MASS
## ---
## biotools version 4.2
boxM(Y, g)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y
## Chi-Sq (approx.) = 21.985, df = 6, p-value = 0.001218
boxplot(Y)

library(faux)
##
## ************
## Welcome to faux. For support and examples visit:
## https://debruine.github.io/faux/
## - Get and set global package options with: faux_options()
## ************
set.seed(123)
n_sim = 1000
sim_int = rnorm_multi(
n = n_sim, vars = 3,
mu = colMeans(Yint),
sd = apply(Yint, 2, sd),
r = cor(Yint)[upper.tri(cor(Yint))]
)
sim_ext = rnorm_multi(
n = n_sim, vars = 3,
mu = colMeans(Yext),
sd = apply(Yext, 2, sd),
r = cor(Yext)[upper.tri(cor(Yext))]
)
sim_tot = data.frame(
rbind(sim_int, sim_ext),
g = gl(2,n_sim,2*n_sim, c('int','ext'))
)
head(sim_tot)
## L a b g
## 1 38.59073 21.72664 -2.597000 int
## 2 40.00038 21.22258 -2.648131 int
## 3 47.95447 20.08905 -2.605478 int
## 4 41.31624 20.78264 -2.175200 int
## 5 41.50610 20.76906 -3.533933 int
## 6 48.69954 20.10337 -2.092865 int
dim(sim_tot)
## [1] 2000 4
mi_color = ifelse(sim_tot$g == 'int',
'red', 'blue')
plot3d(sim_tot$L,
sim_tot$a,
sim_tot$b,
col = mi_color)
#################################################
T2_2mp <- read_excel("G:/Mi unidad/MAESTRÍA CIENCIA Y TECNOLOGÍA ALIMENTOS/Métodos Multivariados/Clases/Clase 02.03.2023/mm2023_1_hotelling_030223.xlsx",sheet = "EJ2")
class(T2_2mp)
## [1] "tbl_df" "tbl" "data.frame"
M1=cbind(T2_2mp$N_15,T2_2mp$P_15,T2_2mp$K_15)
M2=cbind(T2_2mp$N_30,T2_2mp$P_30,T2_2mp$K_30)
Y2=rbind(M1,M2)
rownames(Y2)=c(paste0("muestra",1:24))
g2=gl(2,dim(M1)[[1]],24,labels = c("ET1","ET2"))
describeBy(Y2,g2)
##
## Descriptive statistics by group
## INDICES: ET1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## V1 1 12 41.08 4.39 40.59 40.97 4.91 35.39 47.80 12.41 0.27 -1.60 1.27
## V2 2 12 2.12 0.09 2.11 2.12 0.10 1.98 2.27 0.29 0.01 -1.31 0.03
## V3 3 12 19.65 1.73 19.68 19.76 1.81 15.91 22.30 6.38 -0.46 -0.48 0.50
## ------------------------------------------------------------
## INDICES: ET2
## vars n mean sd median trimmed mad min max range skew kurtosis se
## V1 1 12 43.98 4.05 43.29 43.85 5.10 38.93 50.34 11.41 0.23 -1.60 1.17
## V2 2 12 5.31 0.55 5.53 5.36 0.39 4.22 5.92 1.70 -0.64 -1.13 0.16
## V3 3 12 22.50 1.66 22.52 22.62 1.56 18.96 24.80 5.83 -0.46 -0.61 0.48
mc = ifelse(g2=='ET1',
'red', 'blue')
plot3d(Y2, col=mc, type = 's')
psych::corPlot(T2_2mp)

library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ################################### WARNING ###################################
## # We noticed you have dplyr installed. The dplyr lag() function breaks how #
## # base R's lag() function is supposed to work, which breaks lag(my_xts). #
## # #
## # If you call library(dplyr) later in this session, then calls to lag(my_xts) #
## # that you enter or source() into this session won't work correctly. #
## # #
## # All package code is unaffected because it is protected by the R namespace #
## # mechanism. #
## # #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## # You can use stats::lag() to make sure you're not using dplyr::lag(), or you #
## # can add conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## ################################### WARNING ###################################
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(T2_2mp)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter

T2_2mpdif=data.frame(N_d=T2_2mp$N_15-T2_2mp$N_30,P_d=T2_2mp$P_15-T2_2mp$P_30,K_d=T2_2mp$K_15-T2_2mp$K_30)
error.bars(T2_2mpdif,bar=F,ylab="grupo de medias",xlab="respuestas",eyes=F)

HotellingsT2(T2_2mpdif)
##
## Hotelling's one sample T2-test
##
## data: T2_2mpdif
## T.2 = 280.79, df1 = 3, df2 = 9, p-value = 0.000000003307
## alternative hypothesis: true location is not equal to c(0,0,0)
#######################################################
#probando normalidad multivariante
#probando igualdad de matrices de varianzas y covarianzas
#hacer gráfico 3d para ver las respuestas en sus grupos