options(scipen=999) # para omitir notación científica
library(ICSNP)
## Loading required package: mvtnorm
## Loading required package: ICS
library(mvtnorm)
library(readxl)

T2_2mi <- read_excel("G:/Mi unidad/MAESTRÍA CIENCIA Y TECNOLOGÍA ALIMENTOS/Métodos Multivariados/Clases/Clase 02.03.2023/mm2023_1_hotelling_030223.xlsx")
T2_2mi
## # A tibble: 24 × 4
##        L     a     b region
##    <dbl> <dbl> <dbl> <chr> 
##  1  47.8  19.8 -2.61 int   
##  2  46.5  20.4 -2.63 int   
##  3  47.1  20.1 -2.62 int   
##  4  39.3  21.4 -2.85 int   
##  5  37.0  22.1 -2.73 int   
##  6  38.2  21.7 -2.79 int   
##  7  41.9  20.9 -1.89 int   
##  8  43.2  20.9 -1.84 int   
##  9  42.5  20.9 -1.86 int   
## 10  37.5  22.7 -1.28 int   
## # … with 14 more rows
class(T2_2mi )
## [1] "tbl_df"     "tbl"        "data.frame"
cor(T2_2mi[,-4])
##            L          a          b
## L  1.0000000 -0.4092125  0.3190057
## a -0.4092125  1.0000000 -0.9906084
## b  0.3190057 -0.9906084  1.0000000
cor(T2_2mi[,-4],method = "spearman")
##            L          a          b
## L  1.0000000 -0.6608696  0.2695652
## a -0.6608696  1.0000000 -0.7434783
## b  0.2695652 -0.7434783  1.0000000
library(psych)
psych::corPlot(T2_2mi[,-4])

Y=as.matrix(cbind(T2_2mi[,-4]));Y
##            L      a      b
##  [1,] 47.800 19.810 -2.610
##  [2,] 46.480 20.370 -2.630
##  [3,] 47.140 20.090 -2.620
##  [4,] 39.330 21.350 -2.850
##  [5,] 37.050 22.080 -2.730
##  [6,] 38.190 21.715 -2.790
##  [7,] 41.860 20.880 -1.890
##  [8,] 43.200 20.900 -1.840
##  [9,] 42.530 20.890 -1.865
## [10,] 37.510 22.720 -1.280
## [11,] 35.390 21.670 -1.570
## [12,] 36.450 22.195 -1.425
## [13,] 39.720  7.110 14.770
## [14,] 39.770  6.880 13.490
## [15,] 39.745  6.995 14.130
## [16,] 47.280  3.840 14.830
## [17,] 45.660  4.390 17.550
## [18,] 46.470  4.115 16.190
## [19,] 42.810  4.700 18.480
## [20,] 44.480  4.840 16.560
## [21,] 43.645  4.770 17.520
## [22,] 42.050  4.250 16.660
## [23,] 45.060  3.780 16.180
## [24,] 43.550  4.015 16.420
#install.packages("rgl")
library(rgl)
mi_color = ifelse(T2_2mi$region =='int','red','blue')
plot3d(T2_2mi$L,
       T2_2mi$a,
       T2_2mi$b,
       col=mi_color,
       size=5,type='s')

g=c(T2_2mi$region);length(g)
## [1] 24
describeBy(T2_2mi[,-4],T2_2mi[,4])
## 
##  Descriptive statistics by group 
## region: ext
##   vars  n  mean   sd median trimmed  mad   min   max range  skew kurtosis   se
## L    1 12 43.35 2.63  43.60   43.32 2.68 39.72 47.28  7.56 -0.16    -1.45 0.76
## a    2 12  4.97 1.27   4.54    4.88 0.71  3.78  7.11  3.33  0.81    -1.18 0.37
## b    3 12 16.07 1.49  16.30   16.08 1.82 13.49 18.48  4.99 -0.19    -1.21 0.43
## ------------------------------------------------------------ 
## region: int
##   vars  n  mean   sd median trimmed  mad   min   max range skew kurtosis   se
## L    1 12 41.08 4.39  40.59   40.97 4.91 35.39 47.80 12.41 0.27    -1.60 1.27
## a    2 12 21.22 0.89  21.12   21.21 1.00 19.81 22.72  2.91 0.01    -1.31 0.26
## b    3 12 -2.17 0.58  -2.25   -2.20 0.66 -2.85 -1.28  1.57 0.20    -1.80 0.17
describeBy(Y,g)
## 
##  Descriptive statistics by group 
## INDICES: ext
##   vars  n  mean   sd median trimmed  mad   min   max range  skew kurtosis   se
## L    1 12 43.35 2.63  43.60   43.32 2.68 39.72 47.28  7.56 -0.16    -1.45 0.76
## a    2 12  4.97 1.27   4.54    4.88 0.71  3.78  7.11  3.33  0.81    -1.18 0.37
## b    3 12 16.07 1.49  16.30   16.08 1.82 13.49 18.48  4.99 -0.19    -1.21 0.43
## ------------------------------------------------------------ 
## INDICES: int
##   vars  n  mean   sd median trimmed  mad   min   max range skew kurtosis   se
## L    1 12 41.08 4.39  40.59   40.97 4.91 35.39 47.80 12.41 0.27    -1.60 1.27
## a    2 12 21.22 0.89  21.12   21.21 1.00 19.81 22.72  2.91 0.01    -1.31 0.26
## b    3 12 -2.17 0.58  -2.25   -2.20 0.66 -2.85 -1.28  1.57 0.20    -1.80 0.17
HotellingsT2(formula=Y~g)
## 
##  Hotelling's two sample T2-test
## 
## data:  Y by g
## T.2 = 1159.5, df1 = 3, df2 = 20, p-value < 0.00000000000000022
## alternative hypothesis: true location difference is not equal to c(0,0,0)
Yint=Y[g=='int',]
Yext=Y[g=='ext',]

dE = sqrt(
  (Yint[,1] - Yext[,1])^2+

    (Yint[,2] - Yext[,2])^2+
    (Yint[,3] - Yext[,3])^2)
dE
##  [1] 22.99219 22.06487 22.51061 26.12250 28.25503 27.17644 26.03136 24.45653
##  [9] 25.23640 26.14567 26.99303 26.44554
sum(dE)
## [1] 304.4302
plot(dE,pch=16,cex=2)

#################################################
#Supuestos
#Normalidad univariante
shapiro.test(T2_2mi$L)
## 
##  Shapiro-Wilk normality test
## 
## data:  T2_2mi$L
## W = 0.95407, p-value = 0.331
shapiro.test(T2_2mi$a)
## 
##  Shapiro-Wilk normality test
## 
## data:  T2_2mi$a
## W = 0.73658, p-value = 0.00003192
shapiro.test(T2_2mi$b)
## 
##  Shapiro-Wilk normality test
## 
## data:  T2_2mi$b
## W = 0.72884, p-value = 0.00002483
plot(density(T2_2mi$L), xlim=c(-3,50), col='blue', lwd=2)
lines(density(T2_2mi$a), col='red', lwd=2)
lines(density(T2_2mi$b), col='green', lwd=2)

library(MVN)
mvn(data = T2_2mi,
    subset = 'region',
    mvnTest = 'royston')
## $multivariateNormality
## $multivariateNormality$ext
##      Test        H    p value MVN
## 1 Royston 3.934984 0.07899132 YES
## 
## $multivariateNormality$int
##      Test        H   p value MVN
## 1 Royston 2.055495 0.1987468 YES
## 
## 
## $univariateNormality
## $univariateNormality$ext
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling     L        0.3107    0.5073    YES   
## 2 Anderson-Darling     a        1.1386    0.0034    NO    
## 3 Anderson-Darling     b        0.2891    0.5516    YES   
## 
## $univariateNormality$int
##               Test  Variable Statistic   p value Normality
## 1 Anderson-Darling     L        0.3975    0.3105    YES   
## 2 Anderson-Darling     a        0.1804    0.8923    YES   
## 3 Anderson-Darling     b        0.7224    0.0432    NO    
## 
## 
## $Descriptives
## $Descriptives$ext
##    n     Mean  Std.Dev  Median   Min   Max   25th   75th       Skew  Kurtosis
## L 12 43.35333 2.625300 43.5975 39.72 47.28 41.480 45.210 -0.1556916 -1.452462
## a 12  4.97375 1.266664  4.5450  3.78  7.11  4.090  5.350  0.8057906 -1.180957
## b 12 16.06500 1.491963 16.3050 13.49 18.48 14.815 16.875 -0.1895573 -1.205732
## 
## $Descriptives$int
##    n    Mean   Std.Dev Median   Min   Max    25th     75th       Skew  Kurtosis
## L 12 41.0775 4.3948381 40.595 35.39 47.80 37.3950 44.02000 0.26918748 -1.597383
## a 12 21.2225 0.8903000 21.125 19.81 22.72 20.7525 21.80625 0.01403846 -1.308349
## b 12 -2.1750 0.5842828 -2.250 -2.85 -1.28 -2.6550 -1.77250 0.19846034 -1.795987
# Igualdad de matrices de varianzas y covarianzas
cov(Yint)
##           L          a          b
## L 19.314602 -3.6262386 -1.0554318
## a -3.626239  0.7926341  0.2430068
## b -1.055432  0.2430068  0.3413864
cov(Yext)
##           L         a         b
## L  6.892202 -2.882234  1.730950
## a -2.882234  1.604437 -1.190202
## b  1.730950 -1.190202  2.225955
library(biotools)
## Loading required package: MASS
## ---
## biotools version 4.2
boxM(Y, g)
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 21.985, df = 6, p-value = 0.001218
boxplot(Y)

library(faux)
## 
## ************
## Welcome to faux. For support and examples visit:
## https://debruine.github.io/faux/
## - Get and set global package options with: faux_options()
## ************
set.seed(123)
n_sim = 1000
sim_int = rnorm_multi(
  n = n_sim, vars = 3,
  mu = colMeans(Yint),
  sd = apply(Yint, 2, sd),
  r = cor(Yint)[upper.tri(cor(Yint))]
)

sim_ext = rnorm_multi(
  n = n_sim, vars = 3,
  mu = colMeans(Yext),
  sd = apply(Yext, 2, sd),
  r = cor(Yext)[upper.tri(cor(Yext))]
)

sim_tot = data.frame(
  rbind(sim_int, sim_ext),
  g = gl(2,n_sim,2*n_sim, c('int','ext'))
)
head(sim_tot)
##          L        a         b   g
## 1 38.59073 21.72664 -2.597000 int
## 2 40.00038 21.22258 -2.648131 int
## 3 47.95447 20.08905 -2.605478 int
## 4 41.31624 20.78264 -2.175200 int
## 5 41.50610 20.76906 -3.533933 int
## 6 48.69954 20.10337 -2.092865 int
dim(sim_tot)
## [1] 2000    4
mi_color = ifelse(sim_tot$g == 'int',
                  'red', 'blue')
plot3d(sim_tot$L,
       sim_tot$a,
       sim_tot$b,
       col = mi_color)
#################################################
T2_2mp <- read_excel("G:/Mi unidad/MAESTRÍA CIENCIA Y TECNOLOGÍA ALIMENTOS/Métodos Multivariados/Clases/Clase 02.03.2023/mm2023_1_hotelling_030223.xlsx",sheet = "EJ2")
class(T2_2mp)
## [1] "tbl_df"     "tbl"        "data.frame"
M1=cbind(T2_2mp$N_15,T2_2mp$P_15,T2_2mp$K_15)
M2=cbind(T2_2mp$N_30,T2_2mp$P_30,T2_2mp$K_30)
Y2=rbind(M1,M2)
rownames(Y2)=c(paste0("muestra",1:24))
g2=gl(2,dim(M1)[[1]],24,labels = c("ET1","ET2"))
describeBy(Y2,g2)
## 
##  Descriptive statistics by group 
## INDICES: ET1
##    vars  n  mean   sd median trimmed  mad   min   max range  skew kurtosis   se
## V1    1 12 41.08 4.39  40.59   40.97 4.91 35.39 47.80 12.41  0.27    -1.60 1.27
## V2    2 12  2.12 0.09   2.11    2.12 0.10  1.98  2.27  0.29  0.01    -1.31 0.03
## V3    3 12 19.65 1.73  19.68   19.76 1.81 15.91 22.30  6.38 -0.46    -0.48 0.50
## ------------------------------------------------------------ 
## INDICES: ET2
##    vars  n  mean   sd median trimmed  mad   min   max range  skew kurtosis   se
## V1    1 12 43.98 4.05  43.29   43.85 5.10 38.93 50.34 11.41  0.23    -1.60 1.17
## V2    2 12  5.31 0.55   5.53    5.36 0.39  4.22  5.92  1.70 -0.64    -1.13 0.16
## V3    3 12 22.50 1.66  22.52   22.62 1.56 18.96 24.80  5.83 -0.46    -0.61 0.48
mc = ifelse(g2=='ET1',
            'red', 'blue')
plot3d(Y2, col=mc, type = 's')


psych::corPlot(T2_2mp)

library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ################################### WARNING ###################################
## # We noticed you have dplyr installed. The dplyr lag() function breaks how    #
## # base R's lag() function is supposed to work, which breaks lag(my_xts).      #
## #                                                                             #
## # If you call library(dplyr) later in this session, then calls to lag(my_xts) #
## # that you enter or source() into this session won't work correctly.          #
## #                                                                             #
## # All package code is unaffected because it is protected by the R namespace   #
## # mechanism.                                                                  #
## #                                                                             #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## # You can use stats::lag() to make sure you're not using dplyr::lag(), or you #
## # can add conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop   #
## # dplyr from breaking base R's lag() function.                                #
## ################################### WARNING ###################################
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(T2_2mp)
## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

T2_2mpdif=data.frame(N_d=T2_2mp$N_15-T2_2mp$N_30,P_d=T2_2mp$P_15-T2_2mp$P_30,K_d=T2_2mp$K_15-T2_2mp$K_30)
error.bars(T2_2mpdif,bar=F,ylab="grupo de medias",xlab="respuestas",eyes=F)

HotellingsT2(T2_2mpdif)
## 
##  Hotelling's one sample T2-test
## 
## data:  T2_2mpdif
## T.2 = 280.79, df1 = 3, df2 = 9, p-value = 0.000000003307
## alternative hypothesis: true location is not equal to c(0,0,0)
#######################################################
#probando normalidad multivariante
#probando igualdad de matrices de varianzas y covarianzas
#hacer gráfico 3d para ver las respuestas en sus grupos