Análisis multivariante

Author

Daniel Perdomo

Published

June 3, 2026

Importar datos

Importar en R

# carga de paquetes
library(tidyverse)
Warning: package 'readr' was built under R version 4.4.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ppcor) # Correlaciones parciales
Warning: package 'ppcor' was built under R version 4.4.3
Cargando paquete requerido: MASS

Adjuntando el paquete: 'MASS'

The following object is masked from 'package:dplyr':

    select
library(MASS) # Outliers mahalanobis distance
library(ggplot2) # Gráficos
library(corrplot)
corrplot 0.94 loaded
# ruta1 <- file.choose()
# ruta2 <- file.choose()
wine_red <- read.csv("C:\\Users\\MINEDUCYT\\Documents\\Seminario\\wine+quality\\winequality-red.csv", header=TRUE, sep=";")
wine_white <- read.csv("C:\\Users\\MINEDUCYT\\Documents\\Seminario\\wine+quality\\winequality-white.csv", header=TRUE, sep=";")

# Vino tinto
head(wine_red)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1           7.4             0.70        0.00            1.9     0.076
2           7.8             0.88        0.00            2.6     0.098
3           7.8             0.76        0.04            2.3     0.092
4          11.2             0.28        0.56            1.9     0.075
5           7.4             0.70        0.00            1.9     0.076
6           7.4             0.66        0.00            1.8     0.075
  free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
1                  11                   34  0.9978 3.51      0.56     9.4
2                  25                   67  0.9968 3.20      0.68     9.8
3                  15                   54  0.9970 3.26      0.65     9.8
4                  17                   60  0.9980 3.16      0.58     9.8
5                  11                   34  0.9978 3.51      0.56     9.4
6                  13                   40  0.9978 3.51      0.56     9.4
  quality
1       5
2       5
3       5
4       6
5       5
6       5
# Vino blanco
head(wine_white)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1           7.0             0.27        0.36           20.7     0.045
2           6.3             0.30        0.34            1.6     0.049
3           8.1             0.28        0.40            6.9     0.050
4           7.2             0.23        0.32            8.5     0.058
5           7.2             0.23        0.32            8.5     0.058
6           8.1             0.28        0.40            6.9     0.050
  free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
1                  45                  170  1.0010 3.00      0.45     8.8
2                  14                  132  0.9940 3.30      0.49     9.5
3                  30                   97  0.9951 3.26      0.44    10.1
4                  47                  186  0.9956 3.19      0.40     9.9
5                  47                  186  0.9956 3.19      0.40     9.9
6                  30                   97  0.9951 3.26      0.44    10.1
  quality
1       6
2       6
3       6
4       6
5       6
6       6
# Reducir tamaño de la base de datos:
# Se tomarán 300 observaciones
# wine_red=wine_red[0:300,] 
# wine_white=wine_white[0:300,]

# Se omite la variable "quality"
wine_red$quality <- NULL
wine_white$quality <- NULL

# Vino tinto
head(wine_red)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1           7.4             0.70        0.00            1.9     0.076
2           7.8             0.88        0.00            2.6     0.098
3           7.8             0.76        0.04            2.3     0.092
4          11.2             0.28        0.56            1.9     0.075
5           7.4             0.70        0.00            1.9     0.076
6           7.4             0.66        0.00            1.8     0.075
  free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
1                  11                   34  0.9978 3.51      0.56     9.4
2                  25                   67  0.9968 3.20      0.68     9.8
3                  15                   54  0.9970 3.26      0.65     9.8
4                  17                   60  0.9980 3.16      0.58     9.8
5                  11                   34  0.9978 3.51      0.56     9.4
6                  13                   40  0.9978 3.51      0.56     9.4
# Vino blanco
head(wine_white)
  fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
1           7.0             0.27        0.36           20.7     0.045
2           6.3             0.30        0.34            1.6     0.049
3           8.1             0.28        0.40            6.9     0.050
4           7.2             0.23        0.32            8.5     0.058
5           7.2             0.23        0.32            8.5     0.058
6           8.1             0.28        0.40            6.9     0.050
  free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
1                  45                  170  1.0010 3.00      0.45     8.8
2                  14                  132  0.9940 3.30      0.49     9.5
3                  30                   97  0.9951 3.26      0.44    10.1
4                  47                  186  0.9956 3.19      0.40     9.9
5                  47                  186  0.9956 3.19      0.40     9.9
6                  30                   97  0.9951 3.26      0.44    10.1

Importar en Pyhton

# cargar paquetes
import pandas as pd # crear y manipular dataframes
import numpy as np # vectores y matrices multidimensionales
import matplotlib.pyplot as plt # gráficos estadísticos
from scipy.spatial.distance import pdist, squareform # distancias euclídeas
import pingouin as pg # correlaciones parciales y distancia de mahalanobis
from scipy.spatial.distance import mahalanobis # distancia de mahalanobis
from scipy.stats import chi2 # chi-ciuadrado
import seaborn as sns # Gráficos
# r = raw string para evitar problemas con las barras invertidas
import pandas as pd
wine_red=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-red.csv", sep=";")
wine_white=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-white.csv", sep=";")

# Reducir tamaño de la base de datos:
# wine_red=wine_red.iloc[0:300,] #Se tomarán 300 observaciones y 6 variables
# wine_white=wine_white.iloc[0:300,]

# Vino tinto
print(wine_red.head())
   fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
0            7.4              0.70         0.00  ...       0.56      9.4        5
1            7.8              0.88         0.00  ...       0.68      9.8        5
2            7.8              0.76         0.04  ...       0.65      9.8        5
3           11.2              0.28         0.56  ...       0.58      9.8        6
4            7.4              0.70         0.00  ...       0.56      9.4        5

[5 rows x 12 columns]
# Vino blanco
print(wine_white.head())
   fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
0            7.0              0.27         0.36  ...       0.45      8.8        6
1            6.3              0.30         0.34  ...       0.49      9.5        6
2            8.1              0.28         0.40  ...       0.44     10.1        6
3            7.2              0.23         0.32  ...       0.40      9.9        6
4            7.2              0.23         0.32  ...       0.40      9.9        6

[5 rows x 12 columns]
# Elimar variable "quality"
wine_red=wine_red.drop("quality", axis=1) # axis=0 filas, axis=1 columnas

wine_white=wine_white.drop("quality", axis=1)

# Vino tinto
print(wine_red.head())
   fixed acidity  volatile acidity  citric acid  ...    pH  sulphates  alcohol
0            7.4              0.70         0.00  ...  3.51       0.56      9.4
1            7.8              0.88         0.00  ...  3.20       0.68      9.8
2            7.8              0.76         0.04  ...  3.26       0.65      9.8
3           11.2              0.28         0.56  ...  3.16       0.58      9.8
4            7.4              0.70         0.00  ...  3.51       0.56      9.4

[5 rows x 11 columns]
# Vino blanco
print(wine_red.head())
   fixed acidity  volatile acidity  citric acid  ...    pH  sulphates  alcohol
0            7.4              0.70         0.00  ...  3.51       0.56      9.4
1            7.8              0.88         0.00  ...  3.20       0.68      9.8
2            7.8              0.76         0.04  ...  3.26       0.65      9.8
3           11.2              0.28         0.56  ...  3.16       0.58      9.8
4            7.4              0.70         0.00  ...  3.51       0.56      9.4

[5 rows x 11 columns]

Vector de medias

Aplicación en R

# Vino tinto
means_red <- colMeans(wine_red)
means_red
       fixed.acidity     volatile.acidity          citric.acid 
          8.31963727           0.52782051           0.27097561 
      residual.sugar            chlorides  free.sulfur.dioxide 
          2.53880550           0.08746654          15.87492183 
total.sulfur.dioxide              density                   pH 
         46.46779237           0.99674668           3.31111320 
           sulphates              alcohol 
          0.65814884          10.42298311 
# Vino blanco
means_white <- colMeans(wine_white)
means_white
       fixed.acidity     volatile.acidity          citric.acid 
          6.85478767           0.27824112           0.33419151 
      residual.sugar            chlorides  free.sulfur.dioxide 
          6.39141486           0.04577236          35.30808493 
total.sulfur.dioxide              density                   pH 
        138.36065741           0.99402738           3.18826664 
           sulphates              alcohol 
          0.48984688          10.51426705 

Aplicación en Python

# Vino tinto
means_red = wine_red.mean()
print("Vector de Medias:\n", means_red)
Vector de Medias:
 fixed acidity            8.319637
volatile acidity         0.527821
citric acid              0.270976
residual sugar           2.538806
chlorides                0.087467
free sulfur dioxide     15.874922
total sulfur dioxide    46.467792
density                  0.996747
pH                       3.311113
sulphates                0.658149
alcohol                 10.422983
dtype: float64
# Vino blanco
means_white = wine_white.mean()
print("Vector de Medias:\n", means_white)
Vector de Medias:
 fixed acidity             6.854788
volatile acidity          0.278241
citric acid               0.334192
residual sugar            6.391415
chlorides                 0.045772
free sulfur dioxide      35.308085
total sulfur dioxide    138.360657
density                   0.994027
pH                        3.188267
sulphates                 0.489847
alcohol                  10.514267
dtype: float64

Matriz de varianzas y covarianzas

Aplicación en R

# Vino tinto
cov_matrix_red <- cov(wine_red)
cov_matrix_red
                     fixed.acidity volatile.acidity   citric.acid
fixed.acidity          3.031416389    -7.985142e-02  0.2278200037
volatile.acidity      -0.079851417     3.206238e-02 -0.0192716208
citric.acid            0.227820004    -1.927162e-02  0.0379474831
residual.sugar         0.281756262     4.841910e-04  0.0394342700
chlorides              0.007678692     5.165869e-04  0.0018687248
free.sulfur.dioxide   -2.800921493    -1.967359e-02 -0.1242521139
total.sulfur.dioxide  -6.482345858     4.504257e-01  0.2276972740
density                0.002195224     7.443665e-06  0.0001341746
pH                    -0.183585704     6.494699e-03 -0.0162975823
sulphates              0.054010092    -7.921434e-03  0.0103277145
alcohol               -0.114421153    -3.860022e-02  0.0228151729
                     residual.sugar     chlorides free.sulfur.dioxide
fixed.acidity          0.2817562623  7.678692e-03       -2.800921e+00
volatile.acidity       0.0004841910  5.165869e-04       -1.967359e-02
citric.acid            0.0394342700  1.868725e-03       -1.242521e-01
residual.sugar         1.9878971330  3.690176e-03        2.758611e+00
chlorides              0.0036901759  2.215143e-03        2.738303e-03
free.sulfur.dioxide    2.7586114522  2.738303e-03        1.094149e+02
total.sulfur.dioxide   9.4164414790  7.338675e-02        2.297375e+02
density                0.0009454109  1.782176e-05       -4.332504e-04
pH                    -0.0186442890 -1.925745e-03        1.136531e-01
sulphates              0.0013209414  2.961878e-03        9.159247e-02
alcohol                0.0632189598 -1.109152e-02       -7.736984e-01
                     total.sulfur.dioxide       density            pH
fixed.acidity               -6.482346e+00  2.195224e-03 -1.835857e-01
volatile.acidity             4.504257e-01  7.443665e-06  6.494699e-03
citric.acid                  2.276973e-01  1.341746e-04 -1.629758e-02
residual.sugar               9.416441e+00  9.454109e-04 -1.864429e-02
chlorides                    7.338675e-02  1.782176e-05 -1.925745e-03
free.sulfur.dioxide          2.297375e+02 -4.332504e-04  1.136531e-01
total.sulfur.dioxide         1.082102e+03  4.424727e-03 -3.376988e-01
density                      4.424727e-03  3.562029e-06 -9.956395e-05
pH                          -3.376988e-01 -9.956395e-05  2.383518e-02
sulphates                    2.394710e-01  4.750962e-05 -5.146186e-03
alcohol                     -7.209298e+00 -9.979518e-04  3.383162e-02
                         sulphates       alcohol
fixed.acidity         5.401009e-02 -0.1144211534
volatile.acidity     -7.921434e-03 -0.0386002214
citric.acid           1.032771e-02  0.0228151729
residual.sugar        1.320941e-03  0.0632189598
chlorides             2.961878e-03 -0.0110915178
free.sulfur.dioxide   9.159247e-02 -0.7736984004
total.sulfur.dioxide  2.394710e-01 -7.2092978950
density               4.750962e-05 -0.0009979518
pH                   -5.146186e-03  0.0338316166
sulphates             2.873262e-02  0.0169067772
alcohol               1.690678e-02  1.1356473950
# Vino blanco
cov_matrix_white <- cov(wine_white)
cov_matrix_white
                     fixed.acidity volatile.acidity   citric.acid
fixed.acidity         0.7121135857    -1.930571e-03  0.0295325116
volatile.acidity     -0.0019305706     1.015954e-02 -0.0018232776
citric.acid           0.0295325116    -1.823278e-03  0.0146457930
residual.sugar        0.3810218137     3.286533e-02  0.0578289265
chlorides             0.0004256255     1.552775e-04  0.0003023838
free.sulfur.dioxide  -0.7089186424    -1.663005e-01  0.1936297767
total.sulfur.dioxide  3.2660133926     3.823539e-01  0.6229887081
density               0.0006696773     8.173933e-06  0.0000541138
pH                   -0.0542648260    -4.857531e-04 -0.0029923451
sulphates            -0.0016509923    -4.109902e-04  0.0008608829
alcohol              -0.1255328219     8.399723e-03 -0.0112782389
                     residual.sugar     chlorides free.sulfur.dioxide
fixed.acidity           0.381021814  4.256255e-04        -0.708918642
volatile.acidity        0.032865334  1.552775e-04        -0.166300459
citric.acid             0.057828926  3.023838e-04         0.193629777
residual.sugar         25.725770164  9.827502e-03        25.800577899
chlorides               0.009827502  4.773337e-04         0.037674498
free.sulfur.dioxide    25.800577899  3.767450e-02       289.242719999
total.sulfur.dioxide   86.531302970  1.846875e-01       444.865890947
density                 0.012727165  1.680754e-05         0.014965532
pH                     -0.148683661 -2.983649e-04        -0.001586555
sulphates              -0.015434743  4.179687e-05         0.114937934
alcohol                -2.812740332 -9.684235e-03        -5.234508674
                     total.sulfur.dioxide       density            pH
fixed.acidity                  3.26601339  6.696773e-04 -5.426483e-02
volatile.acidity               0.38235390  8.173933e-06 -4.857531e-04
citric.acid                    0.62298871  5.411380e-05 -2.992345e-03
residual.sugar                86.53130297  1.272717e-02 -1.486837e-01
chlorides                      0.18468749  1.680754e-05 -2.983649e-04
free.sulfur.dioxide          444.86589095  1.496553e-02 -1.586555e-03
total.sulfur.dioxide        1806.08549085  6.735203e-02  1.489422e-02
density                        0.06735203  8.945524e-06 -4.226861e-05
pH                             0.01489422 -4.226861e-05  2.280118e-02
sulphates                      0.65264458  2.542747e-05  2.687523e-03
alcohol                      -23.47660460 -2.871430e-03  2.256505e-02
                         sulphates       alcohol
fixed.acidity        -1.650992e-03  -0.125532822
volatile.acidity     -4.109902e-04   0.008399723
citric.acid           8.608829e-04  -0.011278239
residual.sugar       -1.543474e-02  -2.812740332
chlorides             4.179687e-05  -0.009684235
free.sulfur.dioxide   1.149379e-01  -5.234508674
total.sulfur.dioxide  6.526446e-01 -23.476604603
density               2.542747e-05  -0.002871430
pH                    2.687523e-03   0.022565052
sulphates             1.302471e-02  -0.002448356
alcohol              -2.448356e-03   1.514426982

Aplicación en Python

pd.set_option('display.max_rows', None)  # Mostrar todas las filas
pd.set_option('display.max_columns', None)  # Mostrar todas las columnas

# Vino tinto
cov_matrix_red = wine_red.cov()
print("Matriz de covarianzas:\n", cov_matrix_red)
Matriz de covarianzas:
                       fixed acidity  volatile acidity  citric acid  \
fixed acidity              3.031416         -0.079851     0.227820   
volatile acidity          -0.079851          0.032062    -0.019272   
citric acid                0.227820         -0.019272     0.037947   
residual sugar             0.281756          0.000484     0.039434   
chlorides                  0.007679          0.000517     0.001869   
free sulfur dioxide       -2.800921         -0.019674    -0.124252   
total sulfur dioxide      -6.482346          0.450426     0.227697   
density                    0.002195          0.000007     0.000134   
pH                        -0.183586          0.006495    -0.016298   
sulphates                  0.054010         -0.007921     0.010328   
alcohol                   -0.114421         -0.038600     0.022815   

                      residual sugar  chlorides  free sulfur dioxide  \
fixed acidity               0.281756   0.007679            -2.800921   
volatile acidity            0.000484   0.000517            -0.019674   
citric acid                 0.039434   0.001869            -0.124252   
residual sugar              1.987897   0.003690             2.758611   
chlorides                   0.003690   0.002215             0.002738   
free sulfur dioxide         2.758611   0.002738           109.414884   
total sulfur dioxide        9.416441   0.073387           229.737521   
density                     0.000945   0.000018            -0.000433   
pH                         -0.018644  -0.001926             0.113653   
sulphates                   0.001321   0.002962             0.091592   
alcohol                     0.063219  -0.011092            -0.773698   

                      total sulfur dioxide   density        pH  sulphates  \
fixed acidity                    -6.482346  0.002195 -0.183586   0.054010   
volatile acidity                  0.450426  0.000007  0.006495  -0.007921   
citric acid                       0.227697  0.000134 -0.016298   0.010328   
residual sugar                    9.416441  0.000945 -0.018644   0.001321   
chlorides                         0.073387  0.000018 -0.001926   0.002962   
free sulfur dioxide             229.737521 -0.000433  0.113653   0.091592   
total sulfur dioxide           1082.102373  0.004425 -0.337699   0.239471   
density                           0.004425  0.000004 -0.000100   0.000048   
pH                               -0.337699 -0.000100  0.023835  -0.005146   
sulphates                         0.239471  0.000048 -0.005146   0.028733   
alcohol                          -7.209298 -0.000998  0.033832   0.016907   

                       alcohol  
fixed acidity        -0.114421  
volatile acidity     -0.038600  
citric acid           0.022815  
residual sugar        0.063219  
chlorides            -0.011092  
free sulfur dioxide  -0.773698  
total sulfur dioxide -7.209298  
density              -0.000998  
pH                    0.033832  
sulphates             0.016907  
alcohol               1.135647  
# Vino blanco
cov_matrix_white = wine_white.cov()
print("Matriz de covarianzas:\n", cov_matrix_white)
Matriz de covarianzas:
                       fixed acidity  volatile acidity  citric acid  \
fixed acidity              0.712114         -0.001931     0.029533   
volatile acidity          -0.001931          0.010160    -0.001823   
citric acid                0.029533         -0.001823     0.014646   
residual sugar             0.381022          0.032865     0.057829   
chlorides                  0.000426          0.000155     0.000302   
free sulfur dioxide       -0.708919         -0.166300     0.193630   
total sulfur dioxide       3.266013          0.382354     0.622989   
density                    0.000670          0.000008     0.000054   
pH                        -0.054265         -0.000486    -0.002992   
sulphates                 -0.001651         -0.000411     0.000861   
alcohol                   -0.125533          0.008400    -0.011278   

                      residual sugar  chlorides  free sulfur dioxide  \
fixed acidity               0.381022   0.000426            -0.708919   
volatile acidity            0.032865   0.000155            -0.166300   
citric acid                 0.057829   0.000302             0.193630   
residual sugar             25.725770   0.009828            25.800578   
chlorides                   0.009828   0.000477             0.037674   
free sulfur dioxide        25.800578   0.037674           289.242720   
total sulfur dioxide       86.531303   0.184687           444.865891   
density                     0.012727   0.000017             0.014966   
pH                         -0.148684  -0.000298            -0.001587   
sulphates                  -0.015435   0.000042             0.114938   
alcohol                    -2.812740  -0.009684            -5.234509   

                      total sulfur dioxide   density        pH  sulphates  \
fixed acidity                     3.266013  0.000670 -0.054265  -0.001651   
volatile acidity                  0.382354  0.000008 -0.000486  -0.000411   
citric acid                       0.622989  0.000054 -0.002992   0.000861   
residual sugar                   86.531303  0.012727 -0.148684  -0.015435   
chlorides                         0.184687  0.000017 -0.000298   0.000042   
free sulfur dioxide             444.865891  0.014966 -0.001587   0.114938   
total sulfur dioxide           1806.085491  0.067352  0.014894   0.652645   
density                           0.067352  0.000009 -0.000042   0.000025   
pH                                0.014894 -0.000042  0.022801   0.002688   
sulphates                         0.652645  0.000025  0.002688   0.013025   
alcohol                         -23.476605 -0.002871  0.022565  -0.002448   

                        alcohol  
fixed acidity         -0.125533  
volatile acidity       0.008400  
citric acid           -0.011278  
residual sugar        -2.812740  
chlorides             -0.009684  
free sulfur dioxide   -5.234509  
total sulfur dioxide -23.476605  
density               -0.002871  
pH                     0.022565  
sulphates             -0.002448  
alcohol                1.514427  

Matriz de Correlaciones

Aplicación en R

# Vino tinto
corr_matrix_red <- cor(wine_red)
corr_matrix_red
                     fixed.acidity volatile.acidity citric.acid residual.sugar
fixed.acidity           1.00000000     -0.256130895  0.67170343    0.114776724
volatile.acidity       -0.25613089      1.000000000 -0.55249568    0.001917882
citric.acid             0.67170343     -0.552495685  1.00000000    0.143577162
residual.sugar          0.11477672      0.001917882  0.14357716    1.000000000
chlorides               0.09370519      0.061297772  0.20382291    0.055609535
free.sulfur.dioxide    -0.15379419     -0.010503827 -0.06097813    0.187048995
total.sulfur.dioxide   -0.11318144      0.076470005  0.03553302    0.203027882
density                 0.66804729      0.022026232  0.36494718    0.355283371
pH                     -0.68297819      0.234937294 -0.54190414   -0.085652422
sulphates               0.18300566     -0.260986685  0.31277004    0.005527121
alcohol                -0.06166827     -0.202288027  0.10990325    0.042075437
                        chlorides free.sulfur.dioxide total.sulfur.dioxide
fixed.acidity         0.093705186        -0.153794193          -0.11318144
volatile.acidity      0.061297772        -0.010503827           0.07647000
citric.acid           0.203822914        -0.060978129           0.03553302
residual.sugar        0.055609535         0.187048995           0.20302788
chlorides             1.000000000         0.005562147           0.04740047
free.sulfur.dioxide   0.005562147         1.000000000           0.66766645
total.sulfur.dioxide  0.047400468         0.667666450           1.00000000
density               0.200632327        -0.021945831           0.07126948
pH                   -0.265026131         0.070377499          -0.06649456
sulphates             0.371260481         0.051657572           0.04294684
alcohol              -0.221140545        -0.069408354          -0.20565394
                         density          pH    sulphates     alcohol
fixed.acidity         0.66804729 -0.68297819  0.183005664 -0.06166827
volatile.acidity      0.02202623  0.23493729 -0.260986685 -0.20228803
citric.acid           0.36494718 -0.54190414  0.312770044  0.10990325
residual.sugar        0.35528337 -0.08565242  0.005527121  0.04207544
chlorides             0.20063233 -0.26502613  0.371260481 -0.22114054
free.sulfur.dioxide  -0.02194583  0.07037750  0.051657572 -0.06940835
total.sulfur.dioxide  0.07126948 -0.06649456  0.042946836 -0.20565394
density               1.00000000 -0.34169933  0.148506412 -0.49617977
pH                   -0.34169933  1.00000000 -0.196647602  0.20563251
sulphates             0.14850641 -0.19664760  1.000000000  0.09359475
alcohol              -0.49617977  0.20563251  0.093594750  1.00000000
# mapa de calor vino tinto
corrplot(corr_matrix_red, method = "color", col = colorRampPalette(c("blue", "white", "red"))(100), tl.cex = 0.8)

# Vino blanco
corr_matrix_white <- cor(wine_white)
corr_matrix_white
                     fixed.acidity volatile.acidity citric.acid residual.sugar
fixed.acidity           1.00000000      -0.02269729  0.28918070     0.08902070
volatile.acidity       -0.02269729       1.00000000 -0.14947181     0.06428606
citric.acid             0.28918070      -0.14947181  1.00000000     0.09421162
residual.sugar          0.08902070       0.06428606  0.09421162     1.00000000
chlorides               0.02308564       0.07051157  0.11436445     0.08868454
free.sulfur.dioxide    -0.04939586      -0.09701194  0.09407722     0.29909835
total.sulfur.dioxide    0.09106976       0.08926050  0.12113080     0.40143931
density                 0.26533101       0.02711385  0.14950257     0.83896645
pH                     -0.42585829      -0.03191537 -0.16374821    -0.19413345
sulphates              -0.01714299      -0.03572815  0.06233094    -0.02666437
alcohol                -0.12088112       0.06771794 -0.07572873    -0.45063122
                       chlorides free.sulfur.dioxide total.sulfur.dioxide
fixed.acidity         0.02308564       -0.0493958591          0.091069756
volatile.acidity      0.07051157       -0.0970119393          0.089260504
citric.acid           0.11436445        0.0940772210          0.121130798
residual.sugar        0.08868454        0.2990983537          0.401439311
chlorides             1.00000000        0.1013923521          0.198910300
free.sulfur.dioxide   0.10139235        1.0000000000          0.615500965
total.sulfur.dioxide  0.19891030        0.6155009650          1.000000000
density               0.25721132        0.2942104109          0.529881324
pH                   -0.09043946       -0.0006177961          0.002320972
sulphates             0.01676288        0.0592172458          0.134562367
alcohol              -0.36018871       -0.2501039415         -0.448892102
                         density            pH   sulphates     alcohol
fixed.acidity         0.26533101 -0.4258582910 -0.01714299 -0.12088112
volatile.acidity      0.02711385 -0.0319153683 -0.03572815  0.06771794
citric.acid           0.14950257 -0.1637482114  0.06233094 -0.07572873
residual.sugar        0.83896645 -0.1941334540 -0.02666437 -0.45063122
chlorides             0.25721132 -0.0904394560  0.01676288 -0.36018871
free.sulfur.dioxide   0.29421041 -0.0006177961  0.05921725 -0.25010394
total.sulfur.dioxide  0.52988132  0.0023209718  0.13456237 -0.44889210
density               1.00000000 -0.0935914935  0.07449315 -0.78013762
pH                   -0.09359149  1.0000000000  0.15595150  0.12143210
sulphates             0.07449315  0.1559514973  1.00000000 -0.01743277
alcohol              -0.78013762  0.1214320987 -0.01743277  1.00000000
# mapa de calor vino blanco
corrplot(corr_matrix_white, method = "color", col = colorRampPalette(c("royalblue", "white", "orange"))(100), tl.cex = 0.8)

Aplicación en Python

# Vino tinto
corr_matrix_red = wine_red.corr()
print("Matriz de Correlaciones:\n", corr_matrix_red)
Matriz de Correlaciones:
                       fixed acidity  volatile acidity  citric acid  \
fixed acidity              1.000000         -0.256131     0.671703   
volatile acidity          -0.256131          1.000000    -0.552496   
citric acid                0.671703         -0.552496     1.000000   
residual sugar             0.114777          0.001918     0.143577   
chlorides                  0.093705          0.061298     0.203823   
free sulfur dioxide       -0.153794         -0.010504    -0.060978   
total sulfur dioxide      -0.113181          0.076470     0.035533   
density                    0.668047          0.022026     0.364947   
pH                        -0.682978          0.234937    -0.541904   
sulphates                  0.183006         -0.260987     0.312770   
alcohol                   -0.061668         -0.202288     0.109903   

                      residual sugar  chlorides  free sulfur dioxide  \
fixed acidity               0.114777   0.093705            -0.153794   
volatile acidity            0.001918   0.061298            -0.010504   
citric acid                 0.143577   0.203823            -0.060978   
residual sugar              1.000000   0.055610             0.187049   
chlorides                   0.055610   1.000000             0.005562   
free sulfur dioxide         0.187049   0.005562             1.000000   
total sulfur dioxide        0.203028   0.047400             0.667666   
density                     0.355283   0.200632            -0.021946   
pH                         -0.085652  -0.265026             0.070377   
sulphates                   0.005527   0.371260             0.051658   
alcohol                     0.042075  -0.221141            -0.069408   

                      total sulfur dioxide   density        pH  sulphates  \
fixed acidity                    -0.113181  0.668047 -0.682978   0.183006   
volatile acidity                  0.076470  0.022026  0.234937  -0.260987   
citric acid                       0.035533  0.364947 -0.541904   0.312770   
residual sugar                    0.203028  0.355283 -0.085652   0.005527   
chlorides                         0.047400  0.200632 -0.265026   0.371260   
free sulfur dioxide               0.667666 -0.021946  0.070377   0.051658   
total sulfur dioxide              1.000000  0.071269 -0.066495   0.042947   
density                           0.071269  1.000000 -0.341699   0.148506   
pH                               -0.066495 -0.341699  1.000000  -0.196648   
sulphates                         0.042947  0.148506 -0.196648   1.000000   
alcohol                          -0.205654 -0.496180  0.205633   0.093595   

                       alcohol  
fixed acidity        -0.061668  
volatile acidity     -0.202288  
citric acid           0.109903  
residual sugar        0.042075  
chlorides            -0.221141  
free sulfur dioxide  -0.069408  
total sulfur dioxide -0.205654  
density              -0.496180  
pH                    0.205633  
sulphates             0.093595  
alcohol               1.000000  
#mapa calor vino tinto
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix_red, annot=True, cmap="coolwarm", center=0, linewidths=0.5)
plt.show()

# Vino blanco
corr_matrix_white = wine_white.corr()
print("Matriz de Correlaciones:\n", corr_matrix_white)
Matriz de Correlaciones:
                       fixed acidity  volatile acidity  citric acid  \
fixed acidity              1.000000         -0.022697     0.289181   
volatile acidity          -0.022697          1.000000    -0.149472   
citric acid                0.289181         -0.149472     1.000000   
residual sugar             0.089021          0.064286     0.094212   
chlorides                  0.023086          0.070512     0.114364   
free sulfur dioxide       -0.049396         -0.097012     0.094077   
total sulfur dioxide       0.091070          0.089261     0.121131   
density                    0.265331          0.027114     0.149503   
pH                        -0.425858         -0.031915    -0.163748   
sulphates                 -0.017143         -0.035728     0.062331   
alcohol                   -0.120881          0.067718    -0.075729   

                      residual sugar  chlorides  free sulfur dioxide  \
fixed acidity               0.089021   0.023086            -0.049396   
volatile acidity            0.064286   0.070512            -0.097012   
citric acid                 0.094212   0.114364             0.094077   
residual sugar              1.000000   0.088685             0.299098   
chlorides                   0.088685   1.000000             0.101392   
free sulfur dioxide         0.299098   0.101392             1.000000   
total sulfur dioxide        0.401439   0.198910             0.615501   
density                     0.838966   0.257211             0.294210   
pH                         -0.194133  -0.090439            -0.000618   
sulphates                  -0.026664   0.016763             0.059217   
alcohol                    -0.450631  -0.360189            -0.250104   

                      total sulfur dioxide   density        pH  sulphates  \
fixed acidity                     0.091070  0.265331 -0.425858  -0.017143   
volatile acidity                  0.089261  0.027114 -0.031915  -0.035728   
citric acid                       0.121131  0.149503 -0.163748   0.062331   
residual sugar                    0.401439  0.838966 -0.194133  -0.026664   
chlorides                         0.198910  0.257211 -0.090439   0.016763   
free sulfur dioxide               0.615501  0.294210 -0.000618   0.059217   
total sulfur dioxide              1.000000  0.529881  0.002321   0.134562   
density                           0.529881  1.000000 -0.093591   0.074493   
pH                                0.002321 -0.093591  1.000000   0.155951   
sulphates                         0.134562  0.074493  0.155951   1.000000   
alcohol                          -0.448892 -0.780138  0.121432  -0.017433   

                       alcohol  
fixed acidity        -0.120881  
volatile acidity      0.067718  
citric acid          -0.075729  
residual sugar       -0.450631  
chlorides            -0.360189  
free sulfur dioxide  -0.250104  
total sulfur dioxide -0.448892  
density              -0.780138  
pH                    0.121432  
sulphates            -0.017433  
alcohol               1.000000  
#mapa calor vino tinto
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix_white, annot=True, cmap="coolwarm", center=0, linewidths=0.5)
plt.show()

Matriz de Correlaciones Parciales

Aplicación en R

# Vino tinto
pcor_red <- pcor(wine_red) # Objeto correlaciones parciales
pcor_matrix_red <- pcor_red$estimate # "stimate" contiene los valores de la matriz
pcor_matrix_red
                     fixed.acidity volatile.acidity citric.acid residual.sugar
fixed.acidity           1.00000000       0.04973736  0.33328898    -0.43095775
volatile.acidity        0.04973736       1.00000000 -0.53546402    -0.02402241
citric.acid             0.33328898      -0.53546402  1.00000000     0.05341803
residual.sugar         -0.43095775      -0.02402241  0.05341803     1.00000000
chlorides              -0.23335902       0.26401083  0.26370924    -0.01084620
free.sulfur.dioxide     0.11013350      -0.16533973 -0.16249047     0.12649385
total.sulfur.dioxide   -0.23171731       0.21410687  0.26497412     0.03630121
density                 0.78544969       0.12842079  0.00859700     0.59519561
pH                     -0.71850174       0.02664812 -0.03423779    -0.32904958
sulphates              -0.15206062      -0.20942859  0.02655334    -0.20073068
alcohol                 0.54341241       0.07891145  0.14946708     0.50207731
                       chlorides free.sulfur.dioxide total.sulfur.dioxide
fixed.acidity        -0.23335902          0.11013350          -0.23171731
volatile.acidity      0.26401083         -0.16533973           0.21410687
citric.acid           0.26370924         -0.16249047           0.26497412
residual.sugar       -0.01084620          0.12649385           0.03630121
chlorides             1.00000000          0.05852533          -0.14017933
free.sulfur.dioxide   0.05852533          1.00000000           0.66293391
total.sulfur.dioxide -0.14017933          0.66293391           1.00000000
density               0.09380338         -0.08543551           0.07878250
pH                   -0.23105397          0.13765744          -0.19390050
sulphates             0.35050976          0.05395905           0.03306191
alcohol              -0.09234314         -0.02614219          -0.08343417
                         density          pH   sulphates     alcohol
fixed.acidity         0.78544969 -0.71850174 -0.15206062  0.54341241
volatile.acidity      0.12842079  0.02664812 -0.20942859  0.07891145
citric.acid           0.00859700 -0.03423779  0.02655334  0.14946708
residual.sugar        0.59519561 -0.32904958 -0.20073068  0.50207731
chlorides             0.09380338 -0.23105397  0.35050976 -0.09234314
free.sulfur.dioxide  -0.08543551  0.13765744  0.05395905 -0.02614219
total.sulfur.dioxide  0.07878250 -0.19390050  0.03306191 -0.08343417
density               1.00000000  0.57223228  0.24799149 -0.75581233
pH                    0.57223228  1.00000000 -0.12860181  0.52004381
sulphates             0.24799149 -0.12860181  1.00000000  0.28879013
alcohol              -0.75581233  0.52004381  0.28879013  1.00000000
# mapa de calor vino tinto
corrplot(pcor_matrix_red, method = "color", col = colorRampPalette(c("blue", "white", "red"))(100), tl.cex = 0.8)

# Vino blanco
pcor_white <- pcor(wine_white) # Objeto correlaciones parciales
pcor_matrix_white <- pcor_white$estimate # "stimate" contiene los valores de la matriz
pcor_matrix_white
                     fixed.acidity volatile.acidity citric.acid residual.sugar
fixed.acidity         1.0000000000      -0.09177263  0.12265107    -0.67491264
volatile.acidity     -0.0917726327       1.00000000 -0.17348652    -0.06557784
citric.acid           0.1226510715      -0.17348652  1.00000000    -0.04507215
residual.sugar       -0.6749126426      -0.06557784 -0.04507215     1.00000000
chlorides            -0.1709109899       0.09904437  0.10690945    -0.19717515
free.sulfur.dioxide  -0.0006842353      -0.18930514  0.03657815     0.17843406
total.sulfur.dioxide -0.0388679308       0.19763096  0.04063530    -0.13361933
density               0.7043664073       0.10614760  0.06764097     0.94153651
pH                   -0.6661276194      -0.10955425 -0.08255467    -0.63555052
sulphates            -0.1765522452      -0.06548969  0.04088718    -0.26691222
alcohol               0.6045902659       0.18100688  0.09410972     0.78095720
                        chlorides free.sulfur.dioxide total.sulfur.dioxide
fixed.acidity        -0.170910990       -0.0006842353         -0.038867931
volatile.acidity      0.099044371       -0.1893051352          0.197630961
citric.acid           0.106909446        0.0365781543          0.040635296
residual.sugar       -0.197175153        0.1784340632         -0.133619329
chlorides             1.000000000        0.0292937057          0.009562664
free.sulfur.dioxide   0.029293706        1.0000000000          0.593206957
total.sulfur.dioxide  0.009562664        0.5932069568          1.000000000
density               0.163780328       -0.1555386433          0.196045940
pH                   -0.164066249        0.0520636863         -0.019294025
sulphates            -0.037354845        0.0104257743          0.069470082
alcohol              -0.008965184       -0.0992961359          0.041463583
                         density          pH   sulphates      alcohol
fixed.acidity         0.70436641 -0.66612762 -0.17655225  0.604590266
volatile.acidity      0.10614760 -0.10955425 -0.06548969  0.181006877
citric.acid           0.06764097 -0.08255467  0.04088718  0.094109718
residual.sugar        0.94153651 -0.63555052 -0.26691222  0.780957197
chlorides             0.16378033 -0.16406625 -0.03735485 -0.008965184
free.sulfur.dioxide  -0.15553864  0.05206369  0.01042577 -0.099296136
total.sulfur.dioxide  0.19604594 -0.01929403  0.06947008  0.041463583
density               1.00000000  0.62772454  0.26821045 -0.886725282
pH                    0.62772454  1.00000000 -0.06069657  0.565640272
sulphates             0.26821045 -0.06069657  1.00000000  0.246627014
alcohol              -0.88672528  0.56564027  0.24662701  1.000000000
# mapa de calor vino blanco
corrplot(pcor_matrix_white, method = "color", col = colorRampPalette(c("royalblue", "white", "orange"))(100), tl.cex = 0.8)

Aplicación en Python

# Vino tinto:
pcor_red = wine_red.pcorr()
print(pcor_red)
                      fixed acidity  volatile acidity  citric acid  \
fixed acidity              1.000000          0.049737     0.333289   
volatile acidity           0.049737          1.000000    -0.535464   
citric acid                0.333289         -0.535464     1.000000   
residual sugar            -0.430958         -0.024022     0.053418   
chlorides                 -0.233359          0.264011     0.263709   
free sulfur dioxide        0.110133         -0.165340    -0.162490   
total sulfur dioxide      -0.231717          0.214107     0.264974   
density                    0.785450          0.128421     0.008597   
pH                        -0.718502          0.026648    -0.034238   
sulphates                 -0.152061         -0.209429     0.026553   
alcohol                    0.543412          0.078911     0.149467   

                      residual sugar  chlorides  free sulfur dioxide  \
fixed acidity              -0.430958  -0.233359             0.110133   
volatile acidity           -0.024022   0.264011            -0.165340   
citric acid                 0.053418   0.263709            -0.162490   
residual sugar              1.000000  -0.010846             0.126494   
chlorides                  -0.010846   1.000000             0.058525   
free sulfur dioxide         0.126494   0.058525             1.000000   
total sulfur dioxide        0.036301  -0.140179             0.662934   
density                     0.595196   0.093803            -0.085436   
pH                         -0.329050  -0.231054             0.137657   
sulphates                  -0.200731   0.350510             0.053959   
alcohol                     0.502077  -0.092343            -0.026142   

                      total sulfur dioxide   density        pH  sulphates  \
fixed acidity                    -0.231717  0.785450 -0.718502  -0.152061   
volatile acidity                  0.214107  0.128421  0.026648  -0.209429   
citric acid                       0.264974  0.008597 -0.034238   0.026553   
residual sugar                    0.036301  0.595196 -0.329050  -0.200731   
chlorides                        -0.140179  0.093803 -0.231054   0.350510   
free sulfur dioxide               0.662934 -0.085436  0.137657   0.053959   
total sulfur dioxide              1.000000  0.078782 -0.193901   0.033062   
density                           0.078782  1.000000  0.572232   0.247991   
pH                               -0.193901  0.572232  1.000000  -0.128602   
sulphates                         0.033062  0.247991 -0.128602   1.000000   
alcohol                          -0.083434 -0.755812  0.520044   0.288790   

                       alcohol  
fixed acidity         0.543412  
volatile acidity      0.078911  
citric acid           0.149467  
residual sugar        0.502077  
chlorides            -0.092343  
free sulfur dioxide  -0.026142  
total sulfur dioxide -0.083434  
density              -0.755812  
pH                    0.520044  
sulphates             0.288790  
alcohol               1.000000  
#mapa calor vino tinto
plt.figure(figsize=(10, 8))
sns.heatmap(pcor_red, annot=True, cmap="coolwarm", center=0, linewidths=0.5)
plt.show()

# Vino blanco:
pcor_white = wine_white.pcorr()
print(pcor_white)
                      fixed acidity  volatile acidity  citric acid  \
fixed acidity              1.000000         -0.091773     0.122651   
volatile acidity          -0.091773          1.000000    -0.173487   
citric acid                0.122651         -0.173487     1.000000   
residual sugar            -0.674913         -0.065578    -0.045072   
chlorides                 -0.170911          0.099044     0.106909   
free sulfur dioxide       -0.000684         -0.189305     0.036578   
total sulfur dioxide      -0.038868          0.197631     0.040635   
density                    0.704366          0.106148     0.067641   
pH                        -0.666128         -0.109554    -0.082555   
sulphates                 -0.176552         -0.065490     0.040887   
alcohol                    0.604590          0.181007     0.094110   

                      residual sugar  chlorides  free sulfur dioxide  \
fixed acidity              -0.674913  -0.170911            -0.000684   
volatile acidity           -0.065578   0.099044            -0.189305   
citric acid                -0.045072   0.106909             0.036578   
residual sugar              1.000000  -0.197175             0.178434   
chlorides                  -0.197175   1.000000             0.029294   
free sulfur dioxide         0.178434   0.029294             1.000000   
total sulfur dioxide       -0.133619   0.009563             0.593207   
density                     0.941537   0.163780            -0.155539   
pH                         -0.635551  -0.164066             0.052064   
sulphates                  -0.266912  -0.037355             0.010426   
alcohol                     0.780957  -0.008965            -0.099296   

                      total sulfur dioxide   density        pH  sulphates  \
fixed acidity                    -0.038868  0.704366 -0.666128  -0.176552   
volatile acidity                  0.197631  0.106148 -0.109554  -0.065490   
citric acid                       0.040635  0.067641 -0.082555   0.040887   
residual sugar                   -0.133619  0.941537 -0.635551  -0.266912   
chlorides                         0.009563  0.163780 -0.164066  -0.037355   
free sulfur dioxide               0.593207 -0.155539  0.052064   0.010426   
total sulfur dioxide              1.000000  0.196046 -0.019294   0.069470   
density                           0.196046  1.000000  0.627725   0.268210   
pH                               -0.019294  0.627725  1.000000  -0.060697   
sulphates                         0.069470  0.268210 -0.060697   1.000000   
alcohol                           0.041464 -0.886725  0.565640   0.246627   

                       alcohol  
fixed acidity         0.604590  
volatile acidity      0.181007  
citric acid           0.094110  
residual sugar        0.780957  
chlorides            -0.008965  
free sulfur dioxide  -0.099296  
total sulfur dioxide  0.041464  
density              -0.886725  
pH                    0.565640  
sulphates             0.246627  
alcohol               1.000000  
#mapa calor vino blanco
plt.figure(figsize=(10, 8))
sns.heatmap(pcor_white, annot=True, cmap="coolwarm", center=0, linewidths=0.5)
plt.show()

Datos Atípicos (Outliers) Multivariantes

Aplicación en R

# Vino tinto:
mahalanobis_dist_red <- mahalanobis(wine_red, means_red, cov_matrix_red) # distancia de Mahalanobis para cada observación.

# Valor crítico para identificar outliers (usando chi-cuadrado)
gl <- ncol(wine_red)  # grados de libertad (número de variables)
alpha <- 0.05    # Nivel de significancia
valor_critico <- qchisq(1 - alpha, df = gl)

valor_critico #ver valor crítico
[1] 19.67514
atipicos_red <- which(mahalanobis_dist_red > valor_critico) # Identificar outliers

head(mahalanobis_dist_red) #ver distancias
[1] 4.920027 8.335593 3.647326 7.439998 4.920027 5.051613
atipicos_red # número de datos atípicos 154
  [1]   14   15   16   18   20   34   39   43   44   46   80   82   84   87   92
 [16]   93   95   96  107  110  127  128  143  145  152  162  170  182  199  227
 [31]  241  244  245  259  282  292  325  326  340  341  354  355  379  391  396
 [46]  397  401  410  416  443  452  456  464  481  485  494  495  500  502  503
 [61]  511  516  539  545  554  555  556  557  558  559  560  565  585  592  609
 [76]  615  634  640  650  652  653  673  685  691  693  696  701  711  724  731
 [91]  755  796  803  834  837  838  862  890  912  918  924  926  927  983  999
[106] 1018 1019 1044 1052 1072 1075 1078 1079 1080 1082 1091 1115 1132 1155 1166
[121] 1179 1187 1229 1236 1245 1257 1261 1270 1271 1289 1290 1296 1297 1300 1313
[136] 1317 1320 1322 1359 1368 1371 1373 1375 1435 1436 1475 1476 1477 1478 1509
[151] 1559 1571 1575 1590
# Vino blanco:
mahalanobis_dist_white <- mahalanobis(wine_white, means_white, cov_matrix_white) # distancia de Mahalanobis para cada observación.

valor_critico #ver valor crítico
[1] 19.67514
atipicos_white <- which(mahalanobis_dist_white > valor_critico) # Identificar outliers

head(mahalanobis_dist_white) #ver distancias
[1] 9.263907 6.682467 8.293968 3.223621 3.223621 8.293968
atipicos_white # número de datos atípicos 395
  [1]   18   21   24   32   41   42   55   97   99  100  116  148  170  179  195
 [16]  196  197  208  209  222  231  246  252  295  316  325  326  373  396  406
 [31]  434  471  485  503  509  532  542  550  601  627  647  660  663  682  684
 [46]  688  701  722  730  746  759  760  764  767  772  773  776  822  831  835
 [61]  853  855  860  867  871  874  879  914  916  927  947  949  980  981 1008
 [76] 1015 1017 1025 1028 1035 1037 1041 1052 1054 1100 1115 1127 1153 1159 1164
 [91] 1172 1179 1181 1215 1218 1221 1229 1240 1246 1251 1255 1256 1264 1273 1294
[106] 1295 1305 1308 1309 1321 1370 1373 1374 1386 1387 1395 1402 1408 1416 1418
[121] 1419 1424 1437 1456 1461 1466 1477 1483 1488 1497 1505 1527 1535 1537 1541
[136] 1542 1545 1552 1576 1578 1579 1581 1584 1591 1597 1599 1600 1639 1650 1652
[151] 1654 1664 1673 1674 1682 1689 1709 1722 1723 1728 1732 1745 1759 1760 1776
[166] 1782 1802 1808 1810 1818 1836 1837 1840 1849 1857 1863 1866 1887 1901 1926
[181] 1927 1932 1933 1941 1943 1945 1948 1952 1959 1962 1964 2015 2018 2025 2026
[196] 2027 2031 2037 2051 2064 2076 2093 2128 2155 2163 2165 2187 2207 2258 2260
[211] 2280 2287 2288 2322 2335 2337 2350 2360 2372 2379 2395 2397 2404 2409 2418
[226] 2423 2425 2441 2442 2466 2467 2476 2477 2523 2526 2560 2564 2580 2590 2595
[241] 2626 2630 2635 2638 2647 2652 2655 2669 2705 2706 2722 2731 2732 2749 2751
[256] 2772 2782 2821 2850 2873 2874 2875 2894 2927 2931 2932 3015 3023 3024 3026
[271] 3044 3051 3053 3065 3067 3073 3095 3096 3098 3140 3153 3166 3216 3219 3221
[286] 3266 3284 3289 3308 3389 3418 3462 3471 3498 3521 3524 3529 3538 3557 3561
[301] 3565 3572 3588 3589 3590 3620 3621 3624 3678 3711 3736 3738 3774 3849 3862
[316] 3863 3864 3869 3870 3872 3880 3902 3903 3905 3912 3916 3938 3973 3982 3999
[331] 4000 4001 4013 4040 4066 4108 4124 4137 4174 4214 4240 4248 4260 4300 4301
[346] 4345 4347 4402 4474 4481 4498 4504 4515 4520 4526 4527 4566 4568 4580 4581
[361] 4583 4592 4598 4610 4618 4627 4633 4649 4650 4651 4697 4699 4702 4703 4727
[376] 4746 4780 4788 4790 4793 4794 4795 4814 4816 4819 4821 4838 4842 4846 4848
[391] 4868 4878 4879 4887 4888

Aplicación en Python

# Vino tinto:
# Calcular la distancia de Mahalanobis para cada observación
xi_xbar_red = wine_red - means_red
inv_cov_red = np.linalg.inv(cov_matrix_red)
parte1_red = np.dot(xi_xbar_red, inv_cov_red)
parte2_red = np.dot(parte1_red, xi_xbar_red.T)

mahalanobis_dist_red= parte2_red.diagonal()
print(mahalanobis_dist_red)
[ 4.92002724  8.33559263  3.64732572 ...  7.22620103  9.03766813
 12.27097623]
# Valor crítico para identificar outliers (usando chi-cuadrado)
gl = wine_red.shape[1] # grados de libertad (número de variables)
alpha = 0.05       # Nivel de significancia
valor_critico = chi2.ppf(1 - alpha, df=gl)

print("Valor crítico: ",valor_critico)
Valor crítico:  19.67513757268249
# Identificar outliers
atipicos_red = np.where(mahalanobis_dist_red > valor_critico)

# Mostrar los outliers
print("Número de datos atípicos: 154 \n", atipicos_red)
Número de datos atípicos: 154 
 (array([  13,   14,   15,   17,   19,   33,   38,   42,   43,   45,   79,
         81,   83,   86,   91,   92,   94,   95,  106,  109,  126,  127,
        142,  144,  151,  161,  169,  181,  198,  226,  240,  243,  244,
        258,  281,  291,  324,  325,  339,  340,  353,  354,  378,  390,
        395,  396,  400,  409,  415,  442,  451,  455,  463,  480,  484,
        493,  494,  499,  501,  502,  510,  515,  538,  544,  553,  554,
        555,  556,  557,  558,  559,  564,  584,  591,  608,  614,  633,
        639,  649,  651,  652,  672,  684,  690,  692,  695,  700,  710,
        723,  730,  754,  795,  802,  833,  836,  837,  861,  889,  911,
        917,  923,  925,  926,  982,  998, 1017, 1018, 1043, 1051, 1071,
       1074, 1077, 1078, 1079, 1081, 1090, 1114, 1131, 1154, 1165, 1178,
       1186, 1228, 1235, 1244, 1256, 1260, 1269, 1270, 1288, 1289, 1295,
       1296, 1299, 1312, 1316, 1319, 1321, 1358, 1367, 1370, 1372, 1374,
       1434, 1435, 1474, 1475, 1476, 1477, 1508, 1558, 1570, 1574, 1589]),)
# Vino blanco:
# Calcular la distancia de Mahalanobis para cada observación
xi_xbar_white = wine_white - means_white
inv_cov_white = np.linalg.inv(cov_matrix_white)
parte1_white = np.dot(xi_xbar_white, inv_cov_white)
parte2_white = np.dot(parte1_white, xi_xbar_white.T)

mahalanobis_dist_white= parte2_white.diagonal()
print(mahalanobis_dist_white)
[9.26390697 6.68246735 8.2939681  ... 9.55520151 7.89464608 7.23836837]
# Valor crítico para identificar outliers (usando chi-cuadrado)
print("Valor crítico: ",valor_critico)
Valor crítico:  19.67513757268249
# Identificar outliers
atipicos_white = np.where(mahalanobis_dist_white > valor_critico)

# Mostrar los outliers
print("Número de datos atípicos: 395 \n", atipicos_white)
Número de datos atípicos: 395 
 (array([  17,   20,   23,   31,   40,   41,   54,   96,   98,   99,  115,
        147,  169,  178,  194,  195,  196,  207,  208,  221,  230,  245,
        251,  294,  315,  324,  325,  372,  395,  405,  433,  470,  484,
        502,  508,  531,  541,  549,  600,  626,  646,  659,  662,  681,
        683,  687,  700,  721,  729,  745,  758,  759,  763,  766,  771,
        772,  775,  821,  830,  834,  852,  854,  859,  866,  870,  873,
        878,  913,  915,  926,  946,  948,  979,  980, 1007, 1014, 1016,
       1024, 1027, 1034, 1036, 1040, 1051, 1053, 1099, 1114, 1126, 1152,
       1158, 1163, 1171, 1178, 1180, 1214, 1217, 1220, 1228, 1239, 1245,
       1250, 1254, 1255, 1263, 1272, 1293, 1294, 1304, 1307, 1308, 1320,
       1369, 1372, 1373, 1385, 1386, 1394, 1401, 1407, 1415, 1417, 1418,
       1423, 1436, 1455, 1460, 1465, 1476, 1482, 1487, 1496, 1504, 1526,
       1534, 1536, 1540, 1541, 1544, 1551, 1575, 1577, 1578, 1580, 1583,
       1590, 1596, 1598, 1599, 1638, 1649, 1651, 1653, 1663, 1672, 1673,
       1681, 1688, 1708, 1721, 1722, 1727, 1731, 1744, 1758, 1759, 1775,
       1781, 1801, 1807, 1809, 1817, 1835, 1836, 1839, 1848, 1856, 1862,
       1865, 1886, 1900, 1925, 1926, 1931, 1932, 1940, 1942, 1944, 1947,
       1951, 1958, 1961, 1963, 2014, 2017, 2024, 2025, 2026, 2030, 2036,
       2050, 2063, 2075, 2092, 2127, 2154, 2162, 2164, 2186, 2206, 2257,
       2259, 2279, 2286, 2287, 2321, 2334, 2336, 2349, 2359, 2371, 2378,
       2394, 2396, 2403, 2408, 2417, 2422, 2424, 2440, 2441, 2465, 2466,
       2475, 2476, 2522, 2525, 2559, 2563, 2579, 2589, 2594, 2625, 2629,
       2634, 2637, 2646, 2651, 2654, 2668, 2704, 2705, 2721, 2730, 2731,
       2748, 2750, 2771, 2781, 2820, 2849, 2872, 2873, 2874, 2893, 2926,
       2930, 2931, 3014, 3022, 3023, 3025, 3043, 3050, 3052, 3064, 3066,
       3072, 3094, 3095, 3097, 3139, 3152, 3165, 3215, 3218, 3220, 3265,
       3283, 3288, 3307, 3388, 3417, 3461, 3470, 3497, 3520, 3523, 3528,
       3537, 3556, 3560, 3564, 3571, 3587, 3588, 3589, 3619, 3620, 3623,
       3677, 3710, 3735, 3737, 3773, 3848, 3861, 3862, 3863, 3868, 3869,
       3871, 3879, 3901, 3902, 3904, 3911, 3915, 3937, 3972, 3981, 3998,
       3999, 4000, 4012, 4039, 4065, 4107, 4123, 4136, 4173, 4213, 4239,
       4247, 4259, 4299, 4300, 4344, 4346, 4401, 4473, 4480, 4497, 4503,
       4514, 4519, 4525, 4526, 4565, 4567, 4579, 4580, 4582, 4591, 4597,
       4609, 4617, 4626, 4632, 4648, 4649, 4650, 4696, 4698, 4701, 4702,
       4726, 4745, 4779, 4787, 4789, 4792, 4793, 4794, 4813, 4815, 4818,
       4820, 4837, 4841, 4845, 4847, 4867, 4877, 4878, 4886, 4887]),)

Matriz de Distancias Euclídeas

Aplicación en R

# Matriz de distancias euclídeas usando las primeras 6 filas de la base
wine_redD=wine_red[0:6,2:7] 
wine_whiteD=wine_white[0:6,2:7]

# Vino tinto:
dist_red <- dist(wine_redD, method = "euclidean")
dist_red
          1         2         3         4         5
2 35.854189                                        
3 20.400134 16.404452                              
4 26.692508 10.684762  6.376605                    
5  0.000000 35.854189 20.400134 26.692508          
6  6.325472 29.558229 14.151392 20.407548  6.325472
matrix_dist_red <- as.matrix(dist_red)
matrix_dist_red
          1        2         3         4         5         6
1  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
2 35.854189  0.00000 16.404452 10.684762 35.854189 29.558229
3 20.400134 16.40445  0.000000  6.376605 20.400134 14.151392
4 26.692508 10.68476  6.376605  0.000000 26.692508 20.407548
5  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
6  6.325472 29.55823 14.151392 20.407548  6.325472  0.000000
# Vino blanco:
dist_white <- dist(wine_whiteD, method = "euclidean")
dist_white
         1        2        3        4        5
2 52.62900                                    
3 75.79210 38.84706                           
4 20.21988 63.66016 90.62323                  
5 20.21988 63.66016 90.62323  0.00000         
6 75.79210 38.84706  0.00000 90.62323 90.62323
matrix_dist_white <- as.matrix(dist_red)
matrix_dist_white
          1        2         3         4         5         6
1  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
2 35.854189  0.00000 16.404452 10.684762 35.854189 29.558229
3 20.400134 16.40445  0.000000  6.376605 20.400134 14.151392
4 26.692508 10.68476  6.376605  0.000000 26.692508 20.407548
5  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
6  6.325472 29.55823 14.151392 20.407548  6.325472  0.000000

Aplicación en Python

# Vino tinto:
dist_red = pdist(wine_red, metric='euclidean')  
matrix_dist_red = squareform(dist_red)  

print("Matriz de distancias euclídeas:\n", matrix_dist_red)
Matriz de distancias euclídeas:
 [[ 0.         35.86019221 20.40970496 ... 19.07949696 23.32259701
  10.9912459 ]
 [35.86019221  0.         16.40458887 ... 27.36763755 24.13167951
  26.08185696]
 [20.40970496 16.40458887  0.         ... 19.89463389 19.82371349
  12.64021468]
 ...
 [19.07949696 27.36763755 19.89463389 ...  0.          5.09238903
  11.26697302]
 [23.32259701 24.13167951 19.82371349 ...  5.09238903  0.
  14.2646307 ]
 [10.9912459  26.08185696 12.64021468 ... 11.26697302 14.2646307
   0.        ]]
# Vino blanco:
dist_white = pdist(wine_white, metric='euclidean')  
matrix_dist_white = squareform(dist_white)  

print("Matriz de distancias euclídeas:\n", matrix_dist_red)
Matriz de distancias euclídeas:
 [[ 0.         35.86019221 20.40970496 ... 19.07949696 23.32259701
  10.9912459 ]
 [35.86019221  0.         16.40458887 ... 27.36763755 24.13167951
  26.08185696]
 [20.40970496 16.40458887  0.         ... 19.89463389 19.82371349
  12.64021468]
 ...
 [19.07949696 27.36763755 19.89463389 ...  0.          5.09238903
  11.26697302]
 [23.32259701 24.13167951 19.82371349 ...  5.09238903  0.
  14.2646307 ]
 [10.9912459  26.08185696 12.64021468 ... 11.26697302 14.2646307
   0.        ]]

Varianza Generalizada

Aplicación en R

# Vino tinto
vg_red <- det(cov_matrix_red)
vg_red
[1] 3.478418e-11
# Vino blanco
vg_white <- det(cov_matrix_white)
vg_white
[1] 1.701408e-11

Aplicación en Python

# Vino tinto
vg_red = np.linalg.det(cov_matrix_red)
print("Varianza Generalizada\n", vg_red)
Varianza Generalizada
 3.478417646276091e-11
# Vino blanco
vg_white = np.linalg.det(cov_matrix_white)
print("Varianza Generalizada\n", vg_white)
Varianza Generalizada
 1.701408009848951e-11