Importar datos

Importar en R

# carga de paquetes
library(tidyverse)
## Warning: package 'readr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ppcor) # Correlaciones parciales
## Warning: package 'ppcor' was built under R version 4.4.3
## Cargando paquete requerido: MASS
## 
## Adjuntando el paquete: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(MASS) # Outliers mahalanobis distance
# ruta1 <- file.choose()
# ruta2 <- file.choose()
wine_red <- read.csv("C:\\Users\\MINEDUCYT\\Documents\\Seminario\\wine+quality\\winequality-red.csv", header=TRUE, sep=";")
wine_white <- read.csv("C:\\Users\\MINEDUCYT\\Documents\\Seminario\\wine+quality\\winequality-white.csv", header=TRUE, sep=";")

# Vino tinto
head(wine_red)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5
# Vino blanco
head(wine_white)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.0             0.27        0.36           20.7     0.045
## 2           6.3             0.30        0.34            1.6     0.049
## 3           8.1             0.28        0.40            6.9     0.050
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.40            6.9     0.050
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  45                  170  1.0010 3.00      0.45     8.8
## 2                  14                  132  0.9940 3.30      0.49     9.5
## 3                  30                   97  0.9951 3.26      0.44    10.1
## 4                  47                  186  0.9956 3.19      0.40     9.9
## 5                  47                  186  0.9956 3.19      0.40     9.9
## 6                  30                   97  0.9951 3.26      0.44    10.1
##   quality
## 1       6
## 2       6
## 3       6
## 4       6
## 5       6
## 6       6
# Reducir tamaño de la base de datos:
# Se tomarán 300 observaciones
# wine_red=wine_red[0:300,] 
# wine_white=wine_white[0:300,]

# Se omite la variable "quality"
wine_red$quality <- NULL
wine_white$quality <- NULL

# Vino tinto
head(wine_red)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
# Vino blanco
head(wine_white)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.0             0.27        0.36           20.7     0.045
## 2           6.3             0.30        0.34            1.6     0.049
## 3           8.1             0.28        0.40            6.9     0.050
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.40            6.9     0.050
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  45                  170  1.0010 3.00      0.45     8.8
## 2                  14                  132  0.9940 3.30      0.49     9.5
## 3                  30                   97  0.9951 3.26      0.44    10.1
## 4                  47                  186  0.9956 3.19      0.40     9.9
## 5                  47                  186  0.9956 3.19      0.40     9.9
## 6                  30                   97  0.9951 3.26      0.44    10.1

Importar en Pyhton

# cargar paquetes
import pandas as pd # crear y manipular dataframes
import numpy as np # vectores y matrices multidimensionales
import matplotlib.pyplot as plt # gráficos estadísticos
from scipy.spatial.distance import pdist, squareform # distancias euclídeas
import pingouin as pg # correlaciones parciales y distancia de mahalanobis
from scipy.spatial.distance import mahalanobis # distancia de mahalanobis
from scipy.stats import chi2 # chi-ciuadrado
# r = raw string para evitar problemas con las barras invertidas
import pandas as pd
wine_red=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-red.csv", sep=";")
wine_white=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-white.csv", sep=";")

# Reducir tamaño de la base de datos:
# wine_red=wine_red.iloc[0:300,] #Se tomarán 300 observaciones y 6 variables
# wine_white=wine_white.iloc[0:300,]

# Vino tinto
print(wine_red.head())
##    fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
## 0            7.4              0.70         0.00  ...       0.56      9.4        5
## 1            7.8              0.88         0.00  ...       0.68      9.8        5
## 2            7.8              0.76         0.04  ...       0.65      9.8        5
## 3           11.2              0.28         0.56  ...       0.58      9.8        6
## 4            7.4              0.70         0.00  ...       0.56      9.4        5
## 
## [5 rows x 12 columns]
# Vino blanco
print(wine_white.head())
##    fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
## 0            7.0              0.27         0.36  ...       0.45      8.8        6
## 1            6.3              0.30         0.34  ...       0.49      9.5        6
## 2            8.1              0.28         0.40  ...       0.44     10.1        6
## 3            7.2              0.23         0.32  ...       0.40      9.9        6
## 4            7.2              0.23         0.32  ...       0.40      9.9        6
## 
## [5 rows x 12 columns]
# Elimar variable "quality"
wine_red=wine_red.drop("quality", axis=1) # axis=0 filas, axis=1 columnas

wine_white=wine_white.drop("quality", axis=1)

# Vino tinto
print(wine_red.head())
##    fixed acidity  volatile acidity  citric acid  ...    pH  sulphates  alcohol
## 0            7.4              0.70         0.00  ...  3.51       0.56      9.4
## 1            7.8              0.88         0.00  ...  3.20       0.68      9.8
## 2            7.8              0.76         0.04  ...  3.26       0.65      9.8
## 3           11.2              0.28         0.56  ...  3.16       0.58      9.8
## 4            7.4              0.70         0.00  ...  3.51       0.56      9.4
## 
## [5 rows x 11 columns]
# Vino blanco
print(wine_red.head())
##    fixed acidity  volatile acidity  citric acid  ...    pH  sulphates  alcohol
## 0            7.4              0.70         0.00  ...  3.51       0.56      9.4
## 1            7.8              0.88         0.00  ...  3.20       0.68      9.8
## 2            7.8              0.76         0.04  ...  3.26       0.65      9.8
## 3           11.2              0.28         0.56  ...  3.16       0.58      9.8
## 4            7.4              0.70         0.00  ...  3.51       0.56      9.4
## 
## [5 rows x 11 columns]

Vector de medias

Aplicación en R

# Vino tinto
means_red <- colMeans(wine_red)
means_red
##        fixed.acidity     volatile.acidity          citric.acid 
##           8.31963727           0.52782051           0.27097561 
##       residual.sugar            chlorides  free.sulfur.dioxide 
##           2.53880550           0.08746654          15.87492183 
## total.sulfur.dioxide              density                   pH 
##          46.46779237           0.99674668           3.31111320 
##            sulphates              alcohol 
##           0.65814884          10.42298311
# Vino blanco
means_white <- colMeans(wine_white)
means_white
##        fixed.acidity     volatile.acidity          citric.acid 
##           6.85478767           0.27824112           0.33419151 
##       residual.sugar            chlorides  free.sulfur.dioxide 
##           6.39141486           0.04577236          35.30808493 
## total.sulfur.dioxide              density                   pH 
##         138.36065741           0.99402738           3.18826664 
##            sulphates              alcohol 
##           0.48984688          10.51426705

Aplicación en Python

# Vino tinto
means_red = wine_red.mean()
print("Vector de Medias:\n", means_red)
## Vector de Medias:
##  fixed acidity            8.319637
## volatile acidity         0.527821
## citric acid              0.270976
## residual sugar           2.538806
## chlorides                0.087467
## free sulfur dioxide     15.874922
## total sulfur dioxide    46.467792
## density                  0.996747
## pH                       3.311113
## sulphates                0.658149
## alcohol                 10.422983
## dtype: float64
# Vino blanco
means_white = wine_white.mean()
print("Vector de Medias:\n", means_white)
## Vector de Medias:
##  fixed acidity             6.854788
## volatile acidity          0.278241
## citric acid               0.334192
## residual sugar            6.391415
## chlorides                 0.045772
## free sulfur dioxide      35.308085
## total sulfur dioxide    138.360657
## density                   0.994027
## pH                        3.188267
## sulphates                 0.489847
## alcohol                  10.514267
## dtype: float64

Matriz de varianzas y covarianzas

Aplicación en R

# Vino tinto
cov_matrix_red <- cov(wine_red)
cov_matrix_red
##                      fixed.acidity volatile.acidity   citric.acid
## fixed.acidity          3.031416389    -7.985142e-02  0.2278200037
## volatile.acidity      -0.079851417     3.206238e-02 -0.0192716208
## citric.acid            0.227820004    -1.927162e-02  0.0379474831
## residual.sugar         0.281756262     4.841910e-04  0.0394342700
## chlorides              0.007678692     5.165869e-04  0.0018687248
## free.sulfur.dioxide   -2.800921493    -1.967359e-02 -0.1242521139
## total.sulfur.dioxide  -6.482345858     4.504257e-01  0.2276972740
## density                0.002195224     7.443665e-06  0.0001341746
## pH                    -0.183585704     6.494699e-03 -0.0162975823
## sulphates              0.054010092    -7.921434e-03  0.0103277145
## alcohol               -0.114421153    -3.860022e-02  0.0228151729
##                      residual.sugar     chlorides free.sulfur.dioxide
## fixed.acidity          0.2817562623  7.678692e-03       -2.800921e+00
## volatile.acidity       0.0004841910  5.165869e-04       -1.967359e-02
## citric.acid            0.0394342700  1.868725e-03       -1.242521e-01
## residual.sugar         1.9878971330  3.690176e-03        2.758611e+00
## chlorides              0.0036901759  2.215143e-03        2.738303e-03
## free.sulfur.dioxide    2.7586114522  2.738303e-03        1.094149e+02
## total.sulfur.dioxide   9.4164414790  7.338675e-02        2.297375e+02
## density                0.0009454109  1.782176e-05       -4.332504e-04
## pH                    -0.0186442890 -1.925745e-03        1.136531e-01
## sulphates              0.0013209414  2.961878e-03        9.159247e-02
## alcohol                0.0632189598 -1.109152e-02       -7.736984e-01
##                      total.sulfur.dioxide       density            pH
## fixed.acidity               -6.482346e+00  2.195224e-03 -1.835857e-01
## volatile.acidity             4.504257e-01  7.443665e-06  6.494699e-03
## citric.acid                  2.276973e-01  1.341746e-04 -1.629758e-02
## residual.sugar               9.416441e+00  9.454109e-04 -1.864429e-02
## chlorides                    7.338675e-02  1.782176e-05 -1.925745e-03
## free.sulfur.dioxide          2.297375e+02 -4.332504e-04  1.136531e-01
## total.sulfur.dioxide         1.082102e+03  4.424727e-03 -3.376988e-01
## density                      4.424727e-03  3.562029e-06 -9.956395e-05
## pH                          -3.376988e-01 -9.956395e-05  2.383518e-02
## sulphates                    2.394710e-01  4.750962e-05 -5.146186e-03
## alcohol                     -7.209298e+00 -9.979518e-04  3.383162e-02
##                          sulphates       alcohol
## fixed.acidity         5.401009e-02 -0.1144211534
## volatile.acidity     -7.921434e-03 -0.0386002214
## citric.acid           1.032771e-02  0.0228151729
## residual.sugar        1.320941e-03  0.0632189598
## chlorides             2.961878e-03 -0.0110915178
## free.sulfur.dioxide   9.159247e-02 -0.7736984004
## total.sulfur.dioxide  2.394710e-01 -7.2092978950
## density               4.750962e-05 -0.0009979518
## pH                   -5.146186e-03  0.0338316166
## sulphates             2.873262e-02  0.0169067772
## alcohol               1.690678e-02  1.1356473950
# Vino blanco
cov_matrix_white <- cov(wine_white)
cov_matrix_white
##                      fixed.acidity volatile.acidity   citric.acid
## fixed.acidity         0.7121135857    -1.930571e-03  0.0295325116
## volatile.acidity     -0.0019305706     1.015954e-02 -0.0018232776
## citric.acid           0.0295325116    -1.823278e-03  0.0146457930
## residual.sugar        0.3810218137     3.286533e-02  0.0578289265
## chlorides             0.0004256255     1.552775e-04  0.0003023838
## free.sulfur.dioxide  -0.7089186424    -1.663005e-01  0.1936297767
## total.sulfur.dioxide  3.2660133926     3.823539e-01  0.6229887081
## density               0.0006696773     8.173933e-06  0.0000541138
## pH                   -0.0542648260    -4.857531e-04 -0.0029923451
## sulphates            -0.0016509923    -4.109902e-04  0.0008608829
## alcohol              -0.1255328219     8.399723e-03 -0.0112782389
##                      residual.sugar     chlorides free.sulfur.dioxide
## fixed.acidity           0.381021814  4.256255e-04        -0.708918642
## volatile.acidity        0.032865334  1.552775e-04        -0.166300459
## citric.acid             0.057828926  3.023838e-04         0.193629777
## residual.sugar         25.725770164  9.827502e-03        25.800577899
## chlorides               0.009827502  4.773337e-04         0.037674498
## free.sulfur.dioxide    25.800577899  3.767450e-02       289.242719999
## total.sulfur.dioxide   86.531302970  1.846875e-01       444.865890947
## density                 0.012727165  1.680754e-05         0.014965532
## pH                     -0.148683661 -2.983649e-04        -0.001586555
## sulphates              -0.015434743  4.179687e-05         0.114937934
## alcohol                -2.812740332 -9.684235e-03        -5.234508674
##                      total.sulfur.dioxide       density            pH
## fixed.acidity                  3.26601339  6.696773e-04 -5.426483e-02
## volatile.acidity               0.38235390  8.173933e-06 -4.857531e-04
## citric.acid                    0.62298871  5.411380e-05 -2.992345e-03
## residual.sugar                86.53130297  1.272717e-02 -1.486837e-01
## chlorides                      0.18468749  1.680754e-05 -2.983649e-04
## free.sulfur.dioxide          444.86589095  1.496553e-02 -1.586555e-03
## total.sulfur.dioxide        1806.08549085  6.735203e-02  1.489422e-02
## density                        0.06735203  8.945524e-06 -4.226861e-05
## pH                             0.01489422 -4.226861e-05  2.280118e-02
## sulphates                      0.65264458  2.542747e-05  2.687523e-03
## alcohol                      -23.47660460 -2.871430e-03  2.256505e-02
##                          sulphates       alcohol
## fixed.acidity        -1.650992e-03  -0.125532822
## volatile.acidity     -4.109902e-04   0.008399723
## citric.acid           8.608829e-04  -0.011278239
## residual.sugar       -1.543474e-02  -2.812740332
## chlorides             4.179687e-05  -0.009684235
## free.sulfur.dioxide   1.149379e-01  -5.234508674
## total.sulfur.dioxide  6.526446e-01 -23.476604603
## density               2.542747e-05  -0.002871430
## pH                    2.687523e-03   0.022565052
## sulphates             1.302471e-02  -0.002448356
## alcohol              -2.448356e-03   1.514426982

Aplicación en Python

pd.set_option('display.max_rows', None)  # Mostrar todas las filas
pd.set_option('display.max_columns', None)  # Mostrar todas las columnas

# Vino tinto
cov_matrix_red = wine_red.cov()
print("Matriz de covarianzas:\n", cov_matrix_red)
## Matriz de covarianzas:
##                        fixed acidity  volatile acidity  citric acid  \
## fixed acidity              3.031416         -0.079851     0.227820   
## volatile acidity          -0.079851          0.032062    -0.019272   
## citric acid                0.227820         -0.019272     0.037947   
## residual sugar             0.281756          0.000484     0.039434   
## chlorides                  0.007679          0.000517     0.001869   
## free sulfur dioxide       -2.800921         -0.019674    -0.124252   
## total sulfur dioxide      -6.482346          0.450426     0.227697   
## density                    0.002195          0.000007     0.000134   
## pH                        -0.183586          0.006495    -0.016298   
## sulphates                  0.054010         -0.007921     0.010328   
## alcohol                   -0.114421         -0.038600     0.022815   
## 
##                       residual sugar  chlorides  free sulfur dioxide  \
## fixed acidity               0.281756   0.007679            -2.800921   
## volatile acidity            0.000484   0.000517            -0.019674   
## citric acid                 0.039434   0.001869            -0.124252   
## residual sugar              1.987897   0.003690             2.758611   
## chlorides                   0.003690   0.002215             0.002738   
## free sulfur dioxide         2.758611   0.002738           109.414884   
## total sulfur dioxide        9.416441   0.073387           229.737521   
## density                     0.000945   0.000018            -0.000433   
## pH                         -0.018644  -0.001926             0.113653   
## sulphates                   0.001321   0.002962             0.091592   
## alcohol                     0.063219  -0.011092            -0.773698   
## 
##                       total sulfur dioxide   density        pH  sulphates  \
## fixed acidity                    -6.482346  0.002195 -0.183586   0.054010   
## volatile acidity                  0.450426  0.000007  0.006495  -0.007921   
## citric acid                       0.227697  0.000134 -0.016298   0.010328   
## residual sugar                    9.416441  0.000945 -0.018644   0.001321   
## chlorides                         0.073387  0.000018 -0.001926   0.002962   
## free sulfur dioxide             229.737521 -0.000433  0.113653   0.091592   
## total sulfur dioxide           1082.102373  0.004425 -0.337699   0.239471   
## density                           0.004425  0.000004 -0.000100   0.000048   
## pH                               -0.337699 -0.000100  0.023835  -0.005146   
## sulphates                         0.239471  0.000048 -0.005146   0.028733   
## alcohol                          -7.209298 -0.000998  0.033832   0.016907   
## 
##                        alcohol  
## fixed acidity        -0.114421  
## volatile acidity     -0.038600  
## citric acid           0.022815  
## residual sugar        0.063219  
## chlorides            -0.011092  
## free sulfur dioxide  -0.773698  
## total sulfur dioxide -7.209298  
## density              -0.000998  
## pH                    0.033832  
## sulphates             0.016907  
## alcohol               1.135647
# Vino blanco
cov_matrix_white = wine_white.cov()
print("Matriz de covarianzas:\n", cov_matrix_white)
## Matriz de covarianzas:
##                        fixed acidity  volatile acidity  citric acid  \
## fixed acidity              0.712114         -0.001931     0.029533   
## volatile acidity          -0.001931          0.010160    -0.001823   
## citric acid                0.029533         -0.001823     0.014646   
## residual sugar             0.381022          0.032865     0.057829   
## chlorides                  0.000426          0.000155     0.000302   
## free sulfur dioxide       -0.708919         -0.166300     0.193630   
## total sulfur dioxide       3.266013          0.382354     0.622989   
## density                    0.000670          0.000008     0.000054   
## pH                        -0.054265         -0.000486    -0.002992   
## sulphates                 -0.001651         -0.000411     0.000861   
## alcohol                   -0.125533          0.008400    -0.011278   
## 
##                       residual sugar  chlorides  free sulfur dioxide  \
## fixed acidity               0.381022   0.000426            -0.708919   
## volatile acidity            0.032865   0.000155            -0.166300   
## citric acid                 0.057829   0.000302             0.193630   
## residual sugar             25.725770   0.009828            25.800578   
## chlorides                   0.009828   0.000477             0.037674   
## free sulfur dioxide        25.800578   0.037674           289.242720   
## total sulfur dioxide       86.531303   0.184687           444.865891   
## density                     0.012727   0.000017             0.014966   
## pH                         -0.148684  -0.000298            -0.001587   
## sulphates                  -0.015435   0.000042             0.114938   
## alcohol                    -2.812740  -0.009684            -5.234509   
## 
##                       total sulfur dioxide   density        pH  sulphates  \
## fixed acidity                     3.266013  0.000670 -0.054265  -0.001651   
## volatile acidity                  0.382354  0.000008 -0.000486  -0.000411   
## citric acid                       0.622989  0.000054 -0.002992   0.000861   
## residual sugar                   86.531303  0.012727 -0.148684  -0.015435   
## chlorides                         0.184687  0.000017 -0.000298   0.000042   
## free sulfur dioxide             444.865891  0.014966 -0.001587   0.114938   
## total sulfur dioxide           1806.085491  0.067352  0.014894   0.652645   
## density                           0.067352  0.000009 -0.000042   0.000025   
## pH                                0.014894 -0.000042  0.022801   0.002688   
## sulphates                         0.652645  0.000025  0.002688   0.013025   
## alcohol                         -23.476605 -0.002871  0.022565  -0.002448   
## 
##                         alcohol  
## fixed acidity         -0.125533  
## volatile acidity       0.008400  
## citric acid           -0.011278  
## residual sugar        -2.812740  
## chlorides             -0.009684  
## free sulfur dioxide   -5.234509  
## total sulfur dioxide -23.476605  
## density               -0.002871  
## pH                     0.022565  
## sulphates             -0.002448  
## alcohol                1.514427

Matriz de Correlaciones

Aplicación en R

# Vino tinto
corr_matrix_red <- cor(wine_red)
corr_matrix_red
##                      fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity           1.00000000     -0.256130895  0.67170343    0.114776724
## volatile.acidity       -0.25613089      1.000000000 -0.55249568    0.001917882
## citric.acid             0.67170343     -0.552495685  1.00000000    0.143577162
## residual.sugar          0.11477672      0.001917882  0.14357716    1.000000000
## chlorides               0.09370519      0.061297772  0.20382291    0.055609535
## free.sulfur.dioxide    -0.15379419     -0.010503827 -0.06097813    0.187048995
## total.sulfur.dioxide   -0.11318144      0.076470005  0.03553302    0.203027882
## density                 0.66804729      0.022026232  0.36494718    0.355283371
## pH                     -0.68297819      0.234937294 -0.54190414   -0.085652422
## sulphates               0.18300566     -0.260986685  0.31277004    0.005527121
## alcohol                -0.06166827     -0.202288027  0.10990325    0.042075437
##                         chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity         0.093705186        -0.153794193          -0.11318144
## volatile.acidity      0.061297772        -0.010503827           0.07647000
## citric.acid           0.203822914        -0.060978129           0.03553302
## residual.sugar        0.055609535         0.187048995           0.20302788
## chlorides             1.000000000         0.005562147           0.04740047
## free.sulfur.dioxide   0.005562147         1.000000000           0.66766645
## total.sulfur.dioxide  0.047400468         0.667666450           1.00000000
## density               0.200632327        -0.021945831           0.07126948
## pH                   -0.265026131         0.070377499          -0.06649456
## sulphates             0.371260481         0.051657572           0.04294684
## alcohol              -0.221140545        -0.069408354          -0.20565394
##                          density          pH    sulphates     alcohol
## fixed.acidity         0.66804729 -0.68297819  0.183005664 -0.06166827
## volatile.acidity      0.02202623  0.23493729 -0.260986685 -0.20228803
## citric.acid           0.36494718 -0.54190414  0.312770044  0.10990325
## residual.sugar        0.35528337 -0.08565242  0.005527121  0.04207544
## chlorides             0.20063233 -0.26502613  0.371260481 -0.22114054
## free.sulfur.dioxide  -0.02194583  0.07037750  0.051657572 -0.06940835
## total.sulfur.dioxide  0.07126948 -0.06649456  0.042946836 -0.20565394
## density               1.00000000 -0.34169933  0.148506412 -0.49617977
## pH                   -0.34169933  1.00000000 -0.196647602  0.20563251
## sulphates             0.14850641 -0.19664760  1.000000000  0.09359475
## alcohol              -0.49617977  0.20563251  0.093594750  1.00000000
# Vino blanco
corr_matrix_white <- cor(wine_white)
corr_matrix_white
##                      fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity           1.00000000      -0.02269729  0.28918070     0.08902070
## volatile.acidity       -0.02269729       1.00000000 -0.14947181     0.06428606
## citric.acid             0.28918070      -0.14947181  1.00000000     0.09421162
## residual.sugar          0.08902070       0.06428606  0.09421162     1.00000000
## chlorides               0.02308564       0.07051157  0.11436445     0.08868454
## free.sulfur.dioxide    -0.04939586      -0.09701194  0.09407722     0.29909835
## total.sulfur.dioxide    0.09106976       0.08926050  0.12113080     0.40143931
## density                 0.26533101       0.02711385  0.14950257     0.83896645
## pH                     -0.42585829      -0.03191537 -0.16374821    -0.19413345
## sulphates              -0.01714299      -0.03572815  0.06233094    -0.02666437
## alcohol                -0.12088112       0.06771794 -0.07572873    -0.45063122
##                        chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity         0.02308564       -0.0493958591          0.091069756
## volatile.acidity      0.07051157       -0.0970119393          0.089260504
## citric.acid           0.11436445        0.0940772210          0.121130798
## residual.sugar        0.08868454        0.2990983537          0.401439311
## chlorides             1.00000000        0.1013923521          0.198910300
## free.sulfur.dioxide   0.10139235        1.0000000000          0.615500965
## total.sulfur.dioxide  0.19891030        0.6155009650          1.000000000
## density               0.25721132        0.2942104109          0.529881324
## pH                   -0.09043946       -0.0006177961          0.002320972
## sulphates             0.01676288        0.0592172458          0.134562367
## alcohol              -0.36018871       -0.2501039415         -0.448892102
##                          density            pH   sulphates     alcohol
## fixed.acidity         0.26533101 -0.4258582910 -0.01714299 -0.12088112
## volatile.acidity      0.02711385 -0.0319153683 -0.03572815  0.06771794
## citric.acid           0.14950257 -0.1637482114  0.06233094 -0.07572873
## residual.sugar        0.83896645 -0.1941334540 -0.02666437 -0.45063122
## chlorides             0.25721132 -0.0904394560  0.01676288 -0.36018871
## free.sulfur.dioxide   0.29421041 -0.0006177961  0.05921725 -0.25010394
## total.sulfur.dioxide  0.52988132  0.0023209718  0.13456237 -0.44889210
## density               1.00000000 -0.0935914935  0.07449315 -0.78013762
## pH                   -0.09359149  1.0000000000  0.15595150  0.12143210
## sulphates             0.07449315  0.1559514973  1.00000000 -0.01743277
## alcohol              -0.78013762  0.1214320987 -0.01743277  1.00000000

Aplicación en Python

# Vino tinto
corr_matrix_red = wine_red.corr()
print("Matriz de Correlaciones:\n", corr_matrix_red)
## Matriz de Correlaciones:
##                        fixed acidity  volatile acidity  citric acid  \
## fixed acidity              1.000000         -0.256131     0.671703   
## volatile acidity          -0.256131          1.000000    -0.552496   
## citric acid                0.671703         -0.552496     1.000000   
## residual sugar             0.114777          0.001918     0.143577   
## chlorides                  0.093705          0.061298     0.203823   
## free sulfur dioxide       -0.153794         -0.010504    -0.060978   
## total sulfur dioxide      -0.113181          0.076470     0.035533   
## density                    0.668047          0.022026     0.364947   
## pH                        -0.682978          0.234937    -0.541904   
## sulphates                  0.183006         -0.260987     0.312770   
## alcohol                   -0.061668         -0.202288     0.109903   
## 
##                       residual sugar  chlorides  free sulfur dioxide  \
## fixed acidity               0.114777   0.093705            -0.153794   
## volatile acidity            0.001918   0.061298            -0.010504   
## citric acid                 0.143577   0.203823            -0.060978   
## residual sugar              1.000000   0.055610             0.187049   
## chlorides                   0.055610   1.000000             0.005562   
## free sulfur dioxide         0.187049   0.005562             1.000000   
## total sulfur dioxide        0.203028   0.047400             0.667666   
## density                     0.355283   0.200632            -0.021946   
## pH                         -0.085652  -0.265026             0.070377   
## sulphates                   0.005527   0.371260             0.051658   
## alcohol                     0.042075  -0.221141            -0.069408   
## 
##                       total sulfur dioxide   density        pH  sulphates  \
## fixed acidity                    -0.113181  0.668047 -0.682978   0.183006   
## volatile acidity                  0.076470  0.022026  0.234937  -0.260987   
## citric acid                       0.035533  0.364947 -0.541904   0.312770   
## residual sugar                    0.203028  0.355283 -0.085652   0.005527   
## chlorides                         0.047400  0.200632 -0.265026   0.371260   
## free sulfur dioxide               0.667666 -0.021946  0.070377   0.051658   
## total sulfur dioxide              1.000000  0.071269 -0.066495   0.042947   
## density                           0.071269  1.000000 -0.341699   0.148506   
## pH                               -0.066495 -0.341699  1.000000  -0.196648   
## sulphates                         0.042947  0.148506 -0.196648   1.000000   
## alcohol                          -0.205654 -0.496180  0.205633   0.093595   
## 
##                        alcohol  
## fixed acidity        -0.061668  
## volatile acidity     -0.202288  
## citric acid           0.109903  
## residual sugar        0.042075  
## chlorides            -0.221141  
## free sulfur dioxide  -0.069408  
## total sulfur dioxide -0.205654  
## density              -0.496180  
## pH                    0.205633  
## sulphates             0.093595  
## alcohol               1.000000
# Vino blanco
corr_matrix_white = wine_white.corr()
print("Matriz de Correlaciones:\n", corr_matrix_white)
## Matriz de Correlaciones:
##                        fixed acidity  volatile acidity  citric acid  \
## fixed acidity              1.000000         -0.022697     0.289181   
## volatile acidity          -0.022697          1.000000    -0.149472   
## citric acid                0.289181         -0.149472     1.000000   
## residual sugar             0.089021          0.064286     0.094212   
## chlorides                  0.023086          0.070512     0.114364   
## free sulfur dioxide       -0.049396         -0.097012     0.094077   
## total sulfur dioxide       0.091070          0.089261     0.121131   
## density                    0.265331          0.027114     0.149503   
## pH                        -0.425858         -0.031915    -0.163748   
## sulphates                 -0.017143         -0.035728     0.062331   
## alcohol                   -0.120881          0.067718    -0.075729   
## 
##                       residual sugar  chlorides  free sulfur dioxide  \
## fixed acidity               0.089021   0.023086            -0.049396   
## volatile acidity            0.064286   0.070512            -0.097012   
## citric acid                 0.094212   0.114364             0.094077   
## residual sugar              1.000000   0.088685             0.299098   
## chlorides                   0.088685   1.000000             0.101392   
## free sulfur dioxide         0.299098   0.101392             1.000000   
## total sulfur dioxide        0.401439   0.198910             0.615501   
## density                     0.838966   0.257211             0.294210   
## pH                         -0.194133  -0.090439            -0.000618   
## sulphates                  -0.026664   0.016763             0.059217   
## alcohol                    -0.450631  -0.360189            -0.250104   
## 
##                       total sulfur dioxide   density        pH  sulphates  \
## fixed acidity                     0.091070  0.265331 -0.425858  -0.017143   
## volatile acidity                  0.089261  0.027114 -0.031915  -0.035728   
## citric acid                       0.121131  0.149503 -0.163748   0.062331   
## residual sugar                    0.401439  0.838966 -0.194133  -0.026664   
## chlorides                         0.198910  0.257211 -0.090439   0.016763   
## free sulfur dioxide               0.615501  0.294210 -0.000618   0.059217   
## total sulfur dioxide              1.000000  0.529881  0.002321   0.134562   
## density                           0.529881  1.000000 -0.093591   0.074493   
## pH                                0.002321 -0.093591  1.000000   0.155951   
## sulphates                         0.134562  0.074493  0.155951   1.000000   
## alcohol                          -0.448892 -0.780138  0.121432  -0.017433   
## 
##                        alcohol  
## fixed acidity        -0.120881  
## volatile acidity      0.067718  
## citric acid          -0.075729  
## residual sugar       -0.450631  
## chlorides            -0.360189  
## free sulfur dioxide  -0.250104  
## total sulfur dioxide -0.448892  
## density              -0.780138  
## pH                    0.121432  
## sulphates            -0.017433  
## alcohol               1.000000

Matriz de Correlaciones Parciales

Aplicación en R

# Vino tinto
pcor_red <- pcor(wine_red) # Objeto correlaciones parciales
pcor_matrix_red <- pcor_red$estimate # "stimate" contiene los valores de la matriz
pcor_matrix_red
##                      fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity           1.00000000       0.04973736  0.33328898    -0.43095775
## volatile.acidity        0.04973736       1.00000000 -0.53546402    -0.02402241
## citric.acid             0.33328898      -0.53546402  1.00000000     0.05341803
## residual.sugar         -0.43095775      -0.02402241  0.05341803     1.00000000
## chlorides              -0.23335902       0.26401083  0.26370924    -0.01084620
## free.sulfur.dioxide     0.11013350      -0.16533973 -0.16249047     0.12649385
## total.sulfur.dioxide   -0.23171731       0.21410687  0.26497412     0.03630121
## density                 0.78544969       0.12842079  0.00859700     0.59519561
## pH                     -0.71850174       0.02664812 -0.03423779    -0.32904958
## sulphates              -0.15206062      -0.20942859  0.02655334    -0.20073068
## alcohol                 0.54341241       0.07891145  0.14946708     0.50207731
##                        chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity        -0.23335902          0.11013350          -0.23171731
## volatile.acidity      0.26401083         -0.16533973           0.21410687
## citric.acid           0.26370924         -0.16249047           0.26497412
## residual.sugar       -0.01084620          0.12649385           0.03630121
## chlorides             1.00000000          0.05852533          -0.14017933
## free.sulfur.dioxide   0.05852533          1.00000000           0.66293391
## total.sulfur.dioxide -0.14017933          0.66293391           1.00000000
## density               0.09380338         -0.08543551           0.07878250
## pH                   -0.23105397          0.13765744          -0.19390050
## sulphates             0.35050976          0.05395905           0.03306191
## alcohol              -0.09234314         -0.02614219          -0.08343417
##                          density          pH   sulphates     alcohol
## fixed.acidity         0.78544969 -0.71850174 -0.15206062  0.54341241
## volatile.acidity      0.12842079  0.02664812 -0.20942859  0.07891145
## citric.acid           0.00859700 -0.03423779  0.02655334  0.14946708
## residual.sugar        0.59519561 -0.32904958 -0.20073068  0.50207731
## chlorides             0.09380338 -0.23105397  0.35050976 -0.09234314
## free.sulfur.dioxide  -0.08543551  0.13765744  0.05395905 -0.02614219
## total.sulfur.dioxide  0.07878250 -0.19390050  0.03306191 -0.08343417
## density               1.00000000  0.57223228  0.24799149 -0.75581233
## pH                    0.57223228  1.00000000 -0.12860181  0.52004381
## sulphates             0.24799149 -0.12860181  1.00000000  0.28879013
## alcohol              -0.75581233  0.52004381  0.28879013  1.00000000
# Vino blanco
pcor_white <- pcor(wine_white) # Objeto correlaciones parciales
pcor_matrix_white <- pcor_white$estimate # "stimate" contiene los valores de la matriz
pcor_matrix_white
##                      fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity         1.0000000000      -0.09177263  0.12265107    -0.67491264
## volatile.acidity     -0.0917726327       1.00000000 -0.17348652    -0.06557784
## citric.acid           0.1226510715      -0.17348652  1.00000000    -0.04507215
## residual.sugar       -0.6749126426      -0.06557784 -0.04507215     1.00000000
## chlorides            -0.1709109899       0.09904437  0.10690945    -0.19717515
## free.sulfur.dioxide  -0.0006842353      -0.18930514  0.03657815     0.17843406
## total.sulfur.dioxide -0.0388679308       0.19763096  0.04063530    -0.13361933
## density               0.7043664073       0.10614760  0.06764097     0.94153651
## pH                   -0.6661276194      -0.10955425 -0.08255467    -0.63555052
## sulphates            -0.1765522452      -0.06548969  0.04088718    -0.26691222
## alcohol               0.6045902659       0.18100688  0.09410972     0.78095720
##                         chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity        -0.170910990       -0.0006842353         -0.038867931
## volatile.acidity      0.099044371       -0.1893051352          0.197630961
## citric.acid           0.106909446        0.0365781543          0.040635296
## residual.sugar       -0.197175153        0.1784340632         -0.133619329
## chlorides             1.000000000        0.0292937057          0.009562664
## free.sulfur.dioxide   0.029293706        1.0000000000          0.593206957
## total.sulfur.dioxide  0.009562664        0.5932069568          1.000000000
## density               0.163780328       -0.1555386433          0.196045940
## pH                   -0.164066249        0.0520636863         -0.019294025
## sulphates            -0.037354845        0.0104257743          0.069470082
## alcohol              -0.008965184       -0.0992961359          0.041463583
##                          density          pH   sulphates      alcohol
## fixed.acidity         0.70436641 -0.66612762 -0.17655225  0.604590266
## volatile.acidity      0.10614760 -0.10955425 -0.06548969  0.181006877
## citric.acid           0.06764097 -0.08255467  0.04088718  0.094109718
## residual.sugar        0.94153651 -0.63555052 -0.26691222  0.780957197
## chlorides             0.16378033 -0.16406625 -0.03735485 -0.008965184
## free.sulfur.dioxide  -0.15553864  0.05206369  0.01042577 -0.099296136
## total.sulfur.dioxide  0.19604594 -0.01929403  0.06947008  0.041463583
## density               1.00000000  0.62772454  0.26821045 -0.886725282
## pH                    0.62772454  1.00000000 -0.06069657  0.565640272
## sulphates             0.26821045 -0.06069657  1.00000000  0.246627014
## alcohol              -0.88672528  0.56564027  0.24662701  1.000000000

Aplicación en Python

# Vino tinto:
pcor_red = wine_red.pcorr()
print(pcor_red)
##                       fixed acidity  volatile acidity  citric acid  \
## fixed acidity              1.000000          0.049737     0.333289   
## volatile acidity           0.049737          1.000000    -0.535464   
## citric acid                0.333289         -0.535464     1.000000   
## residual sugar            -0.430958         -0.024022     0.053418   
## chlorides                 -0.233359          0.264011     0.263709   
## free sulfur dioxide        0.110133         -0.165340    -0.162490   
## total sulfur dioxide      -0.231717          0.214107     0.264974   
## density                    0.785450          0.128421     0.008597   
## pH                        -0.718502          0.026648    -0.034238   
## sulphates                 -0.152061         -0.209429     0.026553   
## alcohol                    0.543412          0.078911     0.149467   
## 
##                       residual sugar  chlorides  free sulfur dioxide  \
## fixed acidity              -0.430958  -0.233359             0.110133   
## volatile acidity           -0.024022   0.264011            -0.165340   
## citric acid                 0.053418   0.263709            -0.162490   
## residual sugar              1.000000  -0.010846             0.126494   
## chlorides                  -0.010846   1.000000             0.058525   
## free sulfur dioxide         0.126494   0.058525             1.000000   
## total sulfur dioxide        0.036301  -0.140179             0.662934   
## density                     0.595196   0.093803            -0.085436   
## pH                         -0.329050  -0.231054             0.137657   
## sulphates                  -0.200731   0.350510             0.053959   
## alcohol                     0.502077  -0.092343            -0.026142   
## 
##                       total sulfur dioxide   density        pH  sulphates  \
## fixed acidity                    -0.231717  0.785450 -0.718502  -0.152061   
## volatile acidity                  0.214107  0.128421  0.026648  -0.209429   
## citric acid                       0.264974  0.008597 -0.034238   0.026553   
## residual sugar                    0.036301  0.595196 -0.329050  -0.200731   
## chlorides                        -0.140179  0.093803 -0.231054   0.350510   
## free sulfur dioxide               0.662934 -0.085436  0.137657   0.053959   
## total sulfur dioxide              1.000000  0.078782 -0.193901   0.033062   
## density                           0.078782  1.000000  0.572232   0.247991   
## pH                               -0.193901  0.572232  1.000000  -0.128602   
## sulphates                         0.033062  0.247991 -0.128602   1.000000   
## alcohol                          -0.083434 -0.755812  0.520044   0.288790   
## 
##                        alcohol  
## fixed acidity         0.543412  
## volatile acidity      0.078911  
## citric acid           0.149467  
## residual sugar        0.502077  
## chlorides            -0.092343  
## free sulfur dioxide  -0.026142  
## total sulfur dioxide -0.083434  
## density              -0.755812  
## pH                    0.520044  
## sulphates             0.288790  
## alcohol               1.000000
# Vino blanco:
pcor_white = wine_white.pcorr()
print(pcor_white)
##                       fixed acidity  volatile acidity  citric acid  \
## fixed acidity              1.000000         -0.091773     0.122651   
## volatile acidity          -0.091773          1.000000    -0.173487   
## citric acid                0.122651         -0.173487     1.000000   
## residual sugar            -0.674913         -0.065578    -0.045072   
## chlorides                 -0.170911          0.099044     0.106909   
## free sulfur dioxide       -0.000684         -0.189305     0.036578   
## total sulfur dioxide      -0.038868          0.197631     0.040635   
## density                    0.704366          0.106148     0.067641   
## pH                        -0.666128         -0.109554    -0.082555   
## sulphates                 -0.176552         -0.065490     0.040887   
## alcohol                    0.604590          0.181007     0.094110   
## 
##                       residual sugar  chlorides  free sulfur dioxide  \
## fixed acidity              -0.674913  -0.170911            -0.000684   
## volatile acidity           -0.065578   0.099044            -0.189305   
## citric acid                -0.045072   0.106909             0.036578   
## residual sugar              1.000000  -0.197175             0.178434   
## chlorides                  -0.197175   1.000000             0.029294   
## free sulfur dioxide         0.178434   0.029294             1.000000   
## total sulfur dioxide       -0.133619   0.009563             0.593207   
## density                     0.941537   0.163780            -0.155539   
## pH                         -0.635551  -0.164066             0.052064   
## sulphates                  -0.266912  -0.037355             0.010426   
## alcohol                     0.780957  -0.008965            -0.099296   
## 
##                       total sulfur dioxide   density        pH  sulphates  \
## fixed acidity                    -0.038868  0.704366 -0.666128  -0.176552   
## volatile acidity                  0.197631  0.106148 -0.109554  -0.065490   
## citric acid                       0.040635  0.067641 -0.082555   0.040887   
## residual sugar                   -0.133619  0.941537 -0.635551  -0.266912   
## chlorides                         0.009563  0.163780 -0.164066  -0.037355   
## free sulfur dioxide               0.593207 -0.155539  0.052064   0.010426   
## total sulfur dioxide              1.000000  0.196046 -0.019294   0.069470   
## density                           0.196046  1.000000  0.627725   0.268210   
## pH                               -0.019294  0.627725  1.000000  -0.060697   
## sulphates                         0.069470  0.268210 -0.060697   1.000000   
## alcohol                           0.041464 -0.886725  0.565640   0.246627   
## 
##                        alcohol  
## fixed acidity         0.604590  
## volatile acidity      0.181007  
## citric acid           0.094110  
## residual sugar        0.780957  
## chlorides            -0.008965  
## free sulfur dioxide  -0.099296  
## total sulfur dioxide  0.041464  
## density              -0.886725  
## pH                    0.565640  
## sulphates             0.246627  
## alcohol               1.000000

Datos Atípicos (Outliers) Multivariantes

Aplicación en R

# Vino tinto:
mahalanobis_dist_red <- mahalanobis(wine_red, means_red, cov_matrix_red) # distancia de Mahalanobis para cada observación.

# Valor crítico para identificar outliers (usando chi-cuadrado)
gl <- ncol(wine_red)  # grados de libertad (número de variables)
alpha <- 0.05    # Nivel de significancia
valor_critico <- qchisq(1 - alpha, df = gl)

valor_critico #ver valor crítico
## [1] 19.67514
atipicos_red <- which(mahalanobis_dist_red > valor_critico) # Identificar outliers

head(mahalanobis_dist_red) #ver distancias
## [1] 4.920027 8.335593 3.647326 7.439998 4.920027 5.051613
atipicos_red # número de datos atípicos 154
##   [1]   14   15   16   18   20   34   39   43   44   46   80   82   84   87   92
##  [16]   93   95   96  107  110  127  128  143  145  152  162  170  182  199  227
##  [31]  241  244  245  259  282  292  325  326  340  341  354  355  379  391  396
##  [46]  397  401  410  416  443  452  456  464  481  485  494  495  500  502  503
##  [61]  511  516  539  545  554  555  556  557  558  559  560  565  585  592  609
##  [76]  615  634  640  650  652  653  673  685  691  693  696  701  711  724  731
##  [91]  755  796  803  834  837  838  862  890  912  918  924  926  927  983  999
## [106] 1018 1019 1044 1052 1072 1075 1078 1079 1080 1082 1091 1115 1132 1155 1166
## [121] 1179 1187 1229 1236 1245 1257 1261 1270 1271 1289 1290 1296 1297 1300 1313
## [136] 1317 1320 1322 1359 1368 1371 1373 1375 1435 1436 1475 1476 1477 1478 1509
## [151] 1559 1571 1575 1590
# Vino blanco:
mahalanobis_dist_white <- mahalanobis(wine_white, means_white, cov_matrix_white) # distancia de Mahalanobis para cada observación.

valor_critico #ver valor crítico
## [1] 19.67514
atipicos_white <- which(mahalanobis_dist_white > valor_critico) # Identificar outliers

head(mahalanobis_dist_white) #ver distancias
## [1] 9.263907 6.682467 8.293968 3.223621 3.223621 8.293968
atipicos_white # número de datos atípicos 395
##   [1]   18   21   24   32   41   42   55   97   99  100  116  148  170  179  195
##  [16]  196  197  208  209  222  231  246  252  295  316  325  326  373  396  406
##  [31]  434  471  485  503  509  532  542  550  601  627  647  660  663  682  684
##  [46]  688  701  722  730  746  759  760  764  767  772  773  776  822  831  835
##  [61]  853  855  860  867  871  874  879  914  916  927  947  949  980  981 1008
##  [76] 1015 1017 1025 1028 1035 1037 1041 1052 1054 1100 1115 1127 1153 1159 1164
##  [91] 1172 1179 1181 1215 1218 1221 1229 1240 1246 1251 1255 1256 1264 1273 1294
## [106] 1295 1305 1308 1309 1321 1370 1373 1374 1386 1387 1395 1402 1408 1416 1418
## [121] 1419 1424 1437 1456 1461 1466 1477 1483 1488 1497 1505 1527 1535 1537 1541
## [136] 1542 1545 1552 1576 1578 1579 1581 1584 1591 1597 1599 1600 1639 1650 1652
## [151] 1654 1664 1673 1674 1682 1689 1709 1722 1723 1728 1732 1745 1759 1760 1776
## [166] 1782 1802 1808 1810 1818 1836 1837 1840 1849 1857 1863 1866 1887 1901 1926
## [181] 1927 1932 1933 1941 1943 1945 1948 1952 1959 1962 1964 2015 2018 2025 2026
## [196] 2027 2031 2037 2051 2064 2076 2093 2128 2155 2163 2165 2187 2207 2258 2260
## [211] 2280 2287 2288 2322 2335 2337 2350 2360 2372 2379 2395 2397 2404 2409 2418
## [226] 2423 2425 2441 2442 2466 2467 2476 2477 2523 2526 2560 2564 2580 2590 2595
## [241] 2626 2630 2635 2638 2647 2652 2655 2669 2705 2706 2722 2731 2732 2749 2751
## [256] 2772 2782 2821 2850 2873 2874 2875 2894 2927 2931 2932 3015 3023 3024 3026
## [271] 3044 3051 3053 3065 3067 3073 3095 3096 3098 3140 3153 3166 3216 3219 3221
## [286] 3266 3284 3289 3308 3389 3418 3462 3471 3498 3521 3524 3529 3538 3557 3561
## [301] 3565 3572 3588 3589 3590 3620 3621 3624 3678 3711 3736 3738 3774 3849 3862
## [316] 3863 3864 3869 3870 3872 3880 3902 3903 3905 3912 3916 3938 3973 3982 3999
## [331] 4000 4001 4013 4040 4066 4108 4124 4137 4174 4214 4240 4248 4260 4300 4301
## [346] 4345 4347 4402 4474 4481 4498 4504 4515 4520 4526 4527 4566 4568 4580 4581
## [361] 4583 4592 4598 4610 4618 4627 4633 4649 4650 4651 4697 4699 4702 4703 4727
## [376] 4746 4780 4788 4790 4793 4794 4795 4814 4816 4819 4821 4838 4842 4846 4848
## [391] 4868 4878 4879 4887 4888

Aplicación en Python

# Vino tinto:
# Calcular la distancia de Mahalanobis para cada observación
xi_xbar_red = wine_red - means_red
inv_cov_red = np.linalg.inv(cov_matrix_red)
parte1_red = np.dot(xi_xbar_red, inv_cov_red)
parte2_red = np.dot(parte1_red, xi_xbar_red.T)

mahalanobis_dist_red= parte2_red.diagonal()
print(mahalanobis_dist_red)
## [ 4.92002724  8.33559263  3.64732572 ...  7.22620103  9.03766813
##  12.27097623]
# Valor crítico para identificar outliers (usando chi-cuadrado)
gl = wine_red.shape[1] # grados de libertad (número de variables)
alpha = 0.05       # Nivel de significancia
valor_critico = chi2.ppf(1 - alpha, df=gl)

print("Valor crítico: ",valor_critico)
## Valor crítico:  19.67513757268249
# Identificar outliers
atipicos_red = np.where(mahalanobis_dist_red > valor_critico)

# Mostrar los outliers
print("Número de datos atípicos: 154 \n", atipicos_red)
## Número de datos atípicos: 154 
##  (array([  13,   14,   15,   17,   19,   33,   38,   42,   43,   45,   79,
##          81,   83,   86,   91,   92,   94,   95,  106,  109,  126,  127,
##         142,  144,  151,  161,  169,  181,  198,  226,  240,  243,  244,
##         258,  281,  291,  324,  325,  339,  340,  353,  354,  378,  390,
##         395,  396,  400,  409,  415,  442,  451,  455,  463,  480,  484,
##         493,  494,  499,  501,  502,  510,  515,  538,  544,  553,  554,
##         555,  556,  557,  558,  559,  564,  584,  591,  608,  614,  633,
##         639,  649,  651,  652,  672,  684,  690,  692,  695,  700,  710,
##         723,  730,  754,  795,  802,  833,  836,  837,  861,  889,  911,
##         917,  923,  925,  926,  982,  998, 1017, 1018, 1043, 1051, 1071,
##        1074, 1077, 1078, 1079, 1081, 1090, 1114, 1131, 1154, 1165, 1178,
##        1186, 1228, 1235, 1244, 1256, 1260, 1269, 1270, 1288, 1289, 1295,
##        1296, 1299, 1312, 1316, 1319, 1321, 1358, 1367, 1370, 1372, 1374,
##        1434, 1435, 1474, 1475, 1476, 1477, 1508, 1558, 1570, 1574, 1589]),)
# Vino blanco:
# Calcular la distancia de Mahalanobis para cada observación
xi_xbar_white = wine_white - means_white
inv_cov_white = np.linalg.inv(cov_matrix_white)
parte1_white = np.dot(xi_xbar_white, inv_cov_white)
parte2_white = np.dot(parte1_white, xi_xbar_white.T)

mahalanobis_dist_white= parte2_white.diagonal()
print(mahalanobis_dist_white)
## [9.26390697 6.68246735 8.2939681  ... 9.55520151 7.89464608 7.23836837]
# Valor crítico para identificar outliers (usando chi-cuadrado)
print("Valor crítico: ",valor_critico)
## Valor crítico:  19.67513757268249
# Identificar outliers
atipicos_white = np.where(mahalanobis_dist_white > valor_critico)

# Mostrar los outliers
print("Número de datos atípicos: 395 \n", atipicos_white)
## Número de datos atípicos: 395 
##  (array([  17,   20,   23,   31,   40,   41,   54,   96,   98,   99,  115,
##         147,  169,  178,  194,  195,  196,  207,  208,  221,  230,  245,
##         251,  294,  315,  324,  325,  372,  395,  405,  433,  470,  484,
##         502,  508,  531,  541,  549,  600,  626,  646,  659,  662,  681,
##         683,  687,  700,  721,  729,  745,  758,  759,  763,  766,  771,
##         772,  775,  821,  830,  834,  852,  854,  859,  866,  870,  873,
##         878,  913,  915,  926,  946,  948,  979,  980, 1007, 1014, 1016,
##        1024, 1027, 1034, 1036, 1040, 1051, 1053, 1099, 1114, 1126, 1152,
##        1158, 1163, 1171, 1178, 1180, 1214, 1217, 1220, 1228, 1239, 1245,
##        1250, 1254, 1255, 1263, 1272, 1293, 1294, 1304, 1307, 1308, 1320,
##        1369, 1372, 1373, 1385, 1386, 1394, 1401, 1407, 1415, 1417, 1418,
##        1423, 1436, 1455, 1460, 1465, 1476, 1482, 1487, 1496, 1504, 1526,
##        1534, 1536, 1540, 1541, 1544, 1551, 1575, 1577, 1578, 1580, 1583,
##        1590, 1596, 1598, 1599, 1638, 1649, 1651, 1653, 1663, 1672, 1673,
##        1681, 1688, 1708, 1721, 1722, 1727, 1731, 1744, 1758, 1759, 1775,
##        1781, 1801, 1807, 1809, 1817, 1835, 1836, 1839, 1848, 1856, 1862,
##        1865, 1886, 1900, 1925, 1926, 1931, 1932, 1940, 1942, 1944, 1947,
##        1951, 1958, 1961, 1963, 2014, 2017, 2024, 2025, 2026, 2030, 2036,
##        2050, 2063, 2075, 2092, 2127, 2154, 2162, 2164, 2186, 2206, 2257,
##        2259, 2279, 2286, 2287, 2321, 2334, 2336, 2349, 2359, 2371, 2378,
##        2394, 2396, 2403, 2408, 2417, 2422, 2424, 2440, 2441, 2465, 2466,
##        2475, 2476, 2522, 2525, 2559, 2563, 2579, 2589, 2594, 2625, 2629,
##        2634, 2637, 2646, 2651, 2654, 2668, 2704, 2705, 2721, 2730, 2731,
##        2748, 2750, 2771, 2781, 2820, 2849, 2872, 2873, 2874, 2893, 2926,
##        2930, 2931, 3014, 3022, 3023, 3025, 3043, 3050, 3052, 3064, 3066,
##        3072, 3094, 3095, 3097, 3139, 3152, 3165, 3215, 3218, 3220, 3265,
##        3283, 3288, 3307, 3388, 3417, 3461, 3470, 3497, 3520, 3523, 3528,
##        3537, 3556, 3560, 3564, 3571, 3587, 3588, 3589, 3619, 3620, 3623,
##        3677, 3710, 3735, 3737, 3773, 3848, 3861, 3862, 3863, 3868, 3869,
##        3871, 3879, 3901, 3902, 3904, 3911, 3915, 3937, 3972, 3981, 3998,
##        3999, 4000, 4012, 4039, 4065, 4107, 4123, 4136, 4173, 4213, 4239,
##        4247, 4259, 4299, 4300, 4344, 4346, 4401, 4473, 4480, 4497, 4503,
##        4514, 4519, 4525, 4526, 4565, 4567, 4579, 4580, 4582, 4591, 4597,
##        4609, 4617, 4626, 4632, 4648, 4649, 4650, 4696, 4698, 4701, 4702,
##        4726, 4745, 4779, 4787, 4789, 4792, 4793, 4794, 4813, 4815, 4818,
##        4820, 4837, 4841, 4845, 4847, 4867, 4877, 4878, 4886, 4887]),)

Matriz de Distancias Euclídeas

Aplicación en R

# Matriz de distancias euclídeas usando las primeras 6 filas de la base
wine_redD=wine_red[0:6,2:7] 
wine_whiteD=wine_white[0:6,2:7]

# Vino tinto:
dist_red <- dist(wine_redD, method = "euclidean")
dist_red
##           1         2         3         4         5
## 2 35.854189                                        
## 3 20.400134 16.404452                              
## 4 26.692508 10.684762  6.376605                    
## 5  0.000000 35.854189 20.400134 26.692508          
## 6  6.325472 29.558229 14.151392 20.407548  6.325472
matrix_dist_red <- as.matrix(dist_red)
matrix_dist_red
##           1        2         3         4         5         6
## 1  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
## 2 35.854189  0.00000 16.404452 10.684762 35.854189 29.558229
## 3 20.400134 16.40445  0.000000  6.376605 20.400134 14.151392
## 4 26.692508 10.68476  6.376605  0.000000 26.692508 20.407548
## 5  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
## 6  6.325472 29.55823 14.151392 20.407548  6.325472  0.000000
# Vino blanco:
dist_white <- dist(wine_whiteD, method = "euclidean")
dist_white
##          1        2        3        4        5
## 2 52.62900                                    
## 3 75.79210 38.84706                           
## 4 20.21988 63.66016 90.62323                  
## 5 20.21988 63.66016 90.62323  0.00000         
## 6 75.79210 38.84706  0.00000 90.62323 90.62323
matrix_dist_white <- as.matrix(dist_red)
matrix_dist_white
##           1        2         3         4         5         6
## 1  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
## 2 35.854189  0.00000 16.404452 10.684762 35.854189 29.558229
## 3 20.400134 16.40445  0.000000  6.376605 20.400134 14.151392
## 4 26.692508 10.68476  6.376605  0.000000 26.692508 20.407548
## 5  0.000000 35.85419 20.400134 26.692508  0.000000  6.325472
## 6  6.325472 29.55823 14.151392 20.407548  6.325472  0.000000

Aplicación en Python

# Vino tinto:
dist_red = pdist(wine_red, metric='euclidean')  
matrix_dist_red = squareform(dist_red)  

print("Matriz de distancias euclídeas:\n", matrix_dist_red)
## Matriz de distancias euclídeas:
##  [[ 0.         35.86019221 20.40970496 ... 19.07949696 23.32259701
##   10.9912459 ]
##  [35.86019221  0.         16.40458887 ... 27.36763755 24.13167951
##   26.08185696]
##  [20.40970496 16.40458887  0.         ... 19.89463389 19.82371349
##   12.64021468]
##  ...
##  [19.07949696 27.36763755 19.89463389 ...  0.          5.09238903
##   11.26697302]
##  [23.32259701 24.13167951 19.82371349 ...  5.09238903  0.
##   14.2646307 ]
##  [10.9912459  26.08185696 12.64021468 ... 11.26697302 14.2646307
##    0.        ]]
# Vino blanco:
dist_white = pdist(wine_white, metric='euclidean')  
matrix_dist_white = squareform(dist_white)  

print("Matriz de distancias euclídeas:\n", matrix_dist_red)
## Matriz de distancias euclídeas:
##  [[ 0.         35.86019221 20.40970496 ... 19.07949696 23.32259701
##   10.9912459 ]
##  [35.86019221  0.         16.40458887 ... 27.36763755 24.13167951
##   26.08185696]
##  [20.40970496 16.40458887  0.         ... 19.89463389 19.82371349
##   12.64021468]
##  ...
##  [19.07949696 27.36763755 19.89463389 ...  0.          5.09238903
##   11.26697302]
##  [23.32259701 24.13167951 19.82371349 ...  5.09238903  0.
##   14.2646307 ]
##  [10.9912459  26.08185696 12.64021468 ... 11.26697302 14.2646307
##    0.        ]]

Varianza Generalizada

Aplicación en R

# Vino tinto
vg_red <- det(cov_matrix_red)
vg_red
## [1] 3.478418e-11
# Vino blanco
vg_white <- det(cov_matrix_white)
vg_white
## [1] 1.701408e-11

Aplicación en Python

# Vino tinto
vg_red = np.linalg.det(cov_matrix_red)
print("Varianza Generalizada\n", vg_red)
## Varianza Generalizada
##  3.478417646276091e-11
# Vino blanco
vg_white = np.linalg.det(cov_matrix_white)
print("Varianza Generalizada\n", vg_white)
## Varianza Generalizada
##  1.701408009848951e-11