# carga de paquetes
library(tidyverse)
## Warning: package 'readr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ppcor) # Correlaciones parciales
## Warning: package 'ppcor' was built under R version 4.4.3
## Cargando paquete requerido: MASS
##
## Adjuntando el paquete: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(MASS) # Outliers mahalanobis distance
# ruta1 <- file.choose()
# ruta2 <- file.choose()
wine_red <- read.csv("C:\\Users\\MINEDUCYT\\Documents\\Seminario\\wine+quality\\winequality-red.csv", header=TRUE, sep=";")
wine_white <- read.csv("C:\\Users\\MINEDUCYT\\Documents\\Seminario\\wine+quality\\winequality-white.csv", header=TRUE, sep=";")
# Vino tinto
head(wine_red)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
# Vino blanco
head(wine_white)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.0 0.27 0.36 20.7 0.045
## 2 6.3 0.30 0.34 1.6 0.049
## 3 8.1 0.28 0.40 6.9 0.050
## 4 7.2 0.23 0.32 8.5 0.058
## 5 7.2 0.23 0.32 8.5 0.058
## 6 8.1 0.28 0.40 6.9 0.050
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 45 170 1.0010 3.00 0.45 8.8
## 2 14 132 0.9940 3.30 0.49 9.5
## 3 30 97 0.9951 3.26 0.44 10.1
## 4 47 186 0.9956 3.19 0.40 9.9
## 5 47 186 0.9956 3.19 0.40 9.9
## 6 30 97 0.9951 3.26 0.44 10.1
## quality
## 1 6
## 2 6
## 3 6
## 4 6
## 5 6
## 6 6
# Reducir tamaño de la base de datos:
# Se tomarán 300 observaciones
# wine_red=wine_red[0:300,]
# wine_white=wine_white[0:300,]
# Se omite la variable "quality"
wine_red$quality <- NULL
wine_white$quality <- NULL
# Vino tinto
head(wine_red)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
# Vino blanco
head(wine_white)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.0 0.27 0.36 20.7 0.045
## 2 6.3 0.30 0.34 1.6 0.049
## 3 8.1 0.28 0.40 6.9 0.050
## 4 7.2 0.23 0.32 8.5 0.058
## 5 7.2 0.23 0.32 8.5 0.058
## 6 8.1 0.28 0.40 6.9 0.050
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 45 170 1.0010 3.00 0.45 8.8
## 2 14 132 0.9940 3.30 0.49 9.5
## 3 30 97 0.9951 3.26 0.44 10.1
## 4 47 186 0.9956 3.19 0.40 9.9
## 5 47 186 0.9956 3.19 0.40 9.9
## 6 30 97 0.9951 3.26 0.44 10.1
# cargar paquetes
import pandas as pd # crear y manipular dataframes
import numpy as np # vectores y matrices multidimensionales
import matplotlib.pyplot as plt # gráficos estadísticos
from scipy.spatial.distance import pdist, squareform # distancias euclídeas
import pingouin as pg # correlaciones parciales y distancia de mahalanobis
from scipy.spatial.distance import mahalanobis # distancia de mahalanobis
from scipy.stats import chi2 # chi-ciuadrado
# r = raw string para evitar problemas con las barras invertidas
import pandas as pd
wine_red=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-red.csv", sep=";")
wine_white=pd.read_csv(r"C:\Users\MINEDUCYT\Documents\Seminario\wine+quality\winequality-white.csv", sep=";")
# Reducir tamaño de la base de datos:
# wine_red=wine_red.iloc[0:300,] #Se tomarán 300 observaciones y 6 variables
# wine_white=wine_white.iloc[0:300,]
# Vino tinto
print(wine_red.head())
## fixed acidity volatile acidity citric acid ... sulphates alcohol quality
## 0 7.4 0.70 0.00 ... 0.56 9.4 5
## 1 7.8 0.88 0.00 ... 0.68 9.8 5
## 2 7.8 0.76 0.04 ... 0.65 9.8 5
## 3 11.2 0.28 0.56 ... 0.58 9.8 6
## 4 7.4 0.70 0.00 ... 0.56 9.4 5
##
## [5 rows x 12 columns]
# Vino blanco
print(wine_white.head())
## fixed acidity volatile acidity citric acid ... sulphates alcohol quality
## 0 7.0 0.27 0.36 ... 0.45 8.8 6
## 1 6.3 0.30 0.34 ... 0.49 9.5 6
## 2 8.1 0.28 0.40 ... 0.44 10.1 6
## 3 7.2 0.23 0.32 ... 0.40 9.9 6
## 4 7.2 0.23 0.32 ... 0.40 9.9 6
##
## [5 rows x 12 columns]
# Elimar variable "quality"
wine_red=wine_red.drop("quality", axis=1) # axis=0 filas, axis=1 columnas
wine_white=wine_white.drop("quality", axis=1)
# Vino tinto
print(wine_red.head())
## fixed acidity volatile acidity citric acid ... pH sulphates alcohol
## 0 7.4 0.70 0.00 ... 3.51 0.56 9.4
## 1 7.8 0.88 0.00 ... 3.20 0.68 9.8
## 2 7.8 0.76 0.04 ... 3.26 0.65 9.8
## 3 11.2 0.28 0.56 ... 3.16 0.58 9.8
## 4 7.4 0.70 0.00 ... 3.51 0.56 9.4
##
## [5 rows x 11 columns]
# Vino blanco
print(wine_red.head())
## fixed acidity volatile acidity citric acid ... pH sulphates alcohol
## 0 7.4 0.70 0.00 ... 3.51 0.56 9.4
## 1 7.8 0.88 0.00 ... 3.20 0.68 9.8
## 2 7.8 0.76 0.04 ... 3.26 0.65 9.8
## 3 11.2 0.28 0.56 ... 3.16 0.58 9.8
## 4 7.4 0.70 0.00 ... 3.51 0.56 9.4
##
## [5 rows x 11 columns]
# Vino tinto
means_red <- colMeans(wine_red)
means_red
## fixed.acidity volatile.acidity citric.acid
## 8.31963727 0.52782051 0.27097561
## residual.sugar chlorides free.sulfur.dioxide
## 2.53880550 0.08746654 15.87492183
## total.sulfur.dioxide density pH
## 46.46779237 0.99674668 3.31111320
## sulphates alcohol
## 0.65814884 10.42298311
# Vino blanco
means_white <- colMeans(wine_white)
means_white
## fixed.acidity volatile.acidity citric.acid
## 6.85478767 0.27824112 0.33419151
## residual.sugar chlorides free.sulfur.dioxide
## 6.39141486 0.04577236 35.30808493
## total.sulfur.dioxide density pH
## 138.36065741 0.99402738 3.18826664
## sulphates alcohol
## 0.48984688 10.51426705
# Vino tinto
means_red = wine_red.mean()
print("Vector de Medias:\n", means_red)
## Vector de Medias:
## fixed acidity 8.319637
## volatile acidity 0.527821
## citric acid 0.270976
## residual sugar 2.538806
## chlorides 0.087467
## free sulfur dioxide 15.874922
## total sulfur dioxide 46.467792
## density 0.996747
## pH 3.311113
## sulphates 0.658149
## alcohol 10.422983
## dtype: float64
# Vino blanco
means_white = wine_white.mean()
print("Vector de Medias:\n", means_white)
## Vector de Medias:
## fixed acidity 6.854788
## volatile acidity 0.278241
## citric acid 0.334192
## residual sugar 6.391415
## chlorides 0.045772
## free sulfur dioxide 35.308085
## total sulfur dioxide 138.360657
## density 0.994027
## pH 3.188267
## sulphates 0.489847
## alcohol 10.514267
## dtype: float64
# Vino tinto
cov_matrix_red <- cov(wine_red)
cov_matrix_red
## fixed.acidity volatile.acidity citric.acid
## fixed.acidity 3.031416389 -7.985142e-02 0.2278200037
## volatile.acidity -0.079851417 3.206238e-02 -0.0192716208
## citric.acid 0.227820004 -1.927162e-02 0.0379474831
## residual.sugar 0.281756262 4.841910e-04 0.0394342700
## chlorides 0.007678692 5.165869e-04 0.0018687248
## free.sulfur.dioxide -2.800921493 -1.967359e-02 -0.1242521139
## total.sulfur.dioxide -6.482345858 4.504257e-01 0.2276972740
## density 0.002195224 7.443665e-06 0.0001341746
## pH -0.183585704 6.494699e-03 -0.0162975823
## sulphates 0.054010092 -7.921434e-03 0.0103277145
## alcohol -0.114421153 -3.860022e-02 0.0228151729
## residual.sugar chlorides free.sulfur.dioxide
## fixed.acidity 0.2817562623 7.678692e-03 -2.800921e+00
## volatile.acidity 0.0004841910 5.165869e-04 -1.967359e-02
## citric.acid 0.0394342700 1.868725e-03 -1.242521e-01
## residual.sugar 1.9878971330 3.690176e-03 2.758611e+00
## chlorides 0.0036901759 2.215143e-03 2.738303e-03
## free.sulfur.dioxide 2.7586114522 2.738303e-03 1.094149e+02
## total.sulfur.dioxide 9.4164414790 7.338675e-02 2.297375e+02
## density 0.0009454109 1.782176e-05 -4.332504e-04
## pH -0.0186442890 -1.925745e-03 1.136531e-01
## sulphates 0.0013209414 2.961878e-03 9.159247e-02
## alcohol 0.0632189598 -1.109152e-02 -7.736984e-01
## total.sulfur.dioxide density pH
## fixed.acidity -6.482346e+00 2.195224e-03 -1.835857e-01
## volatile.acidity 4.504257e-01 7.443665e-06 6.494699e-03
## citric.acid 2.276973e-01 1.341746e-04 -1.629758e-02
## residual.sugar 9.416441e+00 9.454109e-04 -1.864429e-02
## chlorides 7.338675e-02 1.782176e-05 -1.925745e-03
## free.sulfur.dioxide 2.297375e+02 -4.332504e-04 1.136531e-01
## total.sulfur.dioxide 1.082102e+03 4.424727e-03 -3.376988e-01
## density 4.424727e-03 3.562029e-06 -9.956395e-05
## pH -3.376988e-01 -9.956395e-05 2.383518e-02
## sulphates 2.394710e-01 4.750962e-05 -5.146186e-03
## alcohol -7.209298e+00 -9.979518e-04 3.383162e-02
## sulphates alcohol
## fixed.acidity 5.401009e-02 -0.1144211534
## volatile.acidity -7.921434e-03 -0.0386002214
## citric.acid 1.032771e-02 0.0228151729
## residual.sugar 1.320941e-03 0.0632189598
## chlorides 2.961878e-03 -0.0110915178
## free.sulfur.dioxide 9.159247e-02 -0.7736984004
## total.sulfur.dioxide 2.394710e-01 -7.2092978950
## density 4.750962e-05 -0.0009979518
## pH -5.146186e-03 0.0338316166
## sulphates 2.873262e-02 0.0169067772
## alcohol 1.690678e-02 1.1356473950
# Vino blanco
cov_matrix_white <- cov(wine_white)
cov_matrix_white
## fixed.acidity volatile.acidity citric.acid
## fixed.acidity 0.7121135857 -1.930571e-03 0.0295325116
## volatile.acidity -0.0019305706 1.015954e-02 -0.0018232776
## citric.acid 0.0295325116 -1.823278e-03 0.0146457930
## residual.sugar 0.3810218137 3.286533e-02 0.0578289265
## chlorides 0.0004256255 1.552775e-04 0.0003023838
## free.sulfur.dioxide -0.7089186424 -1.663005e-01 0.1936297767
## total.sulfur.dioxide 3.2660133926 3.823539e-01 0.6229887081
## density 0.0006696773 8.173933e-06 0.0000541138
## pH -0.0542648260 -4.857531e-04 -0.0029923451
## sulphates -0.0016509923 -4.109902e-04 0.0008608829
## alcohol -0.1255328219 8.399723e-03 -0.0112782389
## residual.sugar chlorides free.sulfur.dioxide
## fixed.acidity 0.381021814 4.256255e-04 -0.708918642
## volatile.acidity 0.032865334 1.552775e-04 -0.166300459
## citric.acid 0.057828926 3.023838e-04 0.193629777
## residual.sugar 25.725770164 9.827502e-03 25.800577899
## chlorides 0.009827502 4.773337e-04 0.037674498
## free.sulfur.dioxide 25.800577899 3.767450e-02 289.242719999
## total.sulfur.dioxide 86.531302970 1.846875e-01 444.865890947
## density 0.012727165 1.680754e-05 0.014965532
## pH -0.148683661 -2.983649e-04 -0.001586555
## sulphates -0.015434743 4.179687e-05 0.114937934
## alcohol -2.812740332 -9.684235e-03 -5.234508674
## total.sulfur.dioxide density pH
## fixed.acidity 3.26601339 6.696773e-04 -5.426483e-02
## volatile.acidity 0.38235390 8.173933e-06 -4.857531e-04
## citric.acid 0.62298871 5.411380e-05 -2.992345e-03
## residual.sugar 86.53130297 1.272717e-02 -1.486837e-01
## chlorides 0.18468749 1.680754e-05 -2.983649e-04
## free.sulfur.dioxide 444.86589095 1.496553e-02 -1.586555e-03
## total.sulfur.dioxide 1806.08549085 6.735203e-02 1.489422e-02
## density 0.06735203 8.945524e-06 -4.226861e-05
## pH 0.01489422 -4.226861e-05 2.280118e-02
## sulphates 0.65264458 2.542747e-05 2.687523e-03
## alcohol -23.47660460 -2.871430e-03 2.256505e-02
## sulphates alcohol
## fixed.acidity -1.650992e-03 -0.125532822
## volatile.acidity -4.109902e-04 0.008399723
## citric.acid 8.608829e-04 -0.011278239
## residual.sugar -1.543474e-02 -2.812740332
## chlorides 4.179687e-05 -0.009684235
## free.sulfur.dioxide 1.149379e-01 -5.234508674
## total.sulfur.dioxide 6.526446e-01 -23.476604603
## density 2.542747e-05 -0.002871430
## pH 2.687523e-03 0.022565052
## sulphates 1.302471e-02 -0.002448356
## alcohol -2.448356e-03 1.514426982
pd.set_option('display.max_rows', None) # Mostrar todas las filas
pd.set_option('display.max_columns', None) # Mostrar todas las columnas
# Vino tinto
cov_matrix_red = wine_red.cov()
print("Matriz de covarianzas:\n", cov_matrix_red)
## Matriz de covarianzas:
## fixed acidity volatile acidity citric acid \
## fixed acidity 3.031416 -0.079851 0.227820
## volatile acidity -0.079851 0.032062 -0.019272
## citric acid 0.227820 -0.019272 0.037947
## residual sugar 0.281756 0.000484 0.039434
## chlorides 0.007679 0.000517 0.001869
## free sulfur dioxide -2.800921 -0.019674 -0.124252
## total sulfur dioxide -6.482346 0.450426 0.227697
## density 0.002195 0.000007 0.000134
## pH -0.183586 0.006495 -0.016298
## sulphates 0.054010 -0.007921 0.010328
## alcohol -0.114421 -0.038600 0.022815
##
## residual sugar chlorides free sulfur dioxide \
## fixed acidity 0.281756 0.007679 -2.800921
## volatile acidity 0.000484 0.000517 -0.019674
## citric acid 0.039434 0.001869 -0.124252
## residual sugar 1.987897 0.003690 2.758611
## chlorides 0.003690 0.002215 0.002738
## free sulfur dioxide 2.758611 0.002738 109.414884
## total sulfur dioxide 9.416441 0.073387 229.737521
## density 0.000945 0.000018 -0.000433
## pH -0.018644 -0.001926 0.113653
## sulphates 0.001321 0.002962 0.091592
## alcohol 0.063219 -0.011092 -0.773698
##
## total sulfur dioxide density pH sulphates \
## fixed acidity -6.482346 0.002195 -0.183586 0.054010
## volatile acidity 0.450426 0.000007 0.006495 -0.007921
## citric acid 0.227697 0.000134 -0.016298 0.010328
## residual sugar 9.416441 0.000945 -0.018644 0.001321
## chlorides 0.073387 0.000018 -0.001926 0.002962
## free sulfur dioxide 229.737521 -0.000433 0.113653 0.091592
## total sulfur dioxide 1082.102373 0.004425 -0.337699 0.239471
## density 0.004425 0.000004 -0.000100 0.000048
## pH -0.337699 -0.000100 0.023835 -0.005146
## sulphates 0.239471 0.000048 -0.005146 0.028733
## alcohol -7.209298 -0.000998 0.033832 0.016907
##
## alcohol
## fixed acidity -0.114421
## volatile acidity -0.038600
## citric acid 0.022815
## residual sugar 0.063219
## chlorides -0.011092
## free sulfur dioxide -0.773698
## total sulfur dioxide -7.209298
## density -0.000998
## pH 0.033832
## sulphates 0.016907
## alcohol 1.135647
# Vino blanco
cov_matrix_white = wine_white.cov()
print("Matriz de covarianzas:\n", cov_matrix_white)
## Matriz de covarianzas:
## fixed acidity volatile acidity citric acid \
## fixed acidity 0.712114 -0.001931 0.029533
## volatile acidity -0.001931 0.010160 -0.001823
## citric acid 0.029533 -0.001823 0.014646
## residual sugar 0.381022 0.032865 0.057829
## chlorides 0.000426 0.000155 0.000302
## free sulfur dioxide -0.708919 -0.166300 0.193630
## total sulfur dioxide 3.266013 0.382354 0.622989
## density 0.000670 0.000008 0.000054
## pH -0.054265 -0.000486 -0.002992
## sulphates -0.001651 -0.000411 0.000861
## alcohol -0.125533 0.008400 -0.011278
##
## residual sugar chlorides free sulfur dioxide \
## fixed acidity 0.381022 0.000426 -0.708919
## volatile acidity 0.032865 0.000155 -0.166300
## citric acid 0.057829 0.000302 0.193630
## residual sugar 25.725770 0.009828 25.800578
## chlorides 0.009828 0.000477 0.037674
## free sulfur dioxide 25.800578 0.037674 289.242720
## total sulfur dioxide 86.531303 0.184687 444.865891
## density 0.012727 0.000017 0.014966
## pH -0.148684 -0.000298 -0.001587
## sulphates -0.015435 0.000042 0.114938
## alcohol -2.812740 -0.009684 -5.234509
##
## total sulfur dioxide density pH sulphates \
## fixed acidity 3.266013 0.000670 -0.054265 -0.001651
## volatile acidity 0.382354 0.000008 -0.000486 -0.000411
## citric acid 0.622989 0.000054 -0.002992 0.000861
## residual sugar 86.531303 0.012727 -0.148684 -0.015435
## chlorides 0.184687 0.000017 -0.000298 0.000042
## free sulfur dioxide 444.865891 0.014966 -0.001587 0.114938
## total sulfur dioxide 1806.085491 0.067352 0.014894 0.652645
## density 0.067352 0.000009 -0.000042 0.000025
## pH 0.014894 -0.000042 0.022801 0.002688
## sulphates 0.652645 0.000025 0.002688 0.013025
## alcohol -23.476605 -0.002871 0.022565 -0.002448
##
## alcohol
## fixed acidity -0.125533
## volatile acidity 0.008400
## citric acid -0.011278
## residual sugar -2.812740
## chlorides -0.009684
## free sulfur dioxide -5.234509
## total sulfur dioxide -23.476605
## density -0.002871
## pH 0.022565
## sulphates -0.002448
## alcohol 1.514427
# Vino tinto
corr_matrix_red <- cor(wine_red)
corr_matrix_red
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.00000000 -0.256130895 0.67170343 0.114776724
## volatile.acidity -0.25613089 1.000000000 -0.55249568 0.001917882
## citric.acid 0.67170343 -0.552495685 1.00000000 0.143577162
## residual.sugar 0.11477672 0.001917882 0.14357716 1.000000000
## chlorides 0.09370519 0.061297772 0.20382291 0.055609535
## free.sulfur.dioxide -0.15379419 -0.010503827 -0.06097813 0.187048995
## total.sulfur.dioxide -0.11318144 0.076470005 0.03553302 0.203027882
## density 0.66804729 0.022026232 0.36494718 0.355283371
## pH -0.68297819 0.234937294 -0.54190414 -0.085652422
## sulphates 0.18300566 -0.260986685 0.31277004 0.005527121
## alcohol -0.06166827 -0.202288027 0.10990325 0.042075437
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity 0.093705186 -0.153794193 -0.11318144
## volatile.acidity 0.061297772 -0.010503827 0.07647000
## citric.acid 0.203822914 -0.060978129 0.03553302
## residual.sugar 0.055609535 0.187048995 0.20302788
## chlorides 1.000000000 0.005562147 0.04740047
## free.sulfur.dioxide 0.005562147 1.000000000 0.66766645
## total.sulfur.dioxide 0.047400468 0.667666450 1.00000000
## density 0.200632327 -0.021945831 0.07126948
## pH -0.265026131 0.070377499 -0.06649456
## sulphates 0.371260481 0.051657572 0.04294684
## alcohol -0.221140545 -0.069408354 -0.20565394
## density pH sulphates alcohol
## fixed.acidity 0.66804729 -0.68297819 0.183005664 -0.06166827
## volatile.acidity 0.02202623 0.23493729 -0.260986685 -0.20228803
## citric.acid 0.36494718 -0.54190414 0.312770044 0.10990325
## residual.sugar 0.35528337 -0.08565242 0.005527121 0.04207544
## chlorides 0.20063233 -0.26502613 0.371260481 -0.22114054
## free.sulfur.dioxide -0.02194583 0.07037750 0.051657572 -0.06940835
## total.sulfur.dioxide 0.07126948 -0.06649456 0.042946836 -0.20565394
## density 1.00000000 -0.34169933 0.148506412 -0.49617977
## pH -0.34169933 1.00000000 -0.196647602 0.20563251
## sulphates 0.14850641 -0.19664760 1.000000000 0.09359475
## alcohol -0.49617977 0.20563251 0.093594750 1.00000000
# Vino blanco
corr_matrix_white <- cor(wine_white)
corr_matrix_white
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.00000000 -0.02269729 0.28918070 0.08902070
## volatile.acidity -0.02269729 1.00000000 -0.14947181 0.06428606
## citric.acid 0.28918070 -0.14947181 1.00000000 0.09421162
## residual.sugar 0.08902070 0.06428606 0.09421162 1.00000000
## chlorides 0.02308564 0.07051157 0.11436445 0.08868454
## free.sulfur.dioxide -0.04939586 -0.09701194 0.09407722 0.29909835
## total.sulfur.dioxide 0.09106976 0.08926050 0.12113080 0.40143931
## density 0.26533101 0.02711385 0.14950257 0.83896645
## pH -0.42585829 -0.03191537 -0.16374821 -0.19413345
## sulphates -0.01714299 -0.03572815 0.06233094 -0.02666437
## alcohol -0.12088112 0.06771794 -0.07572873 -0.45063122
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity 0.02308564 -0.0493958591 0.091069756
## volatile.acidity 0.07051157 -0.0970119393 0.089260504
## citric.acid 0.11436445 0.0940772210 0.121130798
## residual.sugar 0.08868454 0.2990983537 0.401439311
## chlorides 1.00000000 0.1013923521 0.198910300
## free.sulfur.dioxide 0.10139235 1.0000000000 0.615500965
## total.sulfur.dioxide 0.19891030 0.6155009650 1.000000000
## density 0.25721132 0.2942104109 0.529881324
## pH -0.09043946 -0.0006177961 0.002320972
## sulphates 0.01676288 0.0592172458 0.134562367
## alcohol -0.36018871 -0.2501039415 -0.448892102
## density pH sulphates alcohol
## fixed.acidity 0.26533101 -0.4258582910 -0.01714299 -0.12088112
## volatile.acidity 0.02711385 -0.0319153683 -0.03572815 0.06771794
## citric.acid 0.14950257 -0.1637482114 0.06233094 -0.07572873
## residual.sugar 0.83896645 -0.1941334540 -0.02666437 -0.45063122
## chlorides 0.25721132 -0.0904394560 0.01676288 -0.36018871
## free.sulfur.dioxide 0.29421041 -0.0006177961 0.05921725 -0.25010394
## total.sulfur.dioxide 0.52988132 0.0023209718 0.13456237 -0.44889210
## density 1.00000000 -0.0935914935 0.07449315 -0.78013762
## pH -0.09359149 1.0000000000 0.15595150 0.12143210
## sulphates 0.07449315 0.1559514973 1.00000000 -0.01743277
## alcohol -0.78013762 0.1214320987 -0.01743277 1.00000000
# Vino tinto
corr_matrix_red = wine_red.corr()
print("Matriz de Correlaciones:\n", corr_matrix_red)
## Matriz de Correlaciones:
## fixed acidity volatile acidity citric acid \
## fixed acidity 1.000000 -0.256131 0.671703
## volatile acidity -0.256131 1.000000 -0.552496
## citric acid 0.671703 -0.552496 1.000000
## residual sugar 0.114777 0.001918 0.143577
## chlorides 0.093705 0.061298 0.203823
## free sulfur dioxide -0.153794 -0.010504 -0.060978
## total sulfur dioxide -0.113181 0.076470 0.035533
## density 0.668047 0.022026 0.364947
## pH -0.682978 0.234937 -0.541904
## sulphates 0.183006 -0.260987 0.312770
## alcohol -0.061668 -0.202288 0.109903
##
## residual sugar chlorides free sulfur dioxide \
## fixed acidity 0.114777 0.093705 -0.153794
## volatile acidity 0.001918 0.061298 -0.010504
## citric acid 0.143577 0.203823 -0.060978
## residual sugar 1.000000 0.055610 0.187049
## chlorides 0.055610 1.000000 0.005562
## free sulfur dioxide 0.187049 0.005562 1.000000
## total sulfur dioxide 0.203028 0.047400 0.667666
## density 0.355283 0.200632 -0.021946
## pH -0.085652 -0.265026 0.070377
## sulphates 0.005527 0.371260 0.051658
## alcohol 0.042075 -0.221141 -0.069408
##
## total sulfur dioxide density pH sulphates \
## fixed acidity -0.113181 0.668047 -0.682978 0.183006
## volatile acidity 0.076470 0.022026 0.234937 -0.260987
## citric acid 0.035533 0.364947 -0.541904 0.312770
## residual sugar 0.203028 0.355283 -0.085652 0.005527
## chlorides 0.047400 0.200632 -0.265026 0.371260
## free sulfur dioxide 0.667666 -0.021946 0.070377 0.051658
## total sulfur dioxide 1.000000 0.071269 -0.066495 0.042947
## density 0.071269 1.000000 -0.341699 0.148506
## pH -0.066495 -0.341699 1.000000 -0.196648
## sulphates 0.042947 0.148506 -0.196648 1.000000
## alcohol -0.205654 -0.496180 0.205633 0.093595
##
## alcohol
## fixed acidity -0.061668
## volatile acidity -0.202288
## citric acid 0.109903
## residual sugar 0.042075
## chlorides -0.221141
## free sulfur dioxide -0.069408
## total sulfur dioxide -0.205654
## density -0.496180
## pH 0.205633
## sulphates 0.093595
## alcohol 1.000000
# Vino blanco
corr_matrix_white = wine_white.corr()
print("Matriz de Correlaciones:\n", corr_matrix_white)
## Matriz de Correlaciones:
## fixed acidity volatile acidity citric acid \
## fixed acidity 1.000000 -0.022697 0.289181
## volatile acidity -0.022697 1.000000 -0.149472
## citric acid 0.289181 -0.149472 1.000000
## residual sugar 0.089021 0.064286 0.094212
## chlorides 0.023086 0.070512 0.114364
## free sulfur dioxide -0.049396 -0.097012 0.094077
## total sulfur dioxide 0.091070 0.089261 0.121131
## density 0.265331 0.027114 0.149503
## pH -0.425858 -0.031915 -0.163748
## sulphates -0.017143 -0.035728 0.062331
## alcohol -0.120881 0.067718 -0.075729
##
## residual sugar chlorides free sulfur dioxide \
## fixed acidity 0.089021 0.023086 -0.049396
## volatile acidity 0.064286 0.070512 -0.097012
## citric acid 0.094212 0.114364 0.094077
## residual sugar 1.000000 0.088685 0.299098
## chlorides 0.088685 1.000000 0.101392
## free sulfur dioxide 0.299098 0.101392 1.000000
## total sulfur dioxide 0.401439 0.198910 0.615501
## density 0.838966 0.257211 0.294210
## pH -0.194133 -0.090439 -0.000618
## sulphates -0.026664 0.016763 0.059217
## alcohol -0.450631 -0.360189 -0.250104
##
## total sulfur dioxide density pH sulphates \
## fixed acidity 0.091070 0.265331 -0.425858 -0.017143
## volatile acidity 0.089261 0.027114 -0.031915 -0.035728
## citric acid 0.121131 0.149503 -0.163748 0.062331
## residual sugar 0.401439 0.838966 -0.194133 -0.026664
## chlorides 0.198910 0.257211 -0.090439 0.016763
## free sulfur dioxide 0.615501 0.294210 -0.000618 0.059217
## total sulfur dioxide 1.000000 0.529881 0.002321 0.134562
## density 0.529881 1.000000 -0.093591 0.074493
## pH 0.002321 -0.093591 1.000000 0.155951
## sulphates 0.134562 0.074493 0.155951 1.000000
## alcohol -0.448892 -0.780138 0.121432 -0.017433
##
## alcohol
## fixed acidity -0.120881
## volatile acidity 0.067718
## citric acid -0.075729
## residual sugar -0.450631
## chlorides -0.360189
## free sulfur dioxide -0.250104
## total sulfur dioxide -0.448892
## density -0.780138
## pH 0.121432
## sulphates -0.017433
## alcohol 1.000000
# Vino tinto
pcor_red <- pcor(wine_red) # Objeto correlaciones parciales
pcor_matrix_red <- pcor_red$estimate # "stimate" contiene los valores de la matriz
pcor_matrix_red
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.00000000 0.04973736 0.33328898 -0.43095775
## volatile.acidity 0.04973736 1.00000000 -0.53546402 -0.02402241
## citric.acid 0.33328898 -0.53546402 1.00000000 0.05341803
## residual.sugar -0.43095775 -0.02402241 0.05341803 1.00000000
## chlorides -0.23335902 0.26401083 0.26370924 -0.01084620
## free.sulfur.dioxide 0.11013350 -0.16533973 -0.16249047 0.12649385
## total.sulfur.dioxide -0.23171731 0.21410687 0.26497412 0.03630121
## density 0.78544969 0.12842079 0.00859700 0.59519561
## pH -0.71850174 0.02664812 -0.03423779 -0.32904958
## sulphates -0.15206062 -0.20942859 0.02655334 -0.20073068
## alcohol 0.54341241 0.07891145 0.14946708 0.50207731
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity -0.23335902 0.11013350 -0.23171731
## volatile.acidity 0.26401083 -0.16533973 0.21410687
## citric.acid 0.26370924 -0.16249047 0.26497412
## residual.sugar -0.01084620 0.12649385 0.03630121
## chlorides 1.00000000 0.05852533 -0.14017933
## free.sulfur.dioxide 0.05852533 1.00000000 0.66293391
## total.sulfur.dioxide -0.14017933 0.66293391 1.00000000
## density 0.09380338 -0.08543551 0.07878250
## pH -0.23105397 0.13765744 -0.19390050
## sulphates 0.35050976 0.05395905 0.03306191
## alcohol -0.09234314 -0.02614219 -0.08343417
## density pH sulphates alcohol
## fixed.acidity 0.78544969 -0.71850174 -0.15206062 0.54341241
## volatile.acidity 0.12842079 0.02664812 -0.20942859 0.07891145
## citric.acid 0.00859700 -0.03423779 0.02655334 0.14946708
## residual.sugar 0.59519561 -0.32904958 -0.20073068 0.50207731
## chlorides 0.09380338 -0.23105397 0.35050976 -0.09234314
## free.sulfur.dioxide -0.08543551 0.13765744 0.05395905 -0.02614219
## total.sulfur.dioxide 0.07878250 -0.19390050 0.03306191 -0.08343417
## density 1.00000000 0.57223228 0.24799149 -0.75581233
## pH 0.57223228 1.00000000 -0.12860181 0.52004381
## sulphates 0.24799149 -0.12860181 1.00000000 0.28879013
## alcohol -0.75581233 0.52004381 0.28879013 1.00000000
# Vino blanco
pcor_white <- pcor(wine_white) # Objeto correlaciones parciales
pcor_matrix_white <- pcor_white$estimate # "stimate" contiene los valores de la matriz
pcor_matrix_white
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.0000000000 -0.09177263 0.12265107 -0.67491264
## volatile.acidity -0.0917726327 1.00000000 -0.17348652 -0.06557784
## citric.acid 0.1226510715 -0.17348652 1.00000000 -0.04507215
## residual.sugar -0.6749126426 -0.06557784 -0.04507215 1.00000000
## chlorides -0.1709109899 0.09904437 0.10690945 -0.19717515
## free.sulfur.dioxide -0.0006842353 -0.18930514 0.03657815 0.17843406
## total.sulfur.dioxide -0.0388679308 0.19763096 0.04063530 -0.13361933
## density 0.7043664073 0.10614760 0.06764097 0.94153651
## pH -0.6661276194 -0.10955425 -0.08255467 -0.63555052
## sulphates -0.1765522452 -0.06548969 0.04088718 -0.26691222
## alcohol 0.6045902659 0.18100688 0.09410972 0.78095720
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity -0.170910990 -0.0006842353 -0.038867931
## volatile.acidity 0.099044371 -0.1893051352 0.197630961
## citric.acid 0.106909446 0.0365781543 0.040635296
## residual.sugar -0.197175153 0.1784340632 -0.133619329
## chlorides 1.000000000 0.0292937057 0.009562664
## free.sulfur.dioxide 0.029293706 1.0000000000 0.593206957
## total.sulfur.dioxide 0.009562664 0.5932069568 1.000000000
## density 0.163780328 -0.1555386433 0.196045940
## pH -0.164066249 0.0520636863 -0.019294025
## sulphates -0.037354845 0.0104257743 0.069470082
## alcohol -0.008965184 -0.0992961359 0.041463583
## density pH sulphates alcohol
## fixed.acidity 0.70436641 -0.66612762 -0.17655225 0.604590266
## volatile.acidity 0.10614760 -0.10955425 -0.06548969 0.181006877
## citric.acid 0.06764097 -0.08255467 0.04088718 0.094109718
## residual.sugar 0.94153651 -0.63555052 -0.26691222 0.780957197
## chlorides 0.16378033 -0.16406625 -0.03735485 -0.008965184
## free.sulfur.dioxide -0.15553864 0.05206369 0.01042577 -0.099296136
## total.sulfur.dioxide 0.19604594 -0.01929403 0.06947008 0.041463583
## density 1.00000000 0.62772454 0.26821045 -0.886725282
## pH 0.62772454 1.00000000 -0.06069657 0.565640272
## sulphates 0.26821045 -0.06069657 1.00000000 0.246627014
## alcohol -0.88672528 0.56564027 0.24662701 1.000000000
# Vino tinto:
pcor_red = wine_red.pcorr()
print(pcor_red)
## fixed acidity volatile acidity citric acid \
## fixed acidity 1.000000 0.049737 0.333289
## volatile acidity 0.049737 1.000000 -0.535464
## citric acid 0.333289 -0.535464 1.000000
## residual sugar -0.430958 -0.024022 0.053418
## chlorides -0.233359 0.264011 0.263709
## free sulfur dioxide 0.110133 -0.165340 -0.162490
## total sulfur dioxide -0.231717 0.214107 0.264974
## density 0.785450 0.128421 0.008597
## pH -0.718502 0.026648 -0.034238
## sulphates -0.152061 -0.209429 0.026553
## alcohol 0.543412 0.078911 0.149467
##
## residual sugar chlorides free sulfur dioxide \
## fixed acidity -0.430958 -0.233359 0.110133
## volatile acidity -0.024022 0.264011 -0.165340
## citric acid 0.053418 0.263709 -0.162490
## residual sugar 1.000000 -0.010846 0.126494
## chlorides -0.010846 1.000000 0.058525
## free sulfur dioxide 0.126494 0.058525 1.000000
## total sulfur dioxide 0.036301 -0.140179 0.662934
## density 0.595196 0.093803 -0.085436
## pH -0.329050 -0.231054 0.137657
## sulphates -0.200731 0.350510 0.053959
## alcohol 0.502077 -0.092343 -0.026142
##
## total sulfur dioxide density pH sulphates \
## fixed acidity -0.231717 0.785450 -0.718502 -0.152061
## volatile acidity 0.214107 0.128421 0.026648 -0.209429
## citric acid 0.264974 0.008597 -0.034238 0.026553
## residual sugar 0.036301 0.595196 -0.329050 -0.200731
## chlorides -0.140179 0.093803 -0.231054 0.350510
## free sulfur dioxide 0.662934 -0.085436 0.137657 0.053959
## total sulfur dioxide 1.000000 0.078782 -0.193901 0.033062
## density 0.078782 1.000000 0.572232 0.247991
## pH -0.193901 0.572232 1.000000 -0.128602
## sulphates 0.033062 0.247991 -0.128602 1.000000
## alcohol -0.083434 -0.755812 0.520044 0.288790
##
## alcohol
## fixed acidity 0.543412
## volatile acidity 0.078911
## citric acid 0.149467
## residual sugar 0.502077
## chlorides -0.092343
## free sulfur dioxide -0.026142
## total sulfur dioxide -0.083434
## density -0.755812
## pH 0.520044
## sulphates 0.288790
## alcohol 1.000000
# Vino blanco:
pcor_white = wine_white.pcorr()
print(pcor_white)
## fixed acidity volatile acidity citric acid \
## fixed acidity 1.000000 -0.091773 0.122651
## volatile acidity -0.091773 1.000000 -0.173487
## citric acid 0.122651 -0.173487 1.000000
## residual sugar -0.674913 -0.065578 -0.045072
## chlorides -0.170911 0.099044 0.106909
## free sulfur dioxide -0.000684 -0.189305 0.036578
## total sulfur dioxide -0.038868 0.197631 0.040635
## density 0.704366 0.106148 0.067641
## pH -0.666128 -0.109554 -0.082555
## sulphates -0.176552 -0.065490 0.040887
## alcohol 0.604590 0.181007 0.094110
##
## residual sugar chlorides free sulfur dioxide \
## fixed acidity -0.674913 -0.170911 -0.000684
## volatile acidity -0.065578 0.099044 -0.189305
## citric acid -0.045072 0.106909 0.036578
## residual sugar 1.000000 -0.197175 0.178434
## chlorides -0.197175 1.000000 0.029294
## free sulfur dioxide 0.178434 0.029294 1.000000
## total sulfur dioxide -0.133619 0.009563 0.593207
## density 0.941537 0.163780 -0.155539
## pH -0.635551 -0.164066 0.052064
## sulphates -0.266912 -0.037355 0.010426
## alcohol 0.780957 -0.008965 -0.099296
##
## total sulfur dioxide density pH sulphates \
## fixed acidity -0.038868 0.704366 -0.666128 -0.176552
## volatile acidity 0.197631 0.106148 -0.109554 -0.065490
## citric acid 0.040635 0.067641 -0.082555 0.040887
## residual sugar -0.133619 0.941537 -0.635551 -0.266912
## chlorides 0.009563 0.163780 -0.164066 -0.037355
## free sulfur dioxide 0.593207 -0.155539 0.052064 0.010426
## total sulfur dioxide 1.000000 0.196046 -0.019294 0.069470
## density 0.196046 1.000000 0.627725 0.268210
## pH -0.019294 0.627725 1.000000 -0.060697
## sulphates 0.069470 0.268210 -0.060697 1.000000
## alcohol 0.041464 -0.886725 0.565640 0.246627
##
## alcohol
## fixed acidity 0.604590
## volatile acidity 0.181007
## citric acid 0.094110
## residual sugar 0.780957
## chlorides -0.008965
## free sulfur dioxide -0.099296
## total sulfur dioxide 0.041464
## density -0.886725
## pH 0.565640
## sulphates 0.246627
## alcohol 1.000000
# Vino tinto:
mahalanobis_dist_red <- mahalanobis(wine_red, means_red, cov_matrix_red) # distancia de Mahalanobis para cada observación.
# Valor crítico para identificar outliers (usando chi-cuadrado)
gl <- ncol(wine_red) # grados de libertad (número de variables)
alpha <- 0.05 # Nivel de significancia
valor_critico <- qchisq(1 - alpha, df = gl)
valor_critico #ver valor crítico
## [1] 19.67514
atipicos_red <- which(mahalanobis_dist_red > valor_critico) # Identificar outliers
head(mahalanobis_dist_red) #ver distancias
## [1] 4.920027 8.335593 3.647326 7.439998 4.920027 5.051613
atipicos_red # número de datos atípicos 154
## [1] 14 15 16 18 20 34 39 43 44 46 80 82 84 87 92
## [16] 93 95 96 107 110 127 128 143 145 152 162 170 182 199 227
## [31] 241 244 245 259 282 292 325 326 340 341 354 355 379 391 396
## [46] 397 401 410 416 443 452 456 464 481 485 494 495 500 502 503
## [61] 511 516 539 545 554 555 556 557 558 559 560 565 585 592 609
## [76] 615 634 640 650 652 653 673 685 691 693 696 701 711 724 731
## [91] 755 796 803 834 837 838 862 890 912 918 924 926 927 983 999
## [106] 1018 1019 1044 1052 1072 1075 1078 1079 1080 1082 1091 1115 1132 1155 1166
## [121] 1179 1187 1229 1236 1245 1257 1261 1270 1271 1289 1290 1296 1297 1300 1313
## [136] 1317 1320 1322 1359 1368 1371 1373 1375 1435 1436 1475 1476 1477 1478 1509
## [151] 1559 1571 1575 1590
# Vino blanco:
mahalanobis_dist_white <- mahalanobis(wine_white, means_white, cov_matrix_white) # distancia de Mahalanobis para cada observación.
valor_critico #ver valor crítico
## [1] 19.67514
atipicos_white <- which(mahalanobis_dist_white > valor_critico) # Identificar outliers
head(mahalanobis_dist_white) #ver distancias
## [1] 9.263907 6.682467 8.293968 3.223621 3.223621 8.293968
atipicos_white # número de datos atípicos 395
## [1] 18 21 24 32 41 42 55 97 99 100 116 148 170 179 195
## [16] 196 197 208 209 222 231 246 252 295 316 325 326 373 396 406
## [31] 434 471 485 503 509 532 542 550 601 627 647 660 663 682 684
## [46] 688 701 722 730 746 759 760 764 767 772 773 776 822 831 835
## [61] 853 855 860 867 871 874 879 914 916 927 947 949 980 981 1008
## [76] 1015 1017 1025 1028 1035 1037 1041 1052 1054 1100 1115 1127 1153 1159 1164
## [91] 1172 1179 1181 1215 1218 1221 1229 1240 1246 1251 1255 1256 1264 1273 1294
## [106] 1295 1305 1308 1309 1321 1370 1373 1374 1386 1387 1395 1402 1408 1416 1418
## [121] 1419 1424 1437 1456 1461 1466 1477 1483 1488 1497 1505 1527 1535 1537 1541
## [136] 1542 1545 1552 1576 1578 1579 1581 1584 1591 1597 1599 1600 1639 1650 1652
## [151] 1654 1664 1673 1674 1682 1689 1709 1722 1723 1728 1732 1745 1759 1760 1776
## [166] 1782 1802 1808 1810 1818 1836 1837 1840 1849 1857 1863 1866 1887 1901 1926
## [181] 1927 1932 1933 1941 1943 1945 1948 1952 1959 1962 1964 2015 2018 2025 2026
## [196] 2027 2031 2037 2051 2064 2076 2093 2128 2155 2163 2165 2187 2207 2258 2260
## [211] 2280 2287 2288 2322 2335 2337 2350 2360 2372 2379 2395 2397 2404 2409 2418
## [226] 2423 2425 2441 2442 2466 2467 2476 2477 2523 2526 2560 2564 2580 2590 2595
## [241] 2626 2630 2635 2638 2647 2652 2655 2669 2705 2706 2722 2731 2732 2749 2751
## [256] 2772 2782 2821 2850 2873 2874 2875 2894 2927 2931 2932 3015 3023 3024 3026
## [271] 3044 3051 3053 3065 3067 3073 3095 3096 3098 3140 3153 3166 3216 3219 3221
## [286] 3266 3284 3289 3308 3389 3418 3462 3471 3498 3521 3524 3529 3538 3557 3561
## [301] 3565 3572 3588 3589 3590 3620 3621 3624 3678 3711 3736 3738 3774 3849 3862
## [316] 3863 3864 3869 3870 3872 3880 3902 3903 3905 3912 3916 3938 3973 3982 3999
## [331] 4000 4001 4013 4040 4066 4108 4124 4137 4174 4214 4240 4248 4260 4300 4301
## [346] 4345 4347 4402 4474 4481 4498 4504 4515 4520 4526 4527 4566 4568 4580 4581
## [361] 4583 4592 4598 4610 4618 4627 4633 4649 4650 4651 4697 4699 4702 4703 4727
## [376] 4746 4780 4788 4790 4793 4794 4795 4814 4816 4819 4821 4838 4842 4846 4848
## [391] 4868 4878 4879 4887 4888
# Vino tinto:
# Calcular la distancia de Mahalanobis para cada observación
xi_xbar_red = wine_red - means_red
inv_cov_red = np.linalg.inv(cov_matrix_red)
parte1_red = np.dot(xi_xbar_red, inv_cov_red)
parte2_red = np.dot(parte1_red, xi_xbar_red.T)
mahalanobis_dist_red= parte2_red.diagonal()
print(mahalanobis_dist_red)
## [ 4.92002724 8.33559263 3.64732572 ... 7.22620103 9.03766813
## 12.27097623]
# Valor crítico para identificar outliers (usando chi-cuadrado)
gl = wine_red.shape[1] # grados de libertad (número de variables)
alpha = 0.05 # Nivel de significancia
valor_critico = chi2.ppf(1 - alpha, df=gl)
print("Valor crítico: ",valor_critico)
## Valor crítico: 19.67513757268249
# Identificar outliers
atipicos_red = np.where(mahalanobis_dist_red > valor_critico)
# Mostrar los outliers
print("Número de datos atípicos: 154 \n", atipicos_red)
## Número de datos atípicos: 154
## (array([ 13, 14, 15, 17, 19, 33, 38, 42, 43, 45, 79,
## 81, 83, 86, 91, 92, 94, 95, 106, 109, 126, 127,
## 142, 144, 151, 161, 169, 181, 198, 226, 240, 243, 244,
## 258, 281, 291, 324, 325, 339, 340, 353, 354, 378, 390,
## 395, 396, 400, 409, 415, 442, 451, 455, 463, 480, 484,
## 493, 494, 499, 501, 502, 510, 515, 538, 544, 553, 554,
## 555, 556, 557, 558, 559, 564, 584, 591, 608, 614, 633,
## 639, 649, 651, 652, 672, 684, 690, 692, 695, 700, 710,
## 723, 730, 754, 795, 802, 833, 836, 837, 861, 889, 911,
## 917, 923, 925, 926, 982, 998, 1017, 1018, 1043, 1051, 1071,
## 1074, 1077, 1078, 1079, 1081, 1090, 1114, 1131, 1154, 1165, 1178,
## 1186, 1228, 1235, 1244, 1256, 1260, 1269, 1270, 1288, 1289, 1295,
## 1296, 1299, 1312, 1316, 1319, 1321, 1358, 1367, 1370, 1372, 1374,
## 1434, 1435, 1474, 1475, 1476, 1477, 1508, 1558, 1570, 1574, 1589]),)
# Vino blanco:
# Calcular la distancia de Mahalanobis para cada observación
xi_xbar_white = wine_white - means_white
inv_cov_white = np.linalg.inv(cov_matrix_white)
parte1_white = np.dot(xi_xbar_white, inv_cov_white)
parte2_white = np.dot(parte1_white, xi_xbar_white.T)
mahalanobis_dist_white= parte2_white.diagonal()
print(mahalanobis_dist_white)
## [9.26390697 6.68246735 8.2939681 ... 9.55520151 7.89464608 7.23836837]
# Valor crítico para identificar outliers (usando chi-cuadrado)
print("Valor crítico: ",valor_critico)
## Valor crítico: 19.67513757268249
# Identificar outliers
atipicos_white = np.where(mahalanobis_dist_white > valor_critico)
# Mostrar los outliers
print("Número de datos atípicos: 395 \n", atipicos_white)
## Número de datos atípicos: 395
## (array([ 17, 20, 23, 31, 40, 41, 54, 96, 98, 99, 115,
## 147, 169, 178, 194, 195, 196, 207, 208, 221, 230, 245,
## 251, 294, 315, 324, 325, 372, 395, 405, 433, 470, 484,
## 502, 508, 531, 541, 549, 600, 626, 646, 659, 662, 681,
## 683, 687, 700, 721, 729, 745, 758, 759, 763, 766, 771,
## 772, 775, 821, 830, 834, 852, 854, 859, 866, 870, 873,
## 878, 913, 915, 926, 946, 948, 979, 980, 1007, 1014, 1016,
## 1024, 1027, 1034, 1036, 1040, 1051, 1053, 1099, 1114, 1126, 1152,
## 1158, 1163, 1171, 1178, 1180, 1214, 1217, 1220, 1228, 1239, 1245,
## 1250, 1254, 1255, 1263, 1272, 1293, 1294, 1304, 1307, 1308, 1320,
## 1369, 1372, 1373, 1385, 1386, 1394, 1401, 1407, 1415, 1417, 1418,
## 1423, 1436, 1455, 1460, 1465, 1476, 1482, 1487, 1496, 1504, 1526,
## 1534, 1536, 1540, 1541, 1544, 1551, 1575, 1577, 1578, 1580, 1583,
## 1590, 1596, 1598, 1599, 1638, 1649, 1651, 1653, 1663, 1672, 1673,
## 1681, 1688, 1708, 1721, 1722, 1727, 1731, 1744, 1758, 1759, 1775,
## 1781, 1801, 1807, 1809, 1817, 1835, 1836, 1839, 1848, 1856, 1862,
## 1865, 1886, 1900, 1925, 1926, 1931, 1932, 1940, 1942, 1944, 1947,
## 1951, 1958, 1961, 1963, 2014, 2017, 2024, 2025, 2026, 2030, 2036,
## 2050, 2063, 2075, 2092, 2127, 2154, 2162, 2164, 2186, 2206, 2257,
## 2259, 2279, 2286, 2287, 2321, 2334, 2336, 2349, 2359, 2371, 2378,
## 2394, 2396, 2403, 2408, 2417, 2422, 2424, 2440, 2441, 2465, 2466,
## 2475, 2476, 2522, 2525, 2559, 2563, 2579, 2589, 2594, 2625, 2629,
## 2634, 2637, 2646, 2651, 2654, 2668, 2704, 2705, 2721, 2730, 2731,
## 2748, 2750, 2771, 2781, 2820, 2849, 2872, 2873, 2874, 2893, 2926,
## 2930, 2931, 3014, 3022, 3023, 3025, 3043, 3050, 3052, 3064, 3066,
## 3072, 3094, 3095, 3097, 3139, 3152, 3165, 3215, 3218, 3220, 3265,
## 3283, 3288, 3307, 3388, 3417, 3461, 3470, 3497, 3520, 3523, 3528,
## 3537, 3556, 3560, 3564, 3571, 3587, 3588, 3589, 3619, 3620, 3623,
## 3677, 3710, 3735, 3737, 3773, 3848, 3861, 3862, 3863, 3868, 3869,
## 3871, 3879, 3901, 3902, 3904, 3911, 3915, 3937, 3972, 3981, 3998,
## 3999, 4000, 4012, 4039, 4065, 4107, 4123, 4136, 4173, 4213, 4239,
## 4247, 4259, 4299, 4300, 4344, 4346, 4401, 4473, 4480, 4497, 4503,
## 4514, 4519, 4525, 4526, 4565, 4567, 4579, 4580, 4582, 4591, 4597,
## 4609, 4617, 4626, 4632, 4648, 4649, 4650, 4696, 4698, 4701, 4702,
## 4726, 4745, 4779, 4787, 4789, 4792, 4793, 4794, 4813, 4815, 4818,
## 4820, 4837, 4841, 4845, 4847, 4867, 4877, 4878, 4886, 4887]),)
# Matriz de distancias euclídeas usando las primeras 6 filas de la base
wine_redD=wine_red[0:6,2:7]
wine_whiteD=wine_white[0:6,2:7]
# Vino tinto:
dist_red <- dist(wine_redD, method = "euclidean")
dist_red
## 1 2 3 4 5
## 2 35.854189
## 3 20.400134 16.404452
## 4 26.692508 10.684762 6.376605
## 5 0.000000 35.854189 20.400134 26.692508
## 6 6.325472 29.558229 14.151392 20.407548 6.325472
matrix_dist_red <- as.matrix(dist_red)
matrix_dist_red
## 1 2 3 4 5 6
## 1 0.000000 35.85419 20.400134 26.692508 0.000000 6.325472
## 2 35.854189 0.00000 16.404452 10.684762 35.854189 29.558229
## 3 20.400134 16.40445 0.000000 6.376605 20.400134 14.151392
## 4 26.692508 10.68476 6.376605 0.000000 26.692508 20.407548
## 5 0.000000 35.85419 20.400134 26.692508 0.000000 6.325472
## 6 6.325472 29.55823 14.151392 20.407548 6.325472 0.000000
# Vino blanco:
dist_white <- dist(wine_whiteD, method = "euclidean")
dist_white
## 1 2 3 4 5
## 2 52.62900
## 3 75.79210 38.84706
## 4 20.21988 63.66016 90.62323
## 5 20.21988 63.66016 90.62323 0.00000
## 6 75.79210 38.84706 0.00000 90.62323 90.62323
matrix_dist_white <- as.matrix(dist_red)
matrix_dist_white
## 1 2 3 4 5 6
## 1 0.000000 35.85419 20.400134 26.692508 0.000000 6.325472
## 2 35.854189 0.00000 16.404452 10.684762 35.854189 29.558229
## 3 20.400134 16.40445 0.000000 6.376605 20.400134 14.151392
## 4 26.692508 10.68476 6.376605 0.000000 26.692508 20.407548
## 5 0.000000 35.85419 20.400134 26.692508 0.000000 6.325472
## 6 6.325472 29.55823 14.151392 20.407548 6.325472 0.000000
# Vino tinto:
dist_red = pdist(wine_red, metric='euclidean')
matrix_dist_red = squareform(dist_red)
print("Matriz de distancias euclídeas:\n", matrix_dist_red)
## Matriz de distancias euclídeas:
## [[ 0. 35.86019221 20.40970496 ... 19.07949696 23.32259701
## 10.9912459 ]
## [35.86019221 0. 16.40458887 ... 27.36763755 24.13167951
## 26.08185696]
## [20.40970496 16.40458887 0. ... 19.89463389 19.82371349
## 12.64021468]
## ...
## [19.07949696 27.36763755 19.89463389 ... 0. 5.09238903
## 11.26697302]
## [23.32259701 24.13167951 19.82371349 ... 5.09238903 0.
## 14.2646307 ]
## [10.9912459 26.08185696 12.64021468 ... 11.26697302 14.2646307
## 0. ]]
# Vino blanco:
dist_white = pdist(wine_white, metric='euclidean')
matrix_dist_white = squareform(dist_white)
print("Matriz de distancias euclídeas:\n", matrix_dist_red)
## Matriz de distancias euclídeas:
## [[ 0. 35.86019221 20.40970496 ... 19.07949696 23.32259701
## 10.9912459 ]
## [35.86019221 0. 16.40458887 ... 27.36763755 24.13167951
## 26.08185696]
## [20.40970496 16.40458887 0. ... 19.89463389 19.82371349
## 12.64021468]
## ...
## [19.07949696 27.36763755 19.89463389 ... 0. 5.09238903
## 11.26697302]
## [23.32259701 24.13167951 19.82371349 ... 5.09238903 0.
## 14.2646307 ]
## [10.9912459 26.08185696 12.64021468 ... 11.26697302 14.2646307
## 0. ]]
# Vino tinto
vg_red <- det(cov_matrix_red)
vg_red
## [1] 3.478418e-11
# Vino blanco
vg_white <- det(cov_matrix_white)
vg_white
## [1] 1.701408e-11
# Vino tinto
vg_red = np.linalg.det(cov_matrix_red)
print("Varianza Generalizada\n", vg_red)
## Varianza Generalizada
## 3.478417646276091e-11
# Vino blanco
vg_white = np.linalg.det(cov_matrix_white)
print("Varianza Generalizada\n", vg_white)
## Varianza Generalizada
## 1.701408009848951e-11