library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(corrplot)
## corrplot 0.92 loaded
data=read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"),header = FALSE)
nuevos_nombres <- c('Class', 'Alcohol', 'MalicAcid', 'Ash', 'AlcalinityOfAsh', 'Magnesium', 'Total phenols', 'Flavanoids', 'NonflavanoidPhenols', 'Proanthocyanins', 'ColorIntensity', 'Hue', 'OD280/OD315', 'Proline')
colnames(data) <- nuevos_nombres
names(data)
##  [1] "Class"               "Alcohol"             "MalicAcid"          
##  [4] "Ash"                 "AlcalinityOfAsh"     "Magnesium"          
##  [7] "Total phenols"       "Flavanoids"          "NonflavanoidPhenols"
## [10] "Proanthocyanins"     "ColorIntensity"      "Hue"                
## [13] "OD280/OD315"         "Proline"
data$Class <- as.factor(data$Class)

Analisis del dataset para su entendimiento

Explorar el dataset

head(data)
##   Class Alcohol MalicAcid  Ash AlcalinityOfAsh Magnesium Total phenols
## 1     1   14.23      1.71 2.43            15.6       127          2.80
## 2     1   13.20      1.78 2.14            11.2       100          2.65
## 3     1   13.16      2.36 2.67            18.6       101          2.80
## 4     1   14.37      1.95 2.50            16.8       113          3.85
## 5     1   13.24      2.59 2.87            21.0       118          2.80
## 6     1   14.20      1.76 2.45            15.2       112          3.27
##   Flavanoids NonflavanoidPhenols Proanthocyanins ColorIntensity  Hue
## 1       3.06                0.28            2.29           5.64 1.04
## 2       2.76                0.26            1.28           4.38 1.05
## 3       3.24                0.30            2.81           5.68 1.03
## 4       3.49                0.24            2.18           7.80 0.86
## 5       2.69                0.39            1.82           4.32 1.04
## 6       3.39                0.34            1.97           6.75 1.05
##   OD280/OD315 Proline
## 1        3.92    1065
## 2        3.40    1050
## 3        3.17    1185
## 4        3.45    1480
## 5        2.93     735
## 6        2.85    1450

En este caso podemos observar que solo hay una variable cualitativa de tipo categorico que es “Class” por este motivo solo se realizara un solo grafico cuantitativo. ## Explorar el tipo de variables para realizar los graficos

Variable Name Type
Class Categorical
Alcohol Continuous
Malic_acid Continuous
Ash Continuous
Alcalinity_of_ash Continuous
Magnesium Integer
Total_phenols Continuous
Flavanoids Continuous
Nonflavanoid_phenols Continuous
Proanthocyanins Continuous
Color_intensity Continuous
Hue Continuous
0D280/0D315 Continuous
Proline Integer

Graficos

Estadística descriptiva univariante

# Crear una tabla de frecuencias para Class
class_counts <- data %>%
  count(Class)


hchart(class_counts, "column", hcaes(x = Class, y = n)) %>%
  hc_title(text = "Distribución de la variable Class") %>%
  hc_xAxis(title = list(text = "Class")) %>%
  hc_yAxis(title = list(text = "Frecuencia")) %>%
  hc_tooltip(pointFormat = "Frecuencia: {point.y}")

Estadística descriptiva bivariante

Grafico de dispersion 1: Alcohol vs Malic Acid

hchart(data, "scatter", hcaes(x = Alcohol, y = MalicAcid)) %>%
  hc_title(text = "Diagrama de Dispersión: Alcohol vs. Malic Acid") %>%
  hc_xAxis(title = list(text = "Alcohol")) %>%
  hc_yAxis(title = list(text = "Malic Acid")) %>%
  hc_tooltip(pointFormat = "Alcohol: {point.x}<br>Malic Acid: {point.y}") %>%
  hc_add_theme(hc_theme_flat())

Grafico de dispersion 2: Ash vs Malic Acid

hchart(data, "scatter", hcaes(x = Ash, y = AlcalinityOfAsh)) %>%
  hc_title(text = "Diagrama de Dispersión: Ash vs. Alcalinity Of Ash") %>%
  hc_xAxis(title = list(text = "Ash")) %>%
  hc_yAxis(title = list(text = "Alcalinity Of Ash")) %>%
  hc_tooltip(pointFormat = "Ash: {point.x}<br>Alcalinity Of Ash: {point.y}") %>%
   hc_plotOptions(
    series = list(
      color = "red"  # Cambiar color de los puntos
    )
  ) %>%
  hc_add_theme(hc_theme_flat())

Estadística descriptiva multivariante

Diagrama de dispersion de 4 variables

plot(data[,6:10], col=c("red"))

Diagrama de correlaciones de 4 variables

data_subset <- data[, 6:10]  

corr_matrix_subset <- cor(data_subset)


corrplot(corr_matrix_subset, 
         method = "circle", 
         type = "full",      
         title = "Correlaciones entre 'Magnesium', 'Total phenols', 'Flavanoids', 'NonflavanoidPhenols', 'Proanthocyanins' ",
         addCoef.col = "black", 
         tl.col = "black",      
         tl.srt = 45)           

Análisis e interpretación de los resultados