Potabilidad del Agua

Contexto:

Se carga la base de datos

library(readr)
## Warning: package 'readr' was built under R version 4.4.1
df <-read.csv("C:\\Users\\ACER\\Downloads\\water_potability.csv")

Se analizan las dimensiones

dim(df)
## [1] 3276   10

Se visualiza la base de datos

 head(df)
##         ph Hardness   Solids Chloramines  Sulfate Conductivity Organic_carbon
## 1       NA 204.8905 20791.32    7.300212 368.5164     564.3087      10.379783
## 2 3.716080 129.4229 18630.06    6.635246       NA     592.8854      15.180013
## 3 8.099124 224.2363 19909.54    9.275884       NA     418.6062      16.868637
## 4 8.316766 214.3734 22018.42    8.059332 356.8861     363.2665      18.436524
## 5 9.092223 181.1015 17978.99    6.546600 310.1357     398.4108      11.558279
## 6 5.584087 188.3133 28748.69    7.544869 326.6784     280.4679       8.399735
##   Trihalomethanes Turbidity Potability
## 1        86.99097  2.963135          0
## 2        56.32908  4.500656          0
## 3        66.42009  3.055934          0
## 4       100.34167  4.628771          0
## 5        31.99799  4.075075          0
## 6        54.91786  2.559708          0

Se observan las primeras 5 filas de la base de datos

 str(df)
## 'data.frame':    3276 obs. of  10 variables:
##  $ ph             : num  NA 3.72 8.1 8.32 9.09 ...
##  $ Hardness       : num  205 129 224 214 181 ...
##  $ Solids         : num  20791 18630 19910 22018 17979 ...
##  $ Chloramines    : num  7.3 6.64 9.28 8.06 6.55 ...
##  $ Sulfate        : num  369 NA NA 357 310 ...
##  $ Conductivity   : num  564 593 419 363 398 ...
##  $ Organic_carbon : num  10.4 15.2 16.9 18.4 11.6 ...
##  $ Trihalomethanes: num  87 56.3 66.4 100.3 32 ...
##  $ Turbidity      : num  2.96 4.5 3.06 4.63 4.08 ...
##  $ Potability     : int  0 0 0 0 0 0 0 0 0 0 ...
barplot(sort(table(df$Potability)),las=2)

hist(sort(df$Hardness))

hist(sort(df$Chloramines))

 boxplot(df$Sulfate)

boxplot(df$Trihalomethanes)

 library(Amelia)
## Warning: package 'Amelia' was built under R version 4.4.1
## Cargando paquete requerido: Rcpp
## Warning: package 'Rcpp' was built under R version 4.4.1
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.2, built: 2024-04-10)
## ## Copyright (C) 2005-2024 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(df)

 library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.1

Instalar la librería si no está instalada

install.packages(“plotly”)

library(plotly)

Gráfico de dispersión interactivo entre pH y Conductividad

plot_ly(data = df, x = ~ph, y = ~Conductivity, type = ‘scatter’, mode = ‘markers’, marker = list(size = 10, color = ‘rgba(255, 182, 193, .9)’, line = list(color = ‘rgba(152, 0, 0, .8)’, width = 2))) %>% layout(title = ‘Relación entre pH y Conductividad’, xaxis = list(title = ‘pH’), yaxis = list(title = ‘Conductividad’)) # Instalar la librería si no está instalada # install.packages(“lattice”) library(lattice)

Gráfico de dispersión múltiple entre diferentes variables

splom(df[c(“ph”, “Hardness”, “Conductivity”, “Sulfate”, “Turbidity”)], main = “Matriz de Dispersión entre Variables”)

Instalar la librería si no está instalada

install.packages(“highcharter”)

library(highcharter)

Gráfico interactivo de líneas para ver la distribución de la dureza

hchart(df, type = “line”, hcaes(x = 1:nrow(df), y = Hardness), color = “blue”) %>% hc_title(text = “Distribución de la Dureza del Agua”)

Instalar la librería si no está instalada

install.packages(“ggridges”)

library(ggridges)

Visualización de densidad para el pH

ggplot(df, aes(x = ph, y = factor(Potability), fill = Potability)) + geom_density_ridges() + labs(title = “Distribución del pH según Potabilidad”, x = “pH”, y = “Potabilidad”)

Instalar la librería si no está instalada

install.packages(“cowplot”)

library(cowplot)

Combinar gráficos

p1 <- ggplot(df, aes(x = Chloramines)) + geom_histogram(fill = “purple”) p2 <- ggplot(df, aes(x = Trihalomethanes)) + geom_histogram(fill = “green”) plot_grid(p1, p2, labels = “AUTO”)