library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## āœ” dplyr     1.1.4     āœ” readr     2.1.5
## āœ” forcats   1.0.0     āœ” stringr   1.5.1
## āœ” ggplot2   3.5.0     āœ” tibble    3.2.1
## āœ” lubridate 1.9.3     āœ” tidyr     1.3.1
## āœ” purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## āœ– dplyr::filter() masks stats::filter()
## āœ– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
water_potability_1_ <- read_csv("water_potability (1).csv")
## Rows: 3276 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): ph, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(water_potability_1_)
## # A tibble: 6 Ɨ 10
##      ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon
##   <dbl>    <dbl>  <dbl>       <dbl>   <dbl>        <dbl>          <dbl>
## 1 NA        205. 20791.        7.30    369.         564.          10.4 
## 2  3.72     129. 18630.        6.64     NA          593.          15.2 
## 3  8.10     224. 19910.        9.28     NA          419.          16.9 
## 4  8.32     214. 22018.        8.06    357.         363.          18.4 
## 5  9.09     181. 17979.        6.55    310.         398.          11.6 
## 6  5.58     188. 28749.        7.54    327.         280.           8.40
## # ℹ 3 more variables: Trihalomethanes <dbl>, Turbidity <dbl>, Potability <dbl>
library(dplyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
histogram_with_text <- function(data, x_var) {
  plot <- data %>%
    plot_ly(x = ~get(x_var)) %>% 
    add_histogram() 
  
  n_summary <- data %>%
    count(get(x_var))

  plot <- plot %>%
    layout(
      annotations = list(
        x = unique(data[[x_var]]),
        y = n_summary$n,
        text = n_summary$n,
        showarrow = FALSE
      )
    )
  
  return(plot)
}

plots <- list(
  histogram_with_text(water_potability_1_, "Potability"),
  histogram_with_text(water_potability_1_, "Hardness"),
  histogram_with_text(water_potability_1_, "Chloramines"),
  histogram_with_text(water_potability_1_, "Sulfate"),
  histogram_with_text(water_potability_1_, "Turbidity")
)

subplot(plots, nrows = 5)
## Warning: Ignoring 781 observations
library(ggplot2)

# Tabla de dispersión
water_potability_clean <- na.omit(water_potability_1_[, c("ph", "Hardness")])

# Tabla de dispersión sin valores faltantes
scatter_plot <- ggplot(water_potability_clean, aes(x = ph, y = Hardness)) +
  geom_point() +
  labs(title = "Tabla de Dispersión de pH vs Hardness",
       x = "pH",
       y = "Hardness")
scatter_plot

##candelabro
library(dplyr)

water_potability_1_ %>%
  plot_ly(
    x = ~Potability,
    open = ~min(Hardness),
    high = ~max(Hardness),
    low = ~min(Hardness),
    close = ~max(Hardness),
    type = 'candlestick',
    name = 'Hardness'
  ) %>%
  add_trace(
    x = ~Potability,
    open = ~min(Turbidity),
    high = ~max(Turbidity),
    low = ~min(Turbidity),
    close = ~max(Turbidity),
    type = 'candlestick',
    name = 'Turbidity'
  ) %>%
  add_trace(
    x = ~Potability,
    open = ~min(Conductivity),
    high = ~max(Conductivity),
    low = ~min(Conductivity),
    close = ~max(Conductivity),
    type = 'candlestick',
    name = 'Conductivity'
  ) %>%
  add_trace(
    x = ~Potability,
    open = ~min(Sulfate),
    high = ~max(Sulfate),
    low = ~min(Sulfate),
    close = ~max(Sulfate),
    type = 'candlestick',
    name = 'Sulfate'
  )
# Diagramas de barras e histogramas
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
## 
##     stamp
bar_hist_plot <- ggplot(water_potability_1_, aes(x = Potability)) +
  geom_bar() +
  labs(title = "Diagrama de Barras de Potabilidad",
       x = "Potabilidad",
       y = "Frecuencia")

hist_plot <- ggplot(water_potability_1_, aes(x = ph)) +
  geom_histogram(binwidth = 0.5, fill = "blue", color = "black") +
  labs(title = "Histograma de pH",
       x = "pH",
       y = "Frecuencia")

# Plot the graphs using cowplot::plot_grid
cowplot::plot_grid(bar_hist_plot, hist_plot, ncol = 2)
## Warning: Removed 491 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Boxplots 
boxplot <- ggplot(water_potability_1_, aes(x = as.factor(Potability), y = Turbidity, group = as.factor(Potability))) +
  geom_boxplot() +
  labs(title = "Boxplot de Potabilidad vs Turbidez",
       x = "Potabilidad",
       y = "Turbidez")

boxplot

# Cargar la librerĆ­a plotly
library(plotly)

# Crear el grÔfico de dispersión con las variables ph y Hardness
p <- plot_ly(water_potability_1_, x = ~ph, y = ~Hardness)

# Crear la agrupación rectangular
subplot(
  add_histogram2d(p) %>%
    colorbar(title = "default") %>%
    layout(xaxis = list(title = "default")),
  add_histogram2d(p, zsmooth = "best") %>%
    colorbar(title = "zsmooth") %>%
    layout(xaxis = list(title = "zsmooth")),
  add_histogram2d(p, nbinsx = 60, nbinsy = 60) %>%
    colorbar(title = "nbins") %>%
    layout(xaxis = list(title = "nbins")),
  shareY = TRUE, titleX = TRUE
)
# Cargar la librerĆ­a plotly
library(plotly)

# Crear la grƔfica 3D con las variables especƭficas de potabilidad del agua
plot_ly(water_potability_1_, x = ~ph, y = ~Hardness, z = ~Chloramines) %>%
  add_lines(color = ~Solids) %>%
  layout(
    scene = list(
      xaxis = list(title = "pH"),
      yaxis = list(title = "Hardness"),
      zaxis = list(title = "Chloramines")
    )
  )