library(tidyverse)
## āā Attaching core tidyverse packages āāāāāāāāāāāāāāāāāāāāāāāā tidyverse 2.0.0 āā
## ā dplyr 1.1.4 ā readr 2.1.5
## ā forcats 1.0.0 ā stringr 1.5.1
## ā ggplot2 3.5.0 ā tibble 3.2.1
## ā lubridate 1.9.3 ā tidyr 1.3.1
## ā purrr 1.0.2
## āā Conflicts āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā tidyverse_conflicts() āā
## ā dplyr::filter() masks stats::filter()
## ā dplyr::lag() masks stats::lag()
## ā¹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
water_potability_1_ <- read_csv("water_potability (1).csv")
## Rows: 3276 Columns: 10
## āā Column specification āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
## Delimiter: ","
## dbl (10): ph, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_...
##
## ā¹ Use `spec()` to retrieve the full column specification for this data.
## ā¹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(water_potability_1_)
## # A tibble: 6 Ć 10
## ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA 205. 20791. 7.30 369. 564. 10.4
## 2 3.72 129. 18630. 6.64 NA 593. 15.2
## 3 8.10 224. 19910. 9.28 NA 419. 16.9
## 4 8.32 214. 22018. 8.06 357. 363. 18.4
## 5 9.09 181. 17979. 6.55 310. 398. 11.6
## 6 5.58 188. 28749. 7.54 327. 280. 8.40
## # ā¹ 3 more variables: Trihalomethanes <dbl>, Turbidity <dbl>, Potability <dbl>
library(dplyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
histogram_with_text <- function(data, x_var) {
plot <- data %>%
plot_ly(x = ~get(x_var)) %>%
add_histogram()
n_summary <- data %>%
count(get(x_var))
plot <- plot %>%
layout(
annotations = list(
x = unique(data[[x_var]]),
y = n_summary$n,
text = n_summary$n,
showarrow = FALSE
)
)
return(plot)
}
plots <- list(
histogram_with_text(water_potability_1_, "Potability"),
histogram_with_text(water_potability_1_, "Hardness"),
histogram_with_text(water_potability_1_, "Chloramines"),
histogram_with_text(water_potability_1_, "Sulfate"),
histogram_with_text(water_potability_1_, "Turbidity")
)
subplot(plots, nrows = 5)
## Warning: Ignoring 781 observations
library(ggplot2)
# Tabla de dispersión
water_potability_clean <- na.omit(water_potability_1_[, c("ph", "Hardness")])
# Tabla de dispersión sin valores faltantes
scatter_plot <- ggplot(water_potability_clean, aes(x = ph, y = Hardness)) +
geom_point() +
labs(title = "Tabla de Dispersión de pH vs Hardness",
x = "pH",
y = "Hardness")
scatter_plot

##candelabro
library(dplyr)
water_potability_1_ %>%
plot_ly(
x = ~Potability,
open = ~min(Hardness),
high = ~max(Hardness),
low = ~min(Hardness),
close = ~max(Hardness),
type = 'candlestick',
name = 'Hardness'
) %>%
add_trace(
x = ~Potability,
open = ~min(Turbidity),
high = ~max(Turbidity),
low = ~min(Turbidity),
close = ~max(Turbidity),
type = 'candlestick',
name = 'Turbidity'
) %>%
add_trace(
x = ~Potability,
open = ~min(Conductivity),
high = ~max(Conductivity),
low = ~min(Conductivity),
close = ~max(Conductivity),
type = 'candlestick',
name = 'Conductivity'
) %>%
add_trace(
x = ~Potability,
open = ~min(Sulfate),
high = ~max(Sulfate),
low = ~min(Sulfate),
close = ~max(Sulfate),
type = 'candlestick',
name = 'Sulfate'
)
# Diagramas de barras e histogramas
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
bar_hist_plot <- ggplot(water_potability_1_, aes(x = Potability)) +
geom_bar() +
labs(title = "Diagrama de Barras de Potabilidad",
x = "Potabilidad",
y = "Frecuencia")
hist_plot <- ggplot(water_potability_1_, aes(x = ph)) +
geom_histogram(binwidth = 0.5, fill = "blue", color = "black") +
labs(title = "Histograma de pH",
x = "pH",
y = "Frecuencia")
# Plot the graphs using cowplot::plot_grid
cowplot::plot_grid(bar_hist_plot, hist_plot, ncol = 2)
## Warning: Removed 491 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Boxplots
boxplot <- ggplot(water_potability_1_, aes(x = as.factor(Potability), y = Turbidity, group = as.factor(Potability))) +
geom_boxplot() +
labs(title = "Boxplot de Potabilidad vs Turbidez",
x = "Potabilidad",
y = "Turbidez")
boxplot

# Cargar la librerĆa plotly
library(plotly)
# Crear el grÔfico de dispersión con las variables ph y Hardness
p <- plot_ly(water_potability_1_, x = ~ph, y = ~Hardness)
# Crear la agrupación rectangular
subplot(
add_histogram2d(p) %>%
colorbar(title = "default") %>%
layout(xaxis = list(title = "default")),
add_histogram2d(p, zsmooth = "best") %>%
colorbar(title = "zsmooth") %>%
layout(xaxis = list(title = "zsmooth")),
add_histogram2d(p, nbinsx = 60, nbinsy = 60) %>%
colorbar(title = "nbins") %>%
layout(xaxis = list(title = "nbins")),
shareY = TRUE, titleX = TRUE
)
# Cargar la librerĆa plotly
library(plotly)
# Crear la grĆ”fica 3D con las variables especĆficas de potabilidad del agua
plot_ly(water_potability_1_, x = ~ph, y = ~Hardness, z = ~Chloramines) %>%
add_lines(color = ~Solids) %>%
layout(
scene = list(
xaxis = list(title = "pH"),
yaxis = list(title = "Hardness"),
zaxis = list(title = "Chloramines")
)
)