## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 2795 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): Accident Date/Time, Operator Name, Pipeline/Facility Name, Pipelin...
## dbl (18): Report Number, Supplemental Number, Accident Year, Operator ID, Ac...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(x_lineal, y_lineal,
main="Nube de Puntos Original",
xlab="Liberados (X)", ylab="Recuperados (Y)",
pch=16, col="steelblue")Basado en la visualización, planteamos la relación lineal: Y = mx + b Se asume una relación lineal donde la recuperación depende de la magnitud del derrame.
## Intercepto original: 32.60681
## Pendiente original: 0.204805
plot(x_lineal, y_lineal,
main="Nube de Puntos Original",
xlab="Liberados (X)", ylab="Recuperados (Y)",
pch=16, col="steelblue")
abline(modelo_lineal, col = "red", lwd = 2)r_inicial <- cor(x_lineal, y_lineal, use = "complete.obs")
if(r_inicial < 0.80) {
print(paste("Pearson inicial insuficiente:", round(r_inicial, 4), ". Eliminando outliers..."))
# Filtrado: Menos de 500 barriles y recuperación > 0
data_filtrada <- subset(pares_lineal,
`Unintentional Release (Barrels)` < 500 &
`Liquid Recovery (Barrels)` > 0)
# Recalculando con datos limpios
x_final <- data_filtrada$`Unintentional Release (Barrels)`
y_final <- data_filtrada$`Liquid Recovery (Barrels)`
r_final <- cor(x_final, y_final)
print(paste("Nuevo Coeficiente de Pearson (r):", round(r_final, 4)))
} else {
r_final <- r_inicial
print("Aprobado con datos originales.")
}## [1] "Pearson inicial insuficiente: 0.5132 . Eliminando outliers..."
## [1] "Nuevo Coeficiente de Pearson (r): 0.9461"