##==============================================================###
## UNIVERSIDAD CENTRAL DEL ECUADOR ###
##Facultad de Ingeniería en Geología, Minas, Petróleos y Ambiental###
##Materia: Estadística — Docente: Ing. Christian Mejía E. MSc. ###
##==============================================================###
suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
library(ggfortify)
library(tidyverse)
library(fdth)
library(lattice)
library(MASS)
library(PASWR)
library(magick)
library(readxl)
library(plotly)
library(psych)
library(car)
library(ggpmisc)
library(scatterplot3d)
library(corrplot)
library(GGally)
library(RSNNS)
})
#############################
#### Directorio de trabajo ##
#############################
setwd("D:/Personal/Escritorio/ESTADISTICA JIM/Kaggle")
Datos<-read.csv("market_pipe_thickness_loss_dataset_LIMPIO.csv", header = T, sep=";",dec =".", fileEncoding = "ISO-8859-1")
Datos <- na.omit(Datos)
str(Datos)
## 'data.frame': 1000 obs. of 11 variables:
## $ Pipe_Size : int 800 800 400 1500 1500 600 200 300 150 800 ...
## $ Thickness : num 15.5 22 12.1 38.7 24.3 ...
## $ Material : chr "Carbon Steel" "PVC" "Carbon Steel" "Carbon Steel" ...
## $ Grade : chr "ASTM A333 Grade 6" "ASTM A106 Grade B" "API 5L X52" "API 5L X42" ...
## $ Max_Pressure : int 300 150 2500 1500 1500 600 1500 900 300 150 ...
## $ Temperature : num 84.9 14.1 0.6 52.7 11.7 67.3 89.6 40.8 3.2 11.6 ...
## $ Corrosion_Impact: num 16.04 7.38 2.12 5.58 12.29 ...
## $ Thickness_Loss : num 4.91 7.32 6.32 6.2 8.58 5.21 5.86 3.02 2.47 0.53 ...
## $ Material_Loss : num 31.7 33.3 52.5 16 35.3 ...
## $ Time : int 2 4 7 19 20 11 6 21 19 1 ...
## $ Condition : chr "Moderate" "Critical" "Critical" "Critical" ...
names(Datos)
## [1] "Pipe_Size" "Thickness" "Material" "Grade"
## [5] "Max_Pressure" "Temperature" "Corrosion_Impact" "Thickness_Loss"
## [9] "Material_Loss" "Time" "Condition"
# Normalización de Material_Loss
normalizacion <- normalizeData(Datos$Material_Loss, type = "0_1")
Datos$Material_Loss_norm <- as.numeric(normalizacion)
# Asegurar que la variable normalizada quede como vector numérico
Datos$Material_Loss_norm <- as.numeric(Datos$Material_Loss_norm)
# Vista rápida
dplyr::glimpse(Datos)
## Rows: 1,000
## Columns: 12
## $ Pipe_Size <int> 800, 800, 400, 1500, 1500, 600, 200, 300, 150, 800,…
## $ Thickness <dbl> 15.48, 22.00, 12.05, 38.72, 24.32, 16.75, 9.94, 13.…
## $ Material <chr> "Carbon Steel", "PVC", "Carbon Steel", "Carbon Stee…
## $ Grade <chr> "ASTM A333 Grade 6", "ASTM A106 Grade B", "API 5L X…
## $ Max_Pressure <int> 300, 150, 2500, 1500, 1500, 600, 1500, 900, 300, 15…
## $ Temperature <dbl> 84.9, 14.1, 0.6, 52.7, 11.7, 67.3, 89.6, 40.8, 3.2,…
## $ Corrosion_Impact <dbl> 16.04, 7.38, 2.12, 5.58, 12.29, 2.06, 1.34, 5.57, 1…
## $ Thickness_Loss <dbl> 4.91, 7.32, 6.32, 6.20, 8.58, 5.21, 5.86, 3.02, 2.4…
## $ Material_Loss <dbl> 31.72, 33.27, 52.45, 16.01, 35.28, 31.10, 58.95, 21…
## $ Time <int> 2, 4, 7, 19, 20, 11, 6, 21, 19, 1, 13, 6, 6, 13, 7,…
## $ Condition <chr> "Moderate", "Critical", "Critical", "Critical", "Cr…
## $ Material_Loss_norm <dbl> 0.0992876644, 0.1041516302, 0.1643392852, 0.0499890…
TDF_Material <- Datos %>%
count(Material) %>%
mutate(
hi = round(n / sum(n), 4),
Porcentaje = round(hi * 100, 2),
Etiqueta = paste0(n, " (", Porcentaje, "%)")
)
knitr::kable(TDF_Material, caption = "Tabla de frecuencias — Material")
| Material | n | hi | Porcentaje | Etiqueta |
|---|---|---|---|---|
| Carbon Steel | 210 | 0.210 | 21.0 | 210 (21%) |
| Fiberglass | 219 | 0.219 | 21.9 | 219 (21.9%) |
| HDPE | 184 | 0.184 | 18.4 | 184 (18.4%) |
| PVC | 186 | 0.186 | 18.6 | 186 (18.6%) |
| Stainless Steel | 201 | 0.201 | 20.1 | 201 (20.1%) |
# Barras: Material (frecuencia absoluta + etiqueta n y %)
ggplot(TDF_Material, aes(x = reorder(Material, -n), y = n)) +
geom_bar(stat = "identity", color = "black", fill = (c("red", "blue", "green", "yellow", "orange"))) +
geom_text(aes(label = Etiqueta), vjust = -0.35, size = 3.4) +
labs(
title = "Distribución del material",
x = "Material",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(axis.text.x = element_text(angle = 25, hjust = 1)) +
expand_limits(y = max(TDF_Material$n) * 1.12)
# Circular: Material (porcentaje)
ggplot(TDF_Material, aes(x = "", y = Porcentaje, fill = Material)) +
geom_bar(stat = "identity", color = "black", width = 1) +
coord_polar(theta = "y") +
theme_void() +
geom_text(aes(label = paste0(Porcentaje, "%")),
position = position_stack(vjust = 0.5), color = "black") +
labs(title = "Distribución del material (porcentaje)")
Interpretación breve.
La variable Material está bastante equilibrada: Fiberglass (21.9%),
Carbon Steel (21.0%), Stainless Steel (20.1%), PVC (18.6%) y HDPE
(18.4%). Esto sugiere que el dataset no está dominado por un solo
material, lo cual es positivo para comparaciones y para evitar sesgos
fuertes por represetación significativa de una categoría. —
TDF_Grade <- Datos %>%
count(Grade) %>%
mutate(
hi = round(n / sum(n), 4),
Porcentaje = round(hi * 100, 2),
Etiqueta = paste0(n, " (", Porcentaje, "%)")
)
knitr::kable(TDF_Grade, caption = "Tabla de frecuencias — Grado del material (Grade)")
| Grade | n | hi | Porcentaje | Etiqueta |
|---|---|---|---|---|
| API 5L X42 | 186 | 0.186 | 18.6 | 186 (18.6%) |
| API 5L X52 | 191 | 0.191 | 19.1 | 191 (19.1%) |
| API 5L X65 | 183 | 0.183 | 18.3 | 183 (18.3%) |
| ASTM A106 Grade B | 212 | 0.212 | 21.2 | 212 (21.2%) |
| ASTM A333 Grade 6 | 228 | 0.228 | 22.8 | 228 (22.8%) |
# Barras: Grade (frecuencia absoluta + etiqueta n y %)
ggplot(TDF_Grade, aes(x = reorder(Grade, -n), y = n)) +
geom_bar(stat = "identity", color = "black", fill = (c("red", "blue", "green", "yellow", "orange"))) +
geom_text(aes(label = Etiqueta), vjust = -0.35, size = 3.2) +
labs(
title = "Distribución del grado del material (Grade)",
x = "Grado (Grade)",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(axis.text.x = element_text(angle = 25, hjust = 1)) +
expand_limits(y = max(TDF_Grade$n) * 1.12)
# Circular: Grade (porcentaje)
ggplot(TDF_Grade, aes(x = "", y = Porcentaje, fill = Grade)) +
geom_bar(stat = "identity", color = "black", width = 1) +
coord_polar(theta = "y") +
theme_void() +
geom_text(aes(label = paste0(Porcentaje, "%")),
position = position_stack(vjust = 0.5), color = "black") +
labs(title = "Distribución del grado del material (porcentaje)")
Interpretación breve.
Grade también presenta una distribución relativamente balanceada, con un
leve predominio de ASTM A333 Grade 6 (22.8%) y ASTM A106 Grade B
(21.2%), mientras que API 5L X52 (19.1%), API 5L X42 (18.6%) y API 5L
X65 (18.3%) quedan cerca. En términos descriptivos, esto permite evaluar
comportamiento del deterioro sin que un grado reduzca la fiabilidad del
análisis. —
TDF_Condition <- Datos %>%
count(Condition) %>%
mutate(
hi = round(n / sum(n), 4),
Porcentaje = round(hi * 100, 2),
Etiqueta = paste0(n, " (", Porcentaje, "%)")
)
knitr::kable(TDF_Condition, caption = "Tabla de frecuencias — Condición (Condition)")
| Condition | n | hi | Porcentaje | Etiqueta |
|---|---|---|---|---|
| Critical | 487 | 0.487 | 48.7 | 487 (48.7%) |
| Moderate | 299 | 0.299 | 29.9 | 299 (29.9%) |
| Normal | 214 | 0.214 | 21.4 | 214 (21.4%) |
# Barras: Condition (frecuencia absoluta + etiqueta n y %)
ggplot(TDF_Condition, aes(x = reorder(Condition, -n), y = n)) +
geom_bar(stat = "identity", color = "black", fill = (c("red", "blue", "green"))) +
geom_text(aes(label = Etiqueta), vjust = -0.35, size = 3.4) +
labs(
title = "Distribución de la condición (Condition)",
x = "Condición",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
expand_limits(y = max(TDF_Condition$n) * 1.12)
# Circular: Condition (porcentaje)
ggplot(TDF_Condition, aes(x = "", y = Porcentaje, fill = Condition)) +
geom_bar(stat = "identity", color = "black", width = 1) +
coord_polar(theta = "y") +
theme_void() +
geom_text(aes(label = paste0(Porcentaje, "%")),
position = position_stack(vjust = 0.5), color = "black") +
labs(title = "Distribución de la condición (porcentaje)")
Interpretación breve.
En Condition sí hay un desbalance claro: Critical representa 48.7%,
Moderate 29.9% y Normal 21.4%. Esto indica que el dataset está orientado
hacia condiciones más severas, por lo que los promedios globales de
deterioro tenderán a reflejar un entorno de mayor riesgo. —
TDF_Max_Pressure <- Datos %>%
count(Max_Pressure) %>%
mutate(
hi = round(n / sum(n), 4),
Porcentaje = round(hi * 100, 2),
Etiqueta = paste0(n, " (", Porcentaje, "%)")
)
knitr::kable(TDF_Max_Pressure, caption = "Tabla de frecuencias — Presión máxima (Max_Pressure)")
| Max_Pressure | n | hi | Porcentaje | Etiqueta |
|---|---|---|---|---|
| 150 | 168 | 0.168 | 16.8 | 168 (16.8%) |
| 300 | 161 | 0.161 | 16.1 | 161 (16.1%) |
| 600 | 163 | 0.163 | 16.3 | 163 (16.3%) |
| 900 | 172 | 0.172 | 17.2 | 172 (17.2%) |
| 1500 | 162 | 0.162 | 16.2 | 162 (16.2%) |
| 2500 | 174 | 0.174 | 17.4 | 174 (17.4%) |
# Barras: Max_Pressure (frecuencia absoluta + etiqueta n y %)
ggplot(TDF_Max_Pressure, aes(x = reorder(Max_Pressure, -n), y = n)) +
geom_bar(stat = "identity", color = "black", fill = (c("red", "blue", "green", "yellow", "orange", "purple"))) +
geom_text(aes(label = Etiqueta), vjust = -0.35, size = 3.2) +
labs(
title = "Distribución de la presión máxima (Max_Pressure)",
x = "Presión máxima",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
expand_limits(y = max(TDF_Max_Pressure$n) * 1.12)
# Circular: Max_Pressure (porcentaje)
ggplot(TDF_Max_Pressure, aes(x = "", y = Porcentaje, fill = as.factor(Max_Pressure))) +
geom_bar(stat = "identity", color = "black", width = 1) +
coord_polar(theta = "y") +
theme_void() +
geom_text(aes(label = paste0(Porcentaje, "%")),
position = position_stack(vjust = 0.5), color = "black") +
labs(title = "Distribución de la presión máxima (porcentaje)", fill = "Max_Pressure")
Interpretación breve.
Corrosion_Impact presenta un comportamiento prácticamente simétrico:
media 9.75 y mediana 9.72, no aparecen outliers. Esto sugiere que el
impacto de corrosión está distribuido de forma extendida a lo largo de
su dominio, sin valores atípicos que distorsionen el resumen central
##Thickness Loss#####
#####################
x <- Datos$Thickness_Loss
x <- x[!is.na(x)]
n <- length(x)
# Número de clases (Sturges)
k <- ceiling(1 + 3.322 * log10(n))
# Intervalos
breaks <- pretty(range(x), n = k)
# Tabla de frecuencias por intervalos
clase <- cut(x, breaks = breaks, include.lowest = TRUE, right = TRUE)
tabla <- as.data.frame(table(clase))
names(tabla) <- c("Intervalo", "fi")
# Frecuencias relativas y acumuladas
tabla$hi <- round(tabla$fi / n, 4)
tabla$Fi <- cumsum(tabla$fi)
tabla$Hi <- round(cumsum(tabla$hi), 4)
# Marca de clase (xi)
tabla$xi <- (head(breaks, -1) + tail(breaks, -1)) / 2
# Tabla final
TDF_Thickness_Loss <- tabla[, c("Intervalo", "xi", "fi", "hi", "Fi", "Hi")]
knitr::kable(TDF_Thickness_Loss, caption = "Tabla de distribución de frecuencias — Pérdida de espesor (Thickness_Loss)")
| Intervalo | xi | fi | hi | Fi | Hi |
|---|---|---|---|---|---|
| [0,1] | 0.5 | 124 | 0.124 | 124 | 0.124 |
| (1,2] | 1.5 | 90 | 0.090 | 214 | 0.214 |
| (2,3] | 2.5 | 100 | 0.100 | 314 | 0.314 |
| (3,4] | 3.5 | 88 | 0.088 | 402 | 0.402 |
| (4,5] | 4.5 | 111 | 0.111 | 513 | 0.513 |
| (5,6] | 5.5 | 101 | 0.101 | 614 | 0.614 |
| (6,7] | 6.5 | 91 | 0.091 | 705 | 0.705 |
| (7,8] | 7.5 | 105 | 0.105 | 810 | 0.810 |
| (8,9] | 8.5 | 105 | 0.105 | 915 | 0.915 |
| (9,10] | 9.5 | 85 | 0.085 | 1000 | 1.000 |
##Histograma: Thickness_Loss (con etiquetas de frecuencia por clase)
Histo_Thickness_loss <- ggplot(Datos, aes(x = Thickness_Loss)) +
geom_histogram(breaks = breaks, color = "black", fill = "orange") +
geom_text(
stat = "bin",
breaks = breaks,
aes(label = after_stat(count)),
vjust = -0.35,
size = 3.2
) +
geom_freqpoly(breaks = breaks, color = "black", linewidth = 1) +
labs(
title = "Histograma de pérdida de espesor (Thickness_Loss)",
x = "Pérdida de espesor",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.15), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
)
Histo_Thickness_loss
#############################
## OJIVAS: Thickness_loss ###
#############################
ojiva_TL <- TDF_Thickness_Loss
# Frecuencia acumulada descendente (absoluta)
ojiva_TL$F_desc <- rev(cumsum(rev(ojiva_TL$fi)))
# Frecuencia acumulada descendente (relativa)
ojiva_TL$H_desc <- round(ojiva_TL$F_desc / sum(ojiva_TL$fi), 4)
# Ojiva ABSOLUTA
Ojiva_TL_abs <- ggplot(ojiva_TL) +
geom_line(aes(x = xi, y = Fi), color = "blue", linewidth = 1) +
geom_point(aes(x = xi, y = Fi), color = "blue", size = 2.5) +
geom_text(aes(x = xi, y = Fi, label = Fi), vjust = -0.7, color = "blue", size = 3) +
geom_line(aes(x = xi, y = F_desc), color = "red", linewidth = 1) +
geom_point(aes(x = xi, y = F_desc), color = "red", size = 2.5) +
geom_text(aes(x = xi, y = F_desc, label = F_desc), vjust = 1.3, color = "red", size = 3) +
labs(
title = "Ojiva absoluta — Pérdida de espesor (Thickness_Loss)",
x = "Marca de clase (xi)",
y = "Frecuencia acumulada (Ni)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
) +
expand_limits(y = c(0, sum(ojiva_TL$fi)))
Ojiva_TL_abs
# Ojiva RELATIVA
Ojiva_TL_rel <- ggplot(ojiva_TL) +
geom_line(aes(x = xi, y = Hi), color = "green3", linewidth = 1) +
geom_point(aes(x = xi, y = Hi), color = "green3", size = 2.5) +
geom_text(aes(x = xi, y = Hi, label = paste0(round(Hi * 100, 0), "%")), vjust = -0.7, color = "green4", size = 3) +
geom_line(aes(x = xi, y = H_desc), color = "red", linewidth = 1) +
geom_point(aes(x = xi, y = H_desc), color = "red", size = 2.5) +
geom_text(aes(x = xi, y = H_desc, label = paste0(round(H_desc * 100, 0), "%")), vjust = 1.3, color = "red", size = 3) +
labs(
title = "Ojiva relativa — Pérdida de espesor (Thickness_Loss)",
x = "Marca de clase (xi)",
y = "Frecuencia acumulada (Hi)",
caption = "Jim Acuña y Davis Piguave"
) +
scale_y_continuous(labels = function(z) paste0(round(z * 100, 0), "%")) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
) +
expand_limits(y = c(0, 1))
Ojiva_TL_rel
############################################################
####### Diagrama de caja y bigotes (con etiquetas) #########
############################################################
# Estadísticos para etiquetar (sin cambiar el cálculo del boxplot)
q1 <- quantile(Datos$Thickness_Loss, 0.25, na.rm = TRUE)
med <- median(Datos$Thickness_Loss, na.rm = TRUE)
q3 <- quantile(Datos$Thickness_Loss, 0.75, na.rm = TRUE)
Caja_Thickness_loss <- ggplot(Datos, aes(x = Thickness_Loss, y = "")) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(color = "black", fill = "pink",
outlier.colour = "black", outlier.shape = 1) +
annotate("text", x = q1, y = 1.12, label = paste0("Q1=", round(q1, 3)), size = 3.2) +
annotate("text", x = med, y = 1.18, label = paste0("Mediana=", round(med, 3)), size = 3.2, fontface = "bold") +
annotate("text", x = q3, y = 1.12, label = paste0("Q3=", round(q3, 3)), size = 3.2) +
labs(
title = "Diagrama de caja y bigotes — Pérdida de espesor (Thickness_Loss)",
x = "Pérdida de espesor",
y = "",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
)
Caja_Thickness_loss
Interpretación breve.
Thickness_Loss se distribuye en todo su rango (0.01 a 9.99) con
tendencia casi simétrica: media ≈ 4.89 y mediana ≈ 4.92. En el boxplot
se esperaría poca presencia de outliers, lo que sugiere variación amplia
pero controlada dentro del rango típico del dataset. —
##Material_Loss_norm#
#####################
x <- Datos$Material_Loss_norm
x <- x[!is.na(x)]
n <- length(x)
# Intervalos
breaks <- seq(0, 1, by = 0.1) # 10 clases
# Tabla de frecuencias por intervalos
clase <- cut(x, breaks = breaks, include.lowest = TRUE, right = TRUE)
tabla <- as.data.frame(table(clase))
names(tabla) <- c("Intervalo", "fi")
# Frecuencias relativas y acumuladas
tabla$hi <- round(tabla$fi / n, 4)
tabla$Fi <- cumsum(tabla$fi)
tabla$Hi <- round(cumsum(tabla$hi), 4)
# Marca de clase (xi)
tabla$xi <- (head(breaks, -1) + tail(breaks, -1)) / 2
# Tabla final
TDF_Material_Loss_norm <- tabla[, c("Intervalo", "xi", "fi", "hi", "Fi", "Hi")]
knitr::kable(TDF_Material_Loss_norm, caption = "Tabla de distribución de frecuencias — Pérdida de material normalizada (Material_Loss_norm)")
| Intervalo | xi | fi | hi | Fi | Hi |
|---|---|---|---|---|---|
| [0,0.1] | 0.05 | 503 | 0.503 | 503 | 0.503 |
| (0.1,0.2] | 0.15 | 264 | 0.264 | 767 | 0.767 |
| (0.2,0.3] | 0.25 | 101 | 0.101 | 868 | 0.868 |
| (0.3,0.4] | 0.35 | 61 | 0.061 | 929 | 0.929 |
| (0.4,0.5] | 0.45 | 32 | 0.032 | 961 | 0.961 |
| (0.5,0.6] | 0.55 | 21 | 0.021 | 982 | 0.982 |
| (0.6,0.7] | 0.65 | 10 | 0.010 | 992 | 0.992 |
| (0.7,0.8] | 0.75 | 5 | 0.005 | 997 | 0.997 |
| (0.8,0.9] | 0.85 | 0 | 0.000 | 997 | 0.997 |
| (0.9,1] | 0.95 | 3 | 0.003 | 1000 | 1.000 |
Histo_Material_Loss_norm <- ggplot(data.frame(Material_Loss_norm = x), aes(x = Material_Loss_norm)) +
geom_histogram(breaks = breaks, color = "black", fill = "red") +
geom_text(
stat = "bin",
breaks = breaks,
aes(label = after_stat(count)),
vjust = -0.35,
size = 3.2
) +
geom_freqpoly(breaks = breaks, color = "black", linewidth = 1) +
labs(
title = "Histograma de pérdida de material normalizada (Material_Loss_norm)",
x = "Pérdida de material",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.15), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
)
Histo_Material_Loss_norm
ojiva_MLN <- TDF_Material_Loss_norm
ojiva_MLN$F_desc <- rev(cumsum(rev(ojiva_MLN$fi)))
ojiva_MLN$H_desc <- round(ojiva_MLN$F_desc / sum(ojiva_MLN$fi), 4)
# Ojiva ABSOLUTA
Ojiva_MLN_abs <- ggplot(ojiva_MLN) +
geom_line(aes(x = xi, y = Fi), color = "blue", linewidth = 1) +
geom_point(aes(x = xi, y = Fi), color = "blue", size = 2.5) +
geom_text(aes(x = xi, y = Fi, label = Fi), vjust = -0.7, color = "blue", size = 3) +
geom_line(aes(x = xi, y = F_desc), color = "red", linewidth = 1) +
geom_point(aes(x = xi, y = F_desc), color = "red", size = 2.5) +
geom_text(aes(x = xi, y = F_desc, label = F_desc), vjust = 1.3, color = "red", size = 3) +
labs(
title = "Ojiva absoluta — Pérdida de material normalizada (Material_Loss_norm)",
x = "Marca de clase (xi)",
y = "Frecuencia acumulada (Ni)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
) +
expand_limits(y = c(0, sum(ojiva_MLN$fi)))
Ojiva_MLN_abs
# Ojiva RELATIVA
Ojiva_MLN_rel <- ggplot(ojiva_MLN) +
geom_line(aes(x = xi, y = Hi), color = "green3", linewidth = 1) +
geom_point(aes(x = xi, y = Hi), color = "green3", size = 2.5) +
geom_text(aes(x = xi, y = Hi, label = paste0(round(Hi * 100, 0), "%")), vjust = -0.7, color = "green4", size = 3) +
geom_line(aes(x = xi, y = H_desc), color = "red", linewidth = 1) +
geom_point(aes(x = xi, y = H_desc), color = "red", size = 2.5) +
geom_text(aes(x = xi, y = H_desc, label = paste0(round(H_desc * 100, 0), "%")), vjust = 1.3, color = "red", size = 3) +
labs(
title = "Ojiva relativa — Pérdida de material normalizada (Material_Loss_norm)",
x = "Marca de clase (xi)",
y = "Frecuencia acumulada (Hi)",
caption = "Jim Acuña y Davis Piguave"
) +
scale_y_continuous(labels = function(z) paste0(round(z * 100, 0), "%")) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
) +
expand_limits(y = c(0, 1))
Ojiva_MLN_rel
q1 <- quantile(Datos$Material_Loss_norm, 0.25, na.rm = TRUE)
med <- median(Datos$Material_Loss_norm, na.rm = TRUE)
q3 <- quantile(Datos$Material_Loss_norm, 0.75, na.rm = TRUE)
Caja_Material_Loss_norm <- ggplot(Datos, aes(x = Material_Loss_norm, y = "")) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(color = "black", fill = "pink",
outlier.colour = "black", outlier.shape = 1) +
annotate("text", x = q1, y = 1.12, label = paste0("Q1=", round(q1, 3)), size = 3.2) +
annotate("text", x = med, y = 1.18, label = paste0("Mediana=", round(med, 3)), size = 3.2, fontface = "bold") +
annotate("text", x = q3, y = 1.12, label = paste0("Q3=", round(q3, 3)), size = 3.2) +
labs(
title = "Diagrama de caja y bigotes — Pérdida de material normalizada (Material_Loss_norm)",
x = "Pérdida de material",
y = "",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
)
Caja_Material_Loss_norm
Interpretación breve.
La normalización conserva la forma de la distribución original pero en
escala 0–1, facilitando comparación visual y lectura rápida. La variable
Material Loss es la que muestra el patrón estadístico más “crítico”:
presenta un sesgo fuerte a la derecha y cola pesada (curtosis alta), lo
cual se evidencia en la diferencia entre media (46.75) y mediana
(31.66). Se identifican outliers altos, indicando un subconjunto de
casos severos que inflan la media. Por lo tanto, la interpretación
descriptiva para esta variable debe basarse en mediana, y percentiles,
porque el promedio queda fuertemente afectado por pérdidas extremas. La
versión normalizada Material Loss norm conserva exactamente la forma
estadística de Material_Loss (misma asimetría y cola), pero en escala
0–1, lo cual facilita comparación e interpretación visual. —
##Corrosion_Impact###
#####################
x <- Datos$Corrosion_Impact
x <- x[!is.na(x)]
n <- length(x)
# Número de clases (Sturges)
k <- ceiling(1 + 3.322 * log10(n))
# Intervalos
breaks <- pretty(range(x), n = k)
# Tabla de frecuencias por intervalos
clase <- cut(x, breaks = breaks, include.lowest = TRUE, right = TRUE)
tabla <- as.data.frame(table(clase))
names(tabla) <- c("Intervalo", "fi")
# Frecuencias relativas y acumuladas
tabla$hi <- round(tabla$fi / n, 4)
tabla$Fi <- cumsum(tabla$fi)
tabla$Hi <- round(cumsum(tabla$hi), 4)
# Marca de clase (xi)
tabla$xi <- (head(breaks, -1) + tail(breaks, -1)) / 2
# Tabla final
TDF_Corrosion_Impact <- tabla[, c("Intervalo", "xi", "fi", "hi", "Fi", "Hi")]
knitr::kable(TDF_Corrosion_Impact, caption = "Tabla de distribución de frecuencias — Impacto de corrosión (Corrosion_Impact)")
| Intervalo | xi | fi | hi | Fi | Hi |
|---|---|---|---|---|---|
| [0,2] | 1 | 102 | 0.102 | 102 | 0.102 |
| (2,4] | 3 | 124 | 0.124 | 226 | 0.226 |
| (4,6] | 5 | 99 | 0.099 | 325 | 0.325 |
| (6,8] | 7 | 87 | 0.087 | 412 | 0.412 |
| (8,10] | 9 | 105 | 0.105 | 517 | 0.517 |
| (10,12] | 11 | 92 | 0.092 | 609 | 0.609 |
| (12,14] | 13 | 105 | 0.105 | 714 | 0.714 |
| (14,16] | 15 | 90 | 0.090 | 804 | 0.804 |
| (16,18] | 17 | 99 | 0.099 | 903 | 0.903 |
| (18,20] | 19 | 97 | 0.097 | 1000 | 1.000 |
Histo_Corrosion_Impact <- ggplot(Datos, aes(x = Corrosion_Impact)) +
geom_histogram(breaks = breaks, color = "black", fill = "purple") +
geom_text(
stat = "bin",
breaks = breaks,
aes(label = after_stat(count)),
vjust = -0.35,
size = 3.2
) +
geom_freqpoly(breaks = breaks, color = "black", linewidth = 1) +
labs(
title = "Histograma de impacto de corrosión (Corrosion_Impact)",
x = "Impacto de corrosión",
y = "Frecuencia (n)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.15), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
)
Histo_Corrosion_Impact
ojiva_CI <- TDF_Corrosion_Impact
ojiva_CI$F_desc <- rev(cumsum(rev(ojiva_CI$fi)))
ojiva_CI$H_desc <- round(ojiva_CI$F_desc / sum(ojiva_CI$fi), 4)
# Ojiva ABSOLUTA
Ojiva_CI_abs <- ggplot(ojiva_CI) +
geom_line(aes(x = xi, y = Fi), color = "blue", linewidth = 1) +
geom_point(aes(x = xi, y = Fi), color = "blue", size = 2.5) +
geom_text(aes(x = xi, y = Fi, label = Fi), vjust = -0.7, color = "blue", size = 3) +
geom_line(aes(x = xi, y = F_desc), color = "red", linewidth = 1) +
geom_point(aes(x = xi, y = F_desc), color = "red", size = 2.5) +
geom_text(aes(x = xi, y = F_desc, label = F_desc), vjust = 1.3, color = "red", size = 3) +
labs(
title = "Ojiva absoluta — Impacto de corrosión (Corrosion_Impact)",
x = "Marca de clase (xi)",
y = "Frecuencia acumulada (Ni)",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
) +
expand_limits(y = c(0, sum(ojiva_CI$fi)))
Ojiva_CI_abs
# Ojiva RELATIVA
Ojiva_CI_rel <- ggplot(ojiva_CI) +
geom_line(aes(x = xi, y = Hi), color = "green3", linewidth = 1) +
geom_point(aes(x = xi, y = Hi), color = "green3", size = 2.5) +
geom_text(aes(x = xi, y = Hi, label = paste0(round(Hi * 100, 0), "%")), vjust = -0.7, color = "green4", size = 3) +
geom_line(aes(x = xi, y = H_desc), color = "red", linewidth = 1) +
geom_point(aes(x = xi, y = H_desc), color = "red", size = 2.5) +
geom_text(aes(x = xi, y = H_desc, label = paste0(round(H_desc * 100, 0), "%")), vjust = 1.3, color = "red", size = 3) +
labs(
title = "Ojiva relativa — Impacto de corrosión (Corrosion_Impact)",
x = "Marca de clase (xi)",
y = "Frecuencia acumulada (Hi)",
caption = "Jim Acuña y Davis Piguave"
) +
scale_y_continuous(labels = function(z) paste0(round(z * 100, 0), "%")) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
) +
expand_limits(y = c(0, 1))
Ojiva_CI_rel
q1 <- quantile(Datos$Corrosion_Impact, 0.25, na.rm = TRUE)
med <- median(Datos$Corrosion_Impact, na.rm = TRUE)
q3 <- quantile(Datos$Corrosion_Impact, 0.75, na.rm = TRUE)
Caja_Corrosion_Impact <- ggplot(Datos, aes(x = Corrosion_Impact, y = "")) +
stat_boxplot(geom = "errorbar", width = 0.3) +
geom_boxplot(color = "black", fill = "pink",
outlier.colour = "black", outlier.shape = 1) +
annotate("text", x = q1, y = 1.12, label = paste0("Q1=", round(q1, 3)), size = 3.2) +
annotate("text", x = med, y = 1.18, label = paste0("Mediana=", round(med, 3)), size = 3.2, fontface = "bold") +
annotate("text", x = q3, y = 1.12, label = paste0("Q3=", round(q3, 3)), size = 3.2) +
labs(
title = "Diagrama de caja y bigotes — Impacto de corrosión (Corrosion_Impact)",
x = "Impacto de corrosión",
y = "",
caption = "Jim Acuña y Davis Piguave"
) +
theme(
plot.title = element_text(size = rel(1.1), hjust = 0.5, face = "bold"),
plot.caption = element_text(hjust = 0.5, face = "italic")
)
Caja_Corrosion_Impact
Interpretación breve.
Corrosion_Impact presenta un comportamiento prácticamente simétrico:
media 9.75 y mediana 9.72, no aparecen outliers. Esto sugiere que el
impacto de corrosión está distribuido de forma extendida a lo largo de
su dominio, sin valores atípicos que distorsionen el resumen
central.