#==============================ENCABEZADO=======================================
# TEMA: ED Variables Cuantitativas - Vida Estimada (Estimated lifetime)
# AUTOR: GRUPO 4
# FECHA: 18-12-2025
#==============================CARGA DE DATOS===================================
options(encoding = "UTF-8")
library(gt)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(e1071)

setwd("C:/Users/HP/Documents/PROYECTO ESTADISTICA/RStudio")
datos <- read.csv("tablap.csv", header = TRUE, dec = ",", sep = ";", fileEncoding = "UTF-8")

# Funcion Moda
get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

#===============PROCESAMIENTO DE LA VARIABLE: ESTIMATED LIFETIME================

# Asegurarnos de que el nombre de la columna coincida con tu CSV
v_lifetime <- datos$Estimated.lifetime 
# Creamos intervalos de clase
min_life <- min(v_lifetime, na.rm = TRUE)
max_life <- max(v_lifetime, na.rm = TRUE)
# Definimos el ancho del intervalo
ancho <- 5 
start_val <- floor(min_life / ancho) * ancho
end_val <- ceiling(max_life / ancho) * ancho
breaks_life <- seq(start_val, end_val + ancho, by = ancho)
labels_life <- paste0("", breaks_life[1:(length(breaks_life)-1)], " - ", breaks_life[2:length(breaks_life)])
# =========================Agrupamos los datos==============================
life_categorizada <- cut(v_lifetime, breaks = breaks_life, labels = labels_life, right = FALSE)
TDF_life_base <- as.data.frame(table(life_categorizada))
names(TDF_life_base) <- c("Rango_Vida", "ni")
# Calculos de Frecuencias
hi_life <- (TDF_life_base$ni / sum(TDF_life_base$ni)) * 100
Niasc_life <- cumsum(TDF_life_base$ni)
Hiasc_life <- cumsum(hi_life)
Nidsc_life <- rev(cumsum(rev(TDF_life_base$ni)))
Hidsc_life <- rev(cumsum(rev(hi_life)))
#==============================TABLA DE DATOS===================================
Tabla_Life_Final <- data.frame(
  N = 1:nrow(TDF_life_base),
  Rango_Vida = as.character(TDF_life_base$Rango_Vida),
  ni = TDF_life_base$ni,
  hi = round(hi_life, 2),
  Niasc = Niasc_life,
  Hiasc = round(Hiasc_life, 2),
  Nidsc = Nidsc_life,
  Hidsc = round(Hidsc_life, 2)
)

# Fila Total
Fila_Total_Life <- data.frame(
  N = NA, Rango_Vida = "TOTAL", ni = sum(Tabla_Life_Final$ni), hi = 100,
  Niasc = NA, Hiasc = NA, Nidsc = NA, Hidsc = NA
)
TDF_Life_Completa <- rbind(Tabla_Life_Final, Fila_Total_Life)

#=====================TABLA PROFESIONAL (GT)====================================
tabla_gt_life <- TDF_Life_Completa %>%
  gt() %>%
  tab_header(
    title = "DISTRIBUCION DE VIDA ESTIMADA ",
  ) %>%
  cols_label(
    N = "N", Rango_Vida = "Rangos", ni = "ni", hi = "hi",
    Niasc = "Ni_asc", Hiasc = "Hi_asc", 
    Nidsc = "Ni_des", Hidsc = "Hi_des"
  ) %>%
  sub_missing(columns = everything(), missing_text = "") %>%
  tab_style(
    style = list(cell_fill(color = "gray95"), cell_text(weight = "bold")),
    locations = cells_body(rows = Rango_Vida == "TOTAL")
  ) %>%
  cols_align(align = "center", columns = everything())

tabla_gt_life
DISTRIBUCION DE VIDA ESTIMADA
N Rangos ni hi Ni_asc Hi_asc Ni_des Hi_des
1 25 - 30 178 1.42 178 1.42 12561 100.00
2 30 - 35 0 0.00 178 1.42 12383 98.58
3 35 - 40 0 0.00 178 1.42 12383 98.58
4 40 - 45 0 0.00 178 1.42 12383 98.58
5 45 - 50 0 0.00 178 1.42 12383 98.58
6 50 - 55 10460 83.27 10638 84.69 12383 98.58
7 55 - 60 394 3.14 11032 87.83 1923 15.31
8 60 - 65 549 4.37 11581 92.20 1529 12.17
9 65 - 70 699 5.56 12280 97.76 980 7.80
10 70 - 75 281 2.24 12561 100.00 281 2.24
11 75 - 80 0 0.00 12561 100.00 0 0.00

TOTAL 12561 100.00



#===============================GRAFICO DE BARRAS==============================
par(mar = c(8, 4, 4, 2) + 0.1)

# BARRAS ABSOLUTA LOCAL
barplot(Tabla_Life_Final$ni, names.arg = Tabla_Life_Final$Rango_Vida, las = 2,
        main = "DISTRIBUCION DE VIDA ESTIMADA ", col = "skyblue", ylab = "ni", cex.names = 0.7)

#BARRAS ABSOLUTA GLOBAL
barplot(Tabla_Life_Final$ni, names.arg = Tabla_Life_Final$Rango_Vida, las = 2,
        main = "DISTRIBUCION DE VIDA ESTIMADA ", col = "darkgrey", 
        ylab = "ni", ylim = c(0, sum(Tabla_Life_Final$ni)), cex.names = 0.7)

# BARRAS RELATIVA LOCAL
barplot(Tabla_Life_Final$hi, names.arg = Tabla_Life_Final$Rango_Vida, las = 2,
        main = "DISTRIBUCION DE VIDA ESTIMADA ", col = "lightgreen", ylab = "hi (%)", cex.names = 0.7)

# BARRAS RELATIVA LOCAL
barplot(Tabla_Life_Final$hi, names.arg = Tabla_Life_Final$Rango_Vida, las = 2,
        main = "DISTRIBUCION DE VIDA ESTIMADA ", col = "grey50", 
        ylab = "hi (%)", ylim = c(0, 100), cex.names = 0.7)

# ===================Grafico Ojivas Absoluta (Ni)==============================
x_life <- 1:nrow(Tabla_Life_Final)

plot(x_life, Tabla_Life_Final$Niasc, type = "p", pch = 19, col = "blue",
     main = "DISTRIBUCION DE VIDA ESTIMADA",
     xlab = "Rangos", ylab = "Cantidad Acumulada", xaxt = 'n')
axis(side = 1, at = x_life, labels = Tabla_Life_Final$Rango_Vida, las = 2, cex.axis = 0.7)
points(x_life, Tabla_Life_Final$Nidsc, col = "darkred", pch = 18)
legend("topright", legend = c("Ni Asc", "Ni Des"), col = c("blue", "darkred"), pch = c(19, 18), bty = "n")

# 
plot(x_life, Tabla_Life_Final$Hiasc, type = "p", pch = 19, col = "blue",
     main = "DISTRIBUCION DE VIDA ESTIMADA ",
     xlab = "Rangos", ylab = "Porcentaje Acumulado (%)", xaxt = 'n', ylim = c(0, 100))
axis(side = 1, at = x_life, labels = Tabla_Life_Final$Rango_Vida, las = 2, cex.axis = 0.7)
points(x_life, Tabla_Life_Final$Hidsc, col = "darkred", pch = 18)
legend("right", legend = c("Hi Asc", "Hi Des"), col = c("blue", "darkred"), pch = c(19, 18), bty = "n")

#===============================BOXPLOT=======================================
boxplot(v_lifetime, horizontal = TRUE, col = "orange",
        main = "DISTRIBUCION DE VIDA ESTIMADA", xlab = "Unidades de Tiempo")

# ========================INDICADORES ESTADISTICOS===========================

mean_life <- mean(v_lifetime, na.rm = TRUE)
sd_life <- sd(v_lifetime, na.rm = TRUE)

indicadores_life_df <- data.frame(
  Indicador = c("Moda", "Mediana", "Media", "Desviacion Estandar", 
                "Varianza", "Coef. Variacion (%)", "Asimetria", "Curtosis"),
  Valor = c(
    round(get_mode(v_lifetime), 2),
    round(median(v_lifetime, na.rm = TRUE), 2),
    round(mean_life, 2),
    round(sd_life, 2),
    round(var(v_lifetime, na.rm = TRUE), 2),
    round((sd_life / mean_life) * 100, 2),
    round(skewness(v_lifetime, na.rm = TRUE), 2),
    round(kurtosis(v_lifetime, na.rm = TRUE), 2)
  )
)

gt(indicadores_life_df) %>% 
  tab_header(title = "INDICADORES: ESTIMATED LIFETIME")
INDICADORES: ESTIMATED LIFETIME
Indicador Valor
Moda 50.00
Mediana 50.00
Media 51.86
Desviacion Estandar 6.30
Varianza 39.64
Coef. Variacion (%) 12.14
Asimetria 0.51
Curtosis 5.89