library(readr)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
## 
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Cargar datos
datasetf <- read_csv("datasetf.csv", show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
# Usar Restart Date/Time
raw_dates <- datasetf$`Restart Date/Time`
raw_dates <- raw_dates[raw_dates != ""]
raw_dates <- na.omit(raw_dates)

# Convertir a fecha/hora
fechas_obj <- as.POSIXct(raw_dates, format = "%m/%d/%Y %H:%M")

# Convertir a numérico (segundos desde 1970)
amperaje <- as.numeric(fechas_obj)
amperaje <- na.omit(amperaje)
fechas_obj <- fechas_obj[!is.na(amperaje)]

# --- CÁLCULOS MATEMÁTICOS DE LOS INTERVALOS ---
k <- 1 + (3.322 * log10(length(amperaje)))
k <- floor(k)

min_val <- min(amperaje)
max_val <- max(amperaje)
R <- max_val - min_val
A <- R / k

# Definir los cortes numéricos
Li_num <- seq(from = min_val, to = max_val - A, by = A)
if(length(Li_num) < k) { 
  Li_num <- c(Li_num, Li_num[length(Li_num)] + A) 
}
if(max(Li_num) + A < max_val) { 
  Li_num <- c(Li_num, tail(Li_num, 1) + A) 
}

Ls_num <- Li_num + A
MC_num <- (Li_num + Ls_num) / 2

# Contar frecuencias
ni <- numeric(length(Li_num))
for (i in 1:length(Li_num)) {
  if (i == length(Li_num)) {
    ni[i] <- sum(amperaje >= Li_num[i] & amperaje <= (max_val + 100000))
  } else {
    ni[i] <- sum(amperaje >= Li_num[i] & amperaje < Ls_num[i]) 
  }
}

# Cálculos de frecuencias
hi <- round(ni / sum(ni) * 100, 2)  # Redondeado a 2 decimales
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)

# Convertir a formato fecha/hora
Li_fecha <- format(structure(Li_num, class = c("POSIXct", "POSIXt")), "%m/%d/%Y %H:%M")
Ls_fecha <- format(structure(Ls_num, class = c("POSIXct", "POSIXt")), "%m/%d/%Y %H:%M")
MC_fecha <- format(structure(MC_num, class = c("POSIXct", "POSIXt")), "%m/%d/%Y %H:%M")

# Crear data frame con nombres CORREGIDOS
TDFRestart <- data.frame(
  Li = Li_fecha,           # Cambiado: Desde -> Li
  Ls = Ls_fecha,           # Cambiado: Hasta -> Ls
  MC = MC_fecha,           # Cambiado: Marca_Clase -> MC
  ni = ni,                 # Mantenido: ni
  hi = hi,                 # Mantenido: hi (minúscula)
  Ni = Niasc,              # Simplificado
  Hi = Hiasc               # Simplificado
)

# TABLA 1: Distribución de Frecuencias con kableExtra
tabla1_sturges <- TDFRestart %>%
  kable(
    format = "html",
    caption = "<b>Tabla 1: Distribución de Frecuencias</b><br><span style='font-size:14px'><b>Variable: Restart Date/Time</b></span>",
    align = c('l', 'l', 'l', 'r', 'r', 'r', 'r'),
    col.names = c("Li", "Ls", "MC", "ni", "hi", "Ni", "Hi")  # Nombres CORREGIDOS
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    font_size = 13,
    position = "center"
  ) %>%
  column_spec(1:3, width = "20%") %>%
  column_spec(4, bold = TRUE, background = "#e6f3ff") %>%
  column_spec(5, bold = TRUE, background = "#fff0e6") %>%
  add_header_above(c("Intervalos" = 3, "Frecuencias" = 4))

# Mostrar Tabla 1
tabla1_sturges
Tabla 1: Distribución de Frecuencias
Variable: Restart Date/Time
Intervalos
Frecuencias
Li Ls MC ni hi Ni Hi
01/11/2010 22:08 08/31/2010 21:41 05/07/2010 21:54 98 7.31 98 7.31
08/31/2010 21:41 04/20/2011 21:14 12/25/2010 21:28 94 7.01 192 14.32
04/20/2011 21:14 12/08/2011 20:48 08/14/2011 21:01 99 7.38 291 21.70
12/08/2011 20:48 07/27/2012 20:21 04/02/2012 20:35 97 7.23 388 28.93
07/27/2012 20:21 03/16/2013 19:55 11/20/2012 20:08 133 9.92 521 38.85
03/16/2013 19:55 11/03/2013 19:28 07/10/2013 19:42 120 8.95 641 47.80
11/03/2013 19:28 06/23/2014 19:02 02/27/2014 19:15 131 9.77 772 57.57
06/23/2014 19:02 02/10/2015 18:35 10/17/2014 18:48 130 9.69 902 67.26
02/10/2015 18:35 09/30/2015 18:09 06/06/2015 18:22 173 12.90 1075 80.16
09/30/2015 18:09 05/19/2016 17:42 01/24/2016 17:55 138 10.29 1213 90.45
05/19/2016 17:42 01/06/2017 17:16 09/12/2016 17:29 128 9.55 1341 100.00
# Totales SOLO de ni y hi
total_ni <- sum(TDFRestart$ni)
total_hi <- sum(TDFRestart$hi)

# Añadir fila total SOLO con ni y hi
TDF_Completo <- rbind(
  TDFRestart, 
  data.frame(
    Li = "TOTAL", 
    Ls = "", 
    MC = "",
    ni = total_ni, 
    hi = total_hi, 
    Ni = "",          # Vacío para total
    Hi = ""           # Vacío para total
  )
)

# TABLA 2: Distribución Completa con kableExtra
tabla2_total <- TDF_Completo %>%
  kable(
    format = "html",
    caption = "<b>Tabla 2: Distribución Completa</b><br><span style='font-size:14px'><b>Con Totales</b></span>",
    align = c('l', 'l', 'l', 'r', 'r', 'r', 'r'),
    col.names = c("Li", "Ls", "MC", "ni", "hi", "Ni", "Hi")
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE,
    font_size = 13,
    position = "center"
  ) %>%
  row_spec(
    nrow(TDF_Completo),
    bold = TRUE,
    color = "white",
    background = "#2E86AB",
    align = "c"
  ) %>%
  column_spec(1:3, width = "20%") %>%
  column_spec(4, background = "#e6f3ff") %>%
  column_spec(5, background = "#fff0e6") %>%
  add_header_above(c("Intervalos" = 3, "Frecuencias" = 4))

# Mostrar Tabla 2
tabla2_total
Tabla 2: Distribución Completa
Con Totales
Intervalos
Frecuencias
Li Ls MC ni hi Ni Hi
01/11/2010 22:08 08/31/2010 21:41 05/07/2010 21:54 98 7.31 98 7.31
08/31/2010 21:41 04/20/2011 21:14 12/25/2010 21:28 94 7.01 192 14.32
04/20/2011 21:14 12/08/2011 20:48 08/14/2011 21:01 99 7.38 291 21.7
12/08/2011 20:48 07/27/2012 20:21 04/02/2012 20:35 97 7.23 388 28.93
07/27/2012 20:21 03/16/2013 19:55 11/20/2012 20:08 133 9.92 521 38.85
03/16/2013 19:55 11/03/2013 19:28 07/10/2013 19:42 120 8.95 641 47.8
11/03/2013 19:28 06/23/2014 19:02 02/27/2014 19:15 131 9.77 772 57.57
06/23/2014 19:02 02/10/2015 18:35 10/17/2014 18:48 130 9.69 902 67.26
02/10/2015 18:35 09/30/2015 18:09 06/06/2015 18:22 173 12.90 1075 80.16
09/30/2015 18:09 05/19/2016 17:42 01/24/2016 17:55 138 10.29 1213 90.45
05/19/2016 17:42 01/06/2017 17:16 09/12/2016 17:29 128 9.55 1341 100
TOTAL 1341 100.00
# Guardar tablas
save_kable(tabla1_sturges, "tabla1_restart_li_ls.html")
save_kable(tabla2_total, "tabla2_restart_total.html")

# --- GRÁFICOS ---

# 1. Histograma anual automático
hist(fechas_obj, 
     breaks = "years", 
     freq = TRUE,
     main = "Distribución Anual - Restart Date/Time",
     xlab = "Año", 
     col = "lightblue", 
     las = 1, 
     format = "%Y")

# 2. Histograma usando los intervalos calculados
hist(amperaje, 
     breaks = c(Li_num, tail(Ls_num, 1)), 
     main = "Histograma - Restart Date/Time",
     col = "blue", 
     xaxt = "n", 
     xlab = "Fecha", 
     ylab = "Frecuencia")

# Eje X con fechas
axis(1, at = Li_num, 
     labels = format(structure(Li_num, class = c("POSIXct", "POSIXt")), "%m/%d/%Y"), 
     las = 2, 
     cex.axis = 0.7)

# 3. Ojivas Temporales
ejex_fecha <- structure(Li_num, class = c("POSIXct", "POSIXt"))

plot(ejex_fecha, Nidsc,
     type = "o", 
     pch = 16, 
     col = "blue",
     main = "Ojivas - Restart Date/Time",
     xlab = "Fecha", 
     ylab = "Frecuencia Acumulada")

lines(ejex_fecha, Niasc, 
      type = "b", 
      pch = 16, 
      col = "black")

legend("topright", 
       legend = c("Descendente", "Ascendente"), 
       col = c("blue", "black"), 
       lty = 1)

# 4. Boxplot
boxplot(fechas_obj, 
        horizontal = TRUE, 
        col = "blue", 
        main = "Boxplot - Restart Date/Time",
        xlab = "Fecha")

# --- RESUMEN ESTADÍSTICO ---
cat("\n")
cat("============================================================\n")
## ============================================================
cat("RESUMEN ESTADÍSTICO - Restart Date/Time\n")
## RESUMEN ESTADÍSTICO - Restart Date/Time
cat("============================================================\n")
## ============================================================
cat("n =", length(amperaje), "\n")
## n = 1341
cat("k =", k, "\n")
## k = 11
cat("R (rango) =", round(R/86400, 2), "días\n")
## R (rango) = 2551.8 días
cat("A (amplitud) =", round(A/86400, 2), "días\n")
## A (amplitud) = 231.98 días
cat("∑ni =", total_ni, "\n")
## ∑ni = 1341
cat("∑hi =", total_hi, "%\n")
## ∑hi = 100 %
cat("Fecha mínima =", format(min(fechas_obj), "%m/%d/%Y %H:%M"), "\n")
## Fecha mínima = 01/11/2010 22:08
cat("Fecha máxima =", format(max(fechas_obj), "%m/%d/%Y %H:%M"), "\n")
## Fecha máxima = 01/06/2017 17:16
cat("============================================================\n")
## ============================================================
# Mostrar también en View
View(TDF_Completo)