#==============================CARGA DE DATOS===================================
library(gt)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("C:/Users/HP/Documents/PROYECTO ESTADISTICA/RStudio")

datos <- read.csv2("tablap.csv", header = TRUE)

# VARIABLE: ENERGIA DEL GAS
energia_gas <- datos$Energy.of.gas
energia_gas <- as.numeric(gsub(",", ".", energia_gas))
energia_gas <- na.omit(energia_gas)

# 2. C??lculos de Frecuencia (Regla de Sturges)
n <- length(energia_gas)
R <- max(energia_gas) - min(energia_gas)
k <- floor(1 + (3.322 * log10(n))) # Constante de Sturges m??s precisa
A <- R / k

# 3. L??mites e Intervalos
liminf <- seq(from = min(energia_gas), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(energia_gas) 

MC <- (liminf + limsup) / 2

# 4. C??lculo de Frecuencias
ni <- c()
for (i in 1:k) {
  if (i == k) {
    ni[i] <- length(subset(energia_gas, energia_gas >= liminf[i] & energia_gas <= limsup[i]))
  } else {
    ni[i] <- length(subset(energia_gas, energia_gas >= liminf[i] & energia_gas < limsup[i]))
  }
}

hi <- (ni / n) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))

# 5. Creaci??n del Data Frame Final
TDFcu_energia_gas <- data.frame(
  liminf = round(liminf, 2),
  limsup = round(limsup, 2),
  MC = round(MC, 2),
  ni = ni,
  hi_perc = round(hi, 2),
  Niasc = Niasc,
  Nidsc = Nidsc,
  Hiasc_perc = round(Hiasc, 2),
  Hidsc_perc = round(Hidsc, 2)
)

#==============================TABLA DE DATOS===================================
print("DISTRIBUCION DEL POTENCIAL ENERGETICO DEL PRODUCTO")
## [1] "DISTRIBUCION DEL POTENCIAL ENERGETICO DEL PRODUCTO"
print(TDFcu_energia_gas)
##         liminf      limsup          MC    ni hi_perc Niasc Nidsc Hiasc_perc
## 1     28139305  5449555728  2738847516 11815   94.06 11815 12561      94.06
## 2   5449555728 10870972151  8160263939   552    4.39 12367   746      98.46
## 3  10870972151 16292388574 13581680362   107    0.85 12474   194      99.31
## 4  16292388574 21713804997 19003096785    38    0.30 12512    87      99.61
## 5  21713804997 27135221420 24424513209    23    0.18 12535    49      99.79
## 6  27135221420 32556637843 29845929632    13    0.10 12548    26      99.90
## 7  32556637843 37978054266 35267346055     4    0.03 12552    13      99.93
## 8  37978054266 43399470689 40688762478     1    0.01 12553     9      99.94
## 9  43399470689 48820887113 46110178901     1    0.01 12554     8      99.94
## 10 48820887113 54242303536 51531595324     1    0.01 12555     7      99.95
## 11 54242303536 59663719959 56953011747     3    0.02 12558     6      99.98
## 12 59663719959 65085136382 62374428170     2    0.02 12560     3      99.99
## 13 65085136382 70506552805 67795844593     0    0.00 12560     1      99.99
## 14 70506552805 75927969228 73217261016     1    0.01 12561     1     100.00
##    Hidsc_perc
## 1      100.00
## 2        5.94
## 3        1.54
## 4        0.69
## 5        0.39
## 6        0.21
## 7        0.10
## 8        0.07
## 9        0.06
## 10       0.06
## 11       0.05
## 12       0.02
## 13       0.01
## 14       0.01
#=============histograma================
Histostur_energia <- hist(energia_gas, main = "DISTRIBUCION DEL POTENCIAL ENERGETICO DEL PRODUCTO",
                          breaks = seq(min(energia_gas),max(energia_gas),A),xlab = "ENERGIA DEL GAS",
                          ylab = "CANTIDAD", col = "darkgreen", plot = TRUE)

x_asc_energia <- c(min(TDFcu_energia_gas$liminf), TDFcu_energia_gas$limsup)
y_asc_energia <- c(0, TDFcu_energia_gas$Niasc)

x_desc_energia <- c(TDFcu_energia_gas$liminf, max(TDFcu_energia_gas$limsup))
y_desc_energia <- c(TDFcu_energia_gas$Nidsc, 0)


y_plot_range_energia <- c(0, max(c(y_asc_energia, y_desc_energia), na.rm = TRUE))
x_plot_range_energia <- range(c(x_asc_energia, x_desc_energia), na.rm = TRUE)

plot(x_asc_energia, y_asc_energia, type = "o",
     main = "DISTRIBUCION DEL POTENCIAL ENERGETICO DEL PRODUCTO",
     xlab = "ENERGIA DEL GAS", ylab = "CANTIDAD",
     col = "darkred",
     xlim = x_plot_range_energia,
     ylim = y_plot_range_energia)
lines(x_desc_energia, y_desc_energia, col = "darkblue", type = "o")

#============================boxplot========================
boxplot(energia_gas, horizontal = TRUE, col = "purple",
        main = "DISTRIBUCION DEL POTENCIAL ENERGETICO DEL PRODUCTO",
        xlab="ENERGIA DEL GAS")