#==============================CARGA DE DATOS Y LIBRERIAS========================
library(gt)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Intentar cargar e1071, si no existe, se instala automaticamente
if(!require(e1071)) {
install.packages("e1071")
library(e1071)
}
## Cargando paquete requerido: e1071
# Funcion personalizada para calcular la moda
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
# Configuracion de ruta
setwd("C:/Users/HP/Documents/PROYECTO ESTADISTICA/RStudio")
# Carga de datos
datos <- read.csv2("tablap.csv", header = TRUE)
# Limpieza de datos
gas_production <- datos$Total.gas.production.by.2023
gas_production <- na.omit(gas_production)
#==============================REGLA DE STURGES=================================
R <- max(gas_production) - min(gas_production)
k <- 1 + (3.3 * log10(length(gas_production)))
k <- floor(k)
A <- R / k
liminf <- seq(from = min(gas_production), by = A, length.out = k)
limsup <- liminf + A
limsup[k] <- max(gas_production)
MC <- (liminf + limsup) / 2
# Conteo de frecuencias (ni)
ni <- c()
for (i in 1:k) {
if (i == k)
ni[i] <- length((subset(gas_production, gas_production >= liminf[i] & gas_production <= limsup[i])))
else
ni[i] <- length(subset(gas_production, gas_production >= liminf[i] & gas_production < limsup[i]))
}
# Calculos de frecuencias relativas y acumuladas
hi <- (ni / length(gas_production)) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- cumsum(hi)
Hidsc <- rev(cumsum(rev(hi)))
# Creacion de la Tabla de Distribucion de Frecuencias (TDF)
TDFcu_gas_production <- data.frame(
liminf = round(liminf, 2),
limsup = round(limsup, 2),
MC = round(MC, 2),
ni = ni,
hi_perc = round(hi, 2),
Niasc = Niasc,
Nidsc = Nidsc,
Hiasc_perc = round(Hiasc, 2),
Hidsc_perc = round(Hidsc, 2)
)
print("Distribucion de la Produccion Total de Gas hacia 2023")
## [1] "Distribucion de la Produccion Total de Gas hacia 2023"
print(TDFcu_gas_production)
## liminf limsup MC ni hi_perc Niasc Nidsc Hiasc_perc Hidsc_perc
## 1 25946 5024793 2525369 11815 94.06 11815 12561 94.06 100.00
## 2 5024793 10023639 7524216 552 4.39 12367 746 98.46 5.94
## 3 10023639 15022486 12523063 107 0.85 12474 194 99.31 1.54
## 4 15022486 20021333 17521910 38 0.30 12512 87 99.61 0.69
## 5 20021333 25020180 22520756 23 0.18 12535 49 99.79 0.39
## 6 25020180 30019026 27519603 13 0.10 12548 26 99.90 0.21
## 7 30019026 35017873 32518450 4 0.03 12552 13 99.93 0.10
## 8 35017873 40016720 37517296 1 0.01 12553 9 99.94 0.07
## 9 40016720 45015566 42516143 1 0.01 12554 8 99.94 0.06
## 10 45015566 50014413 47514990 1 0.01 12555 7 99.95 0.06
## 11 50014413 55013260 52513837 3 0.02 12558 6 99.98 0.05
## 12 55013260 60012107 57512683 2 0.02 12560 3 99.99 0.02
## 13 60012107 65010953 62511530 0 0.00 12560 1 99.99 0.01
## 14 65010953 70009800 67510377 1 0.01 12561 1 100.00 0.01
#=========================GRAFICAS==========================
# 1. Histograma
Histostur_gas <- hist(gas_production,
main = "Distribucion de la Produccion Total de Gas hacia 2023",
breaks = seq(min(gas_production), max(gas_production), length.out = k + 1),
xlab = "PRODUCCION DE GAS (M3)",
ylab = "CANTIDAD",
col = "darkgoldenrod",
plot = TRUE)

# 2. Ojivas
x_asc_gas <- c(min(TDFcu_gas_production$liminf), TDFcu_gas_production$limsup)
y_asc_gas <- c(0, TDFcu_gas_production$Niasc)
x_desc_gas <- c(TDFcu_gas_production$liminf, max(TDFcu_gas_production$limsup))
y_desc_gas <- c(TDFcu_gas_production$Nidsc, 0)
y_plot_range_gas <- c(0, max(c(y_asc_gas, y_desc_gas), na.rm = TRUE))
x_plot_range_gas <- range(c(x_asc_gas, x_desc_gas), na.rm = TRUE)
plot(x_asc_gas, y_asc_gas, type = "o",
main = "Distribucion de la Produccion Total de Gas hacia 2023",
xlab = "PRODUCCION DE GAS", ylab = "CANTIDAD",
col = "darkgreen",
xlim = x_plot_range_gas,
ylim = y_plot_range_gas)
lines(x_desc_gas, y_desc_gas, col = "darkred", type = "o")

# 3. Boxplot
boxplot(gas_production, horizontal = TRUE, col = "darkorchid",
main = "Distribucion de la Produccion Total de Gas hacia 2023",
xlab = "PRODUCCION DE GAS")

#======================INDICADORES ESTADISTICOS===============================
# Convertir a numerico para evitar errores de tipo de dato
gas_num <- as.numeric(gas_production)
mean_val <- mean(gas_num)
median_val <- median(gas_num)
sd_val <- sd(gas_num)
var_val <- var(gas_num)
skew_val <- skewness(gas_num)
kurt_val <- kurtosis(gas_num)
cv_val <- (sd_val / mean_val) * 100
indicadores_gas_production <- data.frame(
Indicador = c("Moda","Mediana", "Media(x)", "Desviacion Estandar", "Varianza", "Coef Variacion (%)", "Asimetria", "Curtosis"),
Valor = c(
round(get_mode(gas_num), 2),
round(median_val, 2),
round(mean_val, 2),
round(sd_val, 2),
round(var_val, 2),
round(cv_val, 2),
round(skew_val, 2),
round(kurt_val, 2)
)
)
print("Distribucion de la Produccion Total de Gas hacia 2023")
## [1] "Distribucion de la Produccion Total de Gas hacia 2023"
print(indicadores_gas_production)
## Indicador Valor
## 1 Moda 1.725565e+06
## 2 Mediana 1.001777e+06
## 3 Media(x) 1.740013e+06
## 4 Desviacion Estandar 2.784581e+06
## 5 Varianza 7.753889e+12
## 6 Coef Variacion (%) 1.600300e+02
## 7 Asimetria 7.960000e+00
## 8 Curtosis 1.124600e+02