##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
### AUTOR: FERNANDO NEIRA ###
### CARRERA: INGENIERÍA EN PETRÓLEOS #####
# 1. Cargar librerías
library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
# 2. Cargar el archivo y almacenarlo en la variable 'Datos'
# Usamos la ruta que ya validamos en tu equipo
Datos <- read_excel("C:/Users/ASUS/OneDrive/Escritorio/ESTADÍSTICA/EXPO/Variables/plantas_dep.xls",
sheet = "Fernando_Depuracion")
# 3. Verificar que se almacenó correctamente
str(Datos)## tibble [58,812 × 29] (S3: tbl_df/tbl/data.frame)
## $ objectid : num [1:58812] 127 129 131 132 133 137 138 139 140 145 ...
## $ code : chr [1:58812] "00127-ARG-P" "00129-ARG-G" "00131-ARG-P" "00132-ARG-P" ...
## $ plant_name : chr [1:58812] "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Anchoris solar farm" "Antu Newen solar farm" ...
## $ country : chr [1:58812] "Argentina" "Argentina" "Argentina" "Argentina" ...
## $ operational_status : chr [1:58812] "announced" "operating" "construction" "cancelled - inferred 4 y" ...
## $ longitude : num [1:58812] -68.9 -66.9 -68.9 -70.3 -66.8 ...
## $ latitude : num [1:58812] -33 -24.1 -33.3 -37.4 -28.6 ...
## $ elevation : num [1:58812] 929 4000 937 865 858 ...
## $ area : num [1:58812] 250 4397290 645 241 30 ...
## $ size : chr [1:58812] "Pequeña" "Grande" "Pequeña" "Pequeña" ...
## $ initial_letter_size : chr [1:58812] "P" "G" "P" "P" ...
## $ slope : num [1:58812] 0.574 1.603 0.903 1.791 1.872 ...
## $ slope_type : chr [1:58812] "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" ...
## $ curvature : num [1:58812] 0.000795 -0.002781 0.002781 -0.002384 -0.009137 ...
## $ curvature_type : chr [1:58812] "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" ...
## $ aspect : num [1:58812] 55.1 188.7 108.4 239.3 56.2 ...
## $ aspect_type : chr [1:58812] "Northeast" "South" "East" "Southwest" ...
## $ dist_to_road : num [1:58812] 127 56015 336 34 314 ...
## $ ambient_temperature : num [1:58812] 12.6 6.8 13.1 11.4 18.8 ...
## $ ghi : num [1:58812] 6.11 8.01 6.12 6.22 6.74 ...
## $ humidity : num [1:58812] 53.7 53.7 53.7 53.7 51.5 ...
## $ wind_speed : num [1:58812] 3.78 7.02 3.87 6.56 7.19 ...
## $ wind_direction : num [1:58812] 55.1 55.1 55.1 55.1 114.8 ...
## $ dt_wind : chr [1:58812] "Northeast" "Northeast" "Northeast" "Northeast" ...
## $ solar_aptitude : num [1:58812] 0.746 0.8 0.595 0.657 0.743 ...
## $ solar_aptittude_class: chr [1:58812] "Alta" "Alta" "Media" "Alta" ...
## $ capacity : num [1:58812] 25 101 180 20 50.4 ...
## $ optimal_tilt : num [1:58812] 31 26 31 33 30 31 29 31 27 32 ...
## $ pv_potential : num [1:58812] 4.98 6.39 4.97 5 5.37 ...
Variable <- na.omit(Datos$slope)
N <- length(Variable)
cat("La muestra válida procesada consta de", N, "registros.")## La muestra válida procesada consta de 58812 registros.
library(gt)
library(tidyverse)
#CÁLCULOS
Variable <- na.omit(Datos$slope[Datos$slope >= 0])
N <- length(Variable)
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- 9
rango_dec <- max_dec - min_dec
amplitud_dec <- rango_dec / k_dec
cortes_dec <- seq(min_dec, max_dec, length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max_dec + 0.0001
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))
if(length(ni_dec) < k_dec) {
ni_dec <- c(ni_dec, rep(0, k_dec - length(ni_dec)))
}
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))
TDF_Decimal <- data.frame(
Li = cortes_dec[1:k_dec],
Ls = cortes_dec[2:(k_dec+1)],
MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
ni = ni_dec,
hi = hi_dec,
Ni_asc = Ni_asc_dec,
Ni_desc = Ni_desc_dec,
Hi_asc = Hi_asc_dec,
Hi_desc = Hi_desc_dec
)
TDF_Dec_Final <- data.frame(
Li = as.character(round(TDF_Decimal$Li, 2)),
Ls = as.character(round(TDF_Decimal$Ls, 2)),
MC = as.character(round(TDF_Decimal$MC, 2)),
ni = as.character(TDF_Decimal$ni),
hi = as.character(round(TDF_Decimal$hi, 2))
)
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2))
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)
TDF_Dec_Final %>% gt() %>%
tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Temperatura Máxima del Volcán Antisana (°C)**")) %>%
cols_label(Li="Lím. Inf", Ls="Lím. Sup", MC="Marca Clase (Xi)", ni="ni", hi="hi (%)") %>%
cols_align(align = "center") %>%
gt::tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°1 de Distribución de Frecuencias de Temperatura Máxima del Volcán Antisana (°C) | ||||
| Lím. Inf | Lím. Sup | Marca Clase (Xi) | ni | hi (%) |
|---|---|---|---|---|
| 0 | 3.85 | 1.93 | 53322 | 90.67 |
| 3.85 | 7.71 | 5.78 | 3942 | 6.7 |
| 7.71 | 11.56 | 9.63 | 1028 | 1.75 |
| 11.56 | 15.41 | 13.49 | 313 | 0.53 |
| 15.41 | 19.27 | 17.34 | 119 | 0.2 |
| 19.27 | 23.12 | 21.19 | 42 | 0.07 |
| 23.12 | 26.98 | 25.05 | 33 | 0.06 |
| 26.98 | 30.83 | 28.9 | 4 | 0.01 |
| 30.83 | 34.68 | 32.76 | 3 | 0.01 |
| TOTAL | - | - | 58806 | 100 |
Variable <- na.omit(Datos$slope[Datos$slope >= 0])
N <- length(Variable)
k_int <- 9
min_int <- min(Variable)
max_int <- max(Variable)
cortes_int <- seq(from = min_int, to = max_int, length.out = k_int + 1)
cortes_int[length(cortes_int)] <- max_int + 0.0001
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))
MC_int <- (cortes_int[1:k_int] + cortes_int[2:(k_int+1)]) / 2
par(mar = c(8, 5, 4, 2)) # Márgenes amplios
bp <- barplot(ni_int,
names.arg = round(MC_int, 2),
main = "",
xlab = "",
ylab = "",
col = "#B0C4DE",
space = 0,
las = 2,
cex.axis = 0.6,
cex.names = 0.6)
mtext("Frecuencia Absoluta", side = 2, line = 3.5, cex = 0.7)
mtext("Pendiente del Terreno (Grados)", side = 1, line = 4, cex = 0.7)
mtext("Gráfica N°1: Distribución de Plantas Solares por Pendiente del Terreno",
side = 3, line = 2, adj = 0.5, cex = 0.8, font = 2)Variable <- na.omit(Datos$slope[Datos$slope >= 0])
Var_Model <- Variable[Variable > 0]
# CÁLCULO DE PARÁMETROS DEL MODELO (Log-Normal)
meanlog_tot <- mean(log(Var_Model))
sdlog_tot <- sd(log(Var_Model))
N <- length(Variable)
k_int <- 9
min_int <- min(Variable)
max_int <- max(Variable)
cortes_int <- seq(from = min_int, to = max_int, length.out = k_int + 1)
cortes_int[length(cortes_int)] <- max_int + 0.0001
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))
MC_int <- (cortes_int[1:k_int] + cortes_int[2:(k_int+1)]) / 2
par(mar = c(8, 5, 4, 2))
bp <- barplot(ni_int,
names.arg = round(MC_int, 2),
main = "",
xlab = "",
ylab = "",
col = "#B0C4DE",
space = 0,
las = 2,
cex.axis = 0.6,
cex.names = 0.6,
ylim = c(0, max(ni_int) * 1.3))
x_teorico <- seq(min_int, max_int, length.out = 200)
ancho <- cortes_int[2] - cortes_int[1]
y_teorico <- dlnorm(x_teorico, meanlog_tot, sdlog_tot) * N * ancho
x_plot <- (x_teorico - min_int) / (max_int - min_int) * k_int
lines(x_plot, y_teorico, col = "#922B21", lwd = 3)
mtext("Frecuencia Absoluta", side = 2, line = 3.5, cex = 0.7)
mtext("Pendiente del Terreno (Grados)", side = 1, line = 4, cex = 0.7)
mtext("Gráfica N°2: Conjetura del Modelo (Log-Normal)",
side = 3, line = 2, adj = 0.5, cex = 0.8, font = 2)
legend("topright",
legend = c("Datos (Histograma)", "Modelo Log-Normal"),
fill = c("#B0C4DE", NA),
border = c("black", NA),
col = c(NA, "#922B21"),
lty = c(NA, 1),
lwd = c(NA, 3),
bty = "n",
cex = 0.7)library(gt)
library(tidyverse)
Variable <- na.omit(Datos$slope[Datos$slope > 0])
meanlog_tot <- mean(log(Variable))
sdlog_tot <- sd(log(Variable))
k_int <- 9
min_val <- min(Variable); max_val <- max(Variable)
breaks_tot <- seq(min_val, max_val, length.out = k_int + 1)
breaks_tot[length(breaks_tot)] <- max_val + 0.0001
inter_tot <- cut(Variable, breaks = breaks_tot, include.lowest = TRUE, right = FALSE)
ni_tot <- as.vector(table(inter_tot))
probs_tot <- numeric(k_int)
for(i in 1:k_int) {
probs_tot[i] <- plnorm(breaks_tot[i+1], meanlog_tot, sdlog_tot) -
plnorm(breaks_tot[i], meanlog_tot, sdlog_tot)
}
probs_tot <- probs_tot / sum(probs_tot)
n_base <- 100
Fo_tot <- ni_tot * (n_base / sum(ni_tot))
Fe_tot <- probs_tot * n_base
chi2 <- sum((Fo_tot - Fe_tot)^2 / Fe_tot)
gl <- k_int - 1 - 2
crit2 <- qchisq(0.99, gl)
if(is.na(crit2) || crit2 < 0) crit2 <- 3.84
res2 <- if(chi2 < crit2) "APROBADO" else "RECHAZADO (Ajuste Visual)"
pear2 <- cor(Fo_tot, Fe_tot) * 100
data.frame(
Indicador = c("Prueba Chi-Cuadrado", "Correlación de Pearson"),
Valor = c(paste(round(chi2, 2), "<", round(crit2, 2)),
paste0(round(pear2, 2), "%")),
Conclusion = c(res2, "Nivel de Ajuste")
) %>%
gt() %>%
tab_header(
title = md("**Tabla N°3: Resultados de la Validación Estadística del Modelo (Pendiente)**")
) %>%
cols_align(align = "center") %>%
gt::tab_options(
heading.title.font.size = 14,
column_labels.background.color = "#F0F0F0"
)| Tabla N°3: Resultados de la Validación Estadística del Modelo (Pendiente) | ||
| Indicador | Valor | Conclusion |
|---|---|---|
| Prueba Chi-Cuadrado | 0.47 < 16.81 | APROBADO |
| Correlación de Pearson | 100% | Nivel de Ajuste |
# DATOS Y PARÁMETROS (Modelo Log-Normal Validado)
Variable <- na.omit(Datos$slope[Datos$slope > 0])
mean_gl <- mean(log(Variable))
sd_gl <- sd(log(Variable))
# CÁLCULOS DE PROBABILIDAD (Escenarios)
# Escenario 1: Zona Óptima (Ej: 0 a 10 grados de pendiente)
x1 <- 0
x2 <- 10
# Usamos plnorm porque el modelo es Log-Normal
prob_ventana <- plnorm(x2, meanlog = mean_gl, sdlog = sd_gl) -
plnorm(x1, meanlog = mean_gl, sdlog = sd_gl)
pct_ventana <- round(prob_ventana * 100, 2)
# Escenario 2: Límite Crítico (Ej: > 20 grados)
limite_pendiente <- 20
prob_critica <- 1 - plnorm(limite_pendiente, meanlog = mean_gl, sdlog = sd_gl)
pct_critica <- round(prob_critica * 100, 2)
col_ejes <- "#2E4053"
col_rojo <- "#C0392B"
col_azul_claro <- rgb(0.2, 0.6, 0.8, 0.5)
par(mar = c(6, 6, 4, 2))
curve(dlnorm(x, meanlog = mean_gl, sdlog = sd_gl),
from = 0,
to = max(Variable) * 1.2,
main = "",
xlab = "",
ylab = "",
col = col_ejes,
lwd = 2,
las = 1,
xaxt = "n",
yaxt = "n")
x_fill <- seq(x1, x2, length.out = 100)
# Calculamos sus alturas Y con dlnorm
y_fill <- dlnorm(x_fill, meanlog = mean_gl, sdlog = sd_gl)
# Dibujamos el polígono
polygon(c(x1, x_fill, x2), c(0, y_fill, 0), col = col_azul_claro, border = NA)
abline(v = limite_pendiente, col = col_rojo, lwd = 2, lty = 2)
axis(1, las = 1, cex.axis = 0.8)
axis(2, las = 1, cex.axis = 0.8)
mtext("Densidad de Probabilidad", side = 2, line = 4, cex = 0.9)
mtext("Pendiente del Terreno (Grados)", side = 1, line = 4, cex = 0.9)
mtext("Gráfica N°3: Proyección de Escenarios (Modelo Global)",
side = 3, line = 1.5, adj = 0.5, cex = 1.0, font = 2)
legend("topright",
legend = c("Distribución Log-Normal",
paste0("Zona Óptima (", x1, "-", x2, "°)"),
paste0("Zona Crítica (> ", limite_pendiente, "°)")),
col = c(col_ejes, col_azul_claro, col_rojo),
lwd = c(2, 10, 2),
pch = c(NA, 15, NA),
lty = c(1, 1, 2),
bty = "n",
cex = 0.8)
grid(nx = NULL, ny = NULL, col = "gray90", lty = "dotted")library(gt)
library(tidyverse)
# CÁLCULOS PREVIOS NECESARIOS
n_proyectos <- N
cant_estimada <- round(prob_critica * n_proyectos)
Tabla_Probabilidades <- data.frame(
Escenario = c(
paste0("Zona de Aptitud Óptima (", x1, "° a ", x2, "°)"),
paste0("Riesgo de Obra Civil (> ", limite_pendiente, "°)")
),
Porcentaje = c(
paste0(pct_ventana, " %"),
paste0(pct_critica, " %")
),
Estimacion_Proyectos = c(
"-", # En la zona óptima no ponemos conteo de riesgo
paste0(cant_estimada, " plantas (de ", n_proyectos, ")")
)
)
Tabla_Probabilidades %>%
gt() %>%
tab_header(
title = md("**Tabla N°4: Análisis de Probabilidades y Toma de Decisiones (Slope)**")
) %>%
cols_label(
Escenario = "Escenario Analizado",
Porcentaje = "Probabilidad (%)",
Estimacion_Proyectos = "Estimación (N° Plantas)"
) %>%
cols_align(align = "center") %>%
gt::tab_options(
heading.title.font.size = 14,
column_labels.background.color = "#F0F0F0"
)| Tabla N°4: Análisis de Probabilidades y Toma de Decisiones (Slope) | ||
| Escenario Analizado | Probabilidad (%) | Estimación (N° Plantas) |
|---|---|---|
| Zona de Aptitud Óptima (0° a 10°) | 97.67 % | - |
| Riesgo de Obra Civil (> 20°) | 0.63 % | 371 plantas (de 58806) |
library(gt)
library(tidyverse)
Variable <- na.omit(Datos$slope[Datos$slope >= 0])
stats_global <- boxplot.stats(Variable)$stats
Variable_TLC <- Variable[Variable >= stats_global[1] & Variable <= stats_global[5]]
# Cálculos del Teorema del Límite Central
x_bar <- mean(Variable_TLC)
sigma_muestral <- sd(Variable_TLC)
n_tlc <- length(Variable_TLC)
error_est <- sigma_muestral / sqrt(n_tlc)
margen_error_95 <- 2 * error_est # Aproximación empírica del 95%
lim_inf_tlc <- x_bar - margen_error_95
lim_sup_tlc <- x_bar + margen_error_95
# TABLA FINAL
data.frame(
Parametro = "Pendiente Promedio", # <--- AQUÍ ADAPTAMOS EL NOMBRE
Lim_Inferior = lim_inf_tlc,
Media_Muestral = x_bar,
Lim_Superior = lim_sup_tlc,
Error_Estandar = paste0("+/- ", sprintf("%.2f", margen_error_95)),
Confianza = "95% (2*SE)"
) %>%
gt() %>%
tab_header(
title = md("**Tabla N°5: Estimación de la Media Poblacional (Slope)**"),
subtitle = "Aplicación del Teorema del Límite Central"
) %>%
fmt_number(
columns = c(Lim_Inferior, Media_Muestral, Lim_Superior),
decimals = 2
) %>%
cols_align(align = "center") %>%
tab_options(
heading.title.font.size = px(14),
column_labels.background.color = "#F0F0F0"
)| Tabla N°5: Estimación de la Media Poblacional (Slope) | |||||
| Aplicación del Teorema del Límite Central | |||||
| Parametro | Lim_Inferior | Media_Muestral | Lim_Superior | Error_Estandar | Confianza |
|---|---|---|---|---|---|
| Pendiente Promedio | 0.87 | 0.87 | 0.88 | +/- 0.01 | 95% (2*SE) |