1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE ELEVACION ####
## DATASET ##
setwd("~/R/ELEVATION")
# Cargar dataset
Datos <- read.csv("Data_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    58771 obs. of  29 variables:
##  $ OBJECTID              : int  127 129 131 132 133 137 138 139 140 145 ...
##  $ code                  : chr  "00127-ARG-P" "00129-ARG-G" "00131-ARG-P" "00132-ARG-P" ...
##  $ plant_name            : chr  "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Anchoris solar farm" "Antu Newen solar farm" ...
##  $ country               : chr  "Argentina" "Argentina" "Argentina" "Argentina" ...
##  $ operational_status    : chr  "announced" "operating" "construction" "cancelled - inferred 4 y" ...
##  $ longitude             : num  -68.9 -66.9 -68.9 -70.3 -66.8 ...
##  $ latitude              : num  -33 -24.1 -33.3 -37.4 -28.6 ...
##  $ elevation             : int  929 4000 937 865 858 570 1612 665 3989 2640 ...
##  $ area                  : num  250 4397290 645 241 30 ...
##  $ size                  : chr  "Small" "Big" "Small" "Small" ...
##  $ slope                 : num  0.574 1.603 0.903 1.791 1.872 ...
##  $ slope_type            : chr  "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" ...
##  $ curvature             : num  0.000795 -0.002781 0.002781 -0.002384 -0.009137 ...
##  $ curvature_type        : chr  "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" ...
##  $ aspect                : num  55.1 188.7 108.4 239.3 56.2 ...
##  $ aspect_type           : chr  "Northeast" "South" "East" "Southwest" ...
##  $ dist_to_road          : num  127 56015 336 34 314 ...
##  $ ambient_temperature   : num  12.6 6.8 13.1 11.4 18.8 ...
##  $ ghi                   : num  6.11 8.01 6.12 6.22 6.74 ...
##  $ humidity              : num  53.7 53.7 53.7 53.7 51.5 ...
##  $ wind_speed            : num  3.78 7.02 3.87 6.56 7.19 ...
##  $ wind_direction        : num  55.1 55.1 55.1 55.1 114.8 ...
##  $ dt_wind               : chr  "Northeast" "Northeast" "Northeast" "Northeast" ...
##  $ solar_aptitude        : num  0.746 0.8 0.595 0.657 0.743 ...
##  $ solar_aptitude_rounded: int  7 8 6 7 7 7 8 7 8 6 ...
##  $ solar_aptittude_class : chr  "Alta" "Alta" "Media" "Alta" ...
##  $ capacity              : num  25 101 180 20 50.4 ...
##  $ optimal_tilt          : num  31 26 31 33 30 31 29 31 27 32 ...
##  $ pv_potential          : num  4.98 6.39 4.97 5 5.37 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

# Extraer variable
Variable <- na.omit(Datos$elevation)
N <- length(Variable)

# CÁLCULO LÍMITES DECIMALES
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))

# CÁLCULOS MATEMÁTICOS 
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal
TDF_Decimal <- data.frame(
  Li = cortes_dec[1:k_dec],
  Ls = cortes_dec[2:(k_dec+1)],
  MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni = ni_dec,
  hi = hi_dec,
  Ni_asc = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc = Hi_asc_dec,
  Hi_desc = Hi_desc_dec)


# CÁLCULO LÍMITES ENTEROS
BASE <- 10
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
Amplitud_raw <- Rango_int / k_int_sug

Amplitud_int <- ceiling(Amplitud_raw / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10

cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 2)
cortes_int <- cortes_int[cortes_int <= (max_int + Amplitud_int)]

while(max(cortes_int) < max(Variable)) {
  cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}

K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))

# CÁLCULOS MATEMÁTICOS 
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

# Dataframe Entero
TDF_Enteros <- data.frame(
  Li = lim_inf_int,
  Ls = lim_sup_int,
  MC = (lim_inf_int + lim_sup_int) / 2,
  ni = ni_int,
  hi = hi_int,
  Ni_asc = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc = Hi_asc_int,
  Hi_desc = Hi_desc_int)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

# Crear Dataframe 
TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

# Generar GT
TDF_Dec_Final %>%
 gt() %>%
 tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares**")) %>%
 cols_label(
  Li = "Lim. Inf", 
  Ls = "Lim. Sup", 
  MC = "Marca Clase",
  ni = "Frec. Abs (ni)", 
  hi = "Frec. Rel (%)",
  Ni_asc = "Ni (Asc)", 
  Ni_desc = "Ni (Desc)",
  Hi_asc = "Hi Asc (%)", 
  Hi_desc = "Hi Desc (%)"
 ) %>%
 cols_align(align = "center", columns = everything()) %>%
 tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-378 -0.88 -189.44 255 0.43 255 58771 0.43 100
-0.88 376.25 187.69 41914 71.32 42169 58516 71.75 99.57
376.25 753.38 564.81 8716 14.83 50885 16602 86.58 28.25
753.38 1130.5 941.94 3216 5.47 54101 7886 92.05 13.42
1130.5 1507.62 1319.06 2245 3.82 56346 4670 95.87 7.95
1507.62 1884.75 1696.19 1095 1.86 57441 2425 97.74 4.13
1884.75 2261.88 2073.31 461 0.78 57902 1330 98.52 2.26
2261.88 2639 2450.44 270 0.46 58172 869 98.98 1.48
2639 3016.12 2827.56 269 0.46 58441 599 99.44 1.02
3016.12 3393.25 3204.69 124 0.21 58565 330 99.65 0.56
3393.25 3770.38 3581.81 72 0.12 58637 206 99.77 0.35
3770.38 4147.5 3958.94 59 0.1 58696 134 99.87 0.23
4147.5 4524.62 4336.06 39 0.07 58735 75 99.94 0.13
4524.62 4901.75 4713.19 25 0.04 58760 36 99.98 0.06
4901.75 5278.88 5090.31 10 0.02 58770 11 100 0.02
5278.88 5656 5467.44 1 0 58771 1 100 0
TOTAL - - 58771 100 - - - -

3.2 Tabla con Límites Enteros

# Crear Dataframe 
TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li),
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

# Generar GT
TDF_Int_Final %>%
 gt() %>%
 tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares**")) %>%
 cols_label(
  Li = "Lim. Inf", 
  Ls = "Lim. Sup", 
  MC = "Marca Clase",
  ni = "Frec. Abs (ni)", 
  hi = "Frec. Rel (%)",
  Ni_asc = "Ni (Asc)", 
  Ni_desc = "Ni (Desc)",
  Hi_asc = "Hi Asc (%)", 
  Hi_desc = "Hi Desc (%)"
 ) %>%
 cols_align(align = "center", columns = everything()) %>%
 tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-380 0 -190 255 0.43 255 58771 0.43 100
0 380 190 42033 71.52 42288 58516 71.95 99.57
380 760 570 8676 14.76 50964 16483 86.72 28.05
760 1140 950 3185 5.42 54149 7807 92.14 13.28
1140 1520 1330 2241 3.81 56390 4622 95.95 7.86
1520 1900 1710 1076 1.83 57466 2381 97.78 4.05
1900 2280 2090 456 0.78 57922 1305 98.56 2.22
2280 2660 2470 256 0.44 58178 849 98.99 1.44
2660 3040 2850 287 0.49 58465 593 99.48 1.01
3040 3420 3230 103 0.18 58568 306 99.65 0.52
3420 3800 3610 70 0.12 58638 203 99.77 0.35
3800 4180 3990 62 0.11 58700 133 99.88 0.23
4180 4560 4370 37 0.06 58737 71 99.94 0.12
4560 4940 4750 25 0.04 58762 34 99.98 0.06
4940 5320 5130 8 0.01 58770 9 100 0.02
5320 5700 5510 1 0 58771 1 100 0
TOTAL - - 58771 100 - - - -

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        names.arg = TDF_Enteros$MC,
        main = "",
        xlab = "", 
        ylab = "",
        col = "#FFCC99",
        ylim = c(0, max(TDF_Enteros$ni) * 1.2),
        space = 0, 
        las = 2, 
        cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)

mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Elevación", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

par(mar = c(8, 7, 4, 2))
barplot(TDF_Enteros$ni, 
        main="",
        xlab = "",
        ylab = "",
        names.arg = TDF_Enteros$MC,
        col = "#FFCC99",
        ylim = c(0, 58771),
        space = 0,
        cex.names = 0.7,
        las = 2) 
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)

mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Elevación", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi, 
        main = "",
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#FFCC99",
        space = 0,
        names.arg = TDF_Enteros$MC,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, max(TDF_Enteros$hi) * 1.2))
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)

mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Elevación", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp3, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 1), "%"), 
     pos = 3, cex = 0.6, col = "black")

par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi, 
        main = "",
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#FFCC99",
        space = 0,
        names.arg = TDF_Enteros$MC,
        las = 2,
        cex.names = 0.7,
        ylim = c(0, 100))
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)

mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Elevación", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp4, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 1), "%"), 
     pos = 3, cex = 0.6, col = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = "#FFCC99",
        xlab = "Elevación (m.s.n.m.)",
        cex.main = 0.9,
         main = "Gráfica N°5: Distribución de la Elevación en las Plantas Solares")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "",
     xlab = "Elevación (m.s.n.m.)",
     ylab = "Frecuencia acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(x_desc), max(x_asc)), 
     ylim = c(0, sum(TDF_Enteros$ni)),
     bty = "l")

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "orange", type = "b", pch = 19)

grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Elevación en las Plantas Solares", 
      side = 3, 
      line = 3, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

legend("right", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "orange"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

5 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Mediana
mediana <- round(median(Variable), 2)

# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)

# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)

if(num_outliers > 0){
  rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}

tabla_indicadores <- data.frame(
 "Variable" = c("Elevación (m.s.n.m.)"),
 "Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
 "X" = c(media),
 "Me" = c(mediana),
 "Mo" = c(moda_txt),
 "V" = c(varianza),
 "Sd" = c(sd_val),
 "Cv" = c(cv),
 "As" = c(asimetria),
 "K" = c(curtosis),
 "Outliers" = rango_outliers)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
 gt() %>%
 tab_header(title = md("**Tabla N°3 de Conclusiones de Elevación de las Plantas Solares**")) %>%
 tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
 cols_label(
  Variable = "Variable",
  Rango_MinMax = "Rango",
  X = "Media (X)",
  Me = "Mediana (Me)",
  Mo = "Moda (Mo)",
  V = "Varianza (V)",
  Sd = "Desv. Est. (Sd)",
  Cv = "C.V. (%)",
  As = "Asimetría (As)",
  K = "Curtosis (K)",
  Outliers = "Outliers [Intervalo]"
 ) %>%
 tab_options(
  heading.title.font.size = px(16),
  column_labels.background.color = "#F0F0F0"
 )

tabla_conclusiones_gt
Tabla N°3 de Conclusiones de Elevación de las Plantas Solares
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Elevación (m.s.n.m.) [-378; 5656] 354.18 156 190 278058.1 527.3121 148.88 3.176571 13.94829 5463 [1015; 5656]
Autor: Martin Sarmiento

6 Conclusiones

La variable “Elevación” fluctúa entre -378 y 5656 m.s.n.m. y sus valores se encuentran alrededor de 156 m.s.n.m., con una desviación estándar de 527.3121, siendo una variable muy heterogénea, cuyos valores se concentran en la parte media baja de la variable con la agregación de valores atípicos de 5463 outliers; por todo lo anterior, el comportamiento de la variable es regular.