1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE PENDIENTE ####
## DATASET ##
# Cargar dataset
Datos <- read.csv("Dataset_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    58978 obs. of  29 variables:
##  $ ï..OBJECTID           : int  2 3 4 5 6 7 8 9 10 11 ...
##  $ code                  : chr  "00001-AFG-P" "00002-AFG-P" "00003-AFG-P" "00004-AFG-P" ...
##  $ plant_name            : chr  "Badghis Solar Power Plant" "Balkh solar farm" "Behsood solar farm" "Dab Pal 4 solar farm" ...
##  $ country               : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ operational_status    : chr  "cancelled - inferred 4 y" "cancelled - inferred 4 y" "cancelled - inferred 4 y" "shelved - inferred 2 y" ...
##  $ longitude             : num  62.9 67.1 70.4 66.2 65.7 ...
##  $ latitude              : num  35.1 36.7 34.4 33.8 31.7 ...
##  $ elevation             : int  918 359 629 2288 1060 1060 1392 398 410 1012 ...
##  $ area                  : num  6.74 10.72 487.73 111.8 1929.96 ...
##  $ size                  : chr  "Small" "Small" "Small" "Small" ...
##  $ slope                 : num  7.38 0.49 1.1 6.16 1.23 ...
##  $ slope_type            : chr  "Moderado" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
##  $ curvature             : num  -0.024 0 0 0.045 -0.005 -0.005 -0.015 0 0 -0.009 ...
##  $ curvature_type        : chr  "Superficies cóncavas / Valles" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies convexas / Crestas" ...
##  $ aspect                : num  96.8 358.5 36.2 305.8 248.4 ...
##  $ aspect_type           : chr  "East" "North" "Northeast" "Northwest" ...
##  $ dist_to_road          : num  7037.1 92.7 112.1 1705.3 115.8 ...
##  $ ambient_temperature   : num  14.4 17.88 21.32 8.86 19.64 ...
##  $ ghi                   : num  5.82 5.58 5.8 6.75 6.62 ...
##  $ humidity              : num  47.7 42.3 36.4 37.3 24.2 ...
##  $ wind_speed            : num  0.039 0.954 0.234 0.943 0.37 ...
##  $ wind_direction        : num  187.5 207.4 255.6 160.3 97.7 ...
##  $ dt_wind               : chr  "South" "Southwest" "West" "South" ...
##  $ solar_aptitude        : num  0.72 0.635 0.685 0.659 0.819 0.819 0.818 0.642 0.63 0.374 ...
##  $ solar_aptitude_rounded: int  7 6 7 7 8 8 8 6 6 4 ...
##  $ solar_aptittude_class : chr  "Alta" "Alta" "Alta" "Alta" ...
##  $ capacity              : num  32 40 60 3000 100 100 36 50 25 100 ...
##  $ optimal_tilt          : num  30 31 31.1 33 31 ...
##  $ pv_potential          : num  4.61 4.41 4.57 5.42 5.17 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

# Extraer variable
Variable <- na.omit(Datos$slope)
N <- length(Variable)

# --- INTERVALOS PERSONALIZADOS PARA PENDIENTE ---
# Basado en la distribución: alta concentración en valores bajos
cortes_personalizados <- c(
  0,    # Inicio
  2,    # Para capturar el pico de valores (0-2°)
  4,    
  6,
  8,
  10,
  12,
  14,
  16,
  18,
  20,
  22,
  24,
  26,
  28,
  30,
  32,
  35    # Un poco más del máximo (34.68)
)

# Verificar que el último corte sea mayor que el valor máximo
if (max(cortes_personalizados) <= max(Variable)) {
    cortes_personalizados[length(cortes_personalizados)] <- max(Variable) + 0.1
}

K_real <- length(cortes_personalizados) - 1
lim_inf_int <- cortes_personalizados[1:K_real]
lim_sup_int <- cortes_personalizados[2:(K_real+1)]

# Frecuencias
inter_int <- cut(Variable, breaks = cortes_personalizados, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))

# Cálculos
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

# Dataframe Entero
TDF_Enteros <- data.frame(
  Li = lim_inf_int,
  Ls = lim_sup_int,
  MC = (lim_inf_int + lim_sup_int) / 2,
  ni = ni_int,
  hi = hi_int,
  Ni_asc = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc = Hi_asc_int,
  Hi_desc = Hi_desc_int)

# Misma lógica para decimales (usando los mismos cortes)
cortes_dec <- cortes_personalizados
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001

inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))

hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

TDF_Decimal <- data.frame(
  Li = cortes_dec[1:K_real],
  Ls = cortes_dec[2:(K_real+1)],
  MC = (cortes_dec[1:K_real] + cortes_dec[2:(K_real+1)]) / 2,
  ni = ni_dec,
  hi = hi_dec,
  Ni_asc = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc = Hi_asc_dec,
  Hi_desc = Hi_desc_dec)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

# Crear Dataframe 
TDF_Dec_Final <- data.frame(
  Li      = format(round(TDF_Decimal$Li, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  Ls      = format(round(TDF_Decimal$Ls, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  MC      = format(round(TDF_Decimal$MC, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  ni      = format(TDF_Decimal$ni, scientific = FALSE, trim = TRUE),
  hi      = format(round(TDF_Decimal$hi, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  Ni_asc  = format(TDF_Decimal$Ni_asc, scientific = FALSE, trim = TRUE),
  Ni_desc = format(TDF_Decimal$Ni_desc, scientific = FALSE, trim = TRUE),
  Hi_asc  = format(round(TDF_Decimal$Hi_asc, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  Hi_desc = format(round(TDF_Decimal$Hi_desc, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  stringsAsFactors = FALSE
)

# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

# Generar GT
TDF_Dec_Final %>%
 gt() %>%
 tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de la Pendiente (°) de las Plantas Solares**")) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
 cols_label(
  Li = "Lim. Inf", 
  Ls = "Lim. Sup", 
  MC = "Marca Clase",
  ni = "Frec. Abs (ni)", 
  hi = "Frec. Rel (%)",
  Ni_asc = "Ni (Asc)", 
  Ni_desc = "Ni (Desc)",
  Hi_asc = "Hi Asc (%)", 
  Hi_desc = "Hi Desc (%)"
 ) %>%
 cols_align(align = "center", columns = everything()) %>%
 tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de la Pendiente (°) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0.00 2.00 1.00 46765 79.29 46765 58978 79.29 100.00
2.00 4.00 3.00 6970 11.82 53735 12213 91.11 20.71
4.00 6.00 5.00 2591 4.39 56326 5243 95.50 8.89
6.00 8.00 7.00 1218 2.07 57544 2652 97.57 4.50
8.00 10.00 9.00 651 1.10 58195 1434 98.67 2.43
10.00 12.00 11.00 327 0.55 58522 783 99.23 1.33
12.00 14.00 13.00 172 0.29 58694 456 99.52 0.77
14.00 16.00 15.00 110 0.19 58804 284 99.70 0.48
16.00 18.00 17.00 56 0.09 58860 174 99.80 0.30
18.00 20.00 19.00 44 0.07 58904 118 99.87 0.20
20.00 22.00 21.00 26 0.04 58930 74 99.92 0.13
22.00 24.00 23.00 17 0.03 58947 48 99.95 0.08
24.00 26.00 25.00 11 0.02 58958 31 99.97 0.05
26.00 28.00 27.00 14 0.02 58972 20 99.99 0.03
28.00 30.00 29.00 2 0.00 58974 6 99.99 0.01
30.00 32.00 31.00 1 0.00 58975 4 99.99 0.01
32.00 34.68 33.34 3 0.01 58978 3 100.00 0.01
TOTAL - - 58978 100 - - - -
Autor: Martin Sarmiento

3.2 Tabla con Límites Enteros

# Crear Dataframe Visual
TDF_Int_Final <- data.frame(
  Li      = format(round(TDF_Enteros$Li, 1), nsmall = 1, scientific = FALSE, trim = TRUE),
  Ls      = format(round(TDF_Enteros$Ls, 1), nsmall = 1, scientific = FALSE, trim = TRUE),
  MC      = format(round(TDF_Enteros$MC, 1), nsmall = 1, scientific = FALSE, trim = TRUE),
  ni      = format(TDF_Enteros$ni, scientific = FALSE, trim = TRUE),
  hi      = format(round(TDF_Enteros$hi, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  Ni_asc  = format(TDF_Enteros$Ni_asc, scientific = FALSE, trim = TRUE),
  Ni_desc = format(TDF_Enteros$Ni_desc, scientific = FALSE, trim = TRUE),
  Hi_asc  = format(round(TDF_Enteros$Hi_asc, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  Hi_desc = format(round(TDF_Enteros$Hi_desc, 2), nsmall = 2, scientific = FALSE, trim = TRUE),
  stringsAsFactors = FALSE
)

# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

# Generar GT
TDF_Int_Final %>%
 gt() %>%
 tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de la Pendiente (°) de las Plantas Solares**")) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
 cols_label(
  Li = "Lim. Inf", 
  Ls = "Lim. Sup", 
  MC = "Marca Clase",
  ni = "Frec. Abs (ni)", 
  hi = "Frec. Rel (%)",
  Ni_asc = "Ni (Asc)", 
  Ni_desc = "Ni (Desc)",
  Hi_asc = "Hi Asc (%)", 
  Hi_desc = "Hi Desc (%)"
 ) %>%
 cols_align(align = "center", columns = everything()) %>%
 tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de la Pendiente (°) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0.0 2.0 1.0 46765 79.29 46765 58978 79.29 100.00
2.0 4.0 3.0 6970 11.82 53735 12213 91.11 20.71
4.0 6.0 5.0 2591 4.39 56326 5243 95.50 8.89
6.0 8.0 7.0 1218 2.07 57544 2652 97.57 4.50
8.0 10.0 9.0 651 1.10 58195 1434 98.67 2.43
10.0 12.0 11.0 327 0.55 58522 783 99.23 1.33
12.0 14.0 13.0 172 0.29 58694 456 99.52 0.77
14.0 16.0 15.0 110 0.19 58804 284 99.70 0.48
16.0 18.0 17.0 56 0.09 58860 174 99.80 0.30
18.0 20.0 19.0 44 0.07 58904 118 99.87 0.20
20.0 22.0 21.0 26 0.04 58930 74 99.92 0.13
22.0 24.0 23.0 17 0.03 58947 48 99.95 0.08
24.0 26.0 25.0 11 0.02 58958 31 99.97 0.05
26.0 28.0 27.0 14 0.02 58972 20 99.99 0.03
28.0 30.0 29.0 2 0.00 58974 6 99.99 0.01
30.0 32.0 31.0 1 0.00 58975 4 99.99 0.01
32.0 35.0 33.5 3 0.01 58978 3 100.00 0.01
TOTAL - - 58978 100 - - - -
Autor: Martin Sarmiento

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        names.arg = TDF_Enteros$MC,
        main = "",
        xlab = "", 
        ylab = "",
        col = "#C1E1C1",
        ylim = c(0, max(TDF_Enteros$ni) * 1.2),
        space = 0, 
        las = 2, 
        cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Pendiente (°)", side = 1, line = 4)

mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Pendiente", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni, 
        main = "",
        xlab = "",
        ylab = "",
        names.arg = TDF_Enteros$MC,
        col = "#C1E1C1",
        space = 0,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, 58771))
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Pendiente (°)", side = 1, line = 4)

mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Pendiente", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi, 
        main = "",
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#C1E1C1",
        space = 0,
        names.arg = TDF_Enteros$MC,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, max(TDF_Enteros$hi) * 1.2))
mtext("Pendiente (°)", side = 1, line = 4)

mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Pendiente", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp3, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 2), "%"), 
     pos = 3, cex = 0.6, col = "black")

par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi, 
        main = "",
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#C1E1C1",
        space = 0,
        names.arg = TDF_Enteros$MC,
        las = 2,
        cex.names = 0.7,
        ylim = c(0, 110)) 
mtext("Pendiente (°)", side = 1, line = 4)

mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Pendiente", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp4, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 2), "%"), 
     pos = 3, cex = 0.6, col = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = "#C1E1C1",
        xlab = "Pendiente (°)",
        cex.main = 0.9,
        main = "Gráfica N°5: Distribución de la Pendiente en las Plantas Solares")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "",
     xlab = "Pendiente (°)",
     ylab = "Frecuencia acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(TDF_Enteros$Li), max(x_asc)), 
     ylim = c(0, sum(TDF_Enteros$ni)),
     bty = "l"
)

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "green", type = "b", pch = 19)

grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Pendiente en las Plantas Solares", 
      side = 3, 
      line = 3, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

legend("right", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "green"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

5 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Mediana
mediana <- round(median(Variable), 2)

# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)

# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)

if(num_outliers > 0){
  rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}


tabla_indicadores <- data.frame(
 "Variable" = c("Pendiente (°)"),
 "Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
 "X" = c(media),
 "Me" = c(mediana),
 "Mo" = c(moda_txt),
 "V" = c(varianza),
 "Sd" = c(sd_val),
 "Cv" = c(cv),
 "As" = c(asimetria),
 "K" = c(curtosis),
 "Outliers" = rango_outliers)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
 gt() %>%
 tab_header(title = md("**Tabla N°3 de Conclusiones de Pendiente de las Plantas Solares**")) %>%
 tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
 cols_label(
  Variable = "Variable",
  Rango_MinMax = "Rango",
  X = "Media (X)",
  Me = "Mediana (Me)",
  Mo = "Moda (Mo)",
  V = "Varianza (V)",
  Sd = "Desv. Est. (Sd)",
  Cv = "C.V. (%)",
  As = "Asimetría (As)",
  K = "Curtosis (K)",
  Outliers = "Outliers [Intervalo]"
 ) %>%
 tab_options(
  heading.title.font.size = px(16),
  column_labels.background.color = "#F0F0F0"
 )

tabla_conclusiones_gt
Tabla N°3 de Conclusiones de Pendiente de las Plantas Solares
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Pendiente (°) [0; 34.68] 1.45 0.67 1 5.099909 2.258298 155.74 3.982554 24.32453 5633 [3.79; 34.68]
Autor: Martin Sarmiento

6 Conclusiones

La variable “Pendiente” fluctúa entre y 34.68° y sus valores se encuentran alrededor de 0.67°, con una desviación estándar de 2.258298, siendo una variable muy heterogénea, cuyos valores se concentran en la parte media baja de la variable con la agregación de presencia de valores atípicos de 5633 outliers; por todo lo anterior, el comportamiento de la variable es muy perjudicial.