1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE VELOCIDAD DEL VIENTO ####
## DATASET ##
setwd("~/R/WIND_SPEED")
# Cargar dataset
Datos <- read.csv("Dataset_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    58978 obs. of  29 variables:
##  $ ï..OBJECTID           : int  2 3 4 5 6 7 8 9 10 11 ...
##  $ code                  : chr  "00001-AFG-P" "00002-AFG-P" "00003-AFG-P" "00004-AFG-P" ...
##  $ plant_name            : chr  "Badghis Solar Power Plant" "Balkh solar farm" "Behsood solar farm" "Dab Pal 4 solar farm" ...
##  $ country               : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ operational_status    : chr  "cancelled - inferred 4 y" "cancelled - inferred 4 y" "cancelled - inferred 4 y" "shelved - inferred 2 y" ...
##  $ longitude             : num  62.9 67.1 70.4 66.2 65.7 ...
##  $ latitude              : num  35.1 36.7 34.4 33.8 31.7 ...
##  $ elevation             : int  918 359 629 2288 1060 1060 1392 398 410 1012 ...
##  $ area                  : num  6.74 10.72 487.73 111.8 1929.96 ...
##  $ size                  : chr  "Small" "Small" "Small" "Small" ...
##  $ slope                 : num  7.38 0.49 1.1 6.16 1.23 ...
##  $ slope_type            : chr  "Moderado" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
##  $ curvature             : num  -0.024 0 0 0.045 -0.005 -0.005 -0.015 0 0 -0.009 ...
##  $ curvature_type        : chr  "Superficies cóncavas / Valles" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies convexas / Crestas" ...
##  $ aspect                : num  96.8 358.5 36.2 305.8 248.4 ...
##  $ aspect_type           : chr  "East" "North" "Northeast" "Northwest" ...
##  $ dist_to_road          : num  7037.1 92.7 112.1 1705.3 115.8 ...
##  $ ambient_temperature   : num  14.4 17.88 21.32 8.86 19.64 ...
##  $ ghi                   : num  5.82 5.58 5.8 6.75 6.62 ...
##  $ humidity              : num  47.7 42.3 36.4 37.3 24.2 ...
##  $ wind_speed            : num  0.039 0.954 0.234 0.943 0.37 ...
##  $ wind_direction        : num  187.5 207.4 255.6 160.3 97.7 ...
##  $ dt_wind               : chr  "South" "Southwest" "West" "South" ...
##  $ solar_aptitude        : num  0.72 0.635 0.685 0.659 0.819 0.819 0.818 0.642 0.63 0.374 ...
##  $ solar_aptitude_rounded: int  7 6 7 7 8 8 8 6 6 4 ...
##  $ solar_aptittude_class : chr  "Alta" "Alta" "Alta" "Alta" ...
##  $ capacity              : num  32 40 60 3000 100 100 36 50 25 100 ...
##  $ optimal_tilt          : num  30 31 31.1 33 31 ...
##  $ pv_potential          : num  4.61 4.41 4.57 5.42 5.17 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

# Extraer variable
Variable <- na.omit(Datos$wind_speed)
N <- length(Variable)

# CÁLCULO LÍMITES DECIMALES
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))

# CÁLCULOS MATEMÁTICOS 
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal
TDF_Decimal <- data.frame(
  Li = cortes_dec[1:k_dec],
  Ls = cortes_dec[2:(k_dec+1)],
  MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni = ni_dec,
  hi = hi_dec,
  Ni_asc = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc = Hi_asc_dec,
  Hi_desc = Hi_desc_dec)


# CÁLCULO LÍMITES ENTEROS
BASE <- 1
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
Amplitud_raw <- Rango_int / k_int_sug

Amplitud_int <- ceiling(Amplitud_raw) 
if(Amplitud_int == 0) Amplitud_int <- 1

cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 5) 
cortes_int <- cortes_int[cortes_int <= (max(Variable) + Amplitud_int)]

if(max(cortes_int) < max(Variable)) {
  cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}

K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))

# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

# Dataframe Entero
TDF_Enteros <- data.frame(
  Li = lim_inf_int,
  Ls = lim_sup_int,
  MC = (lim_inf_int + lim_sup_int) / 2,
  ni = ni_int,
  hi = hi_int,
  Ni_asc = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc = Hi_asc_int,
  Hi_desc = Hi_desc_int)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

# Crear Dataframe
TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

# Generar GT
TDF_Dec_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Velocidad del Viento (m/s)**")) %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase",
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)",
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Velocidad del Viento (m/s)
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0 0.8 0.4 30491 51.7 30491 58978 51.7 100
0.8 1.59 1.2 22792 38.64 53283 28487 90.34 48.3
1.59 2.39 1.99 2810 4.76 56093 5695 95.11 9.66
2.39 3.19 2.79 668 1.13 56761 2885 96.24 4.89
3.19 3.99 3.59 627 1.06 57388 2217 97.3 3.76
3.99 4.78 4.38 518 0.88 57906 1590 98.18 2.7
4.78 5.58 5.18 489 0.83 58395 1072 99.01 1.82
5.58 6.38 5.98 331 0.56 58726 583 99.57 0.99
6.38 7.17 6.78 160 0.27 58886 252 99.84 0.43
7.17 7.97 7.57 53 0.09 58939 92 99.93 0.16
7.97 8.77 8.37 27 0.05 58966 39 99.98 0.07
8.77 9.57 9.17 9 0.02 58975 12 99.99 0.02
9.57 10.36 9.96 0 0 58975 3 99.99 0.01
10.36 11.16 10.76 2 0 58977 3 100 0.01
11.16 11.96 11.56 0 0 58977 1 100 0
11.96 12.76 12.36 1 0 58978 1 100 0
TOTAL - - 58978 100 - - - -

3.2 Tabla con Límites Enteros

# Crear Dataframe
TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li),
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

# Generar GT
TDF_Int_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Velocidad del Viento (m/s)**")) %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase",
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)",
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Velocidad del Viento (m/s)
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0 1 0.5 38480 65.24 38480 58978 65.24 100
1 2 1.5 16934 28.71 55414 20498 93.96 34.76
2 3 2.5 1196 2.03 56610 3564 95.98 6.04
3 4 3.5 784 1.33 57394 2368 97.31 4.02
4 5 4.5 636 1.08 58030 1584 98.39 2.69
5 6 5.5 553 0.94 58583 948 99.33 1.61
6 7 6.5 279 0.47 58862 395 99.8 0.67
7 8 7.5 78 0.13 58940 116 99.94 0.2
8 9 8.5 33 0.06 58973 38 99.99 0.06
9 10 9.5 2 0 58975 5 99.99 0.01
10 11 10.5 2 0 58977 3 100 0.01
11 12 11.5 0 0 58977 1 100 0
12 13 12.5 1 0 58978 1 100 0
TOTAL - - 58978 100 - - - -

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        names.arg = TDF_Enteros$MC,
        main = "",
        xlab = "", 
        ylab = "",
        col = "#87CEEB",
        ylim = c(0, max(TDF_Enteros$ni) * 1.2),
        space = 0, 
        las = 2, 
        cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Velocidad del Viento (m/s)", side = 1, line = 4)

mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Velocidad del Viento", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni, 
        main="",
        xlab = "",
        ylab = "",
        names.arg = TDF_Enteros$MC,
        col = "#87CEEB",
        space = 0,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, 58978)) 
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Velocidad del Viento (m/s)", side = 1, line = 4)

mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Velocidad del Viento", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 4, 2))
bp3 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "", 
        ylab = "Porcentaje (%)", 
        col = "#87CEEB", 
        space = 0, 
        names.arg = TDF_Enteros$MC, 
        cex.names = 0.7, 
        las = 2, 
        ylim = c(0, max(TDF_Enteros$hi) * 1.2))
mtext("Velocidad del Viento (m/s)", side = 1, line = 4)

mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Velocidad del Viento", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp3, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 2), "%"), 
     pos = 3, cex = 0.6, col = "black")

par(mar = c(8, 5, 4, 2))
bp4 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "", 
        ylab = "Porcentaje (%)", 
        col = "#87CEEB", 
        space = 0, 
        names.arg = TDF_Enteros$MC, 
        las = 2, 
        cex.names = 0.7, 
        ylim = c(0, 100))
mtext("Velocidad del Viento (m/s)", side = 1, line = 4)

mtext("Gráfica N°4: Distribución Porcentual Global de las Plantas Solares por Velocidad del Viento", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp4, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 2), "%"), 
     pos = 3, cex = 0.6, col = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = "#87CEEB",
        xlab = "Velocidad del Viento (m/s)",
        cex.main = 0.9,
        main = "Gráfica N°5: Distribución de la Velocidad del Viento en las Plantas Solares")

4.4 Ojivas

par(mar = c(5, 5, 4, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "",
     xlab = "Velocidad del Viento (m/s)",
     ylab = "Frecuencia acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(TDF_Enteros$Li), max(x_asc)), 
     ylim = c(0, sum(TDF_Enteros$ni)),
     bty = "l"
)

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "#5DADE2", type = "b", pch = 19)

grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Velocidad del Viento en las Plantas Solares", 
      side = 3, 
      line = 3, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

legend("right", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "#5DADE2"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

5 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Mediana
mediana <- round(median(Variable), 2)

# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)

# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)

if(num_outliers > 0){
  rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}

tabla_indicadores <- data.frame(
 "Variable" = c("Velocidad del Viento (m/s)"),
 "Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
 "X" = c(media),
 "Me" = c(mediana),
 "Mo" = c(moda_txt),
 "V" = c(varianza),
 "Sd" = c(sd_val),
 "Cv" = c(cv),
 "As" = c(asimetria),
 "K" = c(curtosis),
 "Outliers" = rango_outliers)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
 gt() %>%
 tab_header(title = md("**Tabla N°3 de Conclusiones de Velocidad del Viento de las Plantas Solares**")) %>%
 tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
 cols_label(
  Variable = "Variable",
  Rango_MinMax = "Rango",
  X = "Media (X)",
  Me = "Mediana (Me)",
  Mo = "Moda (Mo)",
  V = "Varianza (V)",
  Sd = "Desv. Est. (Sd)",
  Cv = "C.V. (%)",
  As = "Asimetría (As)",
  K = "Curtosis (K)",
  Outliers = "Outliers [Intervalo]"
 ) %>%
 tab_options(
  heading.title.font.size = px(16),
  column_labels.background.color = "#F0F0F0"
 )

tabla_conclusiones_gt
Tabla N°3 de Conclusiones de Velocidad del Viento de las Plantas Solares
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Velocidad del Viento (m/s) [0; 12.76] 0.99 0.78 0.5 0.8739566 0.9348565 94.43 3.563565 16.58633 3099 [2.26; 12.76]
Autor: Martin Sarmiento

6 Conclusiones

La variable “Velocidad del Viento” fluctúa entre 0 y 12.76 m/s y sus valores se encuentran alrededor de 0.78 m/s, con una desviación estándar de 0.9348565, siendo una variable altamente heterogénea, cuyos valores se concentran en la parte baja de la variable con la agregación de valores atípicos de 3099 outliers; por todo lo anterior, el comportamiento de la variable es irregular.