1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE HUMEDAD ####
## DATASET ##
setwd("~/R/HUMIDITY")
# Cargar dataset
Datos <- read.csv("DataSet_prov.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    5075 obs. of  30 variables:
##  $ FID_                  : int  0 2 3 4 5 6 10 11 12 13 ...
##  $ OBJECTID              : int  127 129 130 131 132 133 137 138 139 140 ...
##  $ code                  : chr  "00127-ARG-P" "00129-ARG-G" "00130-ARG-P" "00131-ARG-P" ...
##  $ plant_name            : chr  "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Altiplano 200 Solar Power Plant" "Anchoris solar farm" ...
##  $ country               : chr  "Argentina" "Argentina" "Argentina" "Argentina" ...
##  $ operational_status    : chr  "announced" "operating" "operating" "construction" ...
##  $ longitude             : num  -68.9 -66.9 -66.9 -68.9 -70.3 ...
##  $ latitude              : num  -33 -24.1 -24.1 -33.3 -37.4 ...
##  $ elevation             : int  929 4000 4000 937 865 858 570 1612 665 3989 ...
##  $ area                  : num  250 4397290 5774 645 241 ...
##  $ size                  : chr  "Pequeña" "Grande" "Pequeña" "Pequeña" ...
##  $ slope                 : num  0.574 1.603 6.243 0.903 1.791 ...
##  $ slope_type            : chr  "Plano o casi plano" "Plano o casi plano" "Moderado" "Plano o casi plano" ...
##  $ curvature             : num  0.000795 -0.002781 -0.043699 0.002781 -0.002384 ...
##  $ curvature_type        : chr  "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies cóncavas / Valles" "Superficies planas o intermedias" ...
##  $ aspect                : num  55.1 188.7 270.9 108.4 239.3 ...
##  $ aspect_type           : chr  "Northeast" "South" "West" "East" ...
##  $ dist_to_road          : num  127 56015 52697 336 34 ...
##  $ ambient_temperature   : num  12.6 6.8 6.8 13.1 11.4 ...
##  $ ghi                   : num  6.11 8.01 7.88 6.12 6.22 ...
##  $ humidity              : num  53.7 53.7 53.7 53.7 53.7 ...
##  $ wind_speed            : num  3.78 7.02 8.33 3.87 6.56 ...
##  $ wind_direction        : num  55.1 55.1 55.1 55.1 55.1 ...
##  $ dt_wind               : chr  "Northeast" "Northeast" "Northeast" "Northeast" ...
##  $ solar_aptitude        : num  0.746 0.8 0.727 0.595 0.657 ...
##  $ solar_aptitude_rounded: int  7 8 7 6 7 7 7 8 7 8 ...
##  $ solar_aptittude_class : chr  "Alta" "Alta" "Alta" "Media" ...
##  $ capacity              : num  25 101 107 180 20 ...
##  $ optimal_tilt          : int  31 26 26 31 33 30 31 29 31 27 ...
##  $ pv_potential          : num  4.98 6.39 6.39 4.97 5 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

# Extraer variable
Variable <- na.omit(Datos$humidity)
N <- length(Variable)

# CÁLCULO LÍMITES DECIMALES 
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))

# CÁLCULOS MATEMÁTICOS 
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal
TDF_Decimal <- data.frame(
  Li = cortes_dec[1:k_dec],
  Ls = cortes_dec[2:(k_dec+1)],
  MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni = ni_dec,
  hi = hi_dec,
  Ni_asc = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc = Hi_asc_dec,
  Hi_desc = Hi_desc_dec)


# CÁLCULO LÍMITES ENTEROS
BASE <- 10
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
Amplitud_raw <- Rango_int / k_int_sug

Amplitud_int <- ceiling(Amplitud_raw / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10

cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 2)
cortes_int <- cortes_int[cortes_int <= 100]

if(max(cortes_int) < max(Variable)) {
   cortes_int <- c(cortes_int, 100)
}

K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))

# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

# Dataframe Entero
TDF_Enteros <- data.frame(
  Li = lim_inf_int,
  Ls = lim_sup_int,
  MC = (lim_inf_int + lim_sup_int) / 2,
  ni = ni_int,
  hi = hi_int,
  Ni_asc = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc = Hi_asc_int,
  Hi_desc = Hi_desc_int)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

# Crear Dataframe
TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

# Generar GT
TDF_Dec_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Humedad (%) de las Plantas Solares**")) %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase",
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)",
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Humedad (%) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0 7.31 3.65 2 0.04 2 5075 0.04 100
7.31 14.62 10.96 18 0.35 20 5073 0.39 99.96
14.62 21.92 18.27 58 1.14 78 5055 1.54 99.61
21.92 29.23 25.58 33 0.65 111 4997 2.19 98.46
29.23 36.54 32.88 16 0.32 127 4964 2.5 97.81
36.54 43.85 40.19 7 0.14 134 4948 2.64 97.5
43.85 51.15 47.5 102 2.01 236 4941 4.65 97.36
51.15 58.46 54.81 964 19 1200 4839 23.65 95.35
58.46 65.77 62.12 1259 24.81 2459 3875 48.45 76.35
65.77 73.08 69.42 1293 25.48 3752 2616 73.93 51.55
73.08 80.38 76.73 656 12.93 4408 1323 86.86 26.07
80.38 87.69 84.04 446 8.79 4854 667 95.65 13.14
87.69 95 91.35 221 4.35 5075 221 100 4.35
TOTAL - - 5075 100 - - - -

3.2 Tabla con Límites Enteros

# Crear Dataframe
TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li),
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

# Generar GT
TDF_Int_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Humedad (%) de las Plantas Solares**")) %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase",
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)",
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Humedad (%) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0 10 5 2 0.04 2 5075 0.04 100
10 20 15 56 1.1 58 5073 1.14 99.96
20 30 25 53 1.04 111 5017 2.19 98.86
30 40 35 22 0.43 133 4964 2.62 97.81
40 50 45 14 0.28 147 4942 2.9 97.38
50 60 55 1054 20.77 1201 4928 23.67 97.1
60 70 65 1619 31.9 2820 3874 55.57 76.33
70 80 75 1390 27.39 4210 2255 82.96 44.43
80 90 85 859 16.93 5069 865 99.88 17.04
90 100 95 6 0.12 5075 6 100 0.12
TOTAL - - 5075 100 - - - -

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 5, 5, 2)) 
barplot(TDF_Enteros$ni, 
        names.arg = TDF_Enteros$MC,
        main = "",
        xlab = "", 
        ylab = "Cantidad",
        col = "#7FFFD4",
        space = 0, 
        las = 2, 
        cex.names = 0.7)
mtext("Humedad (%)", side = 1, line = 4)

mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Humedad", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

par(mar = c(8, 5, 5, 2))
barplot(TDF_Enteros$ni, 
        main="",
        xlab = "",
        ylab = "Cantidad",
        names.arg = TDF_Enteros$MC,
        col = "#7FFFD4",
        space = 0,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, sum(TDF_Enteros$ni))) 
mtext("Humedad (%)", side = 1, line = 4)

mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Humedad", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "", 
        ylab = "Porcentaje (%)", 
        col = "#7FFFD4", 
        space = 0, 
        names.arg = TDF_Enteros$MC, 
        cex.names = 0.7, 
        las = 2, 
        ylim = c(0, max(TDF_Enteros$hi) * 1.2))
mtext("Humedad (%)", side = 1, line = 4)

mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Humedad", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp3, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 1), "%"), 
     pos = 3, cex = 0.6, col = "black")

par(mar = c(8, 5, 4, 2))
bp4 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "", 
        ylab = "Porcentaje (%)", 
        col = "#7FFFD4", 
        space = 0, 
        names.arg = TDF_Enteros$MC, 
        las = 2, 
        cex.names = 0.7, 
        ylim = c(0, 100))
mtext("Humedad (%)", side = 1, line = 4)

mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Humedad", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp4, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 1), "%"), 
     pos = 3, cex = 0.6, col = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = "#7FFFD4",
        xlab = "Humedad (%)",
        cex.main = 0.9,
        main = "Gráfica N°5: Distribución de la Humedad en las Plantas Solares")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "",
     xlab = "Humedad (%)",
     ylab = "Frecuencia acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(TDF_Enteros$Li), max(x_asc)), 
     ylim = c(0, sum(TDF_Enteros$ni)),
     bty = "l"
)

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "#66CDAA", type = "b", pch = 19)

grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Humedad en las Plantas Solares", 
      side = 3, 
      line = 3, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

legend("right", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "#66CDAA"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

5 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Mediana
mediana <- round(median(Variable), 2)

# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)

# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)

if(num_outliers > 0){
  rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}


tabla_indicadores <- data.frame(
 "Variable" = c("Humedad (%)"),
 "Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
 "X" = c(media),
 "Me" = c(mediana),
 "Mo" = c(moda_txt),
 "V" = c(varianza),
 "Sd" = c(sd_val),
 "Cv" = c(cv),
 "As" = c(asimetria),
 "K" = c(curtosis),
 "Outliers" = rango_outliers)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
 gt() %>%
 tab_header(title = md("**Tabla N°3 de Conclusiones de Humedad de las Plantas Solares**")) %>%
 tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
 cols_label(
  Variable = "Variable",
  Rango_MinMax = "Rango",
  X = "Media (X)",
  Me = "Mediana (Me)",
  Mo = "Moda (Mo)",
  V = "Varianza (V)",
  Sd = "Desv. Est. (Sd)",
  Cv = "C.V. (%)",
  As = "Asimetría (As)",
  K = "Curtosis (K)",
  Outliers = "Outliers [Intervalo]"
 ) %>%
 tab_options(
  heading.title.font.size = px(16),
  column_labels.background.color = "#F0F0F0"
 )

tabla_conclusiones_gt
Tabla N°3 de Conclusiones de Humedad de las Plantas Solares
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Humedad (%) [0; 95] 66.71 67 65 151.7699 12.31949 18.47 -1.178637 4.002398 127 [0; 35]
Autor: Martin Sarmiento

6 Conclusiones

La variable “Humedad” fluctúa entre 0 y 95 % y sus valores se encuentran alrededor de 67 %, con una desviación estándar de 12.31949, siendo una variable muy homogénea, cuyos valores se concentran en la parte media alta de la variable con la agregación de valores atípicos de 127 outliers; por todo lo anterior, el comportamiento de la variable es muy regular.