1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE LATITUD ####
## DATASET ##
setwd("~/R/LATITUD")
# Cargar dataset
Datos <- read.csv("Data_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    58771 obs. of  29 variables:
##  $ OBJECTID              : int  127 129 131 132 133 137 138 139 140 145 ...
##  $ code                  : chr  "00127-ARG-P" "00129-ARG-G" "00131-ARG-P" "00132-ARG-P" ...
##  $ plant_name            : chr  "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Anchoris solar farm" "Antu Newen solar farm" ...
##  $ country               : chr  "Argentina" "Argentina" "Argentina" "Argentina" ...
##  $ operational_status    : chr  "announced" "operating" "construction" "cancelled - inferred 4 y" ...
##  $ longitude             : num  -68.9 -66.9 -68.9 -70.3 -66.8 ...
##  $ latitude              : num  -33 -24.1 -33.3 -37.4 -28.6 ...
##  $ elevation             : int  929 4000 937 865 858 570 1612 665 3989 2640 ...
##  $ area                  : num  250 4397290 645 241 30 ...
##  $ size                  : chr  "Small" "Big" "Small" "Small" ...
##  $ slope                 : num  0.574 1.603 0.903 1.791 1.872 ...
##  $ slope_type            : chr  "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" ...
##  $ curvature             : num  0.000795 -0.002781 0.002781 -0.002384 -0.009137 ...
##  $ curvature_type        : chr  "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" ...
##  $ aspect                : num  55.1 188.7 108.4 239.3 56.2 ...
##  $ aspect_type           : chr  "Northeast" "South" "East" "Southwest" ...
##  $ dist_to_road          : num  127 56015 336 34 314 ...
##  $ ambient_temperature   : num  12.6 6.8 13.1 11.4 18.8 ...
##  $ ghi                   : num  6.11 8.01 6.12 6.22 6.74 ...
##  $ humidity              : num  53.7 53.7 53.7 53.7 51.5 ...
##  $ wind_speed            : num  3.78 7.02 3.87 6.56 7.19 ...
##  $ wind_direction        : num  55.1 55.1 55.1 55.1 114.8 ...
##  $ dt_wind               : chr  "Northeast" "Northeast" "Northeast" "Northeast" ...
##  $ solar_aptitude        : num  0.746 0.8 0.595 0.657 0.743 ...
##  $ solar_aptitude_rounded: int  7 8 6 7 7 7 8 7 8 6 ...
##  $ solar_aptittude_class : chr  "Alta" "Alta" "Media" "Alta" ...
##  $ capacity              : num  25 101 180 20 50.4 ...
##  $ optimal_tilt          : num  31 26 31 33 30 31 29 31 27 32 ...
##  $ pv_potential          : num  4.98 6.39 4.97 5 5.37 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

# Extraer variable
Variable <- na.omit(Datos$latitude)
N <- length(Variable)

# CÁLCULO LÍMITES DECIMALES 
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))

# CÁLCULOS MATEMÁTICOS 
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec) 
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal 
TDF_Decimal <- data.frame(
  Li = cortes_dec[1:k_dec],
  Ls = cortes_dec[2:(k_dec+1)],
  MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni = ni_dec,
  hi = hi_dec,            
  Ni_asc = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc = Hi_asc_dec,    
  Hi_desc = Hi_desc_dec)


# CÁLCULO LÍMITES ENTEROS
BASE <- 10
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
Amplitud_raw <- Rango_int / k_int_sug

Amplitud_int <- ceiling(Amplitud_raw / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10

cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 2)
cortes_int <- cortes_int[cortes_int <= (max_int + Amplitud_int)]

while(max(cortes_int) < max(Variable)) {
  cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}

K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))

# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

# Dataframe Entero 
TDF_Enteros <- data.frame(
  Li = lim_inf_int,
  Ls = lim_sup_int,
  MC = (lim_inf_int + lim_sup_int) / 2,
  ni = ni_int,
  hi = hi_int,
  Ni_asc = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc = Hi_asc_int,
  Hi_desc = Hi_desc_int)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

# Crear Dataframe
TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)), 
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

# Calcular Totales 
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

# Generar GT
TDF_Dec_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares**")) %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase", 
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)", 
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)", 
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-53.15 -46.08 -49.62 1 0 1 58771 0 100
-46.08 -39.01 -42.55 0 0 1 58770 0 100
-39.01 -31.95 -35.48 238 0.4 239 58770 0.41 100
-31.95 -24.88 -28.41 361 0.61 600 58532 1.02 99.59
-24.88 -17.81 -21.34 782 1.33 1382 58171 2.35 98.98
-17.81 -10.74 -14.28 606 1.03 1988 57389 3.38 97.65
-10.74 -3.67 -7.21 431 0.73 2419 56783 4.12 96.62
-3.67 3.39 -0.14 278 0.47 2697 56352 4.59 95.88
3.39 10.46 6.93 915 1.56 3612 56074 6.15 95.41
10.46 17.53 14 2432 4.14 6044 55159 10.28 93.85
17.53 24.6 21.06 2872 4.89 8916 52727 15.17 89.72
24.6 31.67 28.13 5633 9.58 14549 49855 24.76 84.83
31.67 38.73 35.2 19070 32.45 33619 44222 57.2 75.24
38.73 45.8 42.27 12087 20.57 45706 25152 77.77 42.8
45.8 52.87 49.34 10780 18.34 56486 13065 96.11 22.23
52.87 59.94 56.41 2285 3.89 58771 2285 100 3.89
TOTAL - - 58771 100 - - - -

3.2 Tabla con Límites Enteros

# Crear Dataframe 
TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li), 
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

# Calcular Totales 
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

# Generar GT
TDF_Int_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares**")) %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase", 
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)", 
    Ni_asc = "Ni (Asc)",
    Ni_desc = "Ni (Desc)", 
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-60 -50 -55 1 0 1 58771 0 100
-50 -40 -45 0 0 1 58770 0 100
-40 -30 -35 308 0.52 309 58770 0.53 100
-30 -20 -25 866 1.47 1175 58462 2 99.47
-20 -10 -15 842 1.43 2017 57596 3.43 98
-10 0 -5 495 0.84 2512 56754 4.27 96.57
0 10 5 1019 1.73 3531 56259 6.01 95.73
10 20 15 3223 5.48 6754 55240 11.49 93.99
20 30 25 6087 10.36 12841 52017 21.85 88.51
30 40 35 23597 40.15 36438 45930 62 78.15
40 50 45 14351 24.42 50789 22333 86.42 38
50 60 55 7982 13.58 58771 7982 100 13.58
60 70 65 0 0 58771 0 100 0
TOTAL - - 58771 100 - - - -

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        names.arg = TDF_Enteros$MC,
        main = "",
        xlab = "", 
        ylab = "",
        col = "#B0C4DE",
        ylim = c(0, max(TDF_Enteros$ni) * 1.2),
        space = 0,
        las = 2, 
        cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5,
      cex = 0.9, 
      font = 2)

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        main="",
        xlab = "",
        ylab = "",
        names.arg = TDF_Enteros$MC,
        col = "#B0C4DE",
        ylim = c(0, 58771),
        space = 0,
        cex.names = 0.7,
        las = 2) 
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5,
      cex = 0.9,
      font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#B0C4DE",
        ylim = c(0, max(TDF_Enteros$hi) * 1.3),
        space = 0,
        names.arg = TDF_Enteros$MC,
        cex.names = 0.7,
        las = 2)
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5,
      cex = 0.9, 
      font = 2)

text(x = bp3, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 1), "%"), 
     pos = 3,   
     cex = 0.6,    
     col = "black")

par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#B0C4DE",
        space = 0,
        names.arg = TDF_Enteros$MC,
        las = 2,
        cex.names = 0.7,
        ylim = c(0, 100))
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp4, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 1), "%"), 
     pos = 3, 
     cex = 0.6,
     col = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = "#B0C4DE",
        xlab = "Latitud (°)",
        cex.main = 0.9,
        main = "Gráfica N°5: Distribución de la Latitud en las Plantas Solares")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Enteros$Ls  
x_desc <- TDF_Enteros$Li 
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "",
     xlab = "Latitud (°)",
     ylab = "Frecuencia Acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(x_desc), max(x_asc)), 
     ylim = c(0, sum(TDF_Enteros$ni)))

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "blue", type = "b", pch = 19)

grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Latitud en las Plantas Solares", 
      side = 3, 
      line = 3, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

legend("left", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "blue"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

5 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Mediana
mediana <- round(median(Variable), 2)

# Moda 
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

## INDICADORES DE DISPERSIÓN 
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA 
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)

# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)

if(num_outliers > 0){
  rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}

tabla_indicadores <- data.frame(
  "Variable" = c("Latitud (°)"),
  "Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
  "X" = c(media),
  "Me" = c(mediana),
  "Mo" = c(moda_txt),
  "V" = c(varianza),
  "Sd" = c(sd_val),
  "Cv" = c(cv),
  "As" = c(asimetria),
  "K" = c(curtosis),
  "Outliers" = rango_outliers)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
  gt() %>%
  tab_header(title = md("**Tabla N°3 de Conclusiones de Latitud de las Plantas Solares**")) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
  cols_label(
    Variable = "Variable",
    Rango_MinMax = "Rango",
    X = "Media (X)",
    Me = "Mediana (Me)",
    Mo = "Moda (Mo)",
    V = "Varianza (V)",
    Sd = "Desv. Est. (Sd)",
    Cv = "C.V. (%)",
    As = "Asimetría (As)",
    K = "Curtosis (K)",
    Outliers = "Outliers [Intervalo]"
  ) %>%
  tab_options(
    heading.title.font.size = px(16),
    column_labels.background.color = "#F0F0F0"
  )

tabla_conclusiones_gt
Tabla N°3 de Conclusiones de Latitud de las Plantas Solares
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Latitud (°) [-53.15; 59.94] 35.09 37.05 35 242.417 15.56975 44.37 -1.922878 4.821545 4498 [-53.15; 12.94]
Autor: Martin Sarmiento

6 Conclusiones

La variable “Latitud” fluctúa entre -53.15° y 59.94° y sus valores se encuentran alrededor de 37.05°, con una desviación estándar de 15.56975 , siendo una variable heterogénea, cuyos valores se concentran en la parte media alta de la variable con la agregación de valores atípicos de 4498 outliers; por todo lo anterior, el comportamiento de la variable es regular.