1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE LATITUD ####
## DATASET ##
setwd("~/R/LATITUD")
# Cargar dataset
Datos <- read.csv("Dataset_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    58978 obs. of  29 variables:
##  $ ï..OBJECTID           : int  2 3 4 5 6 7 8 9 10 11 ...
##  $ code                  : chr  "00001-AFG-P" "00002-AFG-P" "00003-AFG-P" "00004-AFG-P" ...
##  $ plant_name            : chr  "Badghis Solar Power Plant" "Balkh solar farm" "Behsood solar farm" "Dab Pal 4 solar farm" ...
##  $ country               : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ operational_status    : chr  "cancelled - inferred 4 y" "cancelled - inferred 4 y" "cancelled - inferred 4 y" "shelved - inferred 2 y" ...
##  $ longitude             : num  62.9 67.1 70.4 66.2 65.7 ...
##  $ latitude              : num  35.1 36.7 34.4 33.8 31.7 ...
##  $ elevation             : int  918 359 629 2288 1060 1060 1392 398 410 1012 ...
##  $ area                  : num  6.74 10.72 487.73 111.8 1929.96 ...
##  $ size                  : chr  "Small" "Small" "Small" "Small" ...
##  $ slope                 : num  7.38 0.49 1.1 6.16 1.23 ...
##  $ slope_type            : chr  "Moderado" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
##  $ curvature             : num  -0.024 0 0 0.045 -0.005 -0.005 -0.015 0 0 -0.009 ...
##  $ curvature_type        : chr  "Superficies cóncavas / Valles" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies convexas / Crestas" ...
##  $ aspect                : num  96.8 358.5 36.2 305.8 248.4 ...
##  $ aspect_type           : chr  "East" "North" "Northeast" "Northwest" ...
##  $ dist_to_road          : num  7037.1 92.7 112.1 1705.3 115.8 ...
##  $ ambient_temperature   : num  14.4 17.88 21.32 8.86 19.64 ...
##  $ ghi                   : num  5.82 5.58 5.8 6.75 6.62 ...
##  $ humidity              : num  47.7 42.3 36.4 37.3 24.2 ...
##  $ wind_speed            : num  0.039 0.954 0.234 0.943 0.37 ...
##  $ wind_direction        : num  187.5 207.4 255.6 160.3 97.7 ...
##  $ dt_wind               : chr  "South" "Southwest" "West" "South" ...
##  $ solar_aptitude        : num  0.72 0.635 0.685 0.659 0.819 0.819 0.818 0.642 0.63 0.374 ...
##  $ solar_aptitude_rounded: int  7 6 7 7 8 8 8 6 6 4 ...
##  $ solar_aptittude_class : chr  "Alta" "Alta" "Alta" "Alta" ...
##  $ capacity              : num  32 40 60 3000 100 100 36 50 25 100 ...
##  $ optimal_tilt          : num  30 31 31.1 33 31 ...
##  $ pv_potential          : num  4.61 4.41 4.57 5.42 5.17 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

# Extraer variable
Variable <- na.omit(Datos$latitude)
N <- length(Variable)

# CÁLCULO LÍMITES DECIMALES 
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec

# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001

# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))

# CÁLCULOS MATEMÁTICOS 
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec) 
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))

# Dataframe Decimal 
TDF_Decimal <- data.frame(
  Li = cortes_dec[1:k_dec],
  Ls = cortes_dec[2:(k_dec+1)],
  MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
  ni = ni_dec,
  hi = hi_dec,            
  Ni_asc = Ni_asc_dec,
  Ni_desc = Ni_desc_dec,
  Hi_asc = Hi_asc_dec,    
  Hi_desc = Hi_desc_dec)


# CÁLCULO LÍMITES ENTEROS
BASE <- 10
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int

# Calculamos la amplitud necesaria para cubrir el rango exacto
Amplitud_int <- ceiling((Rango_int / k_int_sug) / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10

# GENERACIÓN DE CORTES PRECISOS
cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 1)

if(max(cortes_int) < max(Variable)) {
  cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}

# Eliminamos intervalos sobrantes
while(length(cortes_int) > 2 && cortes_int[length(cortes_int)-1] >= max(Variable)) {
  cortes_int <- cortes_int[-length(cortes_int)]
}

K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]

# Frecuencias 
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))

# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))

# Dataframe Entero 
TDF_Enteros <- data.frame(
  Li = lim_inf_int,
  Ls = lim_sup_int,
  MC = (lim_inf_int + lim_sup_int) / 2,
  ni = ni_int,
  hi = hi_int,
  Ni_asc = Ni_asc_int,
  Ni_desc = Ni_desc_int,
  Hi_asc = Hi_asc_int,
  Hi_desc = Hi_desc_int)

3 Tabla de Distribución de Frecuencias

3.1 Tabla con Límites Decimales

# Crear Dataframe
TDF_Dec_Final <- data.frame(
  Li      = as.character(round(TDF_Decimal$Li, 2)),
  Ls      = as.character(round(TDF_Decimal$Ls, 2)),
  MC      = as.character(round(TDF_Decimal$MC, 2)),
  ni      = as.character(TDF_Decimal$ni),
  hi      = as.character(round(TDF_Decimal$hi, 2)),
  Ni_asc  = as.character(TDF_Decimal$Ni_asc),
  Ni_desc = as.character(TDF_Decimal$Ni_desc),
  Hi_asc  = as.character(round(TDF_Decimal$Hi_asc, 2)), 
  Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)

# Calcular Totales 
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)

# Generar GT
TDF_Dec_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares**")) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase", 
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)", 
    Ni_asc = "Ni (Asc)", 
    Ni_desc = "Ni (Desc)", 
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°1 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-41.61 -35.26 -38.44 87 0.15 87 58978 0.15 100
-35.26 -28.92 -32.09 400 0.68 487 58891 0.83 99.85
-28.92 -22.57 -25.74 455 0.77 942 58491 1.6 99.17
-22.57 -16.22 -19.4 854 1.45 1796 58036 3.05 98.4
-16.22 -9.88 -13.05 431 0.73 2227 57182 3.78 96.95
-9.88 -3.53 -6.7 408 0.69 2635 56751 4.47 96.22
-3.53 2.82 -0.36 239 0.41 2874 56343 4.87 95.53
2.82 9.16 5.99 682 1.16 3556 56104 6.03 95.13
9.16 15.51 12.34 2134 3.62 5690 55422 9.65 93.97
15.51 21.86 18.69 1739 2.95 7429 53288 12.6 90.35
21.86 28.21 25.03 4214 7.15 11643 51549 19.74 87.4
28.21 34.55 31.38 8323 14.11 19966 47335 33.85 80.26
34.55 40.9 37.73 18588 31.52 38554 39012 65.37 66.15
40.9 47.25 44.07 8299 14.07 46853 20424 79.44 34.63
47.25 53.59 50.42 10768 18.26 57621 12125 97.7 20.56
53.59 59.94 56.77 1357 2.3 58978 1357 100 2.3
TOTAL - - 58978 100 - - - -
Autor: Martin Sarmiento

3.2 Tabla con Límites Enteros

# Crear Dataframe 
TDF_Int_Final <- data.frame(
  Li      = as.character(TDF_Enteros$Li), 
  Ls      = as.character(TDF_Enteros$Ls),
  MC      = as.character(TDF_Enteros$MC),
  ni      = as.character(TDF_Enteros$ni),
  hi      = as.character(round(TDF_Enteros$hi, 2)),
  Ni_asc  = as.character(TDF_Enteros$Ni_asc),
  Ni_desc = as.character(TDF_Enteros$Ni_desc),
  Hi_asc  = as.character(round(TDF_Enteros$Hi_asc, 2)),
  Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)

# Calcular Totales 
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)

# Generar GT
TDF_Int_Final %>%
  gt() %>%
  tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares**")) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
  cols_label(
    Li = "Lim. Inf", 
    Ls = "Lim. Sup", 
    MC = "Marca Clase", 
    ni = "Frec. Abs (ni)", 
    hi = "Frec. Rel (%)", 
    Ni_asc = "Ni (Asc)",
    Ni_desc = "Ni (Desc)", 
    Hi_asc = "Hi Asc (%)", 
    Hi_desc = "Hi Desc (%)"
  ) %>%
  cols_align(align = "center", columns = everything()) %>%
  tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")
Tabla N°2 de Distribución de Frecuencias de Latitud (°) de las Plantas Solares
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
-50 -40 -45 2 0 2 58978 0 100
-40 -30 -35 428 0.73 430 58976 0.73 100
-30 -20 -25 931 1.58 1361 58548 2.31 99.27
-20 -10 -15 863 1.46 2224 57617 3.77 97.69
-10 0 -5 495 0.84 2719 56754 4.61 96.23
0 10 5 1019 1.73 3738 56259 6.34 95.39
10 20 15 3223 5.46 6961 55240 11.8 93.66
20 30 25 6086 10.32 13047 52017 22.12 88.2
30 40 35 23597 40.01 36644 45931 62.13 77.88
40 50 45 14351 24.33 50995 22334 86.46 37.87
50 60 55 7983 13.54 58978 7983 100 13.54
TOTAL - - 58978 100 - - - -
Autor: Martin Sarmiento

4 Análisis Gráfico

4.1 Histogramas de Cantidad

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        names.arg = TDF_Enteros$MC,
        main = "",
        xlab = "", 
        ylab = "",
        col = "#B0C4DE",
        ylim = c(0, max(TDF_Enteros$ni) * 1.2),
        space = 0,
        las = 2, 
        cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5,
      cex = 0.9, 
      font = 2)

par(mar = c(8, 7, 5, 2)) 
barplot(TDF_Enteros$ni, 
        main="",
        xlab = "",
        ylab = "",
        names.arg = TDF_Enteros$MC,
        col = "#B0C4DE",
        ylim = c(0, 58771),
        space = 0,
        cex.names = 0.7,
        las = 2) 
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5,
      cex = 0.9,
      font = 2)

4.2 Histogramas Porcentuales

par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#B0C4DE",
        ylim = c(0, max(TDF_Enteros$hi) * 1.3),
        space = 0,
        names.arg = TDF_Enteros$MC,
        cex.names = 0.7,
        las = 2)
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5,
      cex = 0.9, 
      font = 2)

text(x = bp3, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 2), "%"), 
     pos = 3,   
     cex = 0.6,    
     col = "black")

par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi, 
        main = "", 
        xlab = "",
        ylab = "Porcentaje (%)",
        col = "#B0C4DE",
        space = 0,
        names.arg = TDF_Enteros$MC,
        las = 2,
        cex.names = 0.7,
        ylim = c(0, 100))
mtext("Latitud (°)", side = 1, line = 4)

mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Latitud", 
      side = 3, 
      line = 2, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

text(x = bp4, 
     y = TDF_Enteros$hi, 
     labels = paste0(round(TDF_Enteros$hi, 2), "%"), 
     pos = 3, 
     cex = 0.6,
     col = "black")

4.3 Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = "#B0C4DE",
        xlab = "Latitud (°)",
        cex.main = 0.9,
        main = "Gráfica N°5: Distribución de la Latitud en las Plantas Solares")

4.4 Ojivas

par(mar = c(5, 5, 7, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Enteros$Ls  
x_desc <- TDF_Enteros$Li 
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "",
     xlab = "Latitud (°)",
     ylab = "Frecuencia Acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(x_desc), max(x_asc)), 
     ylim = c(0, sum(TDF_Enteros$ni)))

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "blue", type = "b", pch = 19)

grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Latitud en las Plantas Solares", 
      side = 3, 
      line = 3, 
      adj = 0.5, 
      cex = 0.9, 
      font = 2)

legend("left", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "blue"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

5 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Mediana
mediana <- round(median(Variable), 2)

# Moda 
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

## INDICADORES DE DISPERSIÓN 
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA 
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)

# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val

outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)

if(num_outliers > 0){
  rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
  rango_outliers <- "0 [Sin Outliers]"
}

tabla_indicadores <- data.frame(
  "Variable" = c("Latitud (°)"),
  "Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
  "X" = c(media),
  "Me" = c(mediana),
  "Mo" = c(moda_txt),
  "V" = c(varianza),
  "Sd" = c(sd_val),
  "Cv" = c(cv),
  "As" = c(asimetria),
  "K" = c(curtosis),
  "Outliers" = rango_outliers)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
  gt() %>%
  tab_header(title = md("**Tabla N°3 de Conclusiones de Latitud de las Plantas Solares**")) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
  cols_label(
    Variable = "Variable",
    Rango_MinMax = "Rango",
    X = "Media (X)",
    Me = "Mediana (Me)",
    Mo = "Moda (Mo)",
    V = "Varianza (V)",
    Sd = "Desv. Est. (Sd)",
    Cv = "C.V. (%)",
    As = "Asimetría (As)",
    K = "Curtosis (K)",
    Outliers = "Outliers [Intervalo]"
  ) %>%
  tab_options(
    heading.title.font.size = px(16),
    column_labels.background.color = "#F0F0F0"
  )

tabla_conclusiones_gt
Tabla N°3 de Conclusiones de Latitud de las Plantas Solares
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Outliers [Intervalo]
Latitud (°) [-41.61; 59.94] 34.86 37.01 35 256.2386 16.00746 45.92 -1.956953 4.842448 4632 [-41.61; 12.71]
Autor: Martin Sarmiento

6 Conclusiones

La variable “Latitud” fluctúa entre -41.61° y 59.94° y sus valores se encuentran alrededor de 37.01°, con una desviación estándar de 16.00746, siendo una variable heterogénea, cuyos valores se concentran en la parte media alta de la variable con la agregación de valores atípicos de 4632 outliers; por todo lo anterior, el comportamiento de la variable es irregular.