1 Configuración y Carga de Datos

##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####


#### VARIABLE AREA ####
## DATASET ##
setwd("~/R/AREA")
# Cargar dataset
Datos <- read.csv("DataSet_.csv", sep = ";", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)
## 'data.frame':    7142 obs. of  26 variables:
##  $ fid                  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ objectid             : int  127 128 129 130 131 132 133 134 135 136 ...
##  $ code                 : chr  "Arg-00001" "Arg-00002" "Arg-00003" "Arg-00004" ...
##  $ country              : chr  "Argentina" "Argentina" "Argentina" "Argentina" ...
##  $ plant_name           : chr  "Aconcagua solar farm" "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Altiplano 200 Solar Power Plant" ...
##  $ operational_status   : chr  "announced" "announced" "operating" "operating" ...
##  $ longitude            : num  -68.9 -68.9 -66.9 -66.9 -68.9 ...
##  $ latitude             : num  -33 -33 -24.1 -24.1 -33.3 ...
##  $ elevation            : int  929 929 4000 4000 937 865 858 858 858 858 ...
##  $ area                 : num  0 0 4397290 5774 0 ...
##  $ slope                : num  0.574 0.574 1.603 6.243 0.903 ...
##  $ slope_type           : chr  "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
##  $ curvature            : num  0.000795 0.000795 -0.002781 -0.043699 0.002781 ...
##  $ curvature_type       : chr  "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies cóncavas / Valles" ...
##  $ aspect               : num  55.1 55.1 188.7 270.9 108.4 ...
##  $ aspect_type          : chr  "Northeast" "Northeast" "South" "West" ...
##  $ ghi                  : num  6.11 6.11 8.01 7.88 6.12 ...
##  $ solar_aptitude       : num  0.746 0.746 0.8 0.727 0.595 ...
##  $ solar_aptittude_class: chr  "Alta" "Alta" "Alta" "Alta" ...
##  $ humidity             : num  0 0 53.7 53.7 0 ...
##  $ wind_speed           : num  3.78 3.78 7.02 8.33 3.87 ...
##  $ wind_direction       : num  0 0 55.1 55.1 0 ...
##  $ ambient_temperature  : num  12.6 12.6 6.8 6.8 13.1 ...
##  $ optimal_tilt         : int  31 31 26 26 31 33 30 30 30 30 ...
##  $ peak_power_per_hour  : num  4.98 4.98 6.39 6.39 4.97 ...
##  $ total_power          : num  25 66.2 101 107 180 ...
# Cargamos las librerias
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(e1071)

2 Cálculo de Intervalos y Frecuencias

#Extraer variable
Variable <- na.omit(Datos$area)
N <- length(Variable)

# Cálculos básicos 
min_val <- min(Variable)
max_val <- max(Variable)
Rango <- max_val - min_val
K <- floor(1 + 3.322 * log10(N)) 
Amplitud <- Rango / K

# Creación de Límites 
lim_inf <- round(seq(from = min_val, to = max_val - Amplitud, by = Amplitud), 2)
# Ajuste para el último límite
lim_sup <- round(seq(from = min_val + Amplitud, to = max_val, by = Amplitud), 2)
lim_sup[K] <- round(max_val, 2) 

# Marca de Clase
MC <- (lim_inf + lim_sup) / 2

# Conteo de Frecuencias (ni)
ni <- numeric(K)
for (i in 1:K) {
  if (i < K) {
    ni[i] <- length(subset(Variable, Variable >= lim_inf[i] & Variable < lim_sup[i]))
  } else {
    ni[i] <- length(subset(Variable, Variable >= lim_inf[i] & Variable <= lim_sup[i] + 0.001)) 
  }
}

# Cálculos de Frecuencias 
sum_ni <- sum(ni)
hi <- (ni / sum_ni) * 100 

Ni_asc <- cumsum(ni)
Hi_asc <- cumsum(hi)

# Acumuladas Descendentes 
Ni_desc <- rev(cumsum(rev(ni)))
Hi_desc <- rev(cumsum(rev(hi)))

# Construcción del Dataframe
TDF_Area <- data.frame(
  Li = lim_inf,
  Ls = lim_sup,
  MC = MC,
  ni = ni,
  hi = round(hi, 2),
  Ni_asc = Ni_asc,
  Ni_desc = Ni_desc,
  Hi_asc = round(Hi_asc, 2),
  Hi_desc = round(Hi_desc, 2))

3 Tabla de Distribución de Frecuencias

#### Crear de fila de totales ####
totales <- c("TOTAL", "-", "-", sum(ni), sum(hi), "-", "-", "-", "-")
TDF_Area_Char <- TDF_Area %>% mutate(across(everything(), as.character))
TDF_Final <- rbind(TDF_Area_Char, totales)

# TABLA DE FRECUENCIAS GT
tabla_gt <- TDF_Final %>%
  gt() %>%
  tab_header(
    title = md("**TABLA DE DISTRIBUCIÓN DE FRECUENCIAS**"),
    subtitle = "Variable: Área (m²)"
  ) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
  cols_label(
    Li = "Lim. Inf",
    Ls = "Lim. Sup",
    MC = "Marca Clase",
    ni = "Frec. Abs (ni)",
    hi = "Frec. Rel (%)",
    Ni_asc = "Ni (Asc)",
    Ni_desc = "Ni (Desc)",
    Hi_asc = "Hi Asc (%)",
    Hi_desc = "Hi Desc (%)"
  ) %>%
  tab_options(
    heading.title.font.size = px(16),
    column_labels.background.color = "#f0f0f0",
    table.font.size = px(14)
  )

tabla_gt
TABLA DE DISTRIBUCIÓN DE FRECUENCIAS
Variable: Área (m²)
Lim. Inf Lim. Sup Marca Clase Frec. Abs (ni) Frec. Rel (%) Ni (Asc) Ni (Desc) Hi Asc (%) Hi Desc (%)
0 1311615.38 655807.69 7044 98.64 7044 7141 98.64 100
1311615.38 2623230.77 1967423.075 27 0.38 7071 97 99.02 1.36
2623230.77 3934846.15 3279038.46 20 0.28 7091 70 99.3 0.98
3934846.15 5246461.54 4590653.845 20 0.28 7111 50 99.58 0.7
5246461.54 6558076.92 5902269.23 10 0.14 7121 30 99.72 0.42
6558076.92 7869692.31 7213884.615 8 0.11 7129 20 99.83 0.28
7869692.31 9181307.69 8525500 0 0 7129 12 99.83 0.17
9181307.69 10492923.08 9837115.385 9 0.13 7138 12 99.96 0.17
10492923.08 11804538.46 11148730.77 2 0.03 7140 3 99.99 0.04
11804538.46 13116153.85 12460346.155 0 0 7140 1 99.99 0.01
13116153.85 14427769.23 13771961.54 0 0 7140 1 99.99 0.01
14427769.23 15739384.62 15083576.925 0 0 7140 1 99.99 0.01
15739384.62 17051000 16395192.31 1 0.01 7141 1 100 0.01
TOTAL - - 7141 100 - - - -
Autor: Martin Sarmiento

4 Gráfico 1 – Frecuencia Local

color_sutil <- "#E3E0AC"

par(mar = c(8, 5, 4, 2)) 
barplot(TDF_Area$ni, 
        names.arg = TDF_Area$MC,
        main = "Gráfica N°1: Frecuencia de Área en Proyectos",
        cex.main = 0.9,
        xlab = "", 
        ylab = "Cantidad",
        col = color_sutil,
        space = 0, 
        las = 2, 
        cex.names = 0.7)
mtext("Área (m²)", side = 1, line = 4)

5 Gráfico 2 – Frecuencia Global

color_sutil <- "#E3E0AC"

par(mar = c(8, 5, 4, 2))
barplot(TDF_Area$ni, 
        main = "Gráfica N°2: Frecuencia Global de Área",
        xlab = "",
        ylab = "Cantidad",
        names.arg = TDF_Area$MC,
        col = color_sutil,
        space = 0,
        cex.main = 0.9,
        cex.axis = 0.7,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, sum(TDF_Area$ni))) 
mtext("Área (m²)", side = 1, line = 4)

6 Gráfico 3 – Porcentaje Local

color_sutil <- "#E3E0AC"

par(mar = c(8, 5, 4, 2))
barplot(TDF_Area$hi, 
        main = "Gráfica N°3: Porcentaje de Frecuencia de Área",
        xlab = "",
        ylab = "Porcentaje (%)",
        col = color_sutil,
        space = 0,
        names.arg = TDF_Area$MC,
        cex.main = 0.9,
        cex.axis = 0.7,
        cex.names = 0.7,
        las = 2,
        ylim = c(0, max(TDF_Area$hi) * 1.1))
mtext("Área (m²)", side = 1, line = 4)

7 Gráfico 4 – Porcentaje Global

color_sutil <- "#E3E0AC"

par(mar = c(8, 5, 4, 2))
barplot(TDF_Area$hi, 
        main = "Gráfica N°4: Porcentaje Global de Área",
        xlab = "",
        ylab = "Porcentaje (%)",
        col = color_sutil,
        space = 0,
        cex.main = 0.9,
        names.arg = TDF_Area$MC,
        las = 2,
        cex.names = 0.7,
        ylim = c(0, 100)) 
mtext("Área (m²)", side = 1, line = 4)

8 Gráfico 5 – Diagrama de Cajas (Boxplot)

par(mar = c(5, 5, 4, 2))
boxplot(Variable, 
        horizontal = TRUE,
        col = color_sutil,
        xlab = "Área (m²)",
        cex.main = 0.9,
        main = "Gráfica N°5: Variabilidad del Área de los Proyectos")

9 Gráfico 6 – Ojivas de Frecuencia Acumulada

par(mar = c(5, 5, 4, 10), xpd = TRUE)

# Coordenadas
x_asc <- TDF_Area$Ls  
x_desc <- TDF_Area$Li 
y_asc <- TDF_Area$Ni_asc
y_desc <- TDF_Area$Ni_desc

# 1. Dibujar la Ascendente 
plot(x_asc, y_asc,
     type = "b", 
     main = "Gráfica N°6: Ojiva de Frecuencia Acumulada (Área)",
     cex.main = 0.9,
     xlab = "Área (m²)",
     ylab = "Frecuencia acumulada",
     col = "black",
     pch = 19, 
     xlim = c(min(x_desc), max(x_asc)), 
     ylim = c(0, sum(ni)),
     bty = "l"
)

# 2. Agregar la Descendente 
lines(x_desc, y_desc, col = "#BDB76B", type = "b", pch = 19)

grid()
legend("right", 
       legend = c("Ascendente", "Descendente"), 
       col = c("black", "#BDB76B"), 
       lty = 1, 
       pch = 1, 
       cex = 0.6, 
       inset = c(0.05, 0.05),
       bty = "n")

10 Indicadores Estadísticos

## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)

# Moda 
max_frecuencia <- max(TDF_Area$ni)
moda_vals <- TDF_Area$MC[TDF_Area$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")

# Mediana
mediana <- round(median(Variable), 2)

## INDICADORES DE DISPERSIÓN 
# Varianza
varianza <- var(Variable)

# Desviación Estándar
sd_val <- sd(Variable)

# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)

## INDICADORES DE FORMA 
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)

# Curtosis
curtosis <- kurtosis(Variable)


outliers <- boxplot.stats(Variable)$out
msg_atipicos <- if(length(outliers) > 0) "Presencia de valores atípicos" else "No hay presencia de valores atípicos"


tabla_indicadores <- data.frame(
  "Variable" = c("Área (m²)"),
  "Rango" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
  "X" = c(media),
  "Me" = c(round(mediana, 2)),
  "Mo" = c(moda_txt),
  "V" = c(round(varianza, 2)),
  "Sd" = c(round(sd_val, 2)),
  "Cv" = c(cv),
  "As" = c(round(asimetria, 4)),
  "K" = c(round(curtosis, 2)),
  "Valores_Atipicos" = msg_atipicos
)

# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
  gt() %>%
  tab_header(
    title = md("**CONCLUSIONES ESTADÍSTICAS**"),
    subtitle = "Resumen de la variable Área"
  ) %>%
  tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
  cols_label(
    Variable = "Variable",
    Rango = "Rango",
    X = "Media (X)",
    Me = "Mediana (Me)",
    Mo = "Moda (Mo)",
    V = "Varianza (V)",
    Sd = "Desv. Est. (Sd)",
    Cv = "C.V. (%)",
    As = "Asimetría (As)",
    K = "Curtosis (K)",
    Valores_Atipicos = "Valores Atípicos"
  ) %>%
  tab_options(
    heading.title.font.size = px(16),
    column_labels.background.color = "#f0f0f0"
  )

tabla_conclusiones_gt
CONCLUSIONES ESTADÍSTICAS
Resumen de la variable Área
Variable Rango Media (X) Mediana (Me) Moda (Mo) Varianza (V) Desv. Est. (Sd) C.V. (%) Asimetría (As) Curtosis (K) Valores Atípicos
Área (m²) [0; 17051000] 75491.56 0 655807.69 4.04474e+11 635982.7 842.46 12.5384 193.92 Presencia de valores atípicos
Autor: Martin Sarmiento