##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####
#### VARIABLE APTITUD SOLAR ####
## DATASET ##
setwd("~/R/SOLAR_APTITUDE")
# Cargar dataset
Datos <- read.csv("DataSet_.csv", sep = ";", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)## 'data.frame': 7142 obs. of 26 variables:
## $ fid : int 1 2 3 4 5 6 7 8 9 10 ...
## $ objectid : int 127 128 129 130 131 132 133 134 135 136 ...
## $ code : chr "Arg-00001" "Arg-00002" "Arg-00003" "Arg-00004" ...
## $ country : chr "Argentina" "Argentina" "Argentina" "Argentina" ...
## $ plant_name : chr "Aconcagua solar farm" "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Altiplano 200 Solar Power Plant" ...
## $ operational_status : chr "announced" "announced" "operating" "operating" ...
## $ longitude : num -68.9 -68.9 -66.9 -66.9 -68.9 ...
## $ latitude : num -33 -33 -24.1 -24.1 -33.3 ...
## $ elevation : int 929 929 4000 4000 937 865 858 858 858 858 ...
## $ area : num 0 0 4397290 5774 0 ...
## $ slope : num 0.574 0.574 1.603 6.243 0.903 ...
## $ slope_type : chr "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
## $ curvature : num 0.000795 0.000795 -0.002781 -0.043699 0.002781 ...
## $ curvature_type : chr "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies cóncavas / Valles" ...
## $ aspect : num 55.1 55.1 188.7 270.9 108.4 ...
## $ aspect_type : chr "Northeast" "Northeast" "South" "West" ...
## $ ghi : num 6.11 6.11 8.01 7.88 6.12 ...
## $ solar_aptitude : num 0.746 0.746 0.8 0.727 0.595 ...
## $ solar_aptittude_class: chr "Alta" "Alta" "Alta" "Alta" ...
## $ humidity : num 0 0 53.7 53.7 0 ...
## $ wind_speed : num 3.78 3.78 7.02 8.33 3.87 ...
## $ wind_direction : num 0 0 55.1 55.1 0 ...
## $ ambient_temperature : num 12.6 12.6 6.8 6.8 13.1 ...
## $ optimal_tilt : int 31 31 26 26 31 33 30 30 30 30 ...
## $ peak_power_per_hour : num 4.98 4.98 6.39 6.39 4.97 ...
## $ total_power : num 25 66.2 101 107 180 ...
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Extraer variable
Variable <- na.omit(Datos$solar_aptitude)
N <- length(Variable)
# Cálculo Límites Decimales #
# Cálculos básicos
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec
# Generamos los cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001
# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))
hi_dec <- (ni_dec/N)*100
# Cálculos de Frecuencias
sum_ni <- sum(ni_dec)
hi_dec <- (ni_dec / sum_ni) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))
# Construcción del Dataframe Decimal
TDF_Decimal <- data.frame(
Li = round(cortes_dec[1:k_dec], 2),
Ls = round(cortes_dec[2:(k_dec+1)], 2),
MC = round((cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2, 2),
ni = ni_dec,
hi = round(hi_dec, 2),
Ni_asc = cumsum(ni_dec),
Ni_desc = rev(cumsum(rev(ni_dec))),
Hi_asc = cumsum(round(hi_dec, 2)),
Hi_desc = rev(cumsum(rev(round(hi_dec, 2)))))
# Cálculo Límites Enteros #
BASE <- 10
# Cálculos básicos
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
Amplitud_raw <- Rango_int / k_int_sug
Amplitud_int <- ceiling(Amplitud_raw / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10
# Generar cortes enteros
cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 2)
cortes_int <- cortes_int[cortes_int <= (max_int + Amplitud_int)]
# Asegurar cobertura del máximo
while(max(cortes_int) < max(Variable)) {
cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}
K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]
# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))
# Cálculos de Frecuencias
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_asc_int <- cumsum(hi_int)
Hi_desc_int <- rev(cumsum(rev(hi_int)))
# Construcción del Dataframe Entero
TDF_Enteros <- data.frame(
Li = lim_inf_int,
Ls = lim_sup_int,
MC = (lim_inf_int + lim_sup_int) / 2,
ni = ni_int,
hi = round(hi_int, 2),
Ni_asc = Ni_asc_int,
Ni_desc = Ni_desc_int,
Hi_asc = round(Hi_asc_int, 2),
Hi_desc = round(Hi_desc_int, 2))#### Crear de fila de totales ####
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), 100, "-", "-", "-", "-")
TDF_Dec_Final <- rbind(mutate(TDF_Decimal, across(everything(), as.character)), totales_dec)
# Generar GT Decimal
TDF_Dec_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Aptitud Solar**")) %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°1 de Distribución de Frecuencias de Aptitud Solar | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| -9999 | -9229.78 | -9614.39 | 60 | 0.84 | 60 | 7141 | 0.84 | 100 |
| -9229.78 | -8460.57 | -8845.18 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -8460.57 | -7691.35 | -8075.96 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -7691.35 | -6922.14 | -7306.75 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -6922.14 | -6152.92 | -6537.53 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -6152.92 | -5383.71 | -5768.32 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -5383.71 | -4614.49 | -4999.1 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -4614.49 | -3845.28 | -4229.88 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -3845.28 | -3076.06 | -3460.67 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -3076.06 | -2306.85 | -2691.45 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -2306.85 | -1537.63 | -1922.24 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -1537.63 | -768.42 | -1153.02 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -768.42 | 0.8 | -383.81 | 7081 | 99.16 | 7141 | 7081 | 100 | 99.16 |
| TOTAL | - | - | 7141 | 100 | - | - | - | - |
#### Crear de fila de totales ####
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), 100, "-", "-", "-", "-")
TDF_Int_Final <- rbind(mutate(TDF_Enteros, across(everything(), as.character)), totales_int)
# Generar GT Enteros
TDF_Int_Final %>%
gt() %>%
tab_header(
title = md("**Tabla N°2 de Distribución de Frecuencias de Aptitud Solar**")) %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
fmt_number(columns = c(Li, Ls), decimals = 0) %>%
fmt_number(columns = c(hi, Hi_asc, Hi_desc), decimals = 2) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°2 de Distribución de Frecuencias de Aptitud Solar | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| -10000 | -9230 | -9615 | 60 | 0.84 | 60 | 7141 | 0.84 | 100 |
| -9230 | -8460 | -8845 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -8460 | -7690 | -8075 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -7690 | -6920 | -7305 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -6920 | -6150 | -6535 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -6150 | -5380 | -5765 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -5380 | -4610 | -4995 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -4610 | -3840 | -4225 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -3840 | -3070 | -3455 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -3070 | -2300 | -2685 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -2300 | -1530 | -1915 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -1530 | -760 | -1145 | 0 | 0 | 60 | 7081 | 0.84 | 99.16 |
| -760 | 10 | -375 | 7081 | 99.16 | 7141 | 7081 | 100 | 99.16 |
| 10 | 780 | 395 | 0 | 0 | 7141 | 0 | 100 | 0 |
| TOTAL | - | - | 7141 | 100 | - | - | - | - |
color_sutil <- "#FFA07A"
par(mar = c(8, 5, 4, 2))
barplot(TDF_Enteros$ni,
names.arg = round(TDF_Enteros$MC, 3),
main = "Gráfica N°1: Distribución de Cantidad de Plantas Solares por Aptitud Solar",
cex.main = 1,
xlab = "",
ylab = "Cantidad",
col = color_sutil,
space = 0,
las = 2,
cex.names = 0.7)
mtext("Aptitud Solar", side = 1, line = 4)color_grafico <- "#FFA07A"
par(mar = c(8, 5, 4, 2))
barplot(TDF_Enteros$ni,
main="Gráfica N°2: Distribución de Cantidades Globales de las Plantas Solares por Aptitud Solar",
cex.main = 0.8,
xlab = "",
ylab = "Cantidad",
names.arg = round(TDF_Enteros$MC, 3),
col = color_sutil,
space = 0,
cex.names = 0.7,
las = 2,
ylim = c(0, sum(TDF_Enteros$ni)))
mtext("Aptitud Solar", side = 1, line = 4)color_grafico <- "#FFA07A"
par(mar = c(8, 5, 4, 2))
barplot(TDF_Enteros$hi,
main="Gráfica N°3: Distribución Porcentual de las Plantas Solares por Aptitud Solar",
cex.main = 1,
xlab = "",
ylab = "Porcentaje (%)",
col = color_sutil,
space = 0,
names.arg = round(TDF_Enteros$MC, 3),
cex.names = 0.7,
las = 2,
ylim = c(0, max(TDF_Enteros$hi) * 1.1))
mtext("Aptitud Solar", side = 1, line = 4)color_grafico <- "#FFA07A"
par(mar = c(8, 5, 4, 2))
barplot(TDF_Enteros$hi,
main="Gráfica N°4: Distribución Porcentual Global de las Plantas Solares por Aptitud Solar",
cex.main = 0.9,
xlab = "",
ylab = "Porcentaje (%)",
col = color_sutil,
space = 0,
names.arg = round(TDF_Enteros$MC, 3),
las = 2,
cex.names = 0.7,
ylim = c(0, 100))
mtext("Aptitud Solar", side = 1, line = 4)par(mar = c(5, 5, 4, 2))
boxplot(Variable,
horizontal = TRUE,
col = color_sutil,
xlab = "Aptitud Solar",
cex.main = 0.9,
main = "Gráfica N°5: Distribución de la Aptitud Solar en las Plantas Solares")par(mar = c(5, 5, 4, 10), xpd = TRUE)
# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc
# 1. Dibujar la Ascendente
plot(x_asc, y_asc,
type = "b",
main = "Gráfica N°6: Ojivas Ascendentes y Descendentes de la Distribución de la Aptitud Solar en las Plantas Solares",
cex.main = 0.7,
xlab = "Aptitud Solar",
ylab = "Frecuencia acumulada",
col = "black",
pch = 19,
xlim = c(min(TDF_Enteros$Li), max(x_asc)),
ylim = c(0, sum(TDF_Enteros$ni)),
bty = "l"
)
# 2. Agregar la Descendente
lines(x_desc, y_desc, col = "#D35400", type = "b", pch = 19)
grid()
legend("left",
legend = c("Ascendente", "Descendente"),
col = c("black", "#D35400"),
lty = 1,
pch = 1,
cex = 0.6,
inset = c(0.05, 0.05),
bty = "n")## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)
# Mediana
mediana <- round(median(Variable), 2)
# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")
## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)
# Desviación Estándar
sd_val <- sd(Variable)
# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)
## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)
# Curtosis
curtosis <- kurtosis(Variable)
# Outliers
outliers_data <- boxplot.stats(Variable)$out
if(length(outliers_data) > 0) {
num_out <- length(outliers_data)
min_out <- round(min(outliers_data), 2)
max_out <- round(max(outliers_data), 2)
# Formato Total [Min; Max]
msg_atipicos <- paste0(" ", num_out, " [", min_out, " ; ", max_out, "]")
} else {
msg_atipicos <- "No hay presencia de valores atípicos"
}
tabla_indicadores <- data.frame(
"Variable" = c("Aptitud Solar"),
"Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
"X" = c(media),
"Me" = c(mediana),
"Mo" = c(moda_txt),
"V" = c(varianza),
"Sd" = c(sd_val),
"Cv" = c(cv),
"As" = c(asimetria),
"K" = c(curtosis),
"Outliers" = msg_atipicos
)
# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
gt() %>%
tab_header(title = md("**Tabla N°3 de Conclusiones**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Variable = "Variable",
Rango_MinMax = "Rango",
X = "Media (X)",
Me = "Mediana (Me)",
Mo = "Moda (Mo)",
V = "Varianza (V)",
Sd = "Desv. Est. (Sd)",
Cv = "C.V. (%)",
As = "Asimetría (As)",
K = "Curtosis (K)",
Outliers = "Outliers"
) %>%
tab_options(
heading.title.font.size = px(16),
column_labels.background.color = "#f0f0f0"
)
tabla_conclusiones_gt| Tabla N°3 de Conclusiones | ||||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) | Outliers |
|---|---|---|---|---|---|---|---|---|---|---|
| Aptitud Solar | [-9999; 0.8] | -83.34 | 0.67 | -375 | 833222.3 | 912.8101 | 1095.28 | -10.77376 | 113.9924 | 60 [-9999 ; -9999] |
| Autor: Martin Sarmiento | ||||||||||
La variable “Aptitud Solar” fluctúa entre -9999 y 0.8 y sus valores se encuentran alrededor de 0.67, con una desviación estándar de 912.8101, siendo una variable muy heterogénea, cuyos valores se concentran en la parte media alta de la variable con la agregación de valores atípicos de 60 outliers; por todo lo anterior, el comportamiento de la variable es muy perjudicial.