##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####
#### VARIABLE APTITUD SOLAR ####
## DATASET ##
setwd("~/R/SOLAR_APTITUDE")
# Cargar dataset
Datos <- read.csv("DataSet_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)## 'data.frame': 58978 obs. of 29 variables:
## $ ï..OBJECTID : int 2 3 4 5 6 7 8 9 10 11 ...
## $ code : chr "00001-AFG-P" "00002-AFG-P" "00003-AFG-P" "00004-AFG-P" ...
## $ plant_name : chr "Badghis Solar Power Plant" "Balkh solar farm" "Behsood solar farm" "Dab Pal 4 solar farm" ...
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ operational_status : chr "cancelled - inferred 4 y" "cancelled - inferred 4 y" "cancelled - inferred 4 y" "shelved - inferred 2 y" ...
## $ longitude : num 62.9 67.1 70.4 66.2 65.7 ...
## $ latitude : num 35.1 36.7 34.4 33.8 31.7 ...
## $ elevation : int 918 359 629 2288 1060 1060 1392 398 410 1012 ...
## $ area : num 6.74 10.72 487.73 111.8 1929.96 ...
## $ size : chr "Small" "Small" "Small" "Small" ...
## $ slope : num 7.38 0.49 1.1 6.16 1.23 ...
## $ slope_type : chr "Moderado" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
## $ curvature : num -0.024 0 0 0.045 -0.005 -0.005 -0.015 0 0 -0.009 ...
## $ curvature_type : chr "Superficies cóncavas / Valles" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies convexas / Crestas" ...
## $ aspect : num 96.8 358.5 36.2 305.8 248.4 ...
## $ aspect_type : chr "East" "North" "Northeast" "Northwest" ...
## $ dist_to_road : num 7037.1 92.7 112.1 1705.3 115.8 ...
## $ ambient_temperature : num 14.4 17.88 21.32 8.86 19.64 ...
## $ ghi : num 5.82 5.58 5.8 6.75 6.62 ...
## $ humidity : num 47.7 42.3 36.4 37.3 24.2 ...
## $ wind_speed : num 0.039 0.954 0.234 0.943 0.37 ...
## $ wind_direction : num 187.5 207.4 255.6 160.3 97.7 ...
## $ dt_wind : chr "South" "Southwest" "West" "South" ...
## $ solar_aptitude : num 0.72 0.635 0.685 0.659 0.819 0.819 0.818 0.642 0.63 0.374 ...
## $ solar_aptitude_rounded: int 7 6 7 7 8 8 8 6 6 4 ...
## $ solar_aptittude_class : chr "Alta" "Alta" "Alta" "Alta" ...
## $ capacity : num 32 40 60 3000 100 100 36 50 25 100 ...
## $ optimal_tilt : num 30 31 31.1 33 31 ...
## $ pv_potential : num 4.61 4.41 4.57 5.42 5.17 ...
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Extraer variable
Variable <- na.omit(Datos$solar_aptitude)
N <- length(Variable)
# CÁLCULO LÍMITES DECIMALES
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec
# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001
# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))
# CÁLCULOS MATEMÁTICOS
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))
# Dataframe Decimal
TDF_Decimal <- data.frame(
Li = cortes_dec[1:k_dec],
Ls = cortes_dec[2:(k_dec+1)],
MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
ni = ni_dec,
hi = hi_dec,
Ni_asc = Ni_asc_dec,
Ni_desc = Ni_desc_dec,
Hi_asc = Hi_asc_dec,
Hi_desc = Hi_desc_dec)
# CÁLCULO LÍMITES ENTEROS
Amplitud_int <- 0.05
min_int <- floor(min(Variable) / Amplitud_int) * Amplitud_int
cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = 2)
while(max(cortes_int) < max(Variable)) {
cortes_int <- seq(from = min_int, to = max(cortes_int) + Amplitud_int + 0.0001, by = Amplitud_int)
}
K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]
# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))
# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))
# Dataframe Entero
TDF_Enteros <- data.frame(
Li = lim_inf_int,
Ls = lim_sup_int,
MC = (lim_inf_int + lim_sup_int) / 2,
ni = ni_int,
hi = hi_int,
Ni_asc = Ni_asc_int,
Ni_desc = Ni_desc_int,
Hi_asc = Hi_asc_int,
Hi_desc = Hi_desc_int)# Crear Dataframe
TDF_Dec_Final <- data.frame(
Li = as.character(round(TDF_Decimal$Li, 2)),
Ls = as.character(round(TDF_Decimal$Ls, 2)),
MC = as.character(round(TDF_Decimal$MC, 2)),
ni = as.character(TDF_Decimal$ni),
hi = as.character(round(TDF_Decimal$hi, 2)),
Ni_asc = as.character(TDF_Decimal$Ni_asc),
Ni_desc = as.character(TDF_Decimal$Ni_desc),
Hi_asc = as.character(round(TDF_Decimal$Hi_asc, 2)),
Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)
# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)
# Generar GT
TDF_Dec_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Aptitud Solar**")) %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°1 de Distribución de Frecuencias de Aptitud Solar | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.06 | 0.03 | 76 | 0.13 | 76 | 58978 | 0.13 | 100 |
| 0.06 | 0.12 | 0.09 | 81 | 0.14 | 157 | 58902 | 0.27 | 99.87 |
| 0.12 | 0.18 | 0.15 | 182 | 0.31 | 339 | 58821 | 0.57 | 99.73 |
| 0.18 | 0.24 | 0.21 | 276 | 0.47 | 615 | 58639 | 1.04 | 99.43 |
| 0.24 | 0.29 | 0.26 | 352 | 0.6 | 967 | 58363 | 1.64 | 98.96 |
| 0.29 | 0.35 | 0.32 | 517 | 0.88 | 1484 | 58011 | 2.52 | 98.36 |
| 0.35 | 0.41 | 0.38 | 487 | 0.83 | 1971 | 57494 | 3.34 | 97.48 |
| 0.41 | 0.47 | 0.44 | 865 | 1.47 | 2836 | 57007 | 4.81 | 96.66 |
| 0.47 | 0.53 | 0.5 | 3502 | 5.94 | 6338 | 56142 | 10.75 | 95.19 |
| 0.53 | 0.59 | 0.56 | 5404 | 9.16 | 11742 | 52640 | 19.91 | 89.25 |
| 0.59 | 0.65 | 0.62 | 8835 | 14.98 | 20577 | 47236 | 34.89 | 80.09 |
| 0.65 | 0.71 | 0.68 | 10817 | 18.34 | 31394 | 38401 | 53.23 | 65.11 |
| 0.71 | 0.77 | 0.74 | 11337 | 19.22 | 42731 | 27584 | 72.45 | 46.77 |
| 0.77 | 0.82 | 0.79 | 8140 | 13.8 | 50871 | 16247 | 86.25 | 27.55 |
| 0.82 | 0.88 | 0.85 | 5630 | 9.55 | 56501 | 8107 | 95.8 | 13.75 |
| 0.88 | 0.94 | 0.91 | 2477 | 4.2 | 58978 | 2477 | 100 | 4.2 |
| TOTAL | - | - | 58978 | 100 | - | - | - | - |
# Crear Dataframe
TDF_Int_Final <- data.frame(
Li = as.character(round(TDF_Enteros$Li, 2)),
Ls = as.character(round(TDF_Enteros$Ls, 2)),
MC = as.character(round(TDF_Enteros$MC, 2)),
ni = as.character(TDF_Enteros$ni),
hi = as.character(round(TDF_Enteros$hi, 2)),
Ni_asc = as.character(TDF_Enteros$Ni_asc),
Ni_desc = as.character(TDF_Enteros$Ni_desc),
Hi_asc = as.character(round(TDF_Enteros$Hi_asc, 2)),
Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)
# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)
# Generar GT
TDF_Int_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Aptitud Solar**")) %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°2 de Distribución de Frecuencias de Aptitud Solar | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.05 | 0.03 | 71 | 0.12 | 71 | 58978 | 0.12 | 100 |
| 0.05 | 0.1 | 0.08 | 41 | 0.07 | 112 | 58907 | 0.19 | 99.88 |
| 0.1 | 0.15 | 0.12 | 147 | 0.25 | 259 | 58866 | 0.44 | 99.81 |
| 0.15 | 0.2 | 0.18 | 168 | 0.28 | 427 | 58719 | 0.72 | 99.56 |
| 0.2 | 0.25 | 0.22 | 267 | 0.45 | 694 | 58551 | 1.18 | 99.28 |
| 0.25 | 0.3 | 0.28 | 315 | 0.53 | 1009 | 58284 | 1.71 | 98.82 |
| 0.3 | 0.35 | 0.33 | 443 | 0.75 | 1452 | 57969 | 2.46 | 98.29 |
| 0.35 | 0.4 | 0.38 | 436 | 0.74 | 1888 | 57526 | 3.2 | 97.54 |
| 0.4 | 0.45 | 0.43 | 370 | 0.63 | 2258 | 57090 | 3.83 | 96.8 |
| 0.45 | 0.5 | 0.48 | 2370 | 4.02 | 4628 | 56720 | 7.85 | 96.17 |
| 0.5 | 0.55 | 0.52 | 3985 | 6.76 | 8613 | 54350 | 14.6 | 92.15 |
| 0.55 | 0.6 | 0.58 | 6337 | 10.74 | 14950 | 50365 | 25.35 | 85.4 |
| 0.6 | 0.65 | 0.62 | 5889 | 9.99 | 20839 | 44028 | 35.33 | 74.65 |
| 0.65 | 0.7 | 0.68 | 9612 | 16.3 | 30451 | 38139 | 51.63 | 64.67 |
| 0.7 | 0.75 | 0.73 | 9762 | 16.55 | 40213 | 28527 | 68.18 | 48.37 |
| 0.75 | 0.8 | 0.78 | 7606 | 12.9 | 47819 | 18765 | 81.08 | 31.82 |
| 0.8 | 0.85 | 0.83 | 5696 | 9.66 | 53515 | 11159 | 90.74 | 18.92 |
| 0.85 | 0.9 | 0.88 | 4154 | 7.04 | 57669 | 5463 | 97.78 | 9.26 |
| 0.9 | 0.95 | 0.92 | 1309 | 2.22 | 58978 | 1309 | 100 | 2.22 |
| TOTAL | - | - | 58978 | 100 | - | - | - | - |
par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
names.arg = round(TDF_Enteros$MC, 2),
main = "",
xlab = "",
ylab = "",
col = "#FFA07A",
space = 0,
las = 2,
cex.names = 0.7,
ylim = c(0, max(TDF_Enteros$ni) * 1.2))
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Aptitud Solar", side = 1, line = 4)
mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Aptitud Solar",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
main="",
xlab = "",
ylab = "",
names.arg = round(TDF_Enteros$MC, 2),
col = "#FFA07A",
space = 0,
cex.names = 0.7,
las = 2,
ylim = c(0, 58978))
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Aptitud Solar", side = 1, line = 4)
mtext("Gráfica N°2: Distribución de Cantidad Global de Plantas Solares por Aptitud Solar",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi,
main = "",
xlab = "",
ylab = "Porcentaje (%)",
col = "#FFA07A",
space = 0,
names.arg = round(TDF_Enteros$MC, 2),
cex.names = 0.7,
las = 2,
ylim = c(0, max(TDF_Enteros$hi) * 1.2))
mtext("Aptitud Solar", side = 1, line = 4)
mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Aptitud Solar",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp3,
y = TDF_Enteros$hi,
labels = paste0(round(TDF_Enteros$hi, 2), "%"),
pos = 3, cex = 0.6, col = "black")par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi,
main = "",
xlab = "",
ylab = "Porcentaje (%)",
col = "#FFA07A",
space = 0,
names.arg = round(TDF_Enteros$MC, 2),
las = 2,
cex.names = 0.7,
ylim = c(0, 100))
mtext("Aptitud Solar", side = 1, line = 4)
mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Aptitud Solar",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp4,
y = TDF_Enteros$hi,
labels = paste0(round(TDF_Enteros$hi, 2), "%"),
pos = 3, cex = 0.6, col = "black")par(mar = c(5, 5, 4, 2))
boxplot(Variable,
horizontal = TRUE,
col = "#FFA07A",
xlab = "Aptitud Solar",
cex.main = 0.9,
main = "Gráfica N°5: Distribución de la Aptitud Solar en las Plantas Solares")par(mar = c(5, 5, 7, 10), xpd = TRUE)
# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc
# 1. Dibujar la Ascendente
plot(x_asc, y_asc,
type = "b",
main = "",
xlab = "Aptitud Solar",
ylab = "Frecuencia acumulada",
col = "black",
pch = 19,
xlim = c(min(TDF_Enteros$Li), max(x_asc)),
ylim = c(0, sum(TDF_Enteros$ni)),
bty = "l"
)
# 2. Agregar la Descendente
lines(x_desc, y_desc, col = "#D35400", type = "b", pch = 19)
grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Aptitud Solar en las Plantas Solares",
side = 3,
line = 3,
adj = 0.5,
cex = 0.9,
font = 2)
legend("left",
legend = c("Ascendente", "Descendente"),
col = c("black", "#D35400"),
lty = 1,
pch = 1,
cex = 0.6,
inset = c(0.05, 0.05),
bty = "n")## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)
# Mediana
mediana <- round(median(Variable), 2)
# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")
## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)
# Desviación Estándar
sd_val <- sd(Variable)
# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)
## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)
# Curtosis
curtosis <- kurtosis(Variable)
# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val
outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)
if(num_outliers > 0){
rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
rango_outliers <- "0 [Sin Outliers]"
}
tabla_indicadores <- data.frame(
"Variable" = c("Aptitud Solar"),
"Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
"X" = c(media),
"Me" = c(mediana),
"Mo" = c(moda_txt),
"V" = c(varianza),
"Sd" = c(sd_val),
"Cv" = c(cv),
"As" = c(asimetria),
"K" = c(curtosis),
"Outliers" = rango_outliers)
# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
gt() %>%
tab_header(title = md("**Tabla N°3 de Conclusiones de Aptitud Solar de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Variable = "Variable",
Rango_MinMax = "Rango",
X = "Media (X)",
Me = "Mediana (Me)",
Mo = "Moda (Mo)",
V = "Varianza (V)",
Sd = "Desv. Est. (Sd)",
Cv = "C.V. (%)",
As = "Asimetría (As)",
K = "Curtosis (K)",
Outliers = "Outliers [Intervalo]"
) %>%
tab_options(
heading.title.font.size = px(16),
column_labels.background.color = "#F0F0F0"
)
tabla_conclusiones_gt| Tabla N°3 de Conclusiones de Aptitud Solar de las Plantas Solares | ||||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) | Outliers [Intervalo] |
|---|---|---|---|---|---|---|---|---|---|---|
| Aptitud Solar | [0; 0.94] | 0.68 | 0.69 | 0.73 | 0.01822424 | 0.1349972 | 19.85 | -0.9593403 | 2.061074 | 1317 [0; 0.34] |
| Autor: Martin Sarmiento | ||||||||||
La variable “Aptitud Solar” fluctúa entre 0 y 0.94 y sus valores se encuentran alrededor de 0.69, con una desviación estándar de 0.01822424, siendo una variable muy homogénea, cuyos valores se concentran en la parte media alta de la variable con la agregación de valores atípicos de 1317 outliers; por todo lo anterior, el comportamiento de la variable es muy bueno.