##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####
#### VARIABLE ELEVACION ####
## DATASET ##
setwd("~/R/ELEVATION")
# Cargar dataset
Datos <- read.csv("Data_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)## 'data.frame': 58771 obs. of 29 variables:
## $ OBJECTID : int 127 129 131 132 133 137 138 139 140 145 ...
## $ code : chr "00127-ARG-P" "00129-ARG-G" "00131-ARG-P" "00132-ARG-P" ...
## $ plant_name : chr "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Anchoris solar farm" "Antu Newen solar farm" ...
## $ country : chr "Argentina" "Argentina" "Argentina" "Argentina" ...
## $ operational_status : chr "announced" "operating" "construction" "cancelled - inferred 4 y" ...
## $ longitude : num -68.9 -66.9 -68.9 -70.3 -66.8 ...
## $ latitude : num -33 -24.1 -33.3 -37.4 -28.6 ...
## $ elevation : int 929 4000 937 865 858 570 1612 665 3989 2640 ...
## $ area : num 250 4397290 645 241 30 ...
## $ size : chr "Small" "Big" "Small" "Small" ...
## $ slope : num 0.574 1.603 0.903 1.791 1.872 ...
## $ slope_type : chr "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" ...
## $ curvature : num 0.000795 -0.002781 0.002781 -0.002384 -0.009137 ...
## $ curvature_type : chr "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" ...
## $ aspect : num 55.1 188.7 108.4 239.3 56.2 ...
## $ aspect_type : chr "Northeast" "South" "East" "Southwest" ...
## $ dist_to_road : num 127 56015 336 34 314 ...
## $ ambient_temperature : num 12.6 6.8 13.1 11.4 18.8 ...
## $ ghi : num 6.11 8.01 6.12 6.22 6.74 ...
## $ humidity : num 53.7 53.7 53.7 53.7 51.5 ...
## $ wind_speed : num 3.78 7.02 3.87 6.56 7.19 ...
## $ wind_direction : num 55.1 55.1 55.1 55.1 114.8 ...
## $ dt_wind : chr "Northeast" "Northeast" "Northeast" "Northeast" ...
## $ solar_aptitude : num 0.746 0.8 0.595 0.657 0.743 ...
## $ solar_aptitude_rounded: int 7 8 6 7 7 7 8 7 8 6 ...
## $ solar_aptittude_class : chr "Alta" "Alta" "Media" "Alta" ...
## $ capacity : num 25 101 180 20 50.4 ...
## $ optimal_tilt : num 31 26 31 33 30 31 29 31 27 32 ...
## $ pv_potential : num 4.98 6.39 4.97 5 5.37 ...
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Extraer variable
Variable <- na.omit(Datos$elevation)
N <- length(Variable)
# CÁLCULO LÍMITES DECIMALES
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec
# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001
# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))
# CÁLCULOS MATEMÁTICOS
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))
# Dataframe Decimal
TDF_Decimal <- data.frame(
Li = cortes_dec[1:k_dec],
Ls = cortes_dec[2:(k_dec+1)],
MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
ni = ni_dec,
hi = hi_dec,
Ni_asc = Ni_asc_dec,
Ni_desc = Ni_desc_dec,
Hi_asc = Hi_asc_dec,
Hi_desc = Hi_desc_dec)
# CÁLCULO LÍMITES ENTEROS
BASE <- 10
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
Amplitud_raw <- Rango_int / k_int_sug
Amplitud_int <- ceiling(Amplitud_raw / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10
cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 2)
cortes_int <- cortes_int[cortes_int <= (max_int + Amplitud_int)]
while(max(cortes_int) < max(Variable)) {
cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}
K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]
# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))
# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))
# Dataframe Entero
TDF_Enteros <- data.frame(
Li = lim_inf_int,
Ls = lim_sup_int,
MC = (lim_inf_int + lim_sup_int) / 2,
ni = ni_int,
hi = hi_int,
Ni_asc = Ni_asc_int,
Ni_desc = Ni_desc_int,
Hi_asc = Hi_asc_int,
Hi_desc = Hi_desc_int)# Crear Dataframe
TDF_Dec_Final <- data.frame(
Li = as.character(round(TDF_Decimal$Li, 2)),
Ls = as.character(round(TDF_Decimal$Ls, 2)),
MC = as.character(round(TDF_Decimal$MC, 2)),
ni = as.character(TDF_Decimal$ni),
hi = as.character(round(TDF_Decimal$hi, 2)),
Ni_asc = as.character(TDF_Decimal$Ni_asc),
Ni_desc = as.character(TDF_Decimal$Ni_desc),
Hi_asc = as.character(round(TDF_Decimal$Hi_asc, 2)),
Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)
# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)
# Generar GT
TDF_Dec_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares**")) %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°1 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| -378 | -0.88 | -189.44 | 255 | 0.43 | 255 | 58771 | 0.43 | 100 |
| -0.88 | 376.25 | 187.69 | 41914 | 71.32 | 42169 | 58516 | 71.75 | 99.57 |
| 376.25 | 753.38 | 564.81 | 8716 | 14.83 | 50885 | 16602 | 86.58 | 28.25 |
| 753.38 | 1130.5 | 941.94 | 3216 | 5.47 | 54101 | 7886 | 92.05 | 13.42 |
| 1130.5 | 1507.62 | 1319.06 | 2245 | 3.82 | 56346 | 4670 | 95.87 | 7.95 |
| 1507.62 | 1884.75 | 1696.19 | 1095 | 1.86 | 57441 | 2425 | 97.74 | 4.13 |
| 1884.75 | 2261.88 | 2073.31 | 461 | 0.78 | 57902 | 1330 | 98.52 | 2.26 |
| 2261.88 | 2639 | 2450.44 | 270 | 0.46 | 58172 | 869 | 98.98 | 1.48 |
| 2639 | 3016.12 | 2827.56 | 269 | 0.46 | 58441 | 599 | 99.44 | 1.02 |
| 3016.12 | 3393.25 | 3204.69 | 124 | 0.21 | 58565 | 330 | 99.65 | 0.56 |
| 3393.25 | 3770.38 | 3581.81 | 72 | 0.12 | 58637 | 206 | 99.77 | 0.35 |
| 3770.38 | 4147.5 | 3958.94 | 59 | 0.1 | 58696 | 134 | 99.87 | 0.23 |
| 4147.5 | 4524.62 | 4336.06 | 39 | 0.07 | 58735 | 75 | 99.94 | 0.13 |
| 4524.62 | 4901.75 | 4713.19 | 25 | 0.04 | 58760 | 36 | 99.98 | 0.06 |
| 4901.75 | 5278.88 | 5090.31 | 10 | 0.02 | 58770 | 11 | 100 | 0.02 |
| 5278.88 | 5656 | 5467.44 | 1 | 0 | 58771 | 1 | 100 | 0 |
| TOTAL | - | - | 58771 | 100 | - | - | - | - |
# Crear Dataframe
TDF_Int_Final <- data.frame(
Li = as.character(TDF_Enteros$Li),
Ls = as.character(TDF_Enteros$Ls),
MC = as.character(TDF_Enteros$MC),
ni = as.character(TDF_Enteros$ni),
hi = as.character(round(TDF_Enteros$hi, 2)),
Ni_asc = as.character(TDF_Enteros$Ni_asc),
Ni_desc = as.character(TDF_Enteros$Ni_desc),
Hi_asc = as.character(round(TDF_Enteros$Hi_asc, 2)),
Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)
# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)
# Generar GT
TDF_Int_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares**")) %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°2 de Distribución de Frecuencias de Elevación (m.s.n.m.) de las Plantas Solares | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| -380 | 0 | -190 | 255 | 0.43 | 255 | 58771 | 0.43 | 100 |
| 0 | 380 | 190 | 42033 | 71.52 | 42288 | 58516 | 71.95 | 99.57 |
| 380 | 760 | 570 | 8676 | 14.76 | 50964 | 16483 | 86.72 | 28.05 |
| 760 | 1140 | 950 | 3185 | 5.42 | 54149 | 7807 | 92.14 | 13.28 |
| 1140 | 1520 | 1330 | 2241 | 3.81 | 56390 | 4622 | 95.95 | 7.86 |
| 1520 | 1900 | 1710 | 1076 | 1.83 | 57466 | 2381 | 97.78 | 4.05 |
| 1900 | 2280 | 2090 | 456 | 0.78 | 57922 | 1305 | 98.56 | 2.22 |
| 2280 | 2660 | 2470 | 256 | 0.44 | 58178 | 849 | 98.99 | 1.44 |
| 2660 | 3040 | 2850 | 287 | 0.49 | 58465 | 593 | 99.48 | 1.01 |
| 3040 | 3420 | 3230 | 103 | 0.18 | 58568 | 306 | 99.65 | 0.52 |
| 3420 | 3800 | 3610 | 70 | 0.12 | 58638 | 203 | 99.77 | 0.35 |
| 3800 | 4180 | 3990 | 62 | 0.11 | 58700 | 133 | 99.88 | 0.23 |
| 4180 | 4560 | 4370 | 37 | 0.06 | 58737 | 71 | 99.94 | 0.12 |
| 4560 | 4940 | 4750 | 25 | 0.04 | 58762 | 34 | 99.98 | 0.06 |
| 4940 | 5320 | 5130 | 8 | 0.01 | 58770 | 9 | 100 | 0.02 |
| 5320 | 5700 | 5510 | 1 | 0 | 58771 | 1 | 100 | 0 |
| TOTAL | - | - | 58771 | 100 | - | - | - | - |
par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
names.arg = TDF_Enteros$MC,
main = "",
xlab = "",
ylab = "",
col = "#FFCC99",
ylim = c(0, max(TDF_Enteros$ni) * 1.2),
space = 0,
las = 2,
cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)
mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Elevación",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(8, 7, 4, 2))
barplot(TDF_Enteros$ni,
main="",
xlab = "",
ylab = "",
names.arg = TDF_Enteros$MC,
col = "#FFCC99",
ylim = c(0, 58771),
space = 0,
cex.names = 0.7,
las = 2)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)
mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Elevación",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi,
main = "",
xlab = "",
ylab = "Porcentaje (%)",
col = "#FFCC99",
space = 0,
names.arg = TDF_Enteros$MC,
cex.names = 0.7,
las = 2,
ylim = c(0, max(TDF_Enteros$hi) * 1.2))
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)
mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Elevación",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp3,
y = TDF_Enteros$hi,
labels = paste0(round(TDF_Enteros$hi, 1), "%"),
pos = 3, cex = 0.6, col = "black")par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi,
main = "",
xlab = "",
ylab = "Porcentaje (%)",
col = "#FFCC99",
space = 0,
names.arg = TDF_Enteros$MC,
las = 2,
cex.names = 0.7,
ylim = c(0, 100))
mtext("Elevación (m.s.n.m.)", side = 1, line = 4)
mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Elevación",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp4,
y = TDF_Enteros$hi,
labels = paste0(round(TDF_Enteros$hi, 1), "%"),
pos = 3, cex = 0.6, col = "black")par(mar = c(5, 5, 4, 2))
boxplot(Variable,
horizontal = TRUE,
col = "#FFCC99",
xlab = "Elevación (m.s.n.m.)",
cex.main = 0.9,
main = "Gráfica N°5: Distribución de la Elevación en las Plantas Solares")par(mar = c(5, 5, 7, 10), xpd = TRUE)
# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc
# 1. Dibujar la Ascendente
plot(x_asc, y_asc,
type = "b",
main = "",
xlab = "Elevación (m.s.n.m.)",
ylab = "Frecuencia acumulada",
col = "black",
pch = 19,
xlim = c(min(x_desc), max(x_asc)),
ylim = c(0, sum(TDF_Enteros$ni)),
bty = "l")
# 2. Agregar la Descendente
lines(x_desc, y_desc, col = "orange", type = "b", pch = 19)
grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Elevación en las Plantas Solares",
side = 3,
line = 3,
adj = 0.5,
cex = 0.9,
font = 2)
legend("right",
legend = c("Ascendente", "Descendente"),
col = c("black", "orange"),
lty = 1,
pch = 1,
cex = 0.6,
inset = c(0.05, 0.05),
bty = "n")## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)
# Mediana
mediana <- round(median(Variable), 2)
# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")
## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)
# Desviación Estándar
sd_val <- sd(Variable)
# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)
## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)
# Curtosis
curtosis <- kurtosis(Variable)
# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val
outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)
if(num_outliers > 0){
rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
rango_outliers <- "0 [Sin Outliers]"
}
tabla_indicadores <- data.frame(
"Variable" = c("Elevación (m.s.n.m.)"),
"Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
"X" = c(media),
"Me" = c(mediana),
"Mo" = c(moda_txt),
"V" = c(varianza),
"Sd" = c(sd_val),
"Cv" = c(cv),
"As" = c(asimetria),
"K" = c(curtosis),
"Outliers" = rango_outliers)
# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
gt() %>%
tab_header(title = md("**Tabla N°3 de Conclusiones de Elevación de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Variable = "Variable",
Rango_MinMax = "Rango",
X = "Media (X)",
Me = "Mediana (Me)",
Mo = "Moda (Mo)",
V = "Varianza (V)",
Sd = "Desv. Est. (Sd)",
Cv = "C.V. (%)",
As = "Asimetría (As)",
K = "Curtosis (K)",
Outliers = "Outliers [Intervalo]"
) %>%
tab_options(
heading.title.font.size = px(16),
column_labels.background.color = "#F0F0F0"
)
tabla_conclusiones_gt| Tabla N°3 de Conclusiones de Elevación de las Plantas Solares | ||||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) | Outliers [Intervalo] |
|---|---|---|---|---|---|---|---|---|---|---|
| Elevación (m.s.n.m.) | [-378; 5656] | 354.18 | 156 | 190 | 278058.1 | 527.3121 | 148.88 | 3.176571 | 13.94829 | 5463 [1015; 5656] |
| Autor: Martin Sarmiento | ||||||||||
La variable “Elevación” fluctúa entre -378 y 5656 m.s.n.m. y sus valores se encuentran alrededor de 156 m.s.n.m., con una desviación estándar de 527.3121, siendo una variable muy heterogénea, cuyos valores se concentran en la parte media baja de la variable con la agregación de valores atípicos de 5463 outliers; por todo lo anterior, el comportamiento de la variable es regular.