##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####
#### VARIABLE LONGITUD ####
## DATASET ##
setwd("~/R/LONGITUD")
# Cargar dataset
Datos <- read.csv("Dataset_Mundial_Final.csv", sep = ";", dec = ",", fileEncoding = "latin1")
# Estructura de los datos
str(Datos)## 'data.frame': 58978 obs. of 29 variables:
## $ ï..OBJECTID : int 2 3 4 5 6 7 8 9 10 11 ...
## $ code : chr "00001-AFG-P" "00002-AFG-P" "00003-AFG-P" "00004-AFG-P" ...
## $ plant_name : chr "Badghis Solar Power Plant" "Balkh solar farm" "Behsood solar farm" "Dab Pal 4 solar farm" ...
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ operational_status : chr "cancelled - inferred 4 y" "cancelled - inferred 4 y" "cancelled - inferred 4 y" "shelved - inferred 2 y" ...
## $ longitude : num 62.9 67.1 70.4 66.2 65.7 ...
## $ latitude : num 35.1 36.7 34.4 33.8 31.7 ...
## $ elevation : int 918 359 629 2288 1060 1060 1392 398 410 1012 ...
## $ area : num 6.74 10.72 487.73 111.8 1929.96 ...
## $ size : chr "Small" "Small" "Small" "Small" ...
## $ slope : num 7.38 0.49 1.1 6.16 1.23 ...
## $ slope_type : chr "Moderado" "Plano o casi plano" "Plano o casi plano" "Moderado" ...
## $ curvature : num -0.024 0 0 0.045 -0.005 -0.005 -0.015 0 0 -0.009 ...
## $ curvature_type : chr "Superficies cóncavas / Valles" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies convexas / Crestas" ...
## $ aspect : num 96.8 358.5 36.2 305.8 248.4 ...
## $ aspect_type : chr "East" "North" "Northeast" "Northwest" ...
## $ dist_to_road : num 7037.1 92.7 112.1 1705.3 115.8 ...
## $ ambient_temperature : num 14.4 17.88 21.32 8.86 19.64 ...
## $ ghi : num 5.82 5.58 5.8 6.75 6.62 ...
## $ humidity : num 47.7 42.3 36.4 37.3 24.2 ...
## $ wind_speed : num 0.039 0.954 0.234 0.943 0.37 ...
## $ wind_direction : num 187.5 207.4 255.6 160.3 97.7 ...
## $ dt_wind : chr "South" "Southwest" "West" "South" ...
## $ solar_aptitude : num 0.72 0.635 0.685 0.659 0.819 0.819 0.818 0.642 0.63 0.374 ...
## $ solar_aptitude_rounded: int 7 6 7 7 8 8 8 6 6 4 ...
## $ solar_aptittude_class : chr "Alta" "Alta" "Alta" "Alta" ...
## $ capacity : num 32 40 60 3000 100 100 36 50 25 100 ...
## $ optimal_tilt : num 30 31 31.1 33 31 ...
## $ pv_potential : num 4.61 4.41 4.57 5.42 5.17 ...
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Extraer variable
Variable <- na.omit(Datos$longitude)
N <- length(Variable)
# CÁLCULO LÍMITES DECIMALES
min_dec <- min(Variable)
max_dec <- max(Variable)
k_dec <- floor(1 + 3.322 * log10(N))
rango_dec <- max(Variable) - min(Variable)
amplitud_dec <- rango_dec / k_dec
# Cortes exactos
cortes_dec <- seq(min(Variable), max(Variable), length.out = k_dec + 1)
cortes_dec[length(cortes_dec)] <- max(Variable) + 0.0001
# Frecuencias
inter_dec <- cut(Variable, breaks = cortes_dec, include.lowest = TRUE, right = FALSE)
ni_dec <- as.vector(table(inter_dec))
# CÁLCULOS MATEMÁTICOS
hi_dec <- (ni_dec / N) * 100
Ni_asc_dec <- cumsum(ni_dec)
Hi_asc_dec <- cumsum(hi_dec)
Ni_desc_dec <- rev(cumsum(rev(ni_dec)))
Hi_desc_dec <- rev(cumsum(rev(hi_dec)))
# Dataframe Decimal
TDF_Decimal <- data.frame(
Li = cortes_dec[1:k_dec],
Ls = cortes_dec[2:(k_dec+1)],
MC = (cortes_dec[1:k_dec] + cortes_dec[2:(k_dec+1)]) / 2,
ni = ni_dec,
hi = hi_dec,
Ni_asc = Ni_asc_dec,
Ni_desc = Ni_desc_dec,
Hi_asc = Hi_asc_dec,
Hi_desc = Hi_desc_dec)
# CÁLCULO LÍMITES ENTEROS
BASE <- 10
min_int <- floor(min(Variable) / BASE) * BASE
max_int <- ceiling(max(Variable) / BASE) * BASE
k_int_sug <- floor(1 + 3.322 * log10(N))
Rango_int <- max_int - min_int
# Calculamos la amplitud necesaria para cubrir el rango exacto
Amplitud_int <- ceiling((Rango_int / k_int_sug) / 10) * 10
if(Amplitud_int == 0) Amplitud_int <- 10
# GENERACIÓN DE CORTES PRECISOS
cortes_int <- seq(from = min_int, by = Amplitud_int, length.out = k_int_sug + 1)
if(max(cortes_int) < max(Variable)) {
cortes_int <- c(cortes_int, max(cortes_int) + Amplitud_int)
}
# Eliminamos intervalos sobrantes
while(length(cortes_int) > 2 && cortes_int[length(cortes_int)-1] >= max(Variable)) {
cortes_int <- cortes_int[-length(cortes_int)]
}
K_real <- length(cortes_int) - 1
lim_inf_int <- cortes_int[1:K_real]
lim_sup_int <- cortes_int[2:(K_real+1)]
# Frecuencias
inter_int <- cut(Variable, breaks = cortes_int, include.lowest = TRUE, right = FALSE)
ni_int <- as.vector(table(inter_int))
# CÁLCULOS MATEMÁTICOS
hi_int <- (ni_int / N) * 100
Ni_asc_int <- cumsum(ni_int)
Hi_asc_int <- cumsum(hi_int)
Ni_desc_int <- rev(cumsum(rev(ni_int)))
Hi_desc_int <- rev(cumsum(rev(hi_int)))
# Dataframe Entero
TDF_Enteros <- data.frame(
Li = lim_inf_int,
Ls = lim_sup_int,
MC = (lim_inf_int + lim_sup_int) / 2,
ni = ni_int,
hi = hi_int,
Ni_asc = Ni_asc_int,
Ni_desc = Ni_desc_int,
Hi_asc = Hi_asc_int,
Hi_desc = Hi_desc_int)# Crear Dataframe
TDF_Dec_Final <- data.frame(
Li = as.character(round(TDF_Decimal$Li, 2)),
Ls = as.character(round(TDF_Decimal$Ls, 2)),
MC = as.character(round(TDF_Decimal$MC, 2)),
ni = as.character(TDF_Decimal$ni),
hi = as.character(round(TDF_Decimal$hi, 2)),
Ni_asc = as.character(TDF_Decimal$Ni_asc),
Ni_desc = as.character(TDF_Decimal$Ni_desc),
Hi_asc = as.character(round(TDF_Decimal$Hi_asc, 2)),
Hi_desc = as.character(round(TDF_Decimal$Hi_desc, 2))
)
# Calcular Totales
totales_dec <- c("TOTAL", "-", "-", sum(TDF_Decimal$ni), round(sum(TDF_Decimal$hi), 2), "-", "-", "-", "-")
TDF_Dec_Final <- rbind(TDF_Dec_Final, totales_dec)
# Generar GT
TDF_Dec_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°1 de Distribución de Frecuencias de Longitud (°) de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°1 de Distribución de Frecuencias de Longitud (°) de las Plantas Solares | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| -124.1 | -105.22 | -114.66 | 1693 | 2.87 | 1693 | 58978 | 2.87 | 100 |
| -105.22 | -86.34 | -95.78 | 2014 | 3.41 | 3707 | 57285 | 6.29 | 97.13 |
| -86.34 | -67.46 | -76.9 | 5219 | 8.85 | 8926 | 55271 | 15.13 | 93.71 |
| -67.46 | -48.58 | -58.02 | 592 | 1 | 9518 | 50052 | 16.14 | 84.87 |
| -48.58 | -29.7 | -39.14 | 990 | 1.68 | 10508 | 49460 | 17.82 | 83.86 |
| -29.7 | -10.82 | -20.26 | 74 | 0.13 | 10582 | 48470 | 17.94 | 82.18 |
| -10.82 | 8.06 | -1.38 | 7990 | 13.55 | 18572 | 48396 | 31.49 | 82.06 |
| 8.06 | 26.94 | 17.5 | 10953 | 18.57 | 29525 | 40406 | 50.06 | 68.51 |
| 26.94 | 45.82 | 36.38 | 1535 | 2.6 | 31060 | 29453 | 52.66 | 49.94 |
| 45.82 | 64.7 | 55.26 | 370 | 0.63 | 31430 | 27918 | 53.29 | 47.34 |
| 64.7 | 83.58 | 74.14 | 3271 | 5.55 | 34701 | 27548 | 58.84 | 46.71 |
| 83.58 | 102.46 | 93.02 | 2401 | 4.07 | 37102 | 24277 | 62.91 | 41.16 |
| 102.46 | 121.34 | 111.9 | 10336 | 17.53 | 47438 | 21876 | 80.43 | 37.09 |
| 121.34 | 140.22 | 130.78 | 9300 | 15.77 | 56738 | 11540 | 96.2 | 19.57 |
| 140.22 | 159.1 | 149.66 | 2216 | 3.76 | 58954 | 2240 | 99.96 | 3.8 |
| 159.1 | 177.98 | 168.54 | 24 | 0.04 | 58978 | 24 | 100 | 0.04 |
| TOTAL | - | - | 58978 | 100 | - | - | - | - |
| Autor: Martin Sarmiento | ||||||||
# Crear Dataframe
TDF_Int_Final <- data.frame(
Li = as.character(TDF_Enteros$Li),
Ls = as.character(TDF_Enteros$Ls),
MC = as.character(TDF_Enteros$MC),
ni = as.character(TDF_Enteros$ni),
hi = as.character(round(TDF_Enteros$hi, 2)),
Ni_asc = as.character(TDF_Enteros$Ni_asc),
Ni_desc = as.character(TDF_Enteros$Ni_desc),
Hi_asc = as.character(round(TDF_Enteros$Hi_asc, 2)),
Hi_desc = as.character(round(TDF_Enteros$Hi_desc, 2))
)
# Calcular Totales
totales_int <- c("TOTAL", "-", "-", sum(TDF_Enteros$ni), round(sum(TDF_Enteros$hi), 2), "-", "-", "-", "-")
TDF_Int_Final <- rbind(TDF_Int_Final, totales_int)
# Generar GT
TDF_Int_Final %>%
gt() %>%
tab_header(title = md("**Tabla N°2 de Distribución de Frecuencias de Longitud (°) de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Li = "Lim. Inf",
Ls = "Lim. Sup",
MC = "Marca Clase",
ni = "Frec. Abs (ni)",
hi = "Frec. Rel (%)",
Ni_asc = "Ni (Asc)",
Ni_desc = "Ni (Desc)",
Hi_asc = "Hi Asc (%)",
Hi_desc = "Hi Desc (%)"
) %>%
cols_align(align = "center", columns = everything()) %>%
tab_options(heading.title.font.size = px(14), column_labels.background.color = "#F0F0F0")| Tabla N°2 de Distribución de Frecuencias de Longitud (°) de las Plantas Solares | ||||||||
| Lim. Inf | Lim. Sup | Marca Clase | Frec. Abs (ni) | Frec. Rel (%) | Ni (Asc) | Ni (Desc) | Hi Asc (%) | Hi Desc (%) |
|---|---|---|---|---|---|---|---|---|
| -130 | -110 | -120 | 1501 | 2.55 | 1501 | 58978 | 2.55 | 100 |
| -110 | -90 | -100 | 1645 | 2.79 | 3146 | 57477 | 5.33 | 97.45 |
| -90 | -70 | -80 | 5523 | 9.36 | 8669 | 55832 | 14.7 | 94.67 |
| -70 | -50 | -60 | 740 | 1.25 | 9409 | 50309 | 15.95 | 85.3 |
| -50 | -30 | -40 | 1099 | 1.86 | 10508 | 49569 | 17.82 | 84.05 |
| -30 | -10 | -20 | 79 | 0.13 | 10587 | 48470 | 17.95 | 82.18 |
| -10 | 10 | 0 | 9571 | 16.23 | 20158 | 48391 | 34.18 | 82.05 |
| 10 | 30 | 20 | 9681 | 16.41 | 29839 | 38820 | 50.59 | 65.82 |
| 30 | 50 | 40 | 1316 | 2.23 | 31155 | 29139 | 52.82 | 49.41 |
| 50 | 70 | 60 | 434 | 0.74 | 31589 | 27823 | 53.56 | 47.18 |
| 70 | 90 | 80 | 3598 | 6.1 | 35187 | 27389 | 59.66 | 46.44 |
| 90 | 110 | 100 | 4780 | 8.1 | 39967 | 23791 | 67.77 | 40.34 |
| 110 | 130 | 120 | 10391 | 17.62 | 50358 | 19011 | 85.38 | 32.23 |
| 130 | 150 | 140 | 8568 | 14.53 | 58926 | 8620 | 99.91 | 14.62 |
| 150 | 170 | 160 | 47 | 0.08 | 58973 | 52 | 99.99 | 0.09 |
| 170 | 190 | 180 | 5 | 0.01 | 58978 | 5 | 100 | 0.01 |
| TOTAL | - | - | 58978 | 100 | - | - | - | - |
| Autor: Martin Sarmiento | ||||||||
par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
names.arg = TDF_Enteros$MC,
main = "",,
xlab = "",
ylab = "",
col = "#FF6961",
ylim = c(0, max(TDF_Enteros$ni) * 1.2),
space = 0,
las = 2,
cex.names = 0.7)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Longitud (°)", side = 1, line = 4)
mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Longitud",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(8, 7, 5, 2))
barplot(TDF_Enteros$ni,
main="",
xlab = "",
ylab = "",
names.arg = TDF_Enteros$MC,
col = "#FF6961",
ylim = c(0, 58771),
space = 0,
cex.names = 0.7,
las = 2)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Longitud (°)", side = 1, line = 4)
mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Longitud",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(8, 5, 5, 2))
bp3 <- barplot(TDF_Enteros$hi,
main = "",
xlab = "",
ylab = "Porcentaje (%)",
col = "#FF6961",
ylim = c(0, max(TDF_Enteros$hi) * 1.3),
space = 0,
names.arg = TDF_Enteros$MC,
cex.names = 0.7,
las = 2)
mtext("Longitud (°)", side = 1, line = 4)
mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Longitud",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp3,
y = TDF_Enteros$hi,
labels = paste0(round(TDF_Enteros$hi, 2), "%"),
pos = 3, cex = 0.6, col = "black")par(mar = c(8, 5, 5, 2))
bp4 <- barplot(TDF_Enteros$hi,
main = "",
xlab = "",
ylab = "Porcentaje (%)",
col = "#FF6961",
space = 0,
names.arg = TDF_Enteros$MC,
las = 2,
cex.names = 0.7,
ylim = c(0, 100))
mtext("Longitud (°)", side = 1, line = 4)
mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Longitud",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp4,
y = TDF_Enteros$hi,
labels = paste0(round(TDF_Enteros$hi, 2), "%"),
pos = 3, cex = 0.6, col = "black")par(mar = c(5, 5, 4, 2))
boxplot(Variable,
horizontal = TRUE,
col = "#FF6961",
xlab = "Longitud (°)",
cex.main = 0.9,
main = "Gráfica N°5: Distribución de la Longitud en las Plantas Solares")par(mar = c(5, 5, 7, 10), xpd = TRUE)
# Coordenadas
x_asc <- TDF_Enteros$Ls
x_desc <- TDF_Enteros$Li
y_asc <- TDF_Enteros$Ni_asc
y_desc <- TDF_Enteros$Ni_desc
# 1. Dibujar la Ascendente
plot(x_asc, y_asc,
type = "b",
main = "",
xlab = "Longitud (°)",
ylab = "Frecuencia Acumulada",
col = "black",
pch = 19,
xlim = c(min(TDF_Enteros$Li), max(x_asc)),
ylim = c(0, sum(TDF_Enteros$ni)))
# 2. Agregar la Descendente
lines(x_desc, y_desc, col = "red", type = "b", pch = 19)
grid()
mtext("Gráfica N°6: Ojivas Ascendentes y Descendentes de la\nDistribución de la Longitud en las Plantas Solares",
side = 3,
line = 3,
adj = 0.5,
cex = 0.9,
font = 2)
legend("left",
legend = c("Ascendente", "Descendente"),
col = c("black", "red"),
lty = 1,
pch = 1,
cex = 0.6,
inset = c(0.05, 0.05),
bty = "n")## INDICADORES DE TENDENCIA CENTRAL
# Media aritmética
media <- round(mean(Variable), 2)
# Mediana
mediana <- round(median(Variable), 2)
# Moda
max_frecuencia <- max(TDF_Enteros$ni)
moda_vals <- TDF_Enteros$MC[TDF_Enteros$ni == max_frecuencia]
moda_txt <- paste(round(moda_vals, 2), collapse = ", ")
## INDICADORES DE DISPERSIÓN
# Varianza
varianza <- var(Variable)
# Desviación Estándar
sd_val <- sd(Variable)
# Coeficiente de Variación
cv <- round((sd_val / abs(media)) * 100, 2)
## INDICADORES DE FORMA
# Coeficiente de Asimetría
asimetria <- skewness(Variable, type = 2)
# Curtosis
curtosis <- kurtosis(Variable)
# Outliers
Q1 <- quantile(Variable, 0.25)
Q3 <- quantile(Variable, 0.75)
IQR_val <- Q3 - Q1
lim_inf <- Q1 - 1.5 * IQR_val
lim_sup <- Q3 + 1.5 * IQR_val
outliers_data <- Variable[Variable < lim_inf | Variable > lim_sup]
num_outliers <- length(outliers_data)
if(num_outliers > 0){
rango_outliers <- paste0(num_outliers, " [", round(min(outliers_data), 2), "; ", round(max(outliers_data), 2), "]")
} else {
rango_outliers <- "0 [Sin Outliers]"
}
tabla_indicadores <- data.frame(
"Variable" = c("Longitud (°)"),
"Rango_MinMax" = paste0("[", round(min(Variable), 2), "; ", round(max(Variable), 2), "]"),
"X" = c(media),
"Me" = c(mediana),
"Mo" = c(moda_txt),
"V" = c(varianza),
"Sd" = c(sd_val),
"Cv" = c(cv),
"As" = c(asimetria),
"K" = c(curtosis),
"Outliers" = rango_outliers)
# Generar Tabla GT
tabla_conclusiones_gt <- tabla_indicadores %>%
gt() %>%
tab_header(title = md("**Tabla N°3 de Conclusiones de Longitud de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Variable = "Variable",
Rango_MinMax = "Rango",
X = "Media (X)",
Me = "Mediana (Me)",
Mo = "Moda (Mo)",
V = "Varianza (V)",
Sd = "Desv. Est. (Sd)",
Cv = "C.V. (%)",
As = "Asimetría (As)",
K = "Curtosis (K)",
Outliers = "Outliers [Intervalo]"
) %>%
tab_options(
heading.title.font.size = px(16),
column_labels.background.color = "#f0f0f0"
)
tabla_conclusiones_gt| Tabla N°3 de Conclusiones de Longitud de las Plantas Solares | ||||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) | Outliers [Intervalo] |
|---|---|---|---|---|---|---|---|---|---|---|
| Longitud (°) | [-124.1; 177.98] | 43.68 | 26.76 | 120 | 5977.611 | 77.31501 | 177 | -0.3934365 | -1.015861 | 0 [Sin Outliers] |
| Autor: Martin Sarmiento | ||||||||||
La variable “Longitud” fluctúa entre -124.1° y 177.98° y sus valores se encuentran alrededor de 26.76°, con una desviación estándar de 77.31501, siendo una variable muy heterogénea, cuyos valores se concentran en la parte media alta de la variable sin la presencia de valores atípicos; por todo lo anterior, el comportamiento de la variable es regular.