##### UNIVERSIDAD CENTRAL DEL ECUADOR #####
#### AUTOR: MARTIN SARMIENTO ####
### CARRERA: INGENIERÍA EN PETRÓLEOS #####
#### VARIABLE PAISES (CONTINENTES) ####
## DATASET ##
setwd("~/R/COUNTRY")
# Cargar dataset
Datos <- read.csv("Data_Mundial_Final.csv", sep = ";", fileEncoding = "latin1")
## Estructura de los datos
str(Datos)## 'data.frame': 58771 obs. of 29 variables:
## $ OBJECTID : int 127 129 131 132 133 137 138 139 140 145 ...
## $ code : chr "00127-ARG-P" "00129-ARG-G" "00131-ARG-P" "00132-ARG-P" ...
## $ plant_name : chr "Aconcagua solar farm" "Altiplano 200 Solar Power Plant" "Anchoris solar farm" "Antu Newen solar farm" ...
## $ country : chr "Argentina" "Argentina" "Argentina" "Argentina" ...
## $ operational_status : chr "announced" "operating" "construction" "cancelled - inferred 4 y" ...
## $ longitude : chr "-68,8713" "-66,895798" "-68,915001" "-70,269897" ...
## $ latitude : chr "-32,998501" "-24,1392" "-33,330101" "-37,375801" ...
## $ elevation : int 929 4000 937 865 858 570 1612 665 3989 2640 ...
## $ area : chr "250,337006" "4397290" "645,163025" "241,276001" ...
## $ size : chr "Small" "Big" "Small" "Small" ...
## $ slope : chr "0,574179" "1,60257" "0,902748" "1,79147" ...
## $ slope_type : chr "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" "Plano o casi plano" ...
## $ curvature : chr "0,000795" "-0,002781" "0,002781" "-0,002384" ...
## $ curvature_type : chr "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" "Superficies planas o intermedias" ...
## $ aspect : chr "55,124672" "188,707367" "108,434952" "239,349335" ...
## $ aspect_type : chr "Northeast" "South" "East" "Southwest" ...
## $ dist_to_road : chr "127,2827045" "56014,95403" "335,9280031" "34,00973342" ...
## $ ambient_temperature : chr "12,6" "6,8" "13,1" "11,4" ...
## $ ghi : chr "6,11" "8,012" "6,119" "6,223" ...
## $ humidity : chr "53,74" "53,74" "53,74" "53,74" ...
## $ wind_speed : chr "3,7789" "7,02062" "3,87037" "6,55962" ...
## $ wind_direction : chr "55,099998" "55,099998" "55,099998" "55,099998" ...
## $ dt_wind : chr "Northeast" "Northeast" "Northeast" "Northeast" ...
## $ solar_aptitude : chr "0,746197" "0,8" "0,595309" "0,657269" ...
## $ solar_aptitude_rounded: int 7 8 6 7 7 7 8 7 8 6 ...
## $ solar_aptittude_class : chr "Alta" "Alta" "Media" "Alta" ...
## $ capacity : chr "25" "101" "180" "20" ...
## $ optimal_tilt : chr "31" "26" "31" "33" ...
## $ pv_potential : chr "4,983" "6,389" "4,969" "5,002" ...
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Extraer variable
Pais <- Datos$country
# EDAvariable nominal
TDF_pais <- table(Pais)
tabla_pais <- as.data.frame(TDF_pais)
hi <- tabla_pais$Freq/sum(tabla_pais$Freq)
hi_porc <- hi*100
sum(hi_porc)## [1] 100
### Agrupación ###
tabla_PAIS$grupo <- case_when(
# América del Sur
grepl("Argentina|Bolivia|Brazil|Chile|Colombia|Ecuador|Guyana|Paraguay|Peru|Suriname|Uruguay|Venezuela",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "América del Sur",
# América del Norte
grepl("United States|USA|Canada|Mexico|México",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "América del Norte",
# América Central y Caribe
grepl("Guatemala|Belize|Honduras|El Salvador|Nicaragua|Costa Rica|Panama|Panamá|Cuba|Dominican Republic|Haiti|Jamaica|Puerto Rico|Antigua and Barbuda|Cayman Islands|Bonaire|Virgin Islands|Saint Kitts and Nevis|Guadeloupe",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "América Central y Caribe",
# Europa
grepl("Spain|España|France|Germany|Alemania|Italy|Italia|United Kingdom|UK|Portugal|Netherlands|Belgium|Poland|Sweden|Norway|Denmark|Finland|Greece|Russia|Ukraine|Albania|Andorra|Austria|Belarus|Bosnia and Herzegovina|Bulgaria|Croatia|Cyprus|Czech Republic|Estonia|Holy See|Hungary|Ireland|Isle of Man|Kosovo|Latvia|Lithuania|Luxembourg|Malta|Moldova|Montenegro|North Macedonia|Romania|Serbia|Slovakia|Slovenia|Switzerland",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "Europa",
# Asia
grepl("China|India|Japan|Japón|Korea|Vietnam|Thailand|Indonesia|Malaysia|Philippines|Saudi Arabia|UAE|United Arab Emirates|Israel|Turkey|Turquía|Iran|Iraq|Afghanistan|Armenia|Azerbaijan|Bahrain|Bangladesh|Bhutan|Brunei|Cambodia|Georgia|Hong Kong|Jordan|Kazakhstan|Kuwait|Kyrgyzstan|Laos|Lebanon|Mongolia|Myanmar|Nepal|Oman|Pakistan|Palestine|Qatar|Singapore|Sri Lanka|Syria|Taiwan|Tajikistan|Timor-Leste|Türkiye|Uzbekistan|Yemen",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "Asia",
# África
grepl("South Africa|Egypt|Egipto|Nigeria|Morocco|Marruecos|Algeria|Kenya|Ethiopia|Ghana|Senegal|Ivory Coast|Angola|Benin|Botswana|Burkina Faso|Burundi|Cabo Verde|Cameroon|Central African Republic|Chad|Comoros|Côte d'Ivoire|DR Congo|Republic of the Congo|Djibouti|Eritrea|Eswatini|Gabon|Guinea|Guinea-Bissau|Lesotho|Liberia|Libya|Madagascar|Malawi|Mali|Mauritania|Mauritius|Mayotte|Mozambique|Namibia|Niger|Réunion|Rwanda|Sao Tome and Principe|Seychelles|Sierra Leone|Somalia|South Sudan|Sudan|Tanzania|The Gambia|Togo|Tunisia|Uganda|Western Sahara|Zambia|Zimbabwe",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "África",
# Oceanía y Otros territorios
grepl("Australia|New Zealand|Fiji|Papua|British Indian Ocean Territory",
tabla_PAIS$Pais, ignore.case = TRUE) ~ "Oceanía",
TRUE ~ "Otros / No Especificado"
)
tabla_resumen <- tabla_PAIS %>%
group_by(grupo) %>%
summarise(
Frecuencia = sum(Freq),
Porcentaje = sum(hi_porc)) %>%
arrange(desc(Frecuencia))
# Renombramos columnas
colnames(tabla_resumen) <- c("Continente","ni","hi (%)")
# Tabla Intermedia GT
tabla_resumen_gt <- tabla_resumen %>%
gt() %>%
tab_header(
title = md("**Tabla N°1 de Agrupación por Continentes de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Continente = "Continentes",
ni = "Frecuencia (ni)",
`hi (%)` = "Porcentaje (hi%)") %>%
fmt_number(columns = c(`hi (%)`), decimals = 2) %>%
tab_options(
heading.title.font.size = px(16),
column_labels.background.color = "#F0F0F0")
# Mostramos la tabla
tabla_resumen_gt | Tabla N°1 de Agrupación por Continentes de las Plantas Solares | ||
| Continentes | Frecuencia (ni) | Porcentaje (hi%) |
|---|---|---|
| Asia | 28403 | 48.33 |
| Europa | 18787 | 31.97 |
| América del Norte | 7783 | 13.24 |
| América del Sur | 2347 | 3.99 |
| África | 1072 | 1.82 |
| América Central y Caribe | 379 | 0.64 |
| Autor: Martin Sarmiento | ||
#### Crear fila de totales ####
totales <- c("TOTAL", sum(tabla_resumen$ni), sum(tabla_resumen$`hi (%)`))
tabla_Continente_Final <- rbind(tabla_resumen, totales)
# Convertir a números para GT
tabla_Continente_Final$ni <- as.numeric(tabla_Continente_Final$ni)
tabla_Continente_Final$`hi (%)` <- as.numeric(tabla_Continente_Final$`hi (%)`)
# TABLA 1
tabla_final_gt <- tabla_Continente_Final %>%
gt() %>%
tab_header(
title = md("**Tabla N°2 de Distribución de Frecuencias por Continentes de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
cols_label(
Continente = "Continentes",
ni = "Frecuencia (ni)",
`hi (%)` = "Porcentaje (hi%)") %>%
fmt_number(columns = c(`hi (%)`), decimals = 2) %>%
tab_options(
heading.title.font.size = px(16),
column_labels.background.color = "#F0F0F0")
tabla_final_gt| Tabla N°2 de Distribución de Frecuencias por Continentes de las Plantas Solares | ||
| Continentes | Frecuencia (ni) | Porcentaje (hi%) |
|---|---|---|
| Asia | 28403 | 48.33 |
| Europa | 18787 | 31.97 |
| América del Norte | 7783 | 13.24 |
| América del Sur | 2347 | 3.99 |
| África | 1072 | 1.82 |
| América Central y Caribe | 379 | 0.64 |
| TOTAL | 58771 | 100.00 |
| Autor: Martin Sarmiento | ||
par(mar = c(13, 6, 4, 2))
barplot(tabla_resumen$ni,
main="",
xlab = "",
ylab = "",
col = "skyblue",
ylim = c(0, max(tabla_resumen$ni) * 1.1),
names.arg=tabla_resumen$Continente,
cex.names = 0.8,
las = 2)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Continentes", side = 1, line = 9)
mtext("Gráfica N°1: Distribución de Cantidad de Plantas Solares por Continentes",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(13, 6, 4, 2))
barplot(tabla_resumen$ni,
main="",
xlab = "",
ylab = "",
col = "skyblue",
ylim = c(0, 58771),
names.arg=tabla_resumen$Continente,
cex.names = 0.8,
las = 2)
mtext("Cantidad", side = 2, line = 4.5, cex = 1, font = 1)
mtext("Continentes", side = 1, line = 11)
mtext("Gráfica N°2: Distribución de Cantidad de Plantas Solares por Continentes",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)par(mar = c(13, 4, 4, 2))
bp3 <- barplot(tabla_resumen$`hi (%)`,
main="",
xlab = "",
ylab = "Porcentaje %",
col = "skyblue",
ylim = c(0, max(tabla_resumen$`hi (%)`) * 1.3),
names.arg=tabla_resumen$Continente,
cex.names = 0.8,
las = 2)
mtext("Continentes", side = 1, line = 11)
mtext("Gráfica N°3: Distribución Porcentual de las Plantas Solares por Continentes",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp3, y = tabla_resumen$`hi (%)`,
labels = paste0(round(tabla_resumen$`hi (%)`, 2), "%"),
pos = 3, cex = 0.8, col = "black")par(mar = c(13, 4, 4, 2))
bp4 <- barplot(tabla_resumen$`hi (%)`,
main="",
xlab = "",
ylab = "Porcentaje %",
col = "skyblue",
ylim = c(0,100),
names.arg=tabla_resumen$Continente,
cex.names = 0.8,
las = 2)
mtext("Continentes", side = 1, line = 11)
mtext("Gráfica N°4: Distribución Porcentual de las Plantas Solares por Continentes",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
text(x = bp4, y = tabla_resumen$`hi (%)`,
labels = paste0(round(tabla_resumen$`hi (%)`, 2), "%"),
pos = 3, cex = 0.8, col = "black")par(mar = c(5, 6, 4, 10), xpd = TRUE)
mis_colores_azules <- c(
"#E1F5FE",
"#B3E5FC",
"#81D4FA",
"#4FC3F7",
"#29B6F6",
"#039BE5",
"#0277BD")
colores_finales <- rep(mis_colores_azules, length.out = length(tabla_resumen$Continente))
pie(tabla_resumen$`hi (%)`,
main = "",
radius = 0.9,
labels = paste0(round(tabla_resumen$`hi (%)`, 1), "%"),
col = colores_finales,
cex = 0.7)
mtext("Gráfica N°5: Distribución Porcentual de las Plantas Solares por Continentes",
side = 3,
line = 2,
adj = 0.5,
cex = 0.9,
font = 2)
legend(x = 1.3, y = 1.1,
legend = tabla_resumen$Continente,
fill = colores_finales,
cex = 0.6,
title = "Continentes",
bty = "n")# Cálculo de la Moda
moda_continentes <- tabla_resumen$Continente[which.max(tabla_resumen$ni)]
# Tabla de Indicadores
tabla_indicadores <- data.frame(
"Variable" = "País",
"Rango" = "Continentes",
"Media (X)" = "-",
"Mediana (Me)" = "-",
"Moda (Mo)" = moda_continentes,
"Varianza (V)" = "-",
"Desv. Est. (Sd)" = "-",
"C.V. (%)" = "-",
"Asimetría (As)" = "-",
"Curtosis (K)" = "-",
check.names = FALSE)
# Generar Tabla de Indicadores
tabla_conclusiones_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("**Tabla N°3 de Conclusiones por Continentes de las Plantas Solares**")) %>%
tab_source_note(source_note = "Autor: Martin Sarmiento") %>%
tab_options(column_labels.background.color = "#F0F0F0")
tabla_conclusiones_gt| Tabla N°3 de Conclusiones por Continentes de las Plantas Solares | |||||||||
| Variable | Rango | Media (X) | Mediana (Me) | Moda (Mo) | Varianza (V) | Desv. Est. (Sd) | C.V. (%) | Asimetría (As) | Curtosis (K) |
|---|---|---|---|---|---|---|---|---|---|
| País | Continentes | - | - | Asia | - | - | - | - | - |
| Autor: Martin Sarmiento | |||||||||
La variable “País”, presenta como valor más frecuente Asia, con una participación destacada en la muestra.