ANÁLISIS ESTADÍSTICO INTEGRAL DE REGISTROS TEMPORALES - ESTACIÓN ANTISANA
library(readxl)
setwd("C:/Users/Usuario/Desktop/TRABAJO DE ESTADISTICA/PDF-EXCEL-QGIS")
Datos <- read_excel("weatherdataANTISANA.csv.xlsx")
str(Datos)
## tibble [366 × 10] (S3: tbl_df/tbl/data.frame)
## $ Date : POSIXct[1:366], format: "2012-01-01" "2012-01-02" ...
## $ Longitude : num [1:366] -78.1 -78.1 -78.1 -78.1 -78.1 ...
## $ Latitude : num [1:366] -0.468 -0.468 -0.468 -0.468 -0.468 ...
## $ Elevation : num [1:366] 4048 4048 4048 4048 4048 ...
## $ Max Temperature : num [1:366] 16.1 15.5 11.6 12 11.7 ...
## $ Min Temperature : num [1:366] 6.91 9.23 8.69 9.53 7.9 ...
## $ Precipitation : num [1:366] 8.49 35.44 41.53 15.48 28.71 ...
## $ Wind : num [1:366] 1.76 1.86 1.74 1.48 1.49 1.51 1.81 1.68 1.23 1.61 ...
## $ Relative Humidity: num [1:366] 0.93 0.96 0.98 0.99 0.98 0.97 0.98 0.99 0.99 0.98 ...
## $ Solar : num [1:366] 15.98 12.25 4.58 4.32 3.86 ...
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(dplyr)
library(gt)
TDFnombres_meses <- Datos %>%
select(Date) %>%
mutate(Date = as.Date(Date)) %>%
mutate(Mes = month(Date,
label = TRUE,
abbr = FALSE)) %>%
count(Mes, name = "ni") %>%
arrange(Mes)
print(TDFnombres_meses)
## # A tibble: 12 × 2
## Mes ni
## <ord> <int>
## 1 enero 31
## 2 febrero 29
## 3 marzo 31
## 4 abril 30
## 5 mayo 31
## 6 junio 30
## 7 julio 31
## 8 agosto 31
## 9 septiembre 30
## 10 octubre 31
## 11 noviembre 30
## 12 diciembre 31
# Orden cronológico
orden_meses <- c("ENERO","FEBRERO","MARZO","ABRIL","MAYO","JUNIO",
"JULIO","AGOSTO","SEPTIEMBRE","OCTUBRE","NOVIEMBRE","DICIEMBRE")
TDFnombres_mes <- TDFnombres_meses %>%
mutate(Mes = toupper(as.character(Mes))) %>%
mutate(Mes = factor(Mes, levels = orden_meses)) %>%
arrange(Mes) %>%
mutate(
hi = 100 * ni / sum(ni),
Ni_asc = cumsum(ni),
Ni_desc = rev(cumsum(rev(ni))),
Hi_asc = 100 * Ni_asc / sum(ni),
Hi_desc = 100 * Ni_desc / sum(ni)
) %>%
mutate(across(c(hi, Hi_asc, Hi_desc), ~ round(.x, 4)))
print(TDFnombres_mes)
## # A tibble: 12 × 7
## Mes ni hi Ni_asc Ni_desc Hi_asc Hi_desc
## <fct> <int> <dbl> <int> <int> <dbl> <dbl>
## 1 ENERO 31 8.47 31 366 8.47 100
## 2 FEBRERO 29 7.92 60 335 16.4 91.5
## 3 MARZO 31 8.47 91 306 24.9 83.6
## 4 ABRIL 30 8.20 121 275 33.1 75.1
## 5 MAYO 31 8.47 152 245 41.5 66.9
## 6 JUNIO 30 8.20 182 214 49.7 58.5
## 7 JULIO 31 8.47 213 184 58.2 50.3
## 8 AGOSTO 31 8.47 244 153 66.7 41.8
## 9 SEPTIEMBRE 30 8.20 274 122 74.9 33.3
## 10 OCTUBRE 31 8.47 305 92 83.3 25.1
## 11 NOVIEMBRE 30 8.20 335 61 91.5 16.7
## 12 DICIEMBRE 31 8.47 366 31 100 8.47
# Totales
totales <- tibble(
Mes = "TOTAL",
ni = sum(TDFnombres_mes$ni),
hi = round(sum(TDFnombres_mes$hi), 4),
Ni_asc = NA_real_,
Ni_desc = NA_real_,
Hi_asc = NA_real_,
Hi_desc = NA_real_
)
TDF_final <- bind_rows(TDFnombres_mes, totales)
TDF_final %>%
gt() %>%
tab_header(
title = md("**DISTRIBUCIÓN POR MESES**"),
subtitle = md("Variable: **FECHA**")
) %>%
tab_source_note(source_note = "Fuente: weatherdataANTISANA.csv.xlsx") %>%
cols_label(
Mes = "Periodo (Mes)",
ni = "Cant. Días (ni)",
hi = "Porcentaje (hi%)",
Ni_asc = "Ni (Asc)", Ni_desc = "Ni (Desc)",
Hi_asc = "Hi (Asc)", Hi_desc = "Hi (Desc)"
) %>%
cols_align(align = "center", columns = everything()) %>%
fmt_number(columns = c(hi, Hi_asc, Hi_desc), decimals = 2) %>%
fmt_number(columns = c(ni, Ni_asc, Ni_desc), decimals = 0) %>%
sub_missing(columns = c(Ni_asc, Ni_desc, Hi_asc, Hi_desc), missing_text = "-") %>%
tab_style(
style = list(cell_fill(color = "#2E4053"), cell_text(color = "white", weight = "bold")),
locations = cells_title()
) %>%
tab_style(
style = list(cell_fill(color = "#F2F3F4"), cell_text(weight = "bold", color = "#2E4053")),
locations = cells_column_labels()
) %>%
tab_options(
table.border.top.color = "#2E4053",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(6)
)
| DISTRIBUCIÓN POR MESES | ||||||
| Variable: FECHA | ||||||
| Periodo (Mes) | Cant. Días (ni) | Porcentaje (hi%) | Ni (Asc) | Ni (Desc) | Hi (Asc) | Hi (Desc) |
|---|---|---|---|---|---|---|
| ENERO | 31 | 8.47 | 31 | 366 | 8.47 | 100.00 |
| FEBRERO | 29 | 7.92 | 60 | 335 | 16.39 | 91.53 |
| MARZO | 31 | 8.47 | 91 | 306 | 24.86 | 83.61 |
| ABRIL | 30 | 8.20 | 121 | 275 | 33.06 | 75.14 |
| MAYO | 31 | 8.47 | 152 | 245 | 41.53 | 66.94 |
| JUNIO | 30 | 8.20 | 182 | 214 | 49.73 | 58.47 |
| JULIO | 31 | 8.47 | 213 | 184 | 58.20 | 50.27 |
| AGOSTO | 31 | 8.47 | 244 | 153 | 66.67 | 41.80 |
| SEPTIEMBRE | 30 | 8.20 | 274 | 122 | 74.86 | 33.33 |
| OCTUBRE | 31 | 8.47 | 305 | 92 | 83.33 | 25.14 |
| NOVIEMBRE | 30 | 8.20 | 335 | 61 | 91.53 | 16.67 |
| DICIEMBRE | 31 | 8.47 | 366 | 31 | 100.00 | 8.47 |
| TOTAL | 366 | 100.00 | - | - | - | - |
| Fuente: weatherdataANTISANA.csv.xlsx | ||||||
TDF_plot <- TDF_final[TDF_final$Mes != "TOTAL", ]
TDF_plot$Mes <- as.character(TDF_plot$Mes)
TDF_plot$ni <- as.numeric(TDF_plot$ni)
to_native <- function(x) iconv(x, from = "UTF-8", to = "", sub = "")
main_txt <- to_native("Gráfica N°1: Distribución mensual de registros de la Estación Antisana")
ylab_txt <- to_native("Cantidad de días ")
xlab_txt <- to_native("MES")
names_mes <- to_native(TDF_plot$Mes)
par(mar = c(9, 4, 4, 2))
barplot(
height = TDF_plot$ni,
names.arg = names_mes,
main = main_txt,
ylab = ylab_txt,
col = "#2E4053",
las = 2,
cex.names = 0.8,
cex.axis = 0.8,
cex.main = 0.9
)
mtext(xlab_txt, side = 1, line = 7, adj = 0.5, cex = 1)
# Quitar TOTAL
TDF_plot <- TDF_final[TDF_final$Mes != "TOTAL", ]
# Asegurar tipos
TDF_plot$Mes <- as.character(TDF_plot$Mes)
TDF_plot$ni <- as.numeric(TDF_plot$ni)
# Textos a encoding nativo
to_native <- function(x) iconv(x, from = "UTF-8", to = "", sub = "")
main_txt <- to_native("Gráfica N°2: Distribución mensual de registros en la Estación Antisana)")
ylab_txt <- to_native("Cantidad de días")
xlab_txt <- to_native("MES")
names_mes <- to_native(TDF_plot$Mes)
par(mar = c(9, 4, 4, 2))
barplot(
height = TDF_plot$ni,
names.arg = names_mes,
main = main_txt,
ylab = ylab_txt,
col = "#2E4053",
las = 2,
cex.names = 0.8,
cex.axis = 0.8,
cex.main = 0.9,
ylim = c(0, 366)
)
mtext(xlab_txt, side = 1, line = 7, adj = 0.5, cex = 1)
# Quitar TOTAL
TDF_plot <- TDF_final[TDF_final$Mes != "TOTAL", ]
# Asegurar tipos
TDF_plot$Mes <- as.character(TDF_plot$Mes)
TDF_plot$hi <- as.numeric(TDF_plot$hi)
# Textos a encoding nativo (Windows)
to_native <- function(x) iconv(x, from = "UTF-8", to = "", sub = "")
main_txt <- to_native("Gráfica N°3: Distribución porcentual en meses de registros de la Estación Antisana")
ylab_txt <- to_native("Porcentaje (%)")
xlab_txt <- to_native("MES")
names_mes <- to_native(TDF_plot$Mes)
par(mar = c(9, 4, 4, 2))
barplot(
height = TDF_plot$hi,
names.arg = names_mes,
main = main_txt,
ylab = ylab_txt,
col = "#2E4053",
las = 2,
cex.names = 0.8,
cex.axis = 0.8,
cex.main = 0.9
)
mtext(xlab_txt, side = 1, line = 7, adj = 0.5, cex = 1)
# Quitar TOTAL
TDF_plot <- TDF_final[TDF_final$Mes != "TOTAL", ]
# Asegurar tipos
TDF_plot$Mes <- as.character(TDF_plot$Mes)
TDF_plot$hi <- as.numeric(TDF_plot$hi)
to_native <- function(x) iconv(x, from = "UTF-8", to = "", sub = "")
main_txt <- to_native("Gráfica N°4: Distribución porcentual en meses de registros de la Estación Antisana")
ylab_txt <- to_native("Porcentaje (%)")
xlab_txt <- to_native("MES")
names_mes <- to_native(TDF_plot$Mes)
par(mar = c(9, 4, 4, 2))
bp <- barplot(
height = TDF_plot$hi,
names.arg = names_mes,
main = main_txt,
ylab = ylab_txt,
col = "#2E4053",
las = 2,
cex.names = 0.8,
cex.axis = 0.8,
cex.main = 0.9,
ylim = c(0, 100)
)
text(
x = bp,
y = TDF_plot$hi,
labels = paste0(round(TDF_plot$hi, 2), "%"),
pos = 3,
cex = 0.8
)
mtext(xlab_txt, side = 1, line = 7, adj = 0.5, cex = 1)
TDF_box <- TDF_final[TDF_final$Mes != "TOTAL", ]
TDF_box$ni <- as.numeric(TDF_box$ni)
to_native <- function(x) iconv(x, from = "UTF-8", to = "", sub = "")
par(mar = c(4, 9, 4, 2))
boxplot(
TDF_box$ni,
horizontal = TRUE,
main = to_native("Gráfica N°5: Diagrama de caja de los registros de la Estación Antisana"),
xlab = to_native("Cantidad de días"),
col = "#F2F3F4",
border = "#2E4053"
)
#Gráfica de Ojivas superpuestas
# Quitar TOTAL
TDF_oj <- TDF_final[TDF_final$Mes != "TOTAL", ]
TDF_oj$Mes <- toupper(as.character(TDF_oj$Mes))
TDF_oj$Mes <- factor(TDF_oj$Mes, levels = orden_meses)
TDF_oj$ni <- as.numeric(TDF_oj$ni)
TDF_oj <- TDF_oj[order(TDF_oj$Mes), ]
# Frecuencias acumuladas
Ni_asc <- cumsum(TDF_oj$ni)
Ni_desc <- rev(cumsum(rev(TDF_oj$ni)))
x <- seq_along(TDF_oj$Mes)
to_native <- function(x) iconv(x, from = "UTF-8", to = "", sub = "")
main_txt <- to_native("Gráfica N°6: Ojivas de frecuencia acumulada de datos de la Estación Antisana")
ylab_txt <- to_native("Frecuencia acumulada")
xlab_txt <- to_native("MES")
names_mes <- to_native(as.character(TDF_oj$Mes))
par(mar = c(9,4,4,6), xpd = TRUE)
ylim_max <- max(c(Ni_asc, Ni_desc))
# Ojiva Ascendente
plot(
x, Ni_asc,
type = "o",
pch = 16,
col = "#2E5990",
lwd = 2,
xaxt = "n",
ylim = c(0, ylim_max),
main = main_txt,
ylab = ylab_txt,
xlab = ""
)
axis(1, at = x, labels = names_mes, las = 2, cex.axis = 0.8)
mtext(xlab_txt, side = 1, line = 7)
# Ojiva Descendente
lines(
x, Ni_desc,
type = "o",
pch = 16,
col = "#C0392B",
lwd = 2
)
# Leyenda
legend(
"topright",
inset = c(-0.25,0),
legend = c("Ascendente", "Descendente"),
col = c("#2E5990","#C0392B"),
lty = 1,
lwd = 2,
pch = 16,
bty = "n"
)
grid()
library(gt)
library(tidyverse)
df_manual <- tibble(
Variable = "Fecha",
Rango = "[1/1/2012-31/12/2012]",
Media = "N/A",
Mediana = "N/A",
Moda = "N/A",
Varianza = "N/A",
Desv_Std = "N/A",
CV_Porc = "N/A",
Asimetria = "N/A",
Curtosis = "N/A",
Atipicos = "N/A"
)
df_manual %>%
gt() %>%
tab_header(
title = md("**ESTADÍSTICOS DESCRIPTIVOS**"),
subtitle = "Resumen de Indicadores - Estación Antisana"
) %>%
tab_source_note(source_note = "Autor: Grupo 3") %>%
fmt_number(columns = c(Media, Mediana, Varianza, Desv_Std, CV_Porc, Curtosis), decimals = 2) %>%
fmt_number(columns = c(Asimetria), decimals = 4) %>%
cols_label(
Variable = "Variable",
Rango = "Rango [Min; Max]",
Media = "Media (X̄)",
Mediana = "Mediana (Me)",
Moda = "Moda (Mo)",
Varianza = "Varianza (S²)",
Desv_Std = "Desv. Est. (S)",
CV_Porc = "C.V. (%)",
Asimetria = "Asimetría (As)",
Curtosis = "Curtosis (K)",
Atipicos = "Outliers"
) %>%
tab_options(
column_labels.background.color = "#2E4053",
table.border.top.color = "black",
table.border.bottom.color = "#2E4053",
column_labels.border.bottom.color = "#2E4053",
data_row.padding = px(8)
) %>%
tab_style(
style = list(cell_text(weight = "bold", color = "white")),
locations = cells_column_labels()
) %>%
cols_align(align = "center", columns = everything())
| ESTADÍSTICOS DESCRIPTIVOS | ||||||||||
| Resumen de Indicadores - Estación Antisana | ||||||||||
| Variable | Rango [Min; Max] | Media (X̄) | Mediana (Me) | Moda (Mo) | Varianza (S²) | Desv. Est. (S) | C.V. (%) | Asimetría (As) | Curtosis (K) | Outliers |
|---|---|---|---|---|---|---|---|---|---|---|
| Fecha | [1/1/2012-31/12/2012] | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
| Autor: Grupo 3 | ||||||||||
El análisis cronológico de la variable Fecha en la Estación Antisana se centra exclusivamente en delimitar el periodo de estudio, el cual comprende un ciclo anual completo desde el 1 de enero de 2012 hasta el 31 de diciembre de 2012. Debido a la naturaleza cuantitativa-discreta de esta variable, descriptores como la Media, Moda o Asimetría se reportan como no aplicables (N/A).