library(ggplot2)
library(readr)
library(tidyverse)
library(dplyr)
library(janitor)
library(cowplot)
theme_set(theme_cowplot())
library(reshape2)
library(lubridate)
library(forecast)
library(tseries)
library(tsibble)
library(stats)
library(ggfortify)
library(ggplot2)
library(dplyr)
library(cowbell)
library(xtable)
library(viridis)
library(ggsci)BasesPerdidas
Base de datos
Base de datos del clima
The data was obtained from the National Aeronautics and Space Administration (NASA) Langley Research Center (LaRC) Prediction of Worldwide Energy Resource (POWER) Project funded through the NASA Earth Science/Applied Science Program.
The data was obtained from the POWER Project’s Hourly 2.x.x version on 2023/09/03.
https://power.larc.nasa.gov/data-access-viewer/
https://power.larc.nasa.gov/docs/referencing/
clima <- read_csv("POWER_Point_Monthly_Timeseries_1981_2021_040d7732N_073d9721W_LST.csv", skip = 25)Limpieza de datos
clima1 <- clima%>%pivot_longer(cols = JAN:ANN,
names_to = "MES",
values_to = "VALUE")%>%
filter(MES!="ANN")%>%
mutate(DATE=ym(paste0(YEAR,"-",MES ))) %>%
select(-c(YEAR, MES)) %>%
pivot_wider(names_from = PARAMETER, values_from = VALUE)%>%
clean_names()%>%
remove_empty()%>%
mutate(year=year(date))%>%
select_if(~all(!is.na(.)))
ggplot(clima1, aes(x=date, y=t2m))+geom_line()+geom_hline(yintercept = 24, linetype = "dashed", color = "red")+theme(aspect.ratio = .4)+geom_hline(yintercept = -5, linetype = "dashed", color = "blue")train<-clima1%>%filter(year<=2010)
test<-clima1%>%filter(year>2010)
train_tseries <- ts(train$t2m, start = c(min(year(train$date)), min(month(train$date))) , frequency = 12)
test_tseries <- ts(test$t2m, start = c(min(year(test$date)), min(month(test$date))) , frequency = 12)
full_tseries<-ts(clima1$t2m, start = c(min(year(test$date)), min(month(test$date))) , frequency = 12)
melted_data <- melt(clima1, id.vars = "date")
# ggplot(melted_data, aes(x = date, y = log10(value), color = variable)) + geom_line()
#DE ACA NO VAMOS A USAR NINGUN GRAFtabla<-function(t_series, origen){
mean_temp <- mean(t_series)
median_temp <- median(t_series)
min_temp <- min(t_series)
max_temp <- max(t_series)
std_dev_temp <- sd(t_series)
summary_df <- data.frame(
Statistic = c("Media", "Mediana", "Mínimo", "Máximo", "Desviación estándar"),
Value = c(mean_temp, median_temp, min_temp, max_temp, std_dev_temp)
)
colnames(summary_df)<-c("Estadístico", origen)
return(summary_df)
}
resumen_train<-tabla(train_tseries, "Train")
resumen_test<-tabla(test_tseries, "Test")
resumen_completo<-tabla(full_tseries, "Base completa")
resumen <- resumen_train%>%
left_join( resumen_test, by = "Estadístico") %>%
left_join(resumen_completo, by = "Estadístico")
# Create the LaTeX table
table1<-xtable(resumen, digits = c(0,3,3,3, 3), align = c("l", "r", "r", "r", "r"))
# Print the LaTeX table
print(table1, include.rownames = FALSE)% latex table generated in R 4.3.1 by xtable 1.8-4 package
% Mon Nov 20 02:56:21 2023
\begin{table}[ht]
\centering
\begin{tabular}{rrrr}
\hline
Estadístico & Train & Test & Base completa \\
\hline
Media & 10.419 & 11.144 & 10.613 \\
Mediana & 10.075 & 11.480 & 10.390 \\
Mínimo & -7.100 & -8.590 & -8.590 \\
Máximo & 26.230 & 25.620 & 26.230 \\
Desviación estándar & 9.195 & 9.356 & 9.234 \\
\hline
\end{tabular}
\end{table}
Ajuste del modelo
train_tseries %>% ggseasonplot() +
labs(x = "Mes",
y = "Temperatura",
title = NULL,
fill = "Año",
caption = "Fuente: Elaboración propia con datos de NASA")+
theme(plot.caption = element_text(hjust = 0)) +
theme_cowplot()pdf("trainSPlot.pdf", width = 10)
train_tseries %>% ggseasonplot() +
labs(x = "Mes",
y = "Temperatura",
title = NULL,
fill = "Año",
caption = "Fuente: Elaboración propia con datos de NASA")+
theme(plot.caption = element_text(hjust = 0)) +
theme_cowplot()
# scale_color_viridis(discrete = TRUE, option = "D")+
# scale_fill_viridis(discrete = TRUE)
dev.off()png
2
test_tseries %>% ggseasonplot() +
labs(x = "Mes",
y = "Temperatura",
title = NULL,
caption = "Fuente: Elaboración propia con datos de NASA")+
theme(plot.caption = element_text(hjust = 0)) +
theme_cowplot()pdf("testSPlot.pdf", width = 10)
test_tseries %>% ggseasonplot() +
labs(x = "Mes",
y = "Temperatura",
title = NULL,
caption = "Fuente: Elaboración propia con datos de NASA")+
theme(plot.caption = element_text(hjust = 0)) +
theme_cowplot()
dev.off()png
2
plot(train_tseries, xlab = ("Año"))
title(sub = "Fuente: Elaboración propia con datos de NASA", adj = 0)
modeloLineal = lm(train_tseries ~ time(train_tseries))
abline(reg = modeloLineal) ylab("Temperatura")$y
[1] "Temperatura"
attr(,"class")
[1] "labels"
xlab("Año")$x
[1] "Año"
attr(,"class")
[1] "labels"
pdf("trainLM.pdf", width = 10)
plot(train_tseries, xlab = ("Año"))
title(sub = "Fuente: Elaboración propia con datos de NASA", adj = 0)
modeloLineal = lm(train_tseries ~ time(train_tseries))
abline(reg = modeloLineal)
ylab("Temperatura")$y
[1] "Temperatura"
attr(,"class")
[1] "labels"
xlab("Año")$x
[1] "Año"
attr(,"class")
[1] "labels"
dev.off()png
2
media = aggregate(train_tseries,FUN=mean)
plot(media, xlab = ("Año"), ylab = ("Temperatura promedio"))
title(sub = "Fuente: Elaboración propia con datos de NASA", adj = 0)pdf("trainMA.pdf", width = 10)
media = aggregate(train_tseries,FUN=mean)
plot(media, xlab = ("Año"), ylab = ("Temperatura promedio"))
title(sub = "Fuente: Elaboración propia con datos de NASA", adj = 0)
dev.off()png
2
boxplot(train_tseries ~ cycle(train_tseries), xlab = ("Mes"), ylab = "Temperatura", col = plasma(12))
title(sub = "Fuente: Elaboración propia con datos de NASA", adj = 0)pdf("boxplot.pdf", width = 10)
boxplot(train_tseries ~ cycle(train_tseries), xlab = ("Mes"), ylab = "Temperatura", col = plasma(12))
title(sub = "Fuente: Elaboración propia con datos de NASA", adj = 0)
dev.off()png
2
fit_temp <- auto.arima(train$t2m, ic="bic")
prediccion_temp <- forecast(fit_temp, h = length(test$t2m))
summary(fit_temp)Series: train$t2m
ARIMA(2,0,1) with non-zero mean
Coefficients:
ar1 ar2 ma1 mean
1.7144 -0.9765 -0.7729 10.4039
s.e. 0.0113 0.0109 0.0245 0.0956
sigma^2 = 4.324: log likelihood = -775.22
AIC=1560.45 AICc=1560.62 BIC=1579.88
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -0.02546485 2.067822 1.605699 19.81736 73.14933 0.3545635
ACF1
Training set -0.3299556
plot(prediccion_temp)adf.test(train_tseries)
Augmented Dickey-Fuller Test
data: train_tseries
Dickey-Fuller = -10.646, Lag order = 7, p-value = 0.01
alternative hypothesis: stationary
sarima_model <- auto.arima(train_tseries, seasonal = TRUE)# Make predictions
# sarima_forecast <- forecast(sarima_model, h = length(test_tseries))
sarima_interval95 <-forecast(sarima_model, h = length(test_tseries), level=c(95))
esperanza <- sarima_interval95$mean
lower <- sarima_interval95$lower
upper <- sarima_interval95$upper
pruebaModelo<-data.frame(real=test_tseries,
predicho=esperanza,
intervalAbajo=lower,
intervalArriba=upper,
Fecha=seq(from=as.Date("2011-01-01"), to=as.Date("2021-12-01"), by = "month"))
colnames(pruebaModelo)<-c("Temperatura real",
"Temperatura predicha",
"Límite inferior del intervalo (95%)",
"Límite superior del intervalo (95%)",
"Fecha"
)
pruebaModelo<-pruebaModelo%>%pivot_longer(cols=-Fecha,
names_to = "Temperatura",
values_to = "Observación")
accuracy_metrics <- accuracy(esperanza, test_tseries)
accuracy_table <- xtable(accuracy_metrics)
print(accuracy_table, digits = c(0,3,3, 3, 3, 3, 3, 3))% latex table generated in R 4.3.1 by xtable 1.8-4 package
% Mon Nov 20 02:56:39 2023
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrrrr}
\hline
& ME & RMSE & MAE & MPE & MAPE & ACF1 & Theil's U \\
\hline
Test set & -0.01 & 2.04 & 1.54 & -263.96 & 359.57 & 0.24 & 1.40 \\
\hline
\end{tabular}
\end{table}
# plot(test_tseries, main = NULL, xlab = ("Año"), ylab = "Temperaturas esperadas del conjunto de prueba")# "Actual vs. Predicted Temperatures")
# lines(sarima_forecast$mean, col = "red")
# legend("topleft", legend = c("Actual", "Predicción"), col = c("black", "red"), lty = 1)
# pdf("predTemp.pdf", width = 20)
# pdf("predTemp2.pdf", width = 20)
(ggplot(data = pruebaModelo, aes(x = Fecha, y = Observación, color = Temperatura)) +
geom_line() +
labs(caption = "Fuente: Elaboración propia con datos de NASA") +
scale_color_manual(values = c("Temperatura real" = "black",
"Temperatura predicha"= "red",
"Límite inferior del intervalo (95%)"= "cyan",
"Límite superior del intervalo (95%)"= "pink"))+
theme_cowplot() +
theme(plot.caption = element_text(hjust = 0)))# dev.off()library(xtable)
resultados <- data.frame(
Prueba = c(
# "Prueba de normalidad",
"Prueba de Ljung-Box",
"Prueba de heterocedasticidad",
"Prueba de estacionariedad"
),
Resultado = c(
# shapiro.test(sarima_model$residuals)$p.value,
Box.test(sarima_model$residuals, lag = 10, type = "Ljung-Box")$p.value,
white.test(sarima_model$residuals)$p.value,
adf.test(sarima_model$residuals)$p.value
)
)
tabla_latex <- xtable(resultados,
caption = "Resultados de las pruebas en los residuos del modelo SARIMA")
print(tabla_latex, type = "latex")% latex table generated in R 4.3.1 by xtable 1.8-4 package
% Mon Nov 20 02:56:39 2023
\begin{table}[ht]
\centering
\begin{tabular}{rlr}
\hline
& Prueba & Resultado \\
\hline
1 & Prueba de Ljung-Box & 0.86 \\
2 & Prueba de heterocedasticidad & 0.47 \\
3 & Prueba de estacionariedad & 0.01 \\
\hline
\end{tabular}
\caption{Resultados de las pruebas en los residuos del modelo SARIMA}
\end{table}
resumen<-summary(sarima_model)
coefficients <- resumen$coef
std_errors <- sqrt(diag(resumen$var.coef))
aic <- resumen$aic
bic <- resumen$bic
resumen_sarimaModel <- data.frame(
Coefficients = coefficients,
StandardErrors = std_errors
# PValues = p_values
)
resumen_sarimaModel <- rbind(
resumen_sarimaModel,
c("AIC", aic, NA),
c("BIC", bic, NA)
)
resumen_latex <- xtable(resumen_sarimaModel, caption = "Resumen del modelo ajustado")
print(resumen_latex, digits = c(0,3,3), align = c("l", "r", "r"))% latex table generated in R 4.3.1 by xtable 1.8-4 package
% Mon Nov 20 02:56:39 2023
\begin{table}[ht]
\centering
\begin{tabular}{rll}
\hline
& Coefficients & StandardErrors \\
\hline
ar1 & -0.216620226258099 & 0.289795046931894 \\
ar2 & 0.372864986197777 & 0.242383365423148 \\
ma1 & 0.424676067184716 & 0.305839459534777 \\
ma2 & -0.16623733419521 & 0.25189477889407 \\
sar1 & -0.687475433816859 & 0.0502949821880856 \\
sar2 & -0.369423878628048 & 0.0514046999109033 \\
drift & 0.00332001884099555 & 0.00582992935573155 \\
8 & AIC & 1404.02492735219 \\
9 & BIC & 1434.84254719039 \\
\hline
\end{tabular}
\caption{Resumen del modelo ajustado}
\end{table}
Box.test(sarima_model$resid,type="Ljung-Box")
Box-Ljung test
data: sarima_model$resid
X-squared = 0.0096337, df = 1, p-value = 0.9218
(acfPlot <- ggAcf(train_tseries, lag.max = 36)+
labs(title="",caption = "Fuente: Elaboración propia con datos de NASA") +
theme_cowplot()+
theme(plot.caption = element_text(hjust = 0)) +
xlab("Rezago")
)# pdf("acf.pdf", width = 10)
acfPlot# dev.off()
(pacfPlot <- ggPacf(train_tseries, lag.max = 36)+
labs(title="",caption = "Fuente: Elaboración propia con datos de NASA") +
theme_cowplot()+
theme(plot.caption = element_text(hjust = 0)) +
xlab("Rezago")
)# pdf("pacf.pdf", width = 10)
pacfPlot# dev.off()