library(haven)
data <- read_dta("~/Desktop/Doctorado/Calibración de purple air/Ate nuevo 12 octubre/ATE123_12 Octubre.dta")
View(data)
data$temperatura_c_ATE1 <- (data$temperatura_ATE1 - 32) * 5/9
data$temperatura_c_ATE2 <- (data$temperatura_ATE2 - 32) * 5/9
data$temperatura_c_ATE3 <- (data$temperatura_ATE3 - 32) * 5/9
data$Vp_ATE1 <- (data$humidity_ATE1 / 100) * 6.11 * 10^((7.5 * data$temperatura_c_ATE1) / (237.3 + data$temperatura_c_ATE1))
data$Vp_ATE2 <- (data$humidity_ATE2 / 100) * 6.11 * 10^((7.5 * data$temperatura_c_ATE2) / (237.3 + data$temperatura_c_ATE2))
data$Vp_ATE3 <- (data$humidity_ATE3 / 100) * 6.11 * 10^((7.5 * data$temperatura_c_ATE3) / (237.3 + data$temperatura_c_ATE3))
# Definir constantes
A1 <- 0.61078
A2 <- 17.558
A3 <- 241.88
data$Dewpoint_ATE1 <- (A3 * log(data$Vp_ATE1 / A1)) / (A2 - log(data$Vp_ATE1 / A1))
data$Dewpoint_ATE2 <- (A3 * log(data$Vp_ATE2 / A1)) / (A2 - log(data$Vp_ATE2 / A1))
data$Dewpoint_ATE3 <- (A3 * log(data$Vp_ATE3 / A1)) / (A2 - log(data$Vp_ATE3 / A1))
En los purple air se tienen datos desde el 18 de julio de 2023 hasta el 07 de mayo de 2024, sin embargo SENAMHI no tiene datos desde febrero.
names(data)
## [1] "fecha" "temperatura_ATE3" "humidity_ATE3"
## [4] "pressure_ATE3" "pm2_5_atm_a_ATE3" "pm2_5_atm_b_ATE3"
## [7] "temperatura_ATE2" "humidity_ATE2" "pressure_ATE2"
## [10] "pm2_5_atm_b_ATE2" "pm2_5_atm_a_ATE2" "temperatura_ATE1"
## [13] "humidity_ATE1" "pressure_ATE1" "pm2_5_atm_a_ATE1"
## [16] "pm2_5_atm_b_ATE1" "ATE_Senamhi" "temperatura_c_ATE1"
## [19] "temperatura_c_ATE2" "temperatura_c_ATE3" "Vp_ATE1"
## [22] "Vp_ATE2" "Vp_ATE3" "Dewpoint_ATE1"
## [25] "Dewpoint_ATE2" "Dewpoint_ATE3"
min_fecha <- min(data$fecha, na.rm = TRUE)
max_fecha <- max(data$fecha, na.rm = TRUE)
min_fecha
## [1] "2023-07-18"
max_fecha
## [1] "2024-05-07"
porcentaje_faltantes <- colSums(is.na(data)) / nrow(data) * 100
# Mostrar el resultado
porcentaje_faltantes
## fecha temperatura_ATE3 humidity_ATE3 pressure_ATE3
## 0.000000 6.896552 6.896552 6.896552
## pm2_5_atm_a_ATE3 pm2_5_atm_b_ATE3 temperatura_ATE2 humidity_ATE2
## 6.896552 6.896552 0.000000 0.000000
## pressure_ATE2 pm2_5_atm_b_ATE2 pm2_5_atm_a_ATE2 temperatura_ATE1
## 0.000000 0.000000 0.000000 0.000000
## humidity_ATE1 pressure_ATE1 pm2_5_atm_a_ATE1 pm2_5_atm_b_ATE1
## 0.000000 0.000000 0.000000 0.000000
## ATE_Senamhi temperatura_c_ATE1 temperatura_c_ATE2 temperatura_c_ATE3
## 35.517241 0.000000 0.000000 6.896552
## Vp_ATE1 Vp_ATE2 Vp_ATE3 Dewpoint_ATE1
## 0.000000 0.000000 6.896552 0.000000
## Dewpoint_ATE2 Dewpoint_ATE3
## 0.000000 6.896552
Cada monitor PurpleAir tiene dos sensores, y según la literatura, para que los monitores PurpleAir sean considerados en el análisis, las lecturas de los sensores deben coincidir en al menos el 30% de sus mediciones, con una precisión de umbral de ≤ 0.130. Esto se realiza con la fórmula: [abs (A-B)/(A+B)].
Los resultados preliminares indican un 100% de concordancia de los sensores del monitor PurpleAir ATE1, ATE2, ATE3 con los criterios de validación establecidos.
#ATE1
data$precision_ATE1 <- abs(data$"pm2_5_atm_a_ATE1" - data$"pm2_5_atm_b_ATE1") / (data$"pm2_5_atm_a_ATE1" + data$"pm2_5_atm_b_ATE1")
num_precision_mayor_013_ATE1 <- sum(data$precision_ATE1 > 0.130, na.rm = TRUE)
print(num_precision_mayor_013_ATE1)
## [1] 0
#ATE2
data$precision_ATE2 <- abs(data$"pm2_5_atm_a_ATE2" - data$"pm2_5_atm_b_ATE2") / (data$"pm2_5_atm_a_ATE2" + data$"pm2_5_atm_b_ATE2")
num_precision_mayor_013_ATE2 <- sum(data$precision_ATE2 > 0.130, na.rm = TRUE)
print(num_precision_mayor_013_ATE2)
## [1] 0
#ATE3
data$precision_ATE3 <- abs(data$"pm2_5_atm_a_ATE3" - data$"pm2_5_atm_b_ATE3") / (data$"pm2_5_atm_a_ATE3" + data$"pm2_5_atm_b_ATE3")
num_precision_mayor_013_ATE3 <- sum(data$precision_ATE3 > 0.130, na.rm = TRUE)
print(num_precision_mayor_013_ATE3)
## [1] 0
# Metodologia de paper de Laura "Data points were excluded if the deviation between sensors A and B 174 exceeded either 70% or 5 μg/m³"
#ATE 1
# Contar los puntos que superan el 70% de desviación porcentual
superan_70_ATE1 <- sum(abs(data$pm2_5_atm_a_ATE1 - data$pm2_5_atm_b_ATE1) / (data$pm2_5_atm_a_ATE1 + data$pm2_5_atm_b_ATE1) > 0.70, na.rm = TRUE)
# Contar los puntos que superan la desviación de 5 μg/m³
superan_5_ATE1 <- sum(abs(data$pm2_5_atm_a_ATE1 - data$pm2_5_atm_b_ATE1) > 5, na.rm = TRUE)
# Mostrar los resultados
superan_70_ATE1
## [1] 0
superan_5_ATE1
## [1] 0
#ATE 2
# Contar los puntos que superan el 70% de desviación porcentual
superan_70_ATE2 <- sum(abs(data$pm2_5_atm_a_ATE2 - data$pm2_5_atm_b_ATE2) / (data$pm2_5_atm_a_ATE2 + data$pm2_5_atm_b_ATE2) > 0.70, na.rm = TRUE)
# Contar los puntos que superan la desviación de 5 μg/m³
superan_5_ATE2 <- sum(abs(data$pm2_5_atm_a_ATE2 - data$pm2_5_atm_b_ATE2) > 5, na.rm = TRUE)
# Mostrar los resultados
superan_70_ATE2
## [1] 0
superan_5_ATE2
## [1] 0
#ATE 3
# Contar los puntos que superan el 70% de desviación porcentual
superan_70_ATE3 <- sum(abs(data$pm2_5_atm_a_ATE3 - data$pm2_5_atm_b_ATE3) / (data$pm2_5_atm_a_ATE3 + data$pm2_5_atm_b_ATE3) > 0.70, na.rm = TRUE)
# Contar los puntos que superan la desviación de 5 μg/m³
superan_5_ATE3 <- sum(abs(data$pm2_5_atm_a_ATE3 - data$pm2_5_atm_b_ATE3) > 5, na.rm = TRUE)
# Mostrar los resultados
superan_70_ATE3
## [1] 0
superan_5_ATE3
## [1] 1
data_filtrada2 <- subset(data, abs(pm2_5_atm_a_ATE3 - pm2_5_atm_b_ATE3) <= 5)
#ATE1 - mean
media_precision_ATE1 <- mean(data$precision_ATE1, na.rm = TRUE)
desviacion_std_precision_ATE1 <- sd(data$precision_ATE1, na.rm = TRUE)
print(media_precision_ATE1)
## [1] 0.0144325
print(desviacion_std_precision_ATE1)
## [1] 0.008915939
#ATE2 - mean
media_precision_ATE2 <- mean(data$precision_ATE2, na.rm = TRUE)
desviacion_std_precision_ATE2 <- sd(data$precision_ATE2, na.rm = TRUE)
print(media_precision_ATE2)
## [1] 0.01205336
print(desviacion_std_precision_ATE2)
## [1] 0.006523589
#ATE3 - mean
media_precision_ATE3 <- mean(data$precision_ATE3, na.rm = TRUE)
desviacion_std_precision_ATE3 <- sd(data$precision_ATE3, na.rm = TRUE)
print(media_precision_ATE3)
## [1] 0.01992125
print(desviacion_std_precision_ATE3)
## [1] 0.01226382
library(ggplot2)
ggplot(data, aes(x = pm2_5_atm_a_ATE1, y = pm2_5_atm_b_ATE1)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE1 ") +
xlab("pm2_5_atm_a_ATE1") +
ylab("pm2_5_atm_b_ATE1")
ggplot(data, aes(x = pm2_5_atm_a_ATE2, y = pm2_5_atm_b_ATE2)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE2 ") +
xlab("pm2_5_atm_a_ATE2") +
ylab("pm2_5_atm_b_ATE2")
ggplot(data_filtrada2, aes(x = pm2_5_atm_a_ATE3, y = pm2_5_atm_b_ATE3)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE3 ") +
xlab("pm2_5_atm_a_ATE3") +
ylab("pm2_5_atm_b_ATE3")
data$ATE1_mean <- rowMeans(data[, c("pm2_5_atm_a_ATE1", "pm2_5_atm_b_ATE1")], na.rm = TRUE)
data$ATE2_mean <- rowMeans(data[, c("pm2_5_atm_a_ATE2", "pm2_5_atm_b_ATE2")], na.rm = TRUE)
data$ATE3_mean <- rowMeans(data[, c("pm2_5_atm_a_ATE3", "pm2_5_atm_b_ATE3")], na.rm = TRUE)
data_filtrada <- subset(data, !is.na(ATE_Senamhi))
ggplot(data_filtrada, aes(x = ATE1_mean, y = ATE_Senamhi)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE1 vs Senamhi") +
xlab("ATE1_mean") +
ylab("ATE_Senamhi")
ggplot(data_filtrada, aes(x = ATE2_mean, y = ATE_Senamhi)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE2 vs Senamhi") +
xlab("ATE2_mean") +
ylab("ATE_Senamhi")
ggplot(data_filtrada, aes(x = ATE3_mean, y = ATE_Senamhi)) +
geom_point(color = "blue") +
ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE3 vs Senamhi") +
xlab("ATE3_mean") +
ylab("ATE_Senamhi")
## Warning: Removed 17 rows containing missing values (`geom_point()`).
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
data_long <- data_filtrada %>%
gather(key = "Monitor", value = "PM2_5", ATE_Senamhi, ATE1_mean, ATE2_mean, ATE3_mean)
## Warning: attributes are not identical across measure variables;
## they will be dropped
ggplot(data_long, aes(x = Monitor, y = PM2_5)) +
geom_boxplot() +
ggtitle("Boxplot of PM2.5 Measurements for ATE_Senamhi, ATE1, ATE2, and ATE3") +
xlab("Monitor") +
ylab("PM2.5 Concentration")
## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).
# Serie de tiempo
data_filtrada$fecha <- as.Date(data_filtrada$fecha)
data_long_filtrada <- data_filtrada %>%
select(fecha, ATE_Senamhi, ATE1_mean, ATE2_mean, ATE3_mean) %>%
pivot_longer(cols = c(ATE_Senamhi, ATE1_mean, ATE2_mean, ATE3_mean),
names_to = "Monitor", values_to = "PM2_5")
ggplot(data_long_filtrada, aes(x = fecha, y = PM2_5, color = Monitor)) +
geom_line() +
ggtitle("Time Series of PM2.5 Measurements (Filtered Data)") +
xlab("Date") +
ylab("PM2.5 Concentration (µg/m³)") +
theme_minimal()
# ATE1 - MODELO1: ATE1_mean = ATE_Senamhi + i
model1 <- lm(ATE1_mean ~ ATE_Senamhi, data = data_filtrada)
summary(model1)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi, data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.890 -4.753 1.650 5.811 18.714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.30153 2.76791 9.502 < 2e-16 ***
## ATE_Senamhi 0.23233 0.04167 5.576 8.63e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.707 on 185 degrees of freedom
## Multiple R-squared: 0.1439, Adjusted R-squared: 0.1392
## F-statistic: 31.09 on 1 and 185 DF, p-value: 8.627e-08
coeficientes_model1 <- coef(model1)
# Mostrar la ecuación en el formato deseado
cat("La ecuación es: ATE1_mean =", coeficientes_model1[1], "+", coeficientes_model1[2], "* ATE_Senamhi\n")
## La ecuación es: ATE1_mean = 26.30153 + 0.2323253 * ATE_Senamhi
# Calcular RMSE
predicciones_model1 <- predict(model1)
RMSE_model1 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model1)^2))
RMSE_model1
## [1] 8.660591
# Calcular MAE
MAE_model1<- mean(abs(data_filtrada$ATE1_mean - predicciones_model1))
MAE_model1
## [1] 6.861055
# Calcular MAPE
MAPE_model1 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model1) / data_filtrada$ATE1_mean)) * 100
MAPE_model1
## [1] 20.42548
# Calcular R2
R2_model1 <- summary(model1)$r.squared
R2_model1
## [1] 0.1438689
# ATE1 - MODELO2: PM2.5 = PAcf_1 + T + i
model2 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1, data = data_filtrada)
summary(model2)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1, data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.4231 -3.4810 -0.2644 3.4202 17.2410
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.28907 6.44626 21.30 <2e-16 ***
## ATE_Senamhi 0.34799 0.02611 13.33 <2e-16 ***
## temperatura_ATE1 -1.54851 0.08683 -17.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.286 on 184 degrees of freedom
## Multiple R-squared: 0.6862, Adjusted R-squared: 0.6828
## F-statistic: 201.2 on 2 and 184 DF, p-value: < 2.2e-16
coeficientes_model2 <- coef(model2)
# Mostrar la ecuación en el formato deseado
cat("La ecuación es: ATE1_mean =", coeficientes_model2[1], "+", coeficientes_model2[2], "* ATE_Senamhi +", coeficientes_model2[3], "* temperatura_ATE1\n")
## La ecuación es: ATE1_mean = 137.2891 + 0.347989 * ATE_Senamhi + -1.548506 * temperatura_ATE1
# Calcular RMSE para el modelo 2
predicciones_model2 <- predict(model2)
RMSE_model2 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model2)^2))
RMSE_model2
## [1] 5.243063
# Calcular MAE para el modelo 2
MAE_model2 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model2))
MAE_model2
## [1] 4.059582
# Calcular MAPE para el modelo 2
MAPE_model2 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model2) / data_filtrada$ATE1_mean)) * 100
MAPE_model2
## [1] 10.80804
# Calcular el R² para el modelo 2
R2_model2 <- summary(model2)$r.squared
R2_model2
## [1] 0.6862271
# ATE1 - MODELO3: PM2.5 = PAcf_1 + RH + i
model3 <- lm(ATE1_mean ~ ATE_Senamhi + humidity_ATE1, data = data_filtrada)
summary(model3)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + humidity_ATE1, data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.645 -4.548 -0.512 4.166 16.236
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -54.36656 6.46283 -8.412 1.1e-14 ***
## ATE_Senamhi 0.36064 0.03158 11.420 < 2e-16 ***
## humidity_ATE1 1.21292 0.09243 13.123 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.275 on 184 degrees of freedom
## Multiple R-squared: 0.5578, Adjusted R-squared: 0.5529
## F-statistic: 116 on 2 and 184 DF, p-value: < 2.2e-16
coeficientes_model3 <- coef(model3)
cat("La ecuación es: ATE1_mean =", coeficientes_model3[1], "+", coeficientes_model3[2], "* ATE_Senamhi +", coeficientes_model3[3], "* humidity_ATE1\n")
## La ecuación es: ATE1_mean = -54.36656 + 0.3606421 * ATE_Senamhi + 1.212918 * humidity_ATE1
predicciones_model3 <- predict(model3)
RMSE_model3 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model3)^2))
RMSE_model3
## [1] 6.224561
MAE_model3 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model3))
MAE_model3
## [1] 5.081805
MAPE_model3 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model3) / data_filtrada$ATE1_mean)) * 100
MAPE_model3
## [1] 13.71706
R2_model3 <- summary(model3)$r.squared
R2_model3
## [1] 0.5577552
# ATE1 - MODELO4: PM2.5 = PAcf_1 + DP + i
model4 <- lm(ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1, data = data_filtrada)
summary(model4)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1, data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.4927 -3.1003 -0.0368 4.2558 23.1783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 218.78474 15.18858 14.405 <2e-16 ***
## ATE_Senamhi 0.29090 0.03075 9.461 <2e-16 ***
## Dewpoint_ATE1 -3.36519 0.26318 -12.786 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.353 on 184 degrees of freedom
## Multiple R-squared: 0.5467, Adjusted R-squared: 0.5417
## F-statistic: 110.9 on 2 and 184 DF, p-value: < 2.2e-16
coeficientes_model4 <- coef(model4)
cat("La ecuación es: ATE1_mean =", coeficientes_model4[1], "+", coeficientes_model4[2], "* ATE_Senamhi +", coeficientes_model4[3], "* Dewpoint_ATE1\n")
## La ecuación es: ATE1_mean = 218.7847 + 0.2908989 * ATE_Senamhi + -3.365194 * Dewpoint_ATE1
predicciones_model4 <- predict(model4)
RMSE_model4 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model4)^2))
RMSE_model4
## [1] 6.302075
MAE_model4 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model4))
MAE_model4
## [1] 4.699129
MAPE_model4 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model4) / data_filtrada$ATE1_mean)) * 100
MAPE_model4
## [1] 13.3265
R2_model4 <- summary(model4)$r.squared
R2_model4
## [1] 0.5466722
# ATE1 - MODELO5: PM2.5 = PAcf_1 + P + i
model5 <- lm(ATE1_mean ~ ATE_Senamhi + pressure_ATE1, data = data_filtrada)
summary(model5)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + pressure_ATE1, data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.148 -3.533 1.460 5.265 18.387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.602e+03 4.482e+02 -5.806 2.76e-08 ***
## ATE_Senamhi 3.037e-01 4.023e-02 7.548 1.98e-12 ***
## pressure_ATE1 2.691e+00 4.588e-01 5.865 2.05e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.014 on 184 degrees of freedom
## Multiple R-squared: 0.2787, Adjusted R-squared: 0.2709
## F-statistic: 35.55 on 2 and 184 DF, p-value: 8.843e-14
coeficientes_model5 <- coef(model5)
cat("La ecuación es: ATE1_mean =", coeficientes_model5[1], "+", coeficientes_model5[2], "* ATE_Senamhi +", coeficientes_model5[3], "* pressure_ATE1\n")
## La ecuación es: ATE1_mean = -2602.154 + 0.303688 * ATE_Senamhi + 2.690868 * pressure_ATE1
predicciones_model5 <- predict(model5)
RMSE_model5 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model5)^2))
RMSE_model5
## [1] 7.94941
MAE_model5 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model5))
MAE_model5
## [1] 6.149123
MAPE_model5 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model5) / data_filtrada$ATE1_mean)) * 100
MAPE_model5
## [1] 18.21754
R2_model5 <- summary(model5)$r.squared
R2_model5
## [1] 0.2787014
# ATE1 - MODELO6: PM2.5 = PAcf_1 + T + RH + T*RH + i
model6 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * humidity_ATE1, data = data_filtrada)
summary(model6)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * humidity_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.6419 -3.5361 -0.2023 3.1832 17.2357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 246.40479 60.57958 4.067 7.07e-05 ***
## ATE_Senamhi 0.32971 0.02813 11.721 < 2e-16 ***
## temperatura_ATE1 -2.98028 0.75997 -3.922 0.000125 ***
## humidity_ATE1 -1.93185 1.01027 -1.912 0.057421 .
## temperatura_ATE1:humidity_ATE1 0.02572 0.01331 1.932 0.054892 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.261 on 182 degrees of freedom
## Multiple R-squared: 0.6925, Adjusted R-squared: 0.6858
## F-statistic: 102.5 on 4 and 182 DF, p-value: < 2.2e-16
coeficientes_model6 <- coef(model6)
cat("La ecuación es: ATE1_mean =", coeficientes_model6[1], "+", coeficientes_model6[2], "* ATE_Senamhi +", coeficientes_model6[3], "* temperatura_ATE1 +", coeficientes_model6[4], "* humidity_ATE1 +", coeficientes_model6[5], "* temperatura_ATE1 * humidity_ATE1\n")
## La ecuación es: ATE1_mean = 246.4048 + 0.3297074 * ATE_Senamhi + -2.980275 * temperatura_ATE1 + -1.931848 * humidity_ATE1 + 0.02572188 * temperatura_ATE1 * humidity_ATE1
predicciones_model6 <- predict(model6)
RMSE_model6 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model6)^2))
RMSE_model6
## [1] 5.190092
MAE_model6 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model6))
MAE_model6
## [1] 3.979757
MAPE_model6 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model6) / data_filtrada$ATE1_mean)) * 100
MAPE_model6
## [1] 10.54223
R2_model6 <- summary(model6)$r.squared
R2_model6
## [1] 0.6925352
# ATE1 - MODELO7: PM2.5 = PAcf_1 + T + DP + T*DP + i
model7 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * Dewpoint_ATE1, data = data_filtrada)
summary(model7)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * Dewpoint_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.5102 -3.5857 -0.2776 3.4032 16.3258
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -175.95225 278.66942 -0.631 0.529
## ATE_Senamhi 0.33670 0.02857 11.785 <2e-16 ***
## temperatura_ATE1 2.29339 3.48365 0.658 0.511
## Dewpoint_ATE1 5.43485 4.80845 1.130 0.260
## temperatura_ATE1:Dewpoint_ATE1 -0.06644 0.05963 -1.114 0.267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.296 on 182 degrees of freedom
## Multiple R-squared: 0.6884, Adjusted R-squared: 0.6816
## F-statistic: 100.5 on 4 and 182 DF, p-value: < 2.2e-16
coeficientes_model7 <- coef(model7)
cat("La ecuación es: ATE1_mean =", coeficientes_model7[1], "+", coeficientes_model7[2], "* ATE_Senamhi +",
coeficientes_model7[3], "* temperatura_ATE1 +", coeficientes_model7[4], "* Dewpoint_ATE1 +",
coeficientes_model7[5], "* temperatura_ATE1 * Dewpoint_ATE1\n")
## La ecuación es: ATE1_mean = -175.9522 + 0.336705 * ATE_Senamhi + 2.29339 * temperatura_ATE1 + 5.434853 * Dewpoint_ATE1 + -0.06644041 * temperatura_ATE1 * Dewpoint_ATE1
predicciones_model7 <- predict(model7)
RMSE_model7 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model7)^2))
RMSE_model7
## [1] 5.224541
MAE_model7 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model7))
MAE_model7
## [1] 4.065191
MAPE_model7 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model7) / data_filtrada$ATE1_mean)) * 100
MAPE_model7
## [1] 10.79909
R2_model7 <- summary(model7)$r.squared
R2_model7
## [1] 0.6884401
# ATE1 - MODELO8: PM2.5 = PAcf_1 + T + P + T*P + i
model8 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * pressure_ATE1, data = data_filtrada)
summary(model8)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * pressure_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.2200 -3.0878 -0.0064 3.1863 16.6073
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.108e+04 4.875e+03 -2.272 0.0243 *
## ATE_Senamhi 3.736e-01 2.700e-02 13.833 <2e-16 ***
## temperatura_ATE1 1.333e+02 6.215e+01 2.145 0.0332 *
## pressure_ATE1 1.150e+01 5.000e+00 2.299 0.0226 *
## temperatura_ATE1:pressure_ATE1 -1.383e-01 6.376e-02 -2.169 0.0314 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.188 on 182 degrees of freedom
## Multiple R-squared: 0.701, Adjusted R-squared: 0.6945
## F-statistic: 106.7 on 4 and 182 DF, p-value: < 2.2e-16
coeficientes_model8 <- coef(model8)
cat("La ecuación es: ATE1_mean =", coeficientes_model8[1], "+", coeficientes_model8[2], "* ATE_Senamhi +",
coeficientes_model8[3], "* temperatura_ATE1 +", coeficientes_model8[4], "* pressure_ATE1 +",
coeficientes_model8[5], "* temperatura_ATE1 * pressure_ATE1\n")
## La ecuación es: ATE1_mean = -11075.49 + 0.373563 * ATE_Senamhi + 133.3407 * temperatura_ATE1 + 11.49519 * pressure_ATE1 + -0.1383096 * temperatura_ATE1 * pressure_ATE1
predicciones_model8 <- predict(model8)
RMSE_model8 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model8)^2))
RMSE_model8
## [1] 5.117722
MAE_model8 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model8))
MAE_model8
## [1] 3.879176
MAPE_model8 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model8) / data_filtrada$ATE1_mean)) * 100
MAPE_model8
## [1] 10.3265
R2_model8 <- summary(model8)$r.squared
R2_model8
## [1] 0.70105
# ATE1 - MODELO9: PM2.5 = PAcf_1 + RH + DP + RH*DP + i
model9 <- lm(ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * Dewpoint_ATE1, data = data_filtrada)
summary(model9)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * Dewpoint_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.5127 -3.2806 -0.2193 3.2081 16.2131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 759.45497 189.10979 4.016 8.65e-05 ***
## ATE_Senamhi 0.31876 0.02816 11.318 < 2e-16 ***
## humidity_ATE1 -10.75931 3.28816 -3.272 0.001277 **
## Dewpoint_ATE1 -13.50078 3.22751 -4.183 4.47e-05 ***
## humidity_ATE1:Dewpoint_ATE1 0.19875 0.05647 3.519 0.000547 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.191 on 182 degrees of freedom
## Multiple R-squared: 0.7007, Adjusted R-squared: 0.6941
## F-statistic: 106.5 on 4 and 182 DF, p-value: < 2.2e-16
coeficientes_model9 <- coef(model9)
cat("La ecuación es: ATE1_mean =", coeficientes_model9[1], "+", coeficientes_model9[2], "* ATE_Senamhi +",
coeficientes_model9[3], "* humidity_ATE1 +", coeficientes_model9[4], "* Dewpoint_ATE1 +",
coeficientes_model9[5], "* humidity_ATE1 * Dewpoint_ATE1\n")
## La ecuación es: ATE1_mean = 759.455 + 0.3187585 * ATE_Senamhi + -10.75931 * humidity_ATE1 + -13.50078 * Dewpoint_ATE1 + 0.1987484 * humidity_ATE1 * Dewpoint_ATE1
predicciones_model9 <- predict(model9)
RMSE_model9 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model9)^2))
RMSE_model9
## [1] 5.121129
MAE_model9 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model9))
MAE_model9
## [1] 3.957181
MAPE_model9 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model9) / data_filtrada$ATE1_mean)) * 100
MAPE_model9
## [1] 10.41229
R2_model9 <- summary(model9)$r.squared
R2_model9
## [1] 0.7006518
# ATE1 - MODEL10: PM2.5 = PAcf_1 + RH + P + RH*P + i
model10 <- lm(ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * pressure_ATE1, data = data_filtrada)
summary(model10)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * pressure_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.8389 -3.8537 -0.6157 4.1557 15.5600
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2722.14009 3137.47745 0.868 0.387
## ATE_Senamhi 0.39306 0.03160 12.438 <2e-16 ***
## humidity_ATE1 -70.71007 54.31459 -1.302 0.195
## pressure_ATE1 -2.84466 3.21941 -0.884 0.378
## humidity_ATE1:pressure_ATE1 0.07367 0.05572 1.322 0.188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.045 on 182 degrees of freedom
## Multiple R-squared: 0.5941, Adjusted R-squared: 0.5851
## F-statistic: 66.59 on 4 and 182 DF, p-value: < 2.2e-16
coeficientes_model10 <- coef(model10)
cat("La ecuación es: ATE1_mean =", coeficientes_model10[1], "+", coeficientes_model10[2], "* ATE_Senamhi +",
coeficientes_model10[3], "* humidity_ATE1 +", coeficientes_model10[4], "* pressure_ATE1 +",
coeficientes_model10[5], "* humidity_ATE1 * pressure_ATE1\n")
## La ecuación es: ATE1_mean = 2722.14 + 0.3930594 * ATE_Senamhi + -70.71007 * humidity_ATE1 + -2.84466 * pressure_ATE1 + 0.0736745 * humidity_ATE1 * pressure_ATE1
predicciones_model10 <- predict(model10)
RMSE_model10 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model10)^2))
RMSE_model10
## [1] 5.963586
MAE_model10 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model10))
MAE_model10
## [1] 4.838999
MAPE_model10 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model10) / data_filtrada$ATE1_mean)) * 100
MAPE_model10
## [1] 13.04146
R2_model10 <- summary(model10)$r.squared
R2_model10
## [1] 0.5940615
# ATE1 - MODEL11: PM2.5 = PAcf_1 + DP + P + DP*P + i
model11 <- lm(ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1 * pressure_ATE1, data = data_filtrada)
summary(model11)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1 * pressure_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5397 -3.0031 0.0865 3.6558 21.7951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.471e+04 1.163e+04 -1.265 0.207
## ATE_Senamhi 3.240e-01 3.179e-02 10.192 <2e-16 ***
## Dewpoint_ATE1 2.314e+02 1.979e+02 1.169 0.244
## pressure_ATE1 1.529e+01 1.193e+01 1.282 0.201
## Dewpoint_ATE1:pressure_ATE1 -2.406e-01 2.030e-01 -1.185 0.238
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.206 on 182 degrees of freedom
## Multiple R-squared: 0.5722, Adjusted R-squared: 0.5628
## F-statistic: 60.86 on 4 and 182 DF, p-value: < 2.2e-16
coeficientes_model11 <- coef(model11)
cat("La ecuación es: ATE1_mean =", coeficientes_model11[1], "+", coeficientes_model11[2], "* ATE_Senamhi +",
coeficientes_model11[3], "* Dewpoint_ATE1 +", coeficientes_model11[4], "* pressure_ATE1 +",
coeficientes_model11[5], "* Dewpoint_ATE1 * pressure_ATE1\n")
## La ecuación es: ATE1_mean = -14710.87 + 0.3240184 * ATE_Senamhi + 231.4222 * Dewpoint_ATE1 + 15.29472 * pressure_ATE1 + -0.240547 * Dewpoint_ATE1 * pressure_ATE1
predicciones_model11 <- predict(model11)
RMSE_model11 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model11)^2))
RMSE_model11
## [1] 6.122052
MAE_model11 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model11))
MAE_model11
## [1] 4.573354
MAPE_model11 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model11) / data_filtrada$ATE1_mean)) * 100
MAPE_model11
## [1] 12.94117
R2_model11 <- summary(model11)$r.squared
R2_model11
## [1] 0.5722015
# ATE1 - MODEL12 (Sin interacción): PM2.5 = PAcf_1 + T + RH
model12 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 + humidity_ATE1, data = data_filtrada)
summary(model12)
##
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 + humidity_ATE1,
## data = data_filtrada)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.4432 -3.4791 -0.2587 3.4308 17.2637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.884935 22.870765 6.029 8.91e-09 ***
## ATE_Senamhi 0.347845 0.026714 13.021 < 2e-16 ***
## temperatura_ATE1 -1.552766 0.179383 -8.656 2.48e-15 ***
## humidity_ATE1 -0.004369 0.160843 -0.027 0.978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.3 on 183 degrees of freedom
## Multiple R-squared: 0.6862, Adjusted R-squared: 0.6811
## F-statistic: 133.4 on 3 and 183 DF, p-value: < 2.2e-16
coeficientes_model12 <- coef(model12)
cat("La ecuación es: ATE1_mean =", coeficientes_model12[1], "+", coeficientes_model12[2], "* ATE_Senamhi +",
coeficientes_model12[3], "* temperatura_ATE1 +", coeficientes_model12[4], "* humidity_ATE1\n")
## La ecuación es: ATE1_mean = 137.8849 + 0.347845 * ATE_Senamhi + -1.552766 * temperatura_ATE1 + -0.004368608 * humidity_ATE1
predicciones_model12 <- predict(model12)
RMSE_model12 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model12)^2))
RMSE_model12
## [1] 5.243052
MAE_model12 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model12))
MAE_model12
## [1] 4.05879
MAPE_model12 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model12) / data_filtrada$ATE1_mean)) * 100
MAPE_model12
## [1] 10.80741
R2_model12 <- summary(model12)$r.squared
R2_model12
## [1] 0.6862284
# El bias es la diferencia promedio entre los valores reales (y_real) y los valores predichos (y_predicho)
# bias = (sum(estimados-observados))/length(observados) o
# bias <- mean(estimados - observados)
# https://search.r-project.org/CRAN/refmans/Fgmutils/html/bias.html
#Modelo 1: ATE1_mean = 26.30153 + 0.2323253 * ATE_Senamhi
data$ATE1_C_model1 <- 26.30153 + 0.2323253 * data$ATE1_mean
bias_model1 <- mean(data$ATE1_C_model1 - data$ATE1_mean)
print(bias_model1)
## [1] -2.850812
#Modelo 2: ATE1_mean = 137.2891 + 0.347989 * ATE_Senamhi + -1.548506 * temperatura_ATE1
data$ATE1_C_model2 <- 137.2891 + 0.347989 * data$ATE1_mean + -1.548506 * data$temperatura_ATE1
bias_model2 <- mean(data$ATE1_C_model2 - data$ATE1_mean)
print(bias_model2)
## [1] -9.858341
#Modelo 3: ATE1_mean = -54.36656 + 0.3606421 * ATE_Senamhi + 1.212918 * humidity_ATE1
data$ATE1_C_model3 <- -54.36656 + 0.3606421 * data$ATE1_mean + 1.212918 * data$humidity_ATE1
bias_model3 <- mean(data$ATE1_C_model3 - data$ATE1_mean)
print(bias_model3)
## [1] -8.966434
#Modelo 4: ATE1_mean = 218.7847 + 0.2908989 * ATE_Senamhi + -3.365194 * Dewpoint_ATE1
data$ATE1_C_model4 <- 218.7847 + 0.2908989 * data$ATE1_mean - 3.365194 * data$Dewpoint_ATE1
bias_model4 <- mean(data$ATE1_C_model4 - data$ATE1_mean)
print(bias_model4)
## [1] -7.527148
#Modelo 5: ATE1_mean = -2602.154 + 0.303688 * ATE_Senamhi + 2.690868 * pressure_ATE1
data$ATE1_C_model5 <- -2602.154 + 0.303688 * data$ATE1_mean + 2.690868 * data$pressure_ATE1
bias_model5 <- mean(data$ATE1_C_model5 - data$ATE1_mean)
print(bias_model5)
## [1] -5.83383
#Modelo 6: ATE1_mean = 246.4048 + 0.3297074 * ATE_Senamhi + -2.980275 * temperatura_ATE1 + -1.931848 * humidity_ATE1 + 0.02572188 * temperatura_ATE1 * humidity_ATE1
data$ATE1_C_model6 <- 246.4048 + 0.3297074 * data$ATE1_mean - 2.980275 * data$temperatura_ATE1 - 1.931848 * data$humidity_ATE1 + 0.02572188 * data$temperatura_ATE1 * data$humidity_ATE1
bias_model6 <- mean(data$ATE1_C_model6 - data$ATE1_mean)
print(bias_model6)
## [1] -9.581249
#Modelo 7: ATE1_mean = -175.9522 + 0.336705 * ATE_Senamhi + 2.29339 * temperatura_ATE1 + 5.434853 * Dewpoint_ATE1 + -0.06644041 * temperatura_ATE1 * Dewpoint_ATE1
data$ATE1_C_model7 <- -175.9522 + 0.336705 * data$ATE1_mean + 2.29339 * data$temperatura_ATE1 + 5.434853 * data$Dewpoint_ATE1 - 0.06644041 * data$temperatura_ATE1 * data$Dewpoint_ATE1
bias_model7 <- mean(data$ATE1_C_model7 - data$ATE1_mean)
print(bias_model7)
## [1] -9.713183
#Modelo 8: ATE1_mean = -11075.49 + 0.373563 * ATE_Senamhi + 133.3407 * temperatura_ATE1 + 11.49519 * pressure_ATE1 + -0.1383096 * temperatura_ATE1 * pressure_ATE1
data$ATE1_C_model8 <- -11075.49 + 0.373563 * data$ATE1_mean + 133.3407 * data$temperatura_ATE1 + 11.49519 * data$pressure_ATE1 - 0.1383096 * data$temperatura_ATE1 * data$pressure_ATE1
bias_model8 <- mean(data$ATE1_C_model8 - data$ATE1_mean)
print(bias_model8)
## [1] -10.52448
#Modelo 9: ATE1_mean = 759.455 + 0.3187585 * ATE_Senamhi + -10.75931 * humidity_ATE1 + -13.50078 * Dewpoint_ATE1 + 0.1987484 * humidity_ATE1 * Dewpoint_ATE1
data$ATE1_C_model9 <- 759.455 + 0.3187585 * data$ATE1_mean - 10.75931 * data$humidity_ATE1 - 13.50078 * data$Dewpoint_ATE1 + 0.1987484 * data$humidity_ATE1 * data$Dewpoint_ATE1
bias_model9 <- mean(data$ATE1_C_model9 - data$ATE1_mean)
print(bias_model9)
## [1] -9.531247
#Modelo 10: ATE1_mean = 2722.14 + 0.3930594 * ATE_Senamhi + -70.71007 * humidity_ATE1 + -2.84466 * pressure_ATE1 + 0.0736745 * humidity_ATE1 * pressure_ATE1
data$ATE1_C_model10 <- 2722.14 + 0.3930594 * data$ATE1_mean - 70.71007 * data$humidity_ATE1 - 2.84466 * data$pressure_ATE1 + 0.0736745 * data$humidity_ATE1 * data$pressure_ATE1
bias_model10 <- mean(data$ATE1_C_model10 - data$ATE1_mean)
print(bias_model10)
## [1] -10.12704
#Modelo 11:ATE1_mean = -14710.87 + 0.3240184 * ATE_Senamhi + 231.4222 * Dewpoint_ATE1 + 15.29472 * pressure_ATE1 + -0.240547 * Dewpoint_ATE1 * pressure_ATE1
data$ATE1_C_model11 <- -14710.87 + 0.3240184 * data$ATE1_mean + 231.4222 * data$Dewpoint_ATE1 + 15.29472 * data$pressure_ATE1 - 0.240547 * data$Dewpoint_ATE1 * data$pressure_ATE1
bias_model11 <- mean(data$ATE1_C_model11 - data$ATE1_mean)
print(bias_model11)
## [1] -8.497702
#Modelo 12: ATE1_mean = 137.8849 + 0.347845 * ATE_Senamhi + -1.552766 * temperatura_ATE1 + -0.004368608 * humidity_ATE1
data$ATE1_C_model12 <- 137.8849 + 0.347845 * data$ATE1_mean - 1.552766 * data$temperatura_ATE1 - 0.004368608 * data$humidity_ATE1
bias_model12 <- mean(data$ATE1_C_model12 - data$ATE1_mean)
print(bias_model12)
## [1] -9.85567
library (dplyr)
library(knitr)
resultados <- data.frame(
Equation = c(
"PM2.5 = PM2.5_S + i",
"ATE1 = PM2.5_S + T + i",
"ATE1 = PM2.5_S + RH + i",
"ATE1 = PM2.5_S + Dewpoint_ATE1 + i",
"ATE1 = PM2.5_S + pressure_ATE1 + i",
"ATE1 = PM2.5_S + T + RH + T*RH + i",
"ATE1 = PM2.5_S + T + DP + T*DP + i",
"ATE1 = PM2.5_S + T + P + T*P + i",
"ATE1 = PM2.5_S + RH + DP + RH*DP + i",
"ATE1 = PM2.5_S + RH + P + RH*P + i",
"ATE1 = PM2.5_S + DP + P + DP*P + i",
"ATE1 = PM2.5_S + T + RH (sin interacción)"
),
RMSE = c(RMSE_model1, RMSE_model2, RMSE_model3, RMSE_model4, RMSE_model5, RMSE_model6, RMSE_model7, RMSE_model8, RMSE_model9, RMSE_model10, RMSE_model11, RMSE_model12),
Bias = c(bias_model1, bias_model2, bias_model3, bias_model4, bias_model5, bias_model6, bias_model7, bias_model8, bias_model9, bias_model10, bias_model11, bias_model12),
MAE = c(MAE_model1, MAE_model2, MAE_model3, MAE_model4, MAE_model5, MAE_model6, MAE_model7, MAE_model8, MAE_model9, MAE_model10, MAE_model11, MAE_model12),
MAPE = c(MAPE_model1, MAPE_model2, MAPE_model3, MAPE_model4, MAPE_model5, MAPE_model6, MAPE_model7, MAPE_model8, MAPE_model9, MAPE_model10, MAPE_model11, MAPE_model12),
R2 = c(R2_model1, R2_model2, R2_model3, R2_model4, R2_model5, R2_model6, R2_model7, R2_model8, R2_model9, R2_model10, R2_model11, R2_model12)
)
knitr::kable(resultados, digits = 6, caption = "Resultados de Modelos 1 al 12")
| Equation | RMSE | Bias | MAE | MAPE | R2 |
|---|---|---|---|---|---|
| PM2.5 = PM2.5_S + i | 8.660591 | -2.850812 | 6.861055 | 20.42548 | 0.143869 |
| ATE1 = PM2.5_S + T + i | 5.243063 | -9.858341 | 4.059582 | 10.80804 | 0.686227 |
| ATE1 = PM2.5_S + RH + i | 6.224561 | -8.966434 | 5.081805 | 13.71706 | 0.557755 |
| ATE1 = PM2.5_S + Dewpoint_ATE1 + i | 6.302075 | -7.527148 | 4.699129 | 13.32650 | 0.546672 |
| ATE1 = PM2.5_S + pressure_ATE1 + i | 7.949410 | -5.833830 | 6.149123 | 18.21754 | 0.278701 |
| ATE1 = PM2.5_S + T + RH + T*RH + i | 5.190092 | -9.581249 | 3.979757 | 10.54223 | 0.692535 |
| ATE1 = PM2.5_S + T + DP + T*DP + i | 5.224541 | -9.713183 | 4.065191 | 10.79909 | 0.688440 |
| ATE1 = PM2.5_S + T + P + T*P + i | 5.117722 | -10.524482 | 3.879176 | 10.32650 | 0.701050 |
| ATE1 = PM2.5_S + RH + DP + RH*DP + i | 5.121129 | -9.531247 | 3.957181 | 10.41229 | 0.700652 |
| ATE1 = PM2.5_S + RH + P + RH*P + i | 5.963586 | -10.127042 | 4.838999 | 13.04146 | 0.594062 |
| ATE1 = PM2.5_S + DP + P + DP*P + i | 6.122052 | -8.497702 | 4.573354 | 12.94117 | 0.572201 |
| ATE1 = PM2.5_S + T + RH (sin interacción) | 5.243052 | -9.855670 | 4.058790 | 10.80741 | 0.686228 |
# Serie de tiempo
data_filtrada$fecha <- as.Date(data_filtrada$fecha)
library(dplyr)
library(tidyr)
# Cargar las librerías necesarias
library(ggplot2)
# Crear el dataframe largo para ATE1_mean, ATE_Senamhi y los modelos de ATE1_C_model1 a ATE1_C_model12
data_long <- data %>%
select(ATE1_mean, ATE_Senamhi, ATE1_C_model1:ATE1_C_model12) %>%
pivot_longer(cols = c(ATE1_mean, ATE_Senamhi, ATE1_C_model1:ATE1_C_model12), names_to = "Modelos", values_to = "PM2_5")
# Crear la gráfica de caja y bigote
ggplot(data_long, aes(x = Modelos, y = PM2_5)) +
geom_boxplot() +
labs(title = "Comparación de ATE1_mean, ATE_Senamhi y Modelos de ATE1_C_model1 a ATE1_C_model12",
x = "Modelos", y = "PM2.5") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotar las etiquetas del eje X
## Warning: Removed 103 rows containing non-finite values (`stat_boxplot()`).
```