1. La base de datos ha sido unida en stata
  2. Se esta trabajando con los datos de los purple air Ate 1, Ate 2, Ate 3 y con SENAMHI Ate.
library(haven)
data <- read_dta("~/Desktop/Doctorado/Calibración de purple air/Ate nuevo 12 octubre/ATE123_12 Octubre.dta")
View(data)

Crear variable dew point

data$temperatura_c_ATE1 <- (data$temperatura_ATE1 - 32) * 5/9
data$temperatura_c_ATE2 <- (data$temperatura_ATE2 - 32) * 5/9
data$temperatura_c_ATE3 <- (data$temperatura_ATE3 - 32) * 5/9
data$Vp_ATE1 <- (data$humidity_ATE1 / 100) * 6.11 * 10^((7.5 * data$temperatura_c_ATE1) / (237.3 + data$temperatura_c_ATE1))
data$Vp_ATE2 <- (data$humidity_ATE2 / 100) * 6.11 * 10^((7.5 * data$temperatura_c_ATE2) / (237.3 + data$temperatura_c_ATE2))
data$Vp_ATE3 <- (data$humidity_ATE3 / 100) * 6.11 * 10^((7.5 * data$temperatura_c_ATE3) / (237.3 + data$temperatura_c_ATE3))
# Definir constantes
A1 <- 0.61078
A2 <- 17.558
A3 <- 241.88

data$Dewpoint_ATE1 <- (A3 * log(data$Vp_ATE1 / A1)) / (A2 - log(data$Vp_ATE1 / A1))
data$Dewpoint_ATE2 <- (A3 * log(data$Vp_ATE2 / A1)) / (A2 - log(data$Vp_ATE2 / A1))
data$Dewpoint_ATE3 <- (A3 * log(data$Vp_ATE3 / A1)) / (A2 - log(data$Vp_ATE3 / A1))

Missing data

En los purple air se tienen datos desde el 18 de julio de 2023 hasta el 07 de mayo de 2024, sin embargo SENAMHI no tiene datos desde febrero.

names(data)
##  [1] "fecha"              "temperatura_ATE3"   "humidity_ATE3"     
##  [4] "pressure_ATE3"      "pm2_5_atm_a_ATE3"   "pm2_5_atm_b_ATE3"  
##  [7] "temperatura_ATE2"   "humidity_ATE2"      "pressure_ATE2"     
## [10] "pm2_5_atm_b_ATE2"   "pm2_5_atm_a_ATE2"   "temperatura_ATE1"  
## [13] "humidity_ATE1"      "pressure_ATE1"      "pm2_5_atm_a_ATE1"  
## [16] "pm2_5_atm_b_ATE1"   "ATE_Senamhi"        "temperatura_c_ATE1"
## [19] "temperatura_c_ATE2" "temperatura_c_ATE3" "Vp_ATE1"           
## [22] "Vp_ATE2"            "Vp_ATE3"            "Dewpoint_ATE1"     
## [25] "Dewpoint_ATE2"      "Dewpoint_ATE3"
min_fecha <- min(data$fecha, na.rm = TRUE)
max_fecha <- max(data$fecha, na.rm = TRUE)

min_fecha
## [1] "2023-07-18"
max_fecha
## [1] "2024-05-07"
porcentaje_faltantes <- colSums(is.na(data)) / nrow(data) * 100

# Mostrar el resultado
porcentaje_faltantes
##              fecha   temperatura_ATE3      humidity_ATE3      pressure_ATE3 
##           0.000000           6.896552           6.896552           6.896552 
##   pm2_5_atm_a_ATE3   pm2_5_atm_b_ATE3   temperatura_ATE2      humidity_ATE2 
##           6.896552           6.896552           0.000000           0.000000 
##      pressure_ATE2   pm2_5_atm_b_ATE2   pm2_5_atm_a_ATE2   temperatura_ATE1 
##           0.000000           0.000000           0.000000           0.000000 
##      humidity_ATE1      pressure_ATE1   pm2_5_atm_a_ATE1   pm2_5_atm_b_ATE1 
##           0.000000           0.000000           0.000000           0.000000 
##        ATE_Senamhi temperatura_c_ATE1 temperatura_c_ATE2 temperatura_c_ATE3 
##          35.517241           0.000000           0.000000           6.896552 
##            Vp_ATE1            Vp_ATE2            Vp_ATE3      Dewpoint_ATE1 
##           0.000000           0.000000           6.896552           0.000000 
##      Dewpoint_ATE2      Dewpoint_ATE3 
##           0.000000           6.896552

Pre-analysis of sensores

Cada monitor PurpleAir tiene dos sensores, y según la literatura, para que los monitores PurpleAir sean considerados en el análisis, las lecturas de los sensores deben coincidir en al menos el 30% de sus mediciones, con una precisión de umbral de ≤ 0.130. Esto se realiza con la fórmula: [abs (A-B)/(A+B)].

Los resultados preliminares indican un 100% de concordancia de los sensores del monitor PurpleAir ATE1, ATE2, ATE3 con los criterios de validación establecidos.

#ATE1
data$precision_ATE1 <- abs(data$"pm2_5_atm_a_ATE1" - data$"pm2_5_atm_b_ATE1") / (data$"pm2_5_atm_a_ATE1" + data$"pm2_5_atm_b_ATE1")
num_precision_mayor_013_ATE1 <- sum(data$precision_ATE1 > 0.130, na.rm = TRUE)
print(num_precision_mayor_013_ATE1)
## [1] 0
#ATE2
data$precision_ATE2 <- abs(data$"pm2_5_atm_a_ATE2" - data$"pm2_5_atm_b_ATE2") / (data$"pm2_5_atm_a_ATE2" + data$"pm2_5_atm_b_ATE2")
num_precision_mayor_013_ATE2 <- sum(data$precision_ATE2 > 0.130, na.rm = TRUE)
print(num_precision_mayor_013_ATE2)
## [1] 0
#ATE3
data$precision_ATE3 <- abs(data$"pm2_5_atm_a_ATE3" - data$"pm2_5_atm_b_ATE3") / (data$"pm2_5_atm_a_ATE3" + data$"pm2_5_atm_b_ATE3")
num_precision_mayor_013_ATE3 <- sum(data$precision_ATE3 > 0.130, na.rm = TRUE)
print(num_precision_mayor_013_ATE3)
## [1] 0
# Metodologia de paper de Laura "Data points were excluded if the deviation between sensors A and B 174 exceeded either 70% or 5 μg/m³"

#ATE 1
# Contar los puntos que superan el 70% de desviación porcentual
superan_70_ATE1 <- sum(abs(data$pm2_5_atm_a_ATE1 - data$pm2_5_atm_b_ATE1) / (data$pm2_5_atm_a_ATE1 + data$pm2_5_atm_b_ATE1) > 0.70, na.rm = TRUE)
# Contar los puntos que superan la desviación de 5 μg/m³
superan_5_ATE1 <- sum(abs(data$pm2_5_atm_a_ATE1 - data$pm2_5_atm_b_ATE1) > 5, na.rm = TRUE)
# Mostrar los resultados
superan_70_ATE1
## [1] 0
superan_5_ATE1
## [1] 0
#ATE 2
# Contar los puntos que superan el 70% de desviación porcentual
superan_70_ATE2 <- sum(abs(data$pm2_5_atm_a_ATE2 - data$pm2_5_atm_b_ATE2) / (data$pm2_5_atm_a_ATE2 + data$pm2_5_atm_b_ATE2) > 0.70, na.rm = TRUE)
# Contar los puntos que superan la desviación de 5 μg/m³
superan_5_ATE2 <- sum(abs(data$pm2_5_atm_a_ATE2 - data$pm2_5_atm_b_ATE2) > 5, na.rm = TRUE)
# Mostrar los resultados
superan_70_ATE2
## [1] 0
superan_5_ATE2
## [1] 0
#ATE 3
# Contar los puntos que superan el 70% de desviación porcentual
superan_70_ATE3 <- sum(abs(data$pm2_5_atm_a_ATE3 - data$pm2_5_atm_b_ATE3) / (data$pm2_5_atm_a_ATE3 + data$pm2_5_atm_b_ATE3) > 0.70, na.rm = TRUE)
# Contar los puntos que superan la desviación de 5 μg/m³
superan_5_ATE3 <- sum(abs(data$pm2_5_atm_a_ATE3 - data$pm2_5_atm_b_ATE3) > 5, na.rm = TRUE)
# Mostrar los resultados
superan_70_ATE3
## [1] 0
superan_5_ATE3
## [1] 1
data_filtrada2 <- subset(data, abs(pm2_5_atm_a_ATE3 - pm2_5_atm_b_ATE3) <= 5)

#ATE1 - mean 
media_precision_ATE1 <- mean(data$precision_ATE1, na.rm = TRUE)
desviacion_std_precision_ATE1 <- sd(data$precision_ATE1, na.rm = TRUE)
print(media_precision_ATE1)
## [1] 0.0144325
print(desviacion_std_precision_ATE1)
## [1] 0.008915939
#ATE2 - mean 
media_precision_ATE2 <- mean(data$precision_ATE2, na.rm = TRUE)
desviacion_std_precision_ATE2 <- sd(data$precision_ATE2, na.rm = TRUE)
print(media_precision_ATE2)
## [1] 0.01205336
print(desviacion_std_precision_ATE2)
## [1] 0.006523589
#ATE3 - mean 
media_precision_ATE3 <- mean(data$precision_ATE3, na.rm = TRUE)
desviacion_std_precision_ATE3 <- sd(data$precision_ATE3, na.rm = TRUE)
print(media_precision_ATE3)
## [1] 0.01992125
print(desviacion_std_precision_ATE3)
## [1] 0.01226382

Scatter Plot de sensor A vs sensor B

library(ggplot2)

ggplot(data, aes(x = pm2_5_atm_a_ATE1, y = pm2_5_atm_b_ATE1)) +
  geom_point(color = "blue") +
  ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE1  ") +
  xlab("pm2_5_atm_a_ATE1") +
  ylab("pm2_5_atm_b_ATE1")

ggplot(data, aes(x = pm2_5_atm_a_ATE2, y = pm2_5_atm_b_ATE2)) +
  geom_point(color = "blue") +
  ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE2  ") +
  xlab("pm2_5_atm_a_ATE2") +
  ylab("pm2_5_atm_b_ATE2")

ggplot(data_filtrada2, aes(x = pm2_5_atm_a_ATE3, y = pm2_5_atm_b_ATE3)) +
  geom_point(color = "blue") +
  ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE3  ") +
  xlab("pm2_5_atm_a_ATE3") +
  ylab("pm2_5_atm_b_ATE3")

Sacar el promedio de sensor A y B

data$ATE1_mean <- rowMeans(data[, c("pm2_5_atm_a_ATE1", "pm2_5_atm_b_ATE1")], na.rm = TRUE)
data$ATE2_mean <- rowMeans(data[, c("pm2_5_atm_a_ATE2", "pm2_5_atm_b_ATE2")], na.rm = TRUE)
data$ATE3_mean <- rowMeans(data[, c("pm2_5_atm_a_ATE3", "pm2_5_atm_b_ATE3")], na.rm = TRUE)

Scatter Plot y box plot de Purple Air vs Senamhi

data_filtrada <- subset(data, !is.na(ATE_Senamhi))

ggplot(data_filtrada, aes(x = ATE1_mean, y = ATE_Senamhi)) +
  geom_point(color = "blue") +
  ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE1 vs Senamhi") +
  xlab("ATE1_mean") +
  ylab("ATE_Senamhi")

ggplot(data_filtrada, aes(x = ATE2_mean, y = ATE_Senamhi)) +
  geom_point(color = "blue") +
  ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE2 vs Senamhi") +
  xlab("ATE2_mean") +
  ylab("ATE_Senamhi")

ggplot(data_filtrada, aes(x = ATE3_mean, y = ATE_Senamhi)) +
  geom_point(color = "blue") +
  ggtitle("Scatter Plot of PM2.5 Measurements Purple Air monitors ATE3 vs Senamhi") +
  xlab("ATE3_mean") +
  ylab("ATE_Senamhi")
## Warning: Removed 17 rows containing missing values (`geom_point()`).

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

data_long <- data_filtrada %>%
  gather(key = "Monitor", value = "PM2_5", ATE_Senamhi, ATE1_mean, ATE2_mean, ATE3_mean)
## Warning: attributes are not identical across measure variables;
## they will be dropped
ggplot(data_long, aes(x = Monitor, y = PM2_5)) +
  geom_boxplot() +
  ggtitle("Boxplot of PM2.5 Measurements for ATE_Senamhi, ATE1, ATE2, and ATE3") +
  xlab("Monitor") +
  ylab("PM2.5 Concentration")
## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).

# Serie de tiempo 
data_filtrada$fecha <- as.Date(data_filtrada$fecha)

data_long_filtrada <- data_filtrada %>%
  select(fecha, ATE_Senamhi, ATE1_mean, ATE2_mean, ATE3_mean) %>%
  pivot_longer(cols = c(ATE_Senamhi, ATE1_mean, ATE2_mean, ATE3_mean), 
               names_to = "Monitor", values_to = "PM2_5")

ggplot(data_long_filtrada, aes(x = fecha, y = PM2_5, color = Monitor)) +
  geom_line() +
  ggtitle("Time Series of PM2.5 Measurements (Filtered Data)") +
  xlab("Date") +
  ylab("PM2.5 Concentration (µg/m³)") +
  theme_minimal()

REGRESIONES LINEALES

# ATE1 -  MODELO1: ATE1_mean = ATE_Senamhi + i

model1 <- lm(ATE1_mean ~ ATE_Senamhi, data = data_filtrada)
summary(model1)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi, data = data_filtrada)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.890  -4.753   1.650   5.811  18.714 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 26.30153    2.76791   9.502  < 2e-16 ***
## ATE_Senamhi  0.23233    0.04167   5.576 8.63e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.707 on 185 degrees of freedom
## Multiple R-squared:  0.1439, Adjusted R-squared:  0.1392 
## F-statistic: 31.09 on 1 and 185 DF,  p-value: 8.627e-08
coeficientes_model1 <- coef(model1)
# Mostrar la ecuación en el formato deseado
cat("La ecuación es: ATE1_mean =", coeficientes_model1[1], "+", coeficientes_model1[2], "* ATE_Senamhi\n")
## La ecuación es: ATE1_mean = 26.30153 + 0.2323253 * ATE_Senamhi
# Calcular RMSE
predicciones_model1 <- predict(model1)
RMSE_model1 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model1)^2))
RMSE_model1
## [1] 8.660591
# Calcular MAE
MAE_model1<- mean(abs(data_filtrada$ATE1_mean - predicciones_model1))
MAE_model1
## [1] 6.861055
# Calcular MAPE
MAPE_model1 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model1) / data_filtrada$ATE1_mean)) * 100
MAPE_model1
## [1] 20.42548
# Calcular R2
R2_model1 <- summary(model1)$r.squared
R2_model1
## [1] 0.1438689
# ATE1 -  MODELO2:  PM2.5 = PAcf_1 + T + i

model2 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1, data = data_filtrada)
summary(model2)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1, data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.4231  -3.4810  -0.2644   3.4202  17.2410 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      137.28907    6.44626   21.30   <2e-16 ***
## ATE_Senamhi        0.34799    0.02611   13.33   <2e-16 ***
## temperatura_ATE1  -1.54851    0.08683  -17.83   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.286 on 184 degrees of freedom
## Multiple R-squared:  0.6862, Adjusted R-squared:  0.6828 
## F-statistic: 201.2 on 2 and 184 DF,  p-value: < 2.2e-16
coeficientes_model2 <- coef(model2)
# Mostrar la ecuación en el formato deseado
cat("La ecuación es: ATE1_mean =", coeficientes_model2[1], "+", coeficientes_model2[2], "* ATE_Senamhi +", coeficientes_model2[3], "* temperatura_ATE1\n")
## La ecuación es: ATE1_mean = 137.2891 + 0.347989 * ATE_Senamhi + -1.548506 * temperatura_ATE1
# Calcular RMSE para el modelo 2
predicciones_model2 <- predict(model2)
RMSE_model2 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model2)^2))
RMSE_model2
## [1] 5.243063
# Calcular MAE para el modelo 2
MAE_model2 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model2))
MAE_model2
## [1] 4.059582
# Calcular MAPE para el modelo 2
MAPE_model2 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model2) / data_filtrada$ATE1_mean)) * 100
MAPE_model2
## [1] 10.80804
# Calcular el R² para el modelo 2
R2_model2 <- summary(model2)$r.squared
R2_model2
## [1] 0.6862271
# ATE1 -  MODELO3:  PM2.5 = PAcf_1 + RH + i

model3 <- lm(ATE1_mean ~ ATE_Senamhi + humidity_ATE1, data = data_filtrada)
summary(model3)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + humidity_ATE1, data = data_filtrada)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.645  -4.548  -0.512   4.166  16.236 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -54.36656    6.46283  -8.412  1.1e-14 ***
## ATE_Senamhi     0.36064    0.03158  11.420  < 2e-16 ***
## humidity_ATE1   1.21292    0.09243  13.123  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.275 on 184 degrees of freedom
## Multiple R-squared:  0.5578, Adjusted R-squared:  0.5529 
## F-statistic:   116 on 2 and 184 DF,  p-value: < 2.2e-16
coeficientes_model3 <- coef(model3)
cat("La ecuación es: ATE1_mean =", coeficientes_model3[1], "+", coeficientes_model3[2], "* ATE_Senamhi +", coeficientes_model3[3], "* humidity_ATE1\n")
## La ecuación es: ATE1_mean = -54.36656 + 0.3606421 * ATE_Senamhi + 1.212918 * humidity_ATE1
predicciones_model3 <- predict(model3)
RMSE_model3 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model3)^2))
RMSE_model3
## [1] 6.224561
MAE_model3 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model3))
MAE_model3
## [1] 5.081805
MAPE_model3 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model3) / data_filtrada$ATE1_mean)) * 100
MAPE_model3
## [1] 13.71706
R2_model3 <- summary(model3)$r.squared
R2_model3
## [1] 0.5577552
# ATE1 -  MODELO4:  PM2.5 = PAcf_1 + DP + i

model4 <- lm(ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1, data = data_filtrada)
summary(model4)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1, data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.4927  -3.1003  -0.0368   4.2558  23.1783 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   218.78474   15.18858  14.405   <2e-16 ***
## ATE_Senamhi     0.29090    0.03075   9.461   <2e-16 ***
## Dewpoint_ATE1  -3.36519    0.26318 -12.786   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.353 on 184 degrees of freedom
## Multiple R-squared:  0.5467, Adjusted R-squared:  0.5417 
## F-statistic: 110.9 on 2 and 184 DF,  p-value: < 2.2e-16
coeficientes_model4 <- coef(model4)
cat("La ecuación es: ATE1_mean =", coeficientes_model4[1], "+", coeficientes_model4[2], "* ATE_Senamhi +", coeficientes_model4[3], "* Dewpoint_ATE1\n")
## La ecuación es: ATE1_mean = 218.7847 + 0.2908989 * ATE_Senamhi + -3.365194 * Dewpoint_ATE1
predicciones_model4 <- predict(model4)
RMSE_model4 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model4)^2))
RMSE_model4
## [1] 6.302075
MAE_model4 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model4))
MAE_model4
## [1] 4.699129
MAPE_model4 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model4) / data_filtrada$ATE1_mean)) * 100
MAPE_model4
## [1] 13.3265
R2_model4 <- summary(model4)$r.squared
R2_model4
## [1] 0.5466722
# ATE1 -  MODELO5:  PM2.5 = PAcf_1 + P + i


model5 <- lm(ATE1_mean ~ ATE_Senamhi + pressure_ATE1, data = data_filtrada)
summary(model5)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + pressure_ATE1, data = data_filtrada)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.148  -3.533   1.460   5.265  18.387 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -2.602e+03  4.482e+02  -5.806 2.76e-08 ***
## ATE_Senamhi    3.037e-01  4.023e-02   7.548 1.98e-12 ***
## pressure_ATE1  2.691e+00  4.588e-01   5.865 2.05e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.014 on 184 degrees of freedom
## Multiple R-squared:  0.2787, Adjusted R-squared:  0.2709 
## F-statistic: 35.55 on 2 and 184 DF,  p-value: 8.843e-14
coeficientes_model5 <- coef(model5)
cat("La ecuación es: ATE1_mean =", coeficientes_model5[1], "+", coeficientes_model5[2], "* ATE_Senamhi +", coeficientes_model5[3], "* pressure_ATE1\n")
## La ecuación es: ATE1_mean = -2602.154 + 0.303688 * ATE_Senamhi + 2.690868 * pressure_ATE1
predicciones_model5 <- predict(model5)
RMSE_model5 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model5)^2))
RMSE_model5
## [1] 7.94941
MAE_model5 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model5))
MAE_model5
## [1] 6.149123
MAPE_model5 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model5) / data_filtrada$ATE1_mean)) * 100
MAPE_model5
## [1] 18.21754
R2_model5 <- summary(model5)$r.squared
R2_model5
## [1] 0.2787014
# ATE1 - MODELO6: PM2.5 = PAcf_1 + T + RH + T*RH + i

model6 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * humidity_ATE1, data = data_filtrada)
summary(model6)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * humidity_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.6419  -3.5361  -0.2023   3.1832  17.2357 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    246.40479   60.57958   4.067 7.07e-05 ***
## ATE_Senamhi                      0.32971    0.02813  11.721  < 2e-16 ***
## temperatura_ATE1                -2.98028    0.75997  -3.922 0.000125 ***
## humidity_ATE1                   -1.93185    1.01027  -1.912 0.057421 .  
## temperatura_ATE1:humidity_ATE1   0.02572    0.01331   1.932 0.054892 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.261 on 182 degrees of freedom
## Multiple R-squared:  0.6925, Adjusted R-squared:  0.6858 
## F-statistic: 102.5 on 4 and 182 DF,  p-value: < 2.2e-16
coeficientes_model6 <- coef(model6)

cat("La ecuación es: ATE1_mean =", coeficientes_model6[1], "+", coeficientes_model6[2], "* ATE_Senamhi +", coeficientes_model6[3], "* temperatura_ATE1 +", coeficientes_model6[4], "* humidity_ATE1 +", coeficientes_model6[5], "* temperatura_ATE1 * humidity_ATE1\n")
## La ecuación es: ATE1_mean = 246.4048 + 0.3297074 * ATE_Senamhi + -2.980275 * temperatura_ATE1 + -1.931848 * humidity_ATE1 + 0.02572188 * temperatura_ATE1 * humidity_ATE1
predicciones_model6 <- predict(model6)
RMSE_model6 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model6)^2))
RMSE_model6
## [1] 5.190092
MAE_model6 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model6))
MAE_model6
## [1] 3.979757
MAPE_model6 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model6) / data_filtrada$ATE1_mean)) * 100
MAPE_model6
## [1] 10.54223
R2_model6 <- summary(model6)$r.squared
R2_model6
## [1] 0.6925352
# ATE1 - MODELO7: PM2.5 = PAcf_1 + T + DP + T*DP + i

model7 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * Dewpoint_ATE1, data = data_filtrada)
summary(model7)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * Dewpoint_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5102  -3.5857  -0.2776   3.4032  16.3258 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -175.95225  278.66942  -0.631    0.529    
## ATE_Senamhi                       0.33670    0.02857  11.785   <2e-16 ***
## temperatura_ATE1                  2.29339    3.48365   0.658    0.511    
## Dewpoint_ATE1                     5.43485    4.80845   1.130    0.260    
## temperatura_ATE1:Dewpoint_ATE1   -0.06644    0.05963  -1.114    0.267    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.296 on 182 degrees of freedom
## Multiple R-squared:  0.6884, Adjusted R-squared:  0.6816 
## F-statistic: 100.5 on 4 and 182 DF,  p-value: < 2.2e-16
coeficientes_model7 <- coef(model7)

cat("La ecuación es: ATE1_mean =", coeficientes_model7[1], "+", coeficientes_model7[2], "* ATE_Senamhi +", 
    coeficientes_model7[3], "* temperatura_ATE1 +", coeficientes_model7[4], "* Dewpoint_ATE1 +", 
    coeficientes_model7[5], "* temperatura_ATE1 * Dewpoint_ATE1\n")
## La ecuación es: ATE1_mean = -175.9522 + 0.336705 * ATE_Senamhi + 2.29339 * temperatura_ATE1 + 5.434853 * Dewpoint_ATE1 + -0.06644041 * temperatura_ATE1 * Dewpoint_ATE1
predicciones_model7 <- predict(model7)
RMSE_model7 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model7)^2))
RMSE_model7
## [1] 5.224541
MAE_model7 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model7))
MAE_model7
## [1] 4.065191
MAPE_model7 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model7) / data_filtrada$ATE1_mean)) * 100
MAPE_model7
## [1] 10.79909
R2_model7 <- summary(model7)$r.squared
R2_model7
## [1] 0.6884401
# ATE1 - MODELO8: PM2.5 = PAcf_1 + T + P + T*P + i


model8 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * pressure_ATE1, data = data_filtrada)
summary(model8)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 * pressure_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.2200  -3.0878  -0.0064   3.1863  16.6073 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -1.108e+04  4.875e+03  -2.272   0.0243 *  
## ATE_Senamhi                     3.736e-01  2.700e-02  13.833   <2e-16 ***
## temperatura_ATE1                1.333e+02  6.215e+01   2.145   0.0332 *  
## pressure_ATE1                   1.150e+01  5.000e+00   2.299   0.0226 *  
## temperatura_ATE1:pressure_ATE1 -1.383e-01  6.376e-02  -2.169   0.0314 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.188 on 182 degrees of freedom
## Multiple R-squared:  0.701,  Adjusted R-squared:  0.6945 
## F-statistic: 106.7 on 4 and 182 DF,  p-value: < 2.2e-16
coeficientes_model8 <- coef(model8)

cat("La ecuación es: ATE1_mean =", coeficientes_model8[1], "+", coeficientes_model8[2], "* ATE_Senamhi +", 
    coeficientes_model8[3], "* temperatura_ATE1 +", coeficientes_model8[4], "* pressure_ATE1 +", 
    coeficientes_model8[5], "* temperatura_ATE1 * pressure_ATE1\n")
## La ecuación es: ATE1_mean = -11075.49 + 0.373563 * ATE_Senamhi + 133.3407 * temperatura_ATE1 + 11.49519 * pressure_ATE1 + -0.1383096 * temperatura_ATE1 * pressure_ATE1
predicciones_model8 <- predict(model8)
RMSE_model8 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model8)^2))
RMSE_model8
## [1] 5.117722
MAE_model8 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model8))
MAE_model8
## [1] 3.879176
MAPE_model8 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model8) / data_filtrada$ATE1_mean)) * 100
MAPE_model8
## [1] 10.3265
R2_model8 <- summary(model8)$r.squared
R2_model8
## [1] 0.70105
# ATE1 - MODELO9: PM2.5 = PAcf_1 + RH + DP + RH*DP + i


model9 <- lm(ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * Dewpoint_ATE1, data = data_filtrada)
summary(model9)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * Dewpoint_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5127  -3.2806  -0.2193   3.2081  16.2131 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 759.45497  189.10979   4.016 8.65e-05 ***
## ATE_Senamhi                   0.31876    0.02816  11.318  < 2e-16 ***
## humidity_ATE1               -10.75931    3.28816  -3.272 0.001277 ** 
## Dewpoint_ATE1               -13.50078    3.22751  -4.183 4.47e-05 ***
## humidity_ATE1:Dewpoint_ATE1   0.19875    0.05647   3.519 0.000547 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.191 on 182 degrees of freedom
## Multiple R-squared:  0.7007, Adjusted R-squared:  0.6941 
## F-statistic: 106.5 on 4 and 182 DF,  p-value: < 2.2e-16
coeficientes_model9 <- coef(model9)

cat("La ecuación es: ATE1_mean =", coeficientes_model9[1], "+", coeficientes_model9[2], "* ATE_Senamhi +", 
    coeficientes_model9[3], "* humidity_ATE1 +", coeficientes_model9[4], "* Dewpoint_ATE1 +", 
    coeficientes_model9[5], "* humidity_ATE1 * Dewpoint_ATE1\n")
## La ecuación es: ATE1_mean = 759.455 + 0.3187585 * ATE_Senamhi + -10.75931 * humidity_ATE1 + -13.50078 * Dewpoint_ATE1 + 0.1987484 * humidity_ATE1 * Dewpoint_ATE1
predicciones_model9 <- predict(model9)
RMSE_model9 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model9)^2))
RMSE_model9
## [1] 5.121129
MAE_model9 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model9))
MAE_model9
## [1] 3.957181
MAPE_model9 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model9) / data_filtrada$ATE1_mean)) * 100
MAPE_model9
## [1] 10.41229
R2_model9 <- summary(model9)$r.squared
R2_model9
## [1] 0.7006518
# ATE1 - MODEL10:   PM2.5 = PAcf_1 + RH + P + RH*P + i

model10 <- lm(ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * pressure_ATE1, data = data_filtrada)
summary(model10)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + humidity_ATE1 * pressure_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.8389  -3.8537  -0.6157   4.1557  15.5600 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 2722.14009 3137.47745   0.868    0.387    
## ATE_Senamhi                    0.39306    0.03160  12.438   <2e-16 ***
## humidity_ATE1                -70.71007   54.31459  -1.302    0.195    
## pressure_ATE1                 -2.84466    3.21941  -0.884    0.378    
## humidity_ATE1:pressure_ATE1    0.07367    0.05572   1.322    0.188    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.045 on 182 degrees of freedom
## Multiple R-squared:  0.5941, Adjusted R-squared:  0.5851 
## F-statistic: 66.59 on 4 and 182 DF,  p-value: < 2.2e-16
coeficientes_model10 <- coef(model10)

cat("La ecuación es: ATE1_mean =", coeficientes_model10[1], "+", coeficientes_model10[2], "* ATE_Senamhi +", 
    coeficientes_model10[3], "* humidity_ATE1 +", coeficientes_model10[4], "* pressure_ATE1 +", 
    coeficientes_model10[5], "* humidity_ATE1 * pressure_ATE1\n")
## La ecuación es: ATE1_mean = 2722.14 + 0.3930594 * ATE_Senamhi + -70.71007 * humidity_ATE1 + -2.84466 * pressure_ATE1 + 0.0736745 * humidity_ATE1 * pressure_ATE1
predicciones_model10 <- predict(model10)
RMSE_model10 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model10)^2))
RMSE_model10
## [1] 5.963586
MAE_model10 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model10))
MAE_model10
## [1] 4.838999
MAPE_model10 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model10) / data_filtrada$ATE1_mean)) * 100
MAPE_model10
## [1] 13.04146
R2_model10 <- summary(model10)$r.squared
R2_model10
## [1] 0.5940615
# ATE1 - MODEL11:    PM2.5 = PAcf_1 + DP + P + DP*P + i

model11 <- lm(ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1 * pressure_ATE1, data = data_filtrada)
summary(model11)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + Dewpoint_ATE1 * pressure_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5397  -3.0031   0.0865   3.6558  21.7951 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -1.471e+04  1.163e+04  -1.265    0.207    
## ATE_Senamhi                  3.240e-01  3.179e-02  10.192   <2e-16 ***
## Dewpoint_ATE1                2.314e+02  1.979e+02   1.169    0.244    
## pressure_ATE1                1.529e+01  1.193e+01   1.282    0.201    
## Dewpoint_ATE1:pressure_ATE1 -2.406e-01  2.030e-01  -1.185    0.238    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.206 on 182 degrees of freedom
## Multiple R-squared:  0.5722, Adjusted R-squared:  0.5628 
## F-statistic: 60.86 on 4 and 182 DF,  p-value: < 2.2e-16
coeficientes_model11 <- coef(model11)

cat("La ecuación es: ATE1_mean =", coeficientes_model11[1], "+", coeficientes_model11[2], "* ATE_Senamhi +", 
    coeficientes_model11[3], "* Dewpoint_ATE1 +", coeficientes_model11[4], "* pressure_ATE1 +", 
    coeficientes_model11[5], "* Dewpoint_ATE1 * pressure_ATE1\n")
## La ecuación es: ATE1_mean = -14710.87 + 0.3240184 * ATE_Senamhi + 231.4222 * Dewpoint_ATE1 + 15.29472 * pressure_ATE1 + -0.240547 * Dewpoint_ATE1 * pressure_ATE1
predicciones_model11 <- predict(model11)
RMSE_model11 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model11)^2))
RMSE_model11
## [1] 6.122052
MAE_model11 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model11))
MAE_model11
## [1] 4.573354
MAPE_model11 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model11) / data_filtrada$ATE1_mean)) * 100
MAPE_model11
## [1] 12.94117
R2_model11 <- summary(model11)$r.squared
R2_model11
## [1] 0.5722015
# ATE1 - MODEL12 (Sin interacción):    PM2.5 = PAcf_1 + T + RH


model12 <- lm(ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 + humidity_ATE1, data = data_filtrada)
summary(model12)
## 
## Call:
## lm(formula = ATE1_mean ~ ATE_Senamhi + temperatura_ATE1 + humidity_ATE1, 
##     data = data_filtrada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.4432  -3.4791  -0.2587   3.4308  17.2637 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      137.884935  22.870765   6.029 8.91e-09 ***
## ATE_Senamhi        0.347845   0.026714  13.021  < 2e-16 ***
## temperatura_ATE1  -1.552766   0.179383  -8.656 2.48e-15 ***
## humidity_ATE1     -0.004369   0.160843  -0.027    0.978    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.3 on 183 degrees of freedom
## Multiple R-squared:  0.6862, Adjusted R-squared:  0.6811 
## F-statistic: 133.4 on 3 and 183 DF,  p-value: < 2.2e-16
coeficientes_model12 <- coef(model12)
cat("La ecuación es: ATE1_mean =", coeficientes_model12[1], "+", coeficientes_model12[2], "* ATE_Senamhi +", 
    coeficientes_model12[3], "* temperatura_ATE1 +", coeficientes_model12[4], "* humidity_ATE1\n")
## La ecuación es: ATE1_mean = 137.8849 + 0.347845 * ATE_Senamhi + -1.552766 * temperatura_ATE1 + -0.004368608 * humidity_ATE1
predicciones_model12 <- predict(model12)
RMSE_model12 <- sqrt(mean((data_filtrada$ATE1_mean - predicciones_model12)^2))
RMSE_model12
## [1] 5.243052
MAE_model12 <- mean(abs(data_filtrada$ATE1_mean - predicciones_model12))
MAE_model12
## [1] 4.05879
MAPE_model12 <- mean(abs((data_filtrada$ATE1_mean - predicciones_model12) / data_filtrada$ATE1_mean)) * 100
MAPE_model12
## [1] 10.80741
R2_model12 <- summary(model12)$r.squared
R2_model12
## [1] 0.6862284

BIAS

# El bias es la diferencia promedio entre los valores reales (y_real) y los valores predichos (y_predicho)
# bias = (sum(estimados-observados))/length(observados) o 
# bias <- mean(estimados - observados)
# https://search.r-project.org/CRAN/refmans/Fgmutils/html/bias.html

#Modelo 1: ATE1_mean = 26.30153 + 0.2323253 * ATE_Senamhi

data$ATE1_C_model1 <- 26.30153 + 0.2323253 * data$ATE1_mean
bias_model1 <- mean(data$ATE1_C_model1 - data$ATE1_mean)
print(bias_model1)
## [1] -2.850812
#Modelo 2: ATE1_mean = 137.2891 + 0.347989 * ATE_Senamhi + -1.548506 * temperatura_ATE1
data$ATE1_C_model2 <- 137.2891 + 0.347989 * data$ATE1_mean + -1.548506 * data$temperatura_ATE1
bias_model2 <- mean(data$ATE1_C_model2 - data$ATE1_mean)
print(bias_model2)
## [1] -9.858341
#Modelo 3: ATE1_mean = -54.36656 + 0.3606421 * ATE_Senamhi + 1.212918 * humidity_ATE1
data$ATE1_C_model3 <- -54.36656 + 0.3606421 * data$ATE1_mean + 1.212918 * data$humidity_ATE1
bias_model3 <- mean(data$ATE1_C_model3 - data$ATE1_mean)
print(bias_model3)
## [1] -8.966434
#Modelo 4: ATE1_mean = 218.7847 + 0.2908989 * ATE_Senamhi + -3.365194 * Dewpoint_ATE1
data$ATE1_C_model4 <- 218.7847 + 0.2908989 * data$ATE1_mean - 3.365194 * data$Dewpoint_ATE1
bias_model4 <- mean(data$ATE1_C_model4 - data$ATE1_mean)
print(bias_model4)
## [1] -7.527148
#Modelo 5: ATE1_mean = -2602.154 + 0.303688 * ATE_Senamhi + 2.690868 * pressure_ATE1
data$ATE1_C_model5 <- -2602.154 + 0.303688 * data$ATE1_mean + 2.690868 * data$pressure_ATE1
bias_model5 <- mean(data$ATE1_C_model5 - data$ATE1_mean)
print(bias_model5)
## [1] -5.83383
#Modelo 6: ATE1_mean = 246.4048 + 0.3297074 * ATE_Senamhi + -2.980275 * temperatura_ATE1 + -1.931848 * humidity_ATE1 + 0.02572188 * temperatura_ATE1 * humidity_ATE1
data$ATE1_C_model6 <- 246.4048 + 0.3297074 * data$ATE1_mean - 2.980275 * data$temperatura_ATE1 - 1.931848 * data$humidity_ATE1 + 0.02572188 * data$temperatura_ATE1 * data$humidity_ATE1
bias_model6 <- mean(data$ATE1_C_model6 - data$ATE1_mean)
print(bias_model6)
## [1] -9.581249
#Modelo 7: ATE1_mean = -175.9522 + 0.336705 * ATE_Senamhi + 2.29339 * temperatura_ATE1 + 5.434853 * Dewpoint_ATE1 + -0.06644041 * temperatura_ATE1 * Dewpoint_ATE1
data$ATE1_C_model7 <- -175.9522 + 0.336705 * data$ATE1_mean + 2.29339 * data$temperatura_ATE1 + 5.434853 * data$Dewpoint_ATE1 - 0.06644041 * data$temperatura_ATE1 * data$Dewpoint_ATE1
bias_model7 <- mean(data$ATE1_C_model7 - data$ATE1_mean)
print(bias_model7)
## [1] -9.713183
#Modelo 8: ATE1_mean = -11075.49 + 0.373563 * ATE_Senamhi + 133.3407 * temperatura_ATE1 + 11.49519 * pressure_ATE1 + -0.1383096 * temperatura_ATE1 * pressure_ATE1
data$ATE1_C_model8 <- -11075.49 + 0.373563 * data$ATE1_mean + 133.3407 * data$temperatura_ATE1 + 11.49519 * data$pressure_ATE1 - 0.1383096 * data$temperatura_ATE1 * data$pressure_ATE1
bias_model8 <- mean(data$ATE1_C_model8 - data$ATE1_mean)
print(bias_model8)
## [1] -10.52448
#Modelo 9: ATE1_mean = 759.455 + 0.3187585 * ATE_Senamhi + -10.75931 * humidity_ATE1 + -13.50078 * Dewpoint_ATE1 + 0.1987484 * humidity_ATE1 * Dewpoint_ATE1
data$ATE1_C_model9 <- 759.455 + 0.3187585 * data$ATE1_mean - 10.75931 * data$humidity_ATE1 - 13.50078 * data$Dewpoint_ATE1 + 0.1987484 * data$humidity_ATE1 * data$Dewpoint_ATE1
bias_model9 <- mean(data$ATE1_C_model9 - data$ATE1_mean)
print(bias_model9)
## [1] -9.531247
#Modelo 10: ATE1_mean = 2722.14 + 0.3930594 * ATE_Senamhi + -70.71007 * humidity_ATE1 + -2.84466 * pressure_ATE1 + 0.0736745 * humidity_ATE1 * pressure_ATE1
data$ATE1_C_model10 <- 2722.14 + 0.3930594 * data$ATE1_mean - 70.71007 * data$humidity_ATE1 - 2.84466 * data$pressure_ATE1 + 0.0736745 * data$humidity_ATE1 * data$pressure_ATE1
bias_model10 <- mean(data$ATE1_C_model10 - data$ATE1_mean)
print(bias_model10)
## [1] -10.12704
#Modelo 11:ATE1_mean = -14710.87 + 0.3240184 * ATE_Senamhi + 231.4222 * Dewpoint_ATE1 + 15.29472 * pressure_ATE1 + -0.240547 * Dewpoint_ATE1 * pressure_ATE1
data$ATE1_C_model11 <- -14710.87 + 0.3240184 * data$ATE1_mean + 231.4222 * data$Dewpoint_ATE1 + 15.29472 * data$pressure_ATE1 - 0.240547 * data$Dewpoint_ATE1 * data$pressure_ATE1
bias_model11 <- mean(data$ATE1_C_model11 - data$ATE1_mean)
print(bias_model11)
## [1] -8.497702
#Modelo 12: ATE1_mean = 137.8849 + 0.347845 * ATE_Senamhi + -1.552766 * temperatura_ATE1 + -0.004368608 * humidity_ATE1

data$ATE1_C_model12 <- 137.8849 + 0.347845 * data$ATE1_mean - 1.552766 * data$temperatura_ATE1 - 0.004368608 * data$humidity_ATE1
bias_model12 <- mean(data$ATE1_C_model12 - data$ATE1_mean)
print(bias_model12)
## [1] -9.85567

TABLA DE RESULTADOS

library (dplyr)
library(knitr)

resultados <- data.frame(
  Equation = c(
    "PM2.5 = PM2.5_S + i", 
    "ATE1 = PM2.5_S + T + i", 
    "ATE1 = PM2.5_S + RH + i",
    "ATE1 = PM2.5_S + Dewpoint_ATE1 + i",
    "ATE1 = PM2.5_S + pressure_ATE1 + i",
    "ATE1 = PM2.5_S + T + RH + T*RH + i",
    "ATE1 = PM2.5_S + T + DP + T*DP + i",
    "ATE1 = PM2.5_S + T + P + T*P + i",
    "ATE1 = PM2.5_S + RH + DP + RH*DP + i",
    "ATE1 = PM2.5_S + RH + P + RH*P + i",
    "ATE1 = PM2.5_S + DP + P + DP*P + i",
    "ATE1 = PM2.5_S + T + RH (sin interacción)"
  ),
  RMSE = c(RMSE_model1, RMSE_model2, RMSE_model3, RMSE_model4, RMSE_model5, RMSE_model6, RMSE_model7, RMSE_model8, RMSE_model9, RMSE_model10, RMSE_model11, RMSE_model12),
  Bias = c(bias_model1, bias_model2, bias_model3, bias_model4, bias_model5, bias_model6, bias_model7, bias_model8, bias_model9, bias_model10, bias_model11, bias_model12),
  MAE = c(MAE_model1, MAE_model2, MAE_model3, MAE_model4, MAE_model5, MAE_model6, MAE_model7, MAE_model8, MAE_model9, MAE_model10, MAE_model11, MAE_model12),
  MAPE = c(MAPE_model1, MAPE_model2, MAPE_model3, MAPE_model4, MAPE_model5, MAPE_model6, MAPE_model7, MAPE_model8, MAPE_model9, MAPE_model10, MAPE_model11, MAPE_model12),
  R2 = c(R2_model1, R2_model2, R2_model3, R2_model4, R2_model5, R2_model6, R2_model7, R2_model8, R2_model9, R2_model10, R2_model11, R2_model12)
)

knitr::kable(resultados, digits = 6, caption = "Resultados de Modelos 1 al 12")
Resultados de Modelos 1 al 12
Equation RMSE Bias MAE MAPE R2
PM2.5 = PM2.5_S + i 8.660591 -2.850812 6.861055 20.42548 0.143869
ATE1 = PM2.5_S + T + i 5.243063 -9.858341 4.059582 10.80804 0.686227
ATE1 = PM2.5_S + RH + i 6.224561 -8.966434 5.081805 13.71706 0.557755
ATE1 = PM2.5_S + Dewpoint_ATE1 + i 6.302075 -7.527148 4.699129 13.32650 0.546672
ATE1 = PM2.5_S + pressure_ATE1 + i 7.949410 -5.833830 6.149123 18.21754 0.278701
ATE1 = PM2.5_S + T + RH + T*RH + i 5.190092 -9.581249 3.979757 10.54223 0.692535
ATE1 = PM2.5_S + T + DP + T*DP + i 5.224541 -9.713183 4.065191 10.79909 0.688440
ATE1 = PM2.5_S + T + P + T*P + i 5.117722 -10.524482 3.879176 10.32650 0.701050
ATE1 = PM2.5_S + RH + DP + RH*DP + i 5.121129 -9.531247 3.957181 10.41229 0.700652
ATE1 = PM2.5_S + RH + P + RH*P + i 5.963586 -10.127042 4.838999 13.04146 0.594062
ATE1 = PM2.5_S + DP + P + DP*P + i 6.122052 -8.497702 4.573354 12.94117 0.572201
ATE1 = PM2.5_S + T + RH (sin interacción) 5.243052 -9.855670 4.058790 10.80741 0.686228
# Serie de tiempo 
data_filtrada$fecha <- as.Date(data_filtrada$fecha)

library(dplyr)
library(tidyr)

# Cargar las librerías necesarias
library(ggplot2)

# Crear el dataframe largo para ATE1_mean, ATE_Senamhi y los modelos de ATE1_C_model1 a ATE1_C_model12
data_long <- data %>%
  select(ATE1_mean, ATE_Senamhi, ATE1_C_model1:ATE1_C_model12) %>%
  pivot_longer(cols = c(ATE1_mean, ATE_Senamhi, ATE1_C_model1:ATE1_C_model12), names_to = "Modelos", values_to = "PM2_5")

# Crear la gráfica de caja y bigote
ggplot(data_long, aes(x = Modelos, y = PM2_5)) +
  geom_boxplot() +
  labs(title = "Comparación de ATE1_mean, ATE_Senamhi y Modelos de ATE1_C_model1 a ATE1_C_model12", 
       x = "Modelos", y = "PM2.5") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotar las etiquetas del eje X
## Warning: Removed 103 rows containing non-finite values (`stat_boxplot()`).

```