Calibración de sensores de O3 en base a mediciones de referencia.

Datasets

MACA 1: macaconb

MACA 2: macasinb

Datos: https://github.com/nanocastro/Repo_maca/tree/master/Datos/Para%20analizar

Notas: https://github.com/nanocastro/Repo_maca/blob/master/Datos/Para%20analizar/calibracion.md

# libs
library(ggplot2)
library(dplyr)
library(tidyr)
library(lubridate)
library(gridExtra)
library(GGally)

reference <- read.csv("INV2018.csv", skip = 2)
colnames(reference) <- c("Dia", "Mes", "Anio", "Hora", "Minuto", "NO", "NO2", "NOx", "PM10", "O3ref", "Temperatura", "DV", "Velocidad", "HR", "CO", "SO2")
reference <- reference %>% unite(Timestamp, Dia, Mes, Anio, Hora, Minuto) %>% select(Timestamp, O3ref)

maca1 <- read.csv("macaconb.csv") %>% 
  filter(Minuto %% 15 == 0) %>% 
  unite(Timestamp, Dia, Mes, Anio, Hora, Minuto)
## Warning: package 'bindrcpp' was built under R version 3.4.4
maca2 <- read.csv("macasinb.csv") %>% 
  filter(Minuto %% 15 == 0) %>% 
  unite(Timestamp, Dia, Mes, Anio, Hora, Minuto)

maca1ref <- reference %>% inner_join(maca1, by = "Timestamp")
maca2ref <- reference %>% inner_join(maca2, by = "Timestamp")

str(maca1ref)
## 'data.frame':    601 obs. of  12 variables:
##  $ Timestamp: chr  "5_1_2018_13_45" "5_1_2018_14_0" "5_1_2018_14_15" "5_1_2018_14_30" ...
##  $ O3ref    : num  7.3 8.8 9.8 9.4 11.4 12.3 12 6.4 5.5 2.2 ...
##  $ O3       : num  44783 42120 41219 41219 60440 ...
##  $ NO2      : num  1819 1707 1674 1595 2384 ...
##  $ CO       : num  142140 125278 121193 113323 211670 ...
##  $ dO3      : int  686 672 667 667 750 793 715 713 795 788 ...
##  $ dNO2     : int  463 447 442 430 532 600 522 576 595 617 ...
##  $ dCO      : int  425 394 386 370 526 592 469 538 567 577 ...
##  $ HR       : num  25.9 24.6 23.6 23.3 19.2 16 12.4 13.8 20.4 20.8 ...
##  $ Temp     : num  30.9 31.8 32.2 32.2 35.7 39.2 43.5 42.1 31.3 29 ...
##  $ PM2.5    : int  85 5082 5001 5044 114 128 121 116 108 136 ...
##  $ PM10     : int  372 10261 10121 10236 394 389 377 378 373 539 ...
str(maca2ref)
## 'data.frame':    595 obs. of  10 variables:
##  $ Timestamp: chr  "5_1_2018_13_15" "5_1_2018_13_30" "5_1_2018_13_45" "5_1_2018_14_0" ...
##  $ O3ref    : num  6.7 8.2 7.3 8.8 9.8 9.4 11.4 12.3 12 6.4 ...
##  $ O3       : int  77584 95832 100315 101659 100984 100315 95219 94010 94611 95832 ...
##  $ NO2      : int  7178 7062 8672 8466 8219 8887 3646 2725 2846 2196 ...
##  $ CO       : int  93750 122876 122876 167102 204464 206287 83662 88051 95229 88398 ...
##  $ dO3      : int  797 832 839 841 840 839 831 829 830 832 ...
##  $ dNO2     : int  783 780 816 812 807 820 638 566 577 511 ...
##  $ dCO      : int  495 564 564 640 687 689 466 479 499 480 ...
##  $ HR       : num  35 31.6 30.2 28.7 27.9 27.9 19.6 19.7 16.5 21.2 ...
##  $ Temp     : num  28.8 28.2 29.8 29.9 30 30 34.9 35.2 39.4 33.7 ...

Análisis

O3 en el tiempo

refColor = '#5760AB'
m1Color = '#099DD9'
m2Color = '#F79420'

ggplot(data = reference, 
       aes(x = dmy_hm(Timestamp), y = O3ref)) +
  geom_line(group = 1, color = refColor) +
  ggtitle("O3 referencia en el tiempo")

ggsave("lines-ref.pdf")
## Saving 7 x 5 in image

O3 en el tiempo vs. MACA

Los datos de referencia y de MACA estaán estandarizados ya que están en unidades diferentes.

ggplot(data = head(maca1ref, n = 200), 
       aes(x = dmy_hm(Timestamp), y = (O3-mean(O3))/(max(O3)-min(O3))  )) +
  geom_line(group = 1, color = m1Color) +
  geom_line(aes(y = (O3ref-mean(O3ref))/(max(O3ref)-min(O3ref)), group = 1, color = I(refColor))) +
  ggtitle("O3 MACA 1 en el tiempo (ref en violeta - 200 mediciones)")

ggsave("lines-s1.pdf")
## Saving 7 x 5 in image
ggplot(data = head(maca2ref, n = 200), 
       aes(x = dmy_hm(Timestamp), y = (O3-mean(O3))/(max(O3)-min(O3))  )) +
  geom_line(group = 1, color = m2Color) +
  geom_line(aes(y = (O3ref-mean(O3ref))/(max(O3ref)-min(O3ref)), group = 1, color = I(refColor))) +
  ggtitle("O3 MACA 2 en el tiempo (ref en violeta - 200 mediciones)")

ggsave("lines-s2.pdf")
## Saving 7 x 5 in image

Autocorrelación

acf(reference %>% select("O3ref"), lag.max = 10, plot = TRUE)

Histogramas

ggplot(data = maca1, 
       aes(x = O3)) +
  geom_histogram(color = 'black', fill = m1Color) +
  ggtitle("Histograma O3 MACA 1")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("hist-s1.pdf")
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = maca2, 
       aes(x = O3)) +
  geom_histogram(color = 'black', fill = m2Color) +
  ggtitle("Histograma O3 MACA 2")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("hist-s2.pdf")
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Scatter plots

p1 <- ggplot(data = maca1ref, 
       aes(x = O3ref, y = O3)) +
  geom_point(color = m1Color) + 
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("O3 referencia vs. MACA 1")

ggsave("scatter-s1_vs_ref.pdf")
## Saving 7 x 5 in image
p2 <- ggplot(data = maca2ref, 
       aes(x = O3ref, y = O3)) +
  geom_point(color = m2Color) + 
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("O3 referencia vs. MACA 2")

ggsave("scatter-s2_vs_ref.pdf")
## Saving 7 x 5 in image
grid.arrange(p1, p2, ncol = 2)

Correlaciones

MACA 1

ggpairs(maca1ref %>% 
          select(-Timestamp) %>% 
          select(-starts_with("d")) %>% 
          select(-starts_with("PM")))

ggsave("pairs-s1.pdf")
## Saving 7 x 5 in image

MACA 2

ggpairs(maca2ref %>% 
          select(-Timestamp) %>% 
          select(-starts_with("d")))

ggsave("pairs-s2.pdf")
## Saving 7 x 5 in image