library(ggplot2)

load("with_ecan.RData")

ids <- c(107, 113, 109, 102, 105)
sites <- c(15, 9, 18, 10, 1)

data <- data.frame(date = with.ecan$date)
for (i in 1:5) {
  id <- ids[i]
  site <- sites[i]
  train.data <- data.frame(x = with.ecan[, paste0('pm2.5.odin.', id, '.site.18')],
                           y = with.ecan[, 'pm2.5'])
  linear.model <- lm(y ~ x, train.data, na.rm=TRUE)
  deployed.data <- data.frame(date = with.ecan$date, 
                              x = with.ecan[, paste0('pm2.5.odin.', id, '.site.', site)])
  results <- predict(linear.model, deployed.data)
  deployed.data[, paste0('pm2.5.odin.', id)] <- predict(linear.model, deployed.data)
  deployed.data$x <- NULL
  data <- merge(data, deployed.data, by='date')
  

}
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded
data <- na.omit(data)

for (i in 1:5) {
  id <- ids[i]
  
  plt <- ggplot(data, aes(data[, paste0('pm2.5.odin.', id)])) +
    geom_histogram(breaks=seq(-20,150,5)) +
    xlab('PM2.5') +
    ggtitle(paste("ODIN", id))
  
  print(plt)
}

for (i in 1:4) {
  id.1 <- ids[i]
  x <- data[, paste0('pm2.5.odin.', id.1)]
  for (j in (i+1):5) {
    id.2 <- ids[j]
    print(paste('ODIN', id.1, 'vs ODIN', id.2))
    y <- data[, paste0('pm2.5.odin.', id.2)]
    print(ks.test(x, y))
    
  }
}
## [1] "ODIN 107 vs ODIN 113"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.20852, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 107 vs ODIN 109"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.18991, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 107 vs ODIN 102"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.25663, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 107 vs ODIN 105"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.34833, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 113 vs ODIN 109"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.046558, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 113 vs ODIN 102"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.10957, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 113 vs ODIN 105"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.2321, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 109 vs ODIN 102"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.080069, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 109 vs ODIN 105"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.19635, p-value < 2.2e-16
## alternative hypothesis: two-sided
## 
## [1] "ODIN 102 vs ODIN 105"
## Warning in ks.test(x, y): p-value will be approximate in the presence of
## ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  x and y
## D = 0.13388, p-value < 2.2e-16
## alternative hypothesis: two-sided