Linear Interpolation with Small Wind

library(RPostgreSQL)

## Loading required package: DBI

# Database login details (not in GitHub repo)
load("access.Rda")
# Connect to database
p <- dbDriver("PostgreSQL")
con <- dbConnect(p,
                 user=access$user,
                 password=access$pwd,
                 host='penap-data.dyndns.org',
                 dbname='cona',
                 port=5432)

locs <- dbGetQuery(con,"SELECT fs.id,
  ST_X(ST_TRANSFORM(fs.geom::geometry,4326)) as lon,
  ST_Y(ST_TRANSFORM(fs.geom::geometry,4326)) as lat
  FROM admin.fixedsites as fs
  ORDER BY fs.id;")

library(ggplot2)

load("with_ecan.RData")

ids <- c(102, 109, 108)
sites <- c(10, 18, 16)

data <- data.frame(date = with.ecan$date)
for (i in 1:3) {
  id <- ids[i]
  site <- sites[i]
  train.data <- data.frame(x = with.ecan[, paste0('pm2.5.odin.', id, '.site.18')],
                           y = with.ecan[, 'pm2.5'])
  linear.model <- lm(y ~ x, train.data, na.rm=TRUE)
  deployed.data <- data.frame(date = with.ecan$date, 
                              x = with.ecan[, paste0('pm2.5.odin.', id, '.site.', site)])
  results <- predict(linear.model, deployed.data)
  deployed.data[, paste0('pm2.5.odin.', id)] <- predict(linear.model, deployed.data)
  deployed.data$x <- NULL
  data <- merge(data, deployed.data, by='date')
}

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

# Remove rows with NAs
data <- na.omit(data)
nrow(data)

## [1] 93291

data.backup <- data

dist <- function(x1, y1, x2, y2)
  sqrt((x1-x2)^2 + (y1-y2)^2)

linear.interpolate <- function(x.1, y.1, x.2, y.2, x) {
  y.1 + (x - x.1) * (y.2 - y.1) / (x.2 - x.1)
}

trial.1 <- data.frame(ids = c(102, 109, 108),
                     sites = c(10, 18, 16))
trial.2 <- data.frame(ids = c(107, 115, 102),
                     sites = c(15, 2, 10))
trial.3 <- data.frame(ids = c(105, 113, 114),
                     sites = c(1, 9, 8))

trials <- list(trial.1, trial.2, trial.3)

experiment <- function(low.wind.data) {
  
  for (trial in trials) {
      
    for (i in 1:3) {
      trial[i, 'lat'] <- locs[which(locs$id==trial[i, 'sites']), 'lat']
      trial[i, 'lon'] <- locs[which(locs$id==trial[i, 'sites']), 'lon']
    }
    
    ids <- trial$ids
    
    data <- data.frame(date = with.ecan$date)
    for (i in 1:3) {
      id <- trial$ids[i]
      site <- trial$sites[i]
      train.data <- data.frame(x = with.ecan[, paste0('pm2.5.odin.', id, '.site.18')],
                               y = with.ecan[, 'pm2.5'])
      linear.model <- lm(y ~ x, train.data, na.rm=TRUE)
      deployed.data <- data.frame(date = low.wind.data$date, 
                                  x = low.wind.data[, paste0('pm2.5.odin.', id, '.site.', site)])
      results <- predict(linear.model, deployed.data)
      deployed.data[, paste0('pm2.5.odin.', id)] <- predict(linear.model, deployed.data)
      deployed.data$x <- NULL
      data <- merge(data, deployed.data, by='date')
    }
    
    # Remove rows with NAs
    data <- na.omit(data)
    print(trial$ids)
    print('nrow')
    print(nrow(data))
    print('date')
    print(data$date[1])
    
    d1 <- dist(trial[1, 'lat'], trial[1, 'lon'], trial[2, 'lat'], trial[2, 'lon'])
    d2 <- dist(trial[2, 'lat'], trial[2, 'lon'], trial[3, 'lat'], trial[3, 'lon'])
    odin.x <- c(0, d1, d1+d2)
    
    y.1 <- data[, paste0('pm2.5.odin.', ids[1])]
    y.2 <- data[, paste0('pm2.5.odin.',ids[3])]
    y <- data[, paste0('pm2.5.odin.', ids[2])]
    y.hat <- linear.interpolate(odin.x[1], y.1, odin.x[3], y.2, odin.x[2])
    
    # Absolute errors
    sqr.err <- (y - y.hat)^2
    mean(sqr.err, na.rm=TRUE)
    
    # Relative errors
    print('mrae')
    print(mean(abs((y-y.hat)/y), na.rm=TRUE))
    
    print('percentage length')
    print(odin.x[2] / odin.x[3])
  }  
  
#   divs <- 100
# 
#   offset.df <- data.frame(percentage = NA, rel.err = NA)
#   
#   for (i in 1:divs) {
#     
#     odin.x[2] <- odin.x[3] * i / divs
#     
#     y.1 <- data[, paste0('pm2.5.odin.', ids[1])]
#     y.2 <- data[, paste0('pm2.5.odin.',ids[3])]
#     y <- data[, paste0('pm2.5.odin.', ids[2])]
#     y.hat <- linear.interpolate(odin.x[1], y.1, odin.x[3], y.2, odin.x[2])
#     
#     # Absolute errors
#     abs.rel.err <- abs( (y - y.hat)/y )
#     
#     
#     # Relative errors
#     offset.df[i, 1:2] <- c(i / divs * 100, mean(abs.rel.err))
#     
# #    to.append <- data.frame(rel.err = abs.rel.err)
# #    to.append[, 'percentage'] <- rep(i/divs*100, nrow(to.append))
#  #   offset.df.2 <- rbind(offset.df.2, to.append)
#     
#   }
#   
  # plt <- ggplot(offset.df) +
  #   geom_point(aes(percentage, rel.err)) +
  #   ggtitle(paste(trial$ids[1], trial$ids[2], trial$ids[3]))
  # 
  # print(plt)
  # # 
  # # plt <- ggplot(offset.df.2) +
  # #   geom_boxplot(aes(percentage, rel.err)) +
  # #   ggtitle(paste(trial$ids[1], trial$ids[2], trial$ids[3]))
  # # 
  # # print(plt)
  # 
  # print('best length percentage')
  # print(offset.df$percentage[ which.min(offset.df$rel.err) ])
  
  print('############')
  

}

# first low wind day
date.lt <- as.POSIXlt(with.ecan$date)

start.date <- as.POSIXct('2016-08-09 12:00', format='%Y-%m-%d %H:%M', tz='Etc/GMT-12')
end.date <- as.POSIXct('2016-08-10 12:00', format='%Y-%m-%d %H:%M', tz='Etc/GMT-12')
low.wind.1 <- with.ecan[which(start.date < with.ecan$date & with.ecan$date < end.date), ]
experiment(low.wind.1)

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 102 109 108
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-09 12:01:00 +12"
## [1] "mrae"
## [1] 0.2552555
## [1] "percentage length"
## [1] 0.39391

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 107 115 102
## [1] "nrow"
## [1] 0
## [1] "date"
## [1] NA
## [1] "mrae"
## [1] NaN
## [1] "percentage length"
## [1] 0.3595254

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 105 113 114
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-09 12:01:00 +12"
## [1] "mrae"
## [1] 0.4087769
## [1] "percentage length"
## [1] 0.5324599
## [1] "############"

start.date <- as.POSIXct('2016-08-24 12:00', format='%Y-%m-%d %H:%M', tz='Etc/GMT-12')
end.date <- as.POSIXct('2016-08-25 12:00', format='%Y-%m-%d %H:%M', tz='Etc/GMT-12')
low.wind.2 <- with.ecan[which(start.date < with.ecan$date & with.ecan$date < end.date), ]
experiment(low.wind.2)

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 102 109 108
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-24 12:01:00 +12"
## [1] "mrae"
## [1] 2.172239
## [1] "percentage length"
## [1] 0.39391

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 107 115 102
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-24 12:01:00 +12"
## [1] "mrae"
## [1] 0.2045531
## [1] "percentage length"
## [1] 0.3595254

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 105 113 114
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-24 12:01:00 +12"
## [1] "mrae"
## [1] 0.927987
## [1] "percentage length"
## [1] 0.5324599
## [1] "############"

start.date <- as.POSIXct('2016-08-14 12:00', format='%Y-%m-%d %H:%M', tz='Etc/GMT-12')
end.date <- as.POSIXct('2016-08-15 12:00', format='%Y-%m-%d %H:%M', tz='Etc/GMT-12')
low.wind.3 <- with.ecan[which(start.date < with.ecan$date & with.ecan$date < end.date), ]
experiment(low.wind.3)

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 102 109 108
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-14 12:01:00 +12"
## [1] "mrae"
## [1] 0.5024468
## [1] "percentage length"
## [1] 0.39391

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 107 115 102
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-14 12:01:00 +12"
## [1] "mrae"
## [1] 0.8113813
## [1] "percentage length"
## [1] 0.3595254

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'na.rm' will be disregarded

## [1] 105 113 114
## [1] "nrow"
## [1] 1439
## [1] "date"
## [1] "2016-08-14 12:01:00 +12"
## [1] "mrae"
## [1] 0.6333785
## [1] "percentage length"
## [1] 0.5324599
## [1] "############"