1. Advanced interpolation of vessel locations in place and time.
  1. Spatial reasoning: involve port and “open sea” relationships with vessel paths.
  2. Develop “on demand” calculations for interpolated points

Environment setup

#https://rpubs.com/dgolicher/postgis_example
library(rpostgis)
library(sp)
library(sf)
library(raster)
library(rgeos)
library(dplyr, warn.conflicts = F)
library(ggplot2)
library(knitr)
library(rnaturalearth)
library(rnaturalearthdata)

Data initialization

SELECT b.* 
INTO TABLE crimea_firehose_test
from hotspots a, firehose_test b
where a.name = 'Crimea' AND ST_Contains( a.geom, b.geom);

CREATE INDEX crimea_firehose_test_geom_idx
  ON crimea_firehose_test
  USING GIST (geom);
  
CREATE INDEX crimea_firehose_test_mmsi_idx ON crimea_firehose_test (mmsi);
Time range for sampled AIS signal capture
min max
2020-02-27 16:18:31 2020-02-28 13:55:43
  random_mmsi <- readRDS('random_mmsi.rds')
  #random_mmsi_list_query <- "SELECT b.* from firehose_test b WHERE b.mmsi = ANY((SELECT DISTINCT mmsi FROM crimea_firehose_test LIMIT 100));"
  #start_time <- Sys.time()
  #random_mmsi <- pgGetGeom(conn, query=random_mmsi_list_query)
  #end_time <- Sys.time()
  #end_time - start_time
  firehose_df <- random_mmsi@data
  #saveRDS(random_mmsi, 'random_mmsi.rds')
  random_mmsi_sf <- st_as_sf(random_mmsi) #for ggplotting
  random_mmsi_sf$x <- as.data.frame(random_mmsi@coords)$x
  random_mmsi_sf$y <- as.data.frame(random_mmsi@coords)$y

Data Exploration

world <- ne_countries(scale = "medium", returnclass = "sf")
ggplot(data = world) +
  geom_sf()+ 
  geom_point(data=random_mmsi_sf, aes(x=x, y=y, colour=factor(mmsi)), size = 1, 
  shape = 23, fill = "darkred") +
  coord_sf(xlim = c( 35,  40), ylim = c( 42,48), expand = FALSE) +            theme(legend.position = "none") + 
  ggtitle("AIS Signals grouped by MMSI")

random_mmsi@data %>% 
  group_by(mmsi) %>% 
  tally() %>% 
  ggplot(aes(n)) +
    geom_histogram(bins = 100) + 
    ggtitle("Signal count density per route") +
    xlab("Number (Bin)") + ylab("Number of routes")

date_min <- min(random_mmsi@data$signal_time)
date_max <- max(random_mmsi@data$signal_time)

Construct a time difference variable

TODO:

  • add satellite vs terrestrial source data and visualize both
  • consider a model (random forest?) to determine “state” of vessel (i.e. en route, in port, etc). visualize these against other data. determine outliers from nuanced categorization.
  • add ports data from GIS collection. Ports should join with known “to” and “from” shipping data. TBD.
  • checkout https://towardsdatascience.com/creating-sea-routes-from-the-sea-of-ais-data-30bc68d8530e
# get time difference between samples

firehose_df<-firehose_df %>%
    group_by(mmsi) %>%
    #arrange(signal_time) %>%
    mutate(diff = signal_time - lag(signal_time, default = first(signal_time)))
# summary stats
sd_diff <- sd(firehose_df$diff)
mean_diff <- mean(firehose_df$diff)
var_diff <- var(firehose_df$diff)

# outlier analysis
#firehose_df$sds <- firehose_df$diff/sd_diff
random_signalTime <- sample(firehose_df$signal_time,1)

firehose_df %>% 
  mutate(x_new  = ifelse(diff > 500, 500, diff)) %>% 
  ggplot(aes(x_new)) +
  geom_histogram(binwidth = 1, col = "black", fill = "cornflowerblue") + 
  ggtitle("AIS Signal frequency density histogram (500+ binned)")

Analysis 1

Determining midpoint for a single mmsi

TODO: - Check if time_diff is within “normal” range. 1 SD. Qualify against all data - Quantify “position_accuracy” within distance of existing signal and against all samples

#get a negative value
set.seed(11)
# get estimated location at random time
random_signalTime <- sample(seq(firehose_df$signal_time[1], firehose_df$signal_time[nrow(firehose_df)], by="sec"), 1)
# get closest vector value
signalTime_idx <- which.min(abs(firehose_df$signal_time-random_signalTime))

midpoint <- function(lat1, long1, lat2, long2, per) {
  a <- lat1 + (lat2 - lat1) * per
  b <- long1 + (long2 - long1) * per
     return (c(b, a))
}

#check if below or above
if(as.numeric(sign(firehose_df$signal_time[signalTime_idx]-random_signalTime))== -1){
  #is negative so take upper and current
  cur_coords <- random_mmsi@coords[signalTime_idx,]
  alt_coords <- random_mmsi@coords[signalTime_idx+1,]
  discrete_diff <- firehose_df[signalTime_idx+1,]$diff
  distance <- pointDistance(c(cur_coords[1], cur_coords[2]), c(alt_coords[1], alt_coords[2]), lonlat=TRUE)
  per <- as.numeric(abs(firehose_df$signal_time[signalTime_idx]-random_signalTime)/as.numeric(discrete_diff))
  mdpt <- midpoint(cur_coords[2], cur_coords[1],alt_coords[2],alt_coords[1],per)
}else if(as.numeric(sign(firehose_df$signal_time[signalTime_idx]-random_signalTime))== 1) {
  #else positive so take lower and current
  cur_coords <- random_mmsi@coords[signalTime_idx,]
  alt_coords <- random_mmsi@coords[signalTime_idx-1,]
  discrete_diff <- firehose_df[signalTime_idx-1,]$diff
  distance <- pointDistance(c(cur_coords[1], cur_coords[2]), c(alt_coords[1], alt_coords[2]), lonlat=TRUE)
  per <- as.numeric(abs(firehose_df$signal_time[signalTime_idx]-random_signalTime)/as.numeric(discrete_diff))
  mdpt <- midpoint(cur_coords[2], cur_coords[1],alt_coords[2],alt_coords[1],per)
}else{
  mdpt <- random_mmsi@coords[signalTime_idx,]
}

Estimated point at random time

#mmsi_hardcode
mdpt
##        x        y 
## 37.30863 44.40327
random_signalTime
## [1] "2020-02-28 10:33:41 EST"

Estimated adjacent vessels at place and time

query2 <- "SELECT *
FROM firehose_test
WHERE ST_Contains(ST_Buffer(
 ST_GeomFromText('POINT(37.30863 44.40327)',4326),
 0.10, 'quad_segs=2'), firehose_test.geom)"
adjacent_vessels <- pgGetGeom(conn, query=query2)
## Returning Point types in SpatialPoints*-class.
adjacent_vessels_sf <- st_as_sf(adjacent_vessels) #for ggplotting
adjacent_vessels_sf$x <- as.data.frame(adjacent_vessels@coords)$x
adjacent_vessels_sf$y <- as.data.frame(adjacent_vessels@coords)$y

ggplot(data = world) +
  geom_sf()+ 
  geom_point(data=adjacent_vessels_sf, aes(x=x, y=y, colour=factor(mmsi)), size = 1, 
  shape = 23, fill = "darkred") +
  coord_sf(xlim = c( 37,  38), ylim = c( 44,45), expand = FALSE) +            theme(legend.position = "none") + 
  ggtitle("AIS Signals within range of location")