#https://rpubs.com/dgolicher/postgis_example
library(rpostgis)
library(sp)
library(sf)
library(raster)
library(rgeos)
library(dplyr, warn.conflicts = F)
library(ggplot2)
library(knitr)
library(rnaturalearth)
library(rnaturalearthdata)
SELECT b.*
INTO TABLE crimea_firehose_test
from hotspots a, firehose_test b
where a.name = 'Crimea' AND ST_Contains( a.geom, b.geom);
CREATE INDEX crimea_firehose_test_geom_idx
ON crimea_firehose_test
USING GIST (geom);
CREATE INDEX crimea_firehose_test_mmsi_idx ON crimea_firehose_test (mmsi);
| min | max |
|---|---|
| 2020-02-27 16:18:31 | 2020-02-28 13:55:43 |
random_mmsi <- readRDS('random_mmsi.rds')
#random_mmsi_list_query <- "SELECT b.* from firehose_test b WHERE b.mmsi = ANY((SELECT DISTINCT mmsi FROM crimea_firehose_test LIMIT 100));"
#start_time <- Sys.time()
#random_mmsi <- pgGetGeom(conn, query=random_mmsi_list_query)
#end_time <- Sys.time()
#end_time - start_time
firehose_df <- random_mmsi@data
#saveRDS(random_mmsi, 'random_mmsi.rds')
random_mmsi_sf <- st_as_sf(random_mmsi) #for ggplotting
random_mmsi_sf$x <- as.data.frame(random_mmsi@coords)$x
random_mmsi_sf$y <- as.data.frame(random_mmsi@coords)$y
world <- ne_countries(scale = "medium", returnclass = "sf")
ggplot(data = world) +
geom_sf()+
geom_point(data=random_mmsi_sf, aes(x=x, y=y, colour=factor(mmsi)), size = 1,
shape = 23, fill = "darkred") +
coord_sf(xlim = c( 35, 40), ylim = c( 42,48), expand = FALSE) + theme(legend.position = "none") +
ggtitle("AIS Signals grouped by MMSI")
random_mmsi@data %>%
group_by(mmsi) %>%
tally() %>%
ggplot(aes(n)) +
geom_histogram(bins = 100) +
ggtitle("Signal count density per route") +
xlab("Number (Bin)") + ylab("Number of routes")
date_min <- min(random_mmsi@data$signal_time)
date_max <- max(random_mmsi@data$signal_time)
TODO:
# get time difference between samples
firehose_df<-firehose_df %>%
group_by(mmsi) %>%
#arrange(signal_time) %>%
mutate(diff = signal_time - lag(signal_time, default = first(signal_time)))
# summary stats
sd_diff <- sd(firehose_df$diff)
mean_diff <- mean(firehose_df$diff)
var_diff <- var(firehose_df$diff)
# outlier analysis
#firehose_df$sds <- firehose_df$diff/sd_diff
random_signalTime <- sample(firehose_df$signal_time,1)
firehose_df %>%
mutate(x_new = ifelse(diff > 500, 500, diff)) %>%
ggplot(aes(x_new)) +
geom_histogram(binwidth = 1, col = "black", fill = "cornflowerblue") +
ggtitle("AIS Signal frequency density histogram (500+ binned)")
Determining midpoint for a single mmsi
TODO: - Check if time_diff is within “normal” range. 1 SD. Qualify against all data - Quantify “position_accuracy” within distance of existing signal and against all samples
#get a negative value
set.seed(11)
# get estimated location at random time
random_signalTime <- sample(seq(firehose_df$signal_time[1], firehose_df$signal_time[nrow(firehose_df)], by="sec"), 1)
# get closest vector value
signalTime_idx <- which.min(abs(firehose_df$signal_time-random_signalTime))
midpoint <- function(lat1, long1, lat2, long2, per) {
a <- lat1 + (lat2 - lat1) * per
b <- long1 + (long2 - long1) * per
return (c(b, a))
}
#check if below or above
if(as.numeric(sign(firehose_df$signal_time[signalTime_idx]-random_signalTime))== -1){
#is negative so take upper and current
cur_coords <- random_mmsi@coords[signalTime_idx,]
alt_coords <- random_mmsi@coords[signalTime_idx+1,]
discrete_diff <- firehose_df[signalTime_idx+1,]$diff
distance <- pointDistance(c(cur_coords[1], cur_coords[2]), c(alt_coords[1], alt_coords[2]), lonlat=TRUE)
per <- as.numeric(abs(firehose_df$signal_time[signalTime_idx]-random_signalTime)/as.numeric(discrete_diff))
mdpt <- midpoint(cur_coords[2], cur_coords[1],alt_coords[2],alt_coords[1],per)
}else if(as.numeric(sign(firehose_df$signal_time[signalTime_idx]-random_signalTime))== 1) {
#else positive so take lower and current
cur_coords <- random_mmsi@coords[signalTime_idx,]
alt_coords <- random_mmsi@coords[signalTime_idx-1,]
discrete_diff <- firehose_df[signalTime_idx-1,]$diff
distance <- pointDistance(c(cur_coords[1], cur_coords[2]), c(alt_coords[1], alt_coords[2]), lonlat=TRUE)
per <- as.numeric(abs(firehose_df$signal_time[signalTime_idx]-random_signalTime)/as.numeric(discrete_diff))
mdpt <- midpoint(cur_coords[2], cur_coords[1],alt_coords[2],alt_coords[1],per)
}else{
mdpt <- random_mmsi@coords[signalTime_idx,]
}
Estimated point at random time
#mmsi_hardcode
mdpt
## x y
## 37.30863 44.40327
random_signalTime
## [1] "2020-02-28 10:33:41 EST"
Estimated adjacent vessels at place and time
query2 <- "SELECT *
FROM firehose_test
WHERE ST_Contains(ST_Buffer(
ST_GeomFromText('POINT(37.30863 44.40327)',4326),
0.10, 'quad_segs=2'), firehose_test.geom)"
adjacent_vessels <- pgGetGeom(conn, query=query2)
## Returning Point types in SpatialPoints*-class.
adjacent_vessels_sf <- st_as_sf(adjacent_vessels) #for ggplotting
adjacent_vessels_sf$x <- as.data.frame(adjacent_vessels@coords)$x
adjacent_vessels_sf$y <- as.data.frame(adjacent_vessels@coords)$y
ggplot(data = world) +
geom_sf()+
geom_point(data=adjacent_vessels_sf, aes(x=x, y=y, colour=factor(mmsi)), size = 1,
shape = 23, fill = "darkred") +
coord_sf(xlim = c( 37, 38), ylim = c( 44,45), expand = FALSE) + theme(legend.position = "none") +
ggtitle("AIS Signals within range of location")