library(sf, quietly = TRUE)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
library(randomForest, quietly = TRUE)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
Prerequisites
- Geocoded VV / VH, GRD Imagery
- Associated ship detection polygon vectors
- includes mmsi and classified column (mmsi_suprv)
- mmsi_suprv is determined against eyeball QA (spot checking) each detected object and selecting it to be: 0 JUNK; 1 PLATFORM; 2 BUOY; 7 KNOWN MMSI; 8 UNKNOWN OBJECT; 9 UNKNOWN VESSEL. This is eyeballed against the following layers: Open Sea Map data, Open Street Map Land Footprints, Sentinel 1 VV / VH geocoded imagery, Bing Map tiles inside ArcMap, and interpolated AIS signal points with paths to set up MMSI-derived QA to feed into the model parameters. Any data that could aid in the eye-ball classification of ship detection results would be helpful. Any automated ship detection script would be helpful (e.g. SNAP or SarScape).
- includes predictor variable columns (i.e. area, distance from shore)
Load Data and set up test/training data
# load shapefile derived from manual ArcMap classification
dark_objects_original <- st_read("../gis_projects/gsd_shoreline_processing/calculate_geometry_dark_objects.shp")
## Reading layer `calculate_geometry_dark_objects' from data source `C:\repositories\ISR-Maritime-Analytics\analytics\gis_projects\gsd_shoreline_processing\calculate_geometry_dark_objects.shp' using driver `ESRI Shapefile'
## Simple feature collection with 727 features and 12 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 53.95172 ymin: 24.41852 xmax: 56.57474 ymax: 25.77184
## epsg (SRID): 4326
## proj4string: +proj=longlat +datum=WGS84 +no_defs
#convert 7s to 9s. 7s are for additional analysis. 7s should be extracted and joined to MMSI ship data tables.
dark_objects_edit1 <- dark_objects_original
dark_objects_edit1$mmsi_suprv[dark_objects_edit1$mmsi_suprv==7] <- 9
# seed for reproducibility
set.seed(11)
smp_siz <- floor(0.75*nrow(dark_objects_edit1))
train_ind <- sample(seq_len(nrow(dark_objects_edit1)),size = smp_siz)
train <- dark_objects_edit1[train_ind,]
test <- dark_objects_edit1[-train_ind,]
Generate basic randomForest model to predict ship detection output categories.
- These categories could be improved using the mmsi connected (#7 classified) data, in a complementary analysis.
#remove geometry because it's upsetting randomForest
train$geometry <- NULL
#ensure that the response/target variable is a factor (factors for categorical response)
train$mmsi_suprv <- as.factor(train$mmsi_suprv)
#exclude junk columns from ArcGIS and whatever. Only keep the response variable and useful predictor variables.
exclude_cols <- c('area_sqkm','size_meter','sat_id','series_id','Shape_Leng','Shape_Area','mmsi','NEAR_FID','geometry', 'OBJECTID')
# model1 with the kitchen sink
model1 <- randomForest(mmsi_suprv ~ ., data = train[ !names(train) %in% exclude_cols ], type="classification", importance=TRUE)
# model2 remove "junk"
# tbd
#save models when you like them and they're statistically-sound
#saveRDS(model1,"fuckssdfsf.rds")
Check results against test data
pred1 = predict(model1, newdata = test)
Analyze model and results
# predictor selection analysis (model variable weight)
varImpPlot(model1)

# train confusion matrix
model1$confusion
## 0 1 8 9 class.error
## 0 117 0 16 2 0.1333333
## 1 0 20 4 16 0.5000000
## 8 20 4 19 25 0.7205882
## 9 8 17 8 269 0.1092715
# test confusion matrix
table(pred1,test$mmsi_suprv)
##
## pred1 0 1 2 8 9
## 0 44 0 0 9 5
## 1 0 7 0 3 2
## 8 1 1 0 5 10
## 9 0 6 1 5 83