library(sf, quietly = TRUE)

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

library(randomForest, quietly = TRUE)

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

Prerequisites

Geocoded VV / VH, GRD Imagery
Associated ship detection polygon vectors
- includes mmsi and classified column (mmsi_suprv)
  - mmsi_suprv is determined against eyeball QA (spot checking) each detected object and selecting it to be: 0 JUNK; 1 PLATFORM; 2 BUOY; 7 KNOWN MMSI; 8 UNKNOWN OBJECT; 9 UNKNOWN VESSEL. This is eyeballed against the following layers: Open Sea Map data, Open Street Map Land Footprints, Sentinel 1 VV / VH geocoded imagery, Bing Map tiles inside ArcMap, and interpolated AIS signal points with paths to set up MMSI-derived QA to feed into the model parameters. Any data that could aid in the eye-ball classification of ship detection results would be helpful. Any automated ship detection script would be helpful (e.g. SNAP or SarScape).
- includes predictor variable columns (i.e. area, distance from shore)

Load Data and set up test/training data

# load shapefile derived from manual ArcMap classification
dark_objects_original <- st_read("../gis_projects/gsd_shoreline_processing/calculate_geometry_dark_objects.shp")

## Reading layer `calculate_geometry_dark_objects' from data source `C:\repositories\ISR-Maritime-Analytics\analytics\gis_projects\gsd_shoreline_processing\calculate_geometry_dark_objects.shp' using driver `ESRI Shapefile'
## Simple feature collection with 727 features and 12 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 53.95172 ymin: 24.41852 xmax: 56.57474 ymax: 25.77184
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs

#convert 7s to 9s.  7s are for additional analysis. 7s should be extracted and joined to MMSI ship data tables.
dark_objects_edit1 <- dark_objects_original
dark_objects_edit1$mmsi_suprv[dark_objects_edit1$mmsi_suprv==7] <- 9

# seed for reproducibility
set.seed(11)
smp_siz <- floor(0.75*nrow(dark_objects_edit1)) 
train_ind <- sample(seq_len(nrow(dark_objects_edit1)),size = smp_siz)
train <- dark_objects_edit1[train_ind,]
test <- dark_objects_edit1[-train_ind,]

Generate basic randomForest model to predict ship detection output categories.

These categories could be improved using the mmsi connected (#7 classified) data, in a complementary analysis.

#remove geometry because it's upsetting randomForest
train$geometry <- NULL
#ensure that the response/target variable is a factor (factors for categorical response)
train$mmsi_suprv <- as.factor(train$mmsi_suprv)
#exclude junk columns from ArcGIS and whatever. Only keep the response variable and useful predictor variables.
exclude_cols <- c('area_sqkm','size_meter','sat_id','series_id','Shape_Leng','Shape_Area','mmsi','NEAR_FID','geometry', 'OBJECTID')

# model1 with the kitchen sink
model1 <- randomForest(mmsi_suprv ~ ., data = train[ !names(train) %in% exclude_cols ], type="classification", importance=TRUE)

# model2 remove "junk"
# tbd

#save models when you like them and they're statistically-sound
#saveRDS(model1,"fuckssdfsf.rds")

Check results against test data

pred1 = predict(model1, newdata = test)

Analyze model and results

# predictor selection analysis (model variable weight)
varImpPlot(model1)

# train confusion matrix
model1$confusion

##     0  1  8   9 class.error
## 0 117  0 16   2   0.1333333
## 1   0 20  4  16   0.5000000
## 8  20  4 19  25   0.7205882
## 9   8 17  8 269   0.1092715

# test confusion matrix
table(pred1,test$mmsi_suprv)

##      
## pred1  0  1  2  8  9
##     0 44  0  0  9  5
##     1  0  7  0  3  2
##     8  1  1  0  5 10
##     9  0  6  1  5 83

Random Forest Dark Object Classification

Prerequisites

Load Data and set up test/training data

Generate basic randomForest model to predict ship detection output categories.

Check results against test data

Analyze model and results