Background

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

For the work we have to load the following libraries:

packages <- c("terra","tools","dplyr","tidyverse","caTools","e1071","pROC","ROCR","raster","data.table","sf","sp","caret")

Read the shapefile with the district boundaries. The codes of the pilot districts are :

Morigoan - 3 Kamrup (R) - 30 Jorhat - 22 Darrang - 26

state_4326 <- vect("assam_State.shp")
districts <- vect("assam_district_35.shp")
plot(districts)

#Codes of pilot districts
#Morigoan - 3
#Kamrup (R) - 30
#Jorhat - 22
#Darrang - 26
###****change to district code###
i <- 26
districts[i,2]$district
## [1] "DARRANG"
crop_district <- (districts[i,2])
plot(crop_district)

#class Spat vector converted to a Simple feature collection 
crop_district <- st_as_sf(crop_district)
set.seed(452)
#randomly generating 10000 points in the study area
crop_distr_pts <- st_sample(crop_district, size = c(10000,10000), type = "random")
plot(crop_distr_pts)

# converting a simple feature "sfc_POINT" to class Spat vector
darPoints <- vect(crop_distr_pts)
R_fin <- rast("conditioningfactors/annual_rainfall_2017_2021.tif")
elv <- rast("conditioningfactors/Method2elev_RESAMPL_Aug.tif")
elv
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2elev_RESAMPL_Aug.tif 
## name        : srtm_filled_dem 
## min value   :               0 
## max value   :        0.992889
plot(elv)

slope <- rast("conditioningfactors/Method2slope_RESAMPL_Aug.tif")
slope
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2slope_RESAMPL_Aug.tif 
## name        : strm_filled_slope_degrees 
## min value   :                         0 
## max value   :                         1
plot(slope)

gcn <- rast("conditioningfactors/Method2gcn_RESAMPL_Aug.tif")
gcn
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2gcn_RESAMPL_Aug.tif 
## name        : GCN250_ARCIII_average 
## min value   :             0.1656022 
## max value   :                     1
plot(gcn)

lith <- rast("conditioningfactors/Method2lith_RESAMPL_Aug.tif")
lith
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2lith_RESAMPL_Aug.tif 
## name        : assam_lith 
## min value   :          0 
## max value   :   0.999826
plot(lith)

drnDens <- rast("conditioningfactors/Method2drnDens_RESAMPL_Aug.tif")
drnDens
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2drnDens_RESAMPL_Aug.tif 
## name        : gmted_drainage_density_without_1 
## min value   :                     0.0001062773 
## max value   :                        0.9996718
plot(drnDens)

rivdist <- rast("conditioningfactors/Method2rivdist_RESAMPL_Aug_updated.tif")
rivdist
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2rivdist_RESAMPL_Aug_updated.tif 
## name        : assam_dist_from_major_rivers_updated_3857 
## min value   :                              1.549304e-06 
## max value   :                                 0.9419088
plot(rivdist)

soil <- rast("conditioningfactors/Method2soil_RESAMPL_Aug.tif")
soil
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2soil_RESAMPL_Aug.tif 
## name        : assam_soil 
## min value   :          0 
## max value   :          1
plot(soil)

lu_2017 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2018.tif")
names(lu_2017) <- 'land use'
lu_2017
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2lu_RESAMPL_Aug_2018.tif 
## name        :   land use 
## min value   : 0.06136545 
## max value   :          1
plot(lu_2017)

built_2017 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2018.tif")
built_2017
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2built_RESAMPL_Aug_2018.tif 
## name        :       ndbi 
## min value   : 0.01254883 
## max value   :  0.8411762
plot(built_2017)

veg_2017 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2018.tif")
veg_2017
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : Method2veg_RESAMPL_Aug_2018.tif 
## name        :       ndvi 
## min value   : 0.01301401 
## max value   :  0.7431054
plot(veg_2017)

rain_2017 <- R_fin[[2]]
rain_2017
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : annual_rainfall_2017_2021.tif 
## name        :       sum 
## min value   : 0.1673821 
## max value   : 0.6586117
plot(rain_2017)

#2018
lu_2018 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2019.tif")
names(lu_2018) <- 'land use'
built_2018 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2019.tif")
veg_2018 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2019.tif")
rain_2018 <- R_fin[[3]] 
#2019
lu_2019 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2020.tif")
names(lu_2019) <- 'land use'
built_2019 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2020.tif")
veg_2019 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2020.tif")
rain_2019 <- R_fin[[4]] 
#2020
lu_2020 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2021.tif")
names(lu_2020) <- 'land use'
built_2020 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2021.tif")
veg_2020 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2021.tif")
rain_2020 <- R_fin[[5]] 
highfld_2017_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2017AugRS_01.tif")
highfld_2017_01
## class       : SpatRaster 
## dimensions  : 13807, 22750, 1  (nrow, ncol, nlyr)
## resolution  : 0.0002777778, 0.0002777778  (x, y)
## extent      : 89.69847, 96.01792, 24.13625, 27.97153  (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326) 
## source      : floodedareas_grtr1in5_2017AugRS_01.tif 
## name        : Inundation 
## min value   :          0 
## max value   :          1
plot(highfld_2017_01)

highfld_2018_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2018AugRS_01.tif")
highfld_2019_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2019AugRS_01.tif")
highfld_2020_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2020AugRS_01.tif")
varbls_2017 <- c(highfld_2017_01, lu_2017, built_2017, veg_2017, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2017)#,  rd_dist, natreserv )
varbls_2018 <- c(highfld_2018_01, lu_2018, built_2018, veg_2018, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2018)#, rd_dist, natreserv)
varbls_2019 <- c(highfld_2019_01, lu_2019, built_2019, veg_2019, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2019)#, rd_dist, natreserv)
varbls_2020 <- c(highfld_2020_01, lu_2020, built_2020, veg_2020, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2020)#, rd_dist, natreserv)
varbls <- c(varbls_2017, varbls_2018, varbls_2019, varbls_2020)
plot(varbls)

totyr_pt_2017 <- terra::extract(varbls_2017,darPoints)
nrow(totyr_pt_2017)
## [1] 10000
totyr_pt_2017[1:10,]
##    ID Inundation  land use      ndbi      ndvi assam_soil assam_lith
## 1   1          0 0.1726873 0.5720696 0.4606273  0.3869510 0.03376448
## 2   2          0 0.1726873 0.5510150 0.5820765  0.3869510 0.03376448
## 3   3          0 0.1726873 0.3622118 0.5960086  0.3869510 0.03376448
## 4   4          0       NaN 0.2294836 0.4068016  0.6178755 0.03376448
## 5   5          0 0.1726873 0.2045756 0.3414046  0.6178755 0.03376448
## 6   6          0 0.1726873 0.1808382 0.4126885  0.6178755 0.03376448
## 7   7          0 0.1103080 0.5817766 0.4479067  0.0000000 0.03376448
## 8   8          0 0.1103080 0.5628227 0.4242240  0.0000000 0.03376448
## 9   9          0 0.1726873 0.5596329 0.6116224  0.3869510 0.03376448
## 10 10          0 0.1103080 0.5637544 0.4434202  0.3869510 0.03376448
##    srtm_filled_dem strm_filled_slope_degrees GCN250_ARCIII_average
## 1        0.8386708              4.956383e-05             0.9285714
## 2        0.8382441              1.302078e-04             0.9285714
## 3        0.8398046              1.942053e-04             0.6710718
## 4        0.9727713              3.325343e-04             0.9586411
## 5        0.8364154              3.155470e-04             0.3912604
## 6        0.8392438              1.146059e-04             0.6710718
## 7        0.9735331              1.058102e-04             0.9651336
## 8        0.9766369              1.000000e+00                   NaN
## 9        0.8378174              4.464984e-04             0.7180905
## 10       0.9730958              1.302078e-04                   NaN
##    gmted_drainage_density_without_1 assam_dist_from_major_rivers_updated_3857
## 1                         0.8147682                                 0.9198999
## 2                         0.8541090                                 0.1018517
## 3                         0.8558805                                 0.8541810
## 4                         0.6088081                                 0.8914124
## 5                         0.8218293                                 0.9154325
## 6                         0.8255780                                 0.8925930
## 7                         0.7433632                                 0.9288179
## 8                         0.2470118                                 0.9196220
## 9                         0.8198205                                 0.1048963
## 10                        0.5985722                                 0.9378961
##          sum
## 1  0.2430130
## 2  0.2578792
## 3  0.2242319
## 4  0.2389359
## 5  0.2564047
## 6  0.2397397
## 7  0.2179935
## 8  0.1989775
## 9  0.2544802
## 10 0.2029372
totyr_pt_2018 <- terra::extract(varbls_2018,darPoints)
nrow(totyr_pt_2018)
## [1] 10000
totyr_pt_2019 <- terra::extract(varbls_2019,darPoints)
nrow(totyr_pt_2019)
## [1] 10000
totyr_pt_2020 <- terra::extract(varbls_2020,darPoints)
nrow(totyr_pt_2020)
## [1] 10000
totyr_pt <- rbind(totyr_pt_2017, totyr_pt_2018, totyr_pt_2019, totyr_pt_2020)
nrow(totyr_pt)
## [1] 40000
totyr_pt <- subset(totyr_pt, select = -ID)
totyr <- totyr_pt
#removing any rows with NA values
totyr <- na.omit(totyr)
#finding the points with flooded cells
totyr_fld <- totyr %>% filter(Inundation == 1)
nrow(totyr_fld)
## [1] 2329
#finding the points with non-flooded cells
totyr_nonfld <- totyr %>% filter(Inundation == 0)
nrow(totyr_nonfld)
## [1] 30836
split = sample.split(totyr_nonfld$sum, SplitRatio = 0.90)

nonfldremoved_set = subset(totyr_nonfld, split == TRUE)
nrow(nonfldremoved_set)
## [1] 27752
nonfldtest_set = subset(totyr_nonfld, split == FALSE)
nrow(nonfldtest_set)
## [1] 3084
#combining with flooded cells
totyr_pt <- rbind(nonfldtest_set,totyr_fld)
nrow(totyr_pt)
## [1] 5413
totyr_pt[1:10,]
##    Inundation   land use      ndbi      ndvi assam_soil assam_lith
## 46          0 0.78696096 0.5661634 0.4440765  0.6178755 0.03376448
## 47          0 0.54490745 0.4577798 0.6851165  0.0000000 0.03376448
## 53          0 0.17268729 0.5515885 0.6142564  0.3326333 0.03376448
## 64          0 0.80118942 0.5684456 0.4940663  0.6178755 0.03376448
## 68          0 0.17268729 0.5614538 0.5877375  0.0000000 0.03376448
## 70          0 0.06136545 0.3340998 0.4411479  0.3869510 0.03376448
## 71          0 0.17268729 0.5723739 0.5504879  0.3869510 0.03376448
## 73          0 0.43424875 0.2149087 0.4006418  0.3869510 0.03376448
## 75          0 0.06136545 0.2307765 0.2980303  0.3869510 0.03376448
## 84          0 0.06136545 0.4110089 0.7016809  0.3869510 0.03376448
##    srtm_filled_dem strm_filled_slope_degrees GCN250_ARCIII_average
## 46       0.9727713              3.527999e-04             0.9343126
## 47       0.8378662              1.117787e-04             0.1943066
## 53       0.8385001              2.750403e-04             0.6990030
## 64       0.8396217              1.838343e-04             0.7582844
## 68       0.8313316              1.838343e-04             0.5584058
## 70       0.8360497              5.807444e-05             0.5160706
## 71       0.8366105              4.993081e-04             0.7725866
## 73       0.8359765              9.748569e-05             0.4885632
## 75       0.8385977              5.894643e-05             0.9285714
## 84       0.8361229              4.440166e-05             0.4222988
##    gmted_drainage_density_without_1 assam_dist_from_major_rivers_updated_3857
## 46                        0.6336079                                 0.9043211
## 47                        0.8189856                                 0.4118861
## 53                        0.8589817                                 0.9222444
## 64                        0.6151277                                 0.9212288
## 68                        0.8760197                                 0.8540909
## 70                        0.8521604                                 0.1074567
## 71                        0.8628972                                 0.8917370
## 73                        0.8273830                                 0.1093554
## 75                        0.8703403                                 0.4153286
## 84                        0.8580140                                 0.9036356
##          sum
## 46 0.1983337
## 47 0.2582810
## 53 0.2486364
## 64 0.2238039
## 68 0.2665491
## 70 0.2598820
## 71 0.2642629
## 73 0.2717497
## 75 0.2368228
## 84 0.2619651
totyr_pt$Inundation = factor(totyr_pt$Inundation , levels = c(0, 1))
set.seed(123)
split = sample.split(totyr_pt$Inundation, SplitRatio = 0.70)
training_set = subset(totyr_pt, split == TRUE)
#dataset rows to train the model
nrow(training_set)
## [1] 3789
test_set = subset(totyr_pt, split == FALSE)
#dataset rows to test the model
nrow(test_set)
## [1] 1624

Training the data using SVM model

classifier = svm(formula = Inundation ~ .,
                 data = training_set,
                 probability = T,
                 type = 'C-classification')
## Warning in svm.default(x, y, scale = scale, ..., na.action = na.action):
## Variable(s) 'assam_lith' constant. Cannot scale data.
classifier
## 
## Call:
## svm(formula = Inundation ~ ., data = training_set, probability = T, 
##     type = "C-classification")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  3270

AUC values ranges between 0 & 1. A model whose predictions are 100% wrong has an AUC value of 0.0 and a model whose predictions are 100% correct gives an AUC value of 1.0. An AUC 1.0 conclude that the model predicted the values 100% accurately. Hence, the AUC value must be as high as possible.

y_pred = predict(classifier, newdata = test_set[-1])
length(y_pred)
## [1] 1624
nrow(test_set)
## [1] 1624
test_set <- na.omit(test_set)
# Define two vectors
actual_values <- (test_set[1]$Inundation)
predict_value <- as.numeric(y_pred)-1

auc_value <- roc(actual_values, predict_value)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(auc_value)
## Area under the curve: 0.5
#predicting using the test set
y_pred_withprob = predict(classifier, newdata = test_set[-1], probability = T)
#confusion matrix table
cm <-  table(test_set[, 1], y_pred_withprob)
cm
##    y_pred_withprob
##       0   1
##   0 686 239
##   1 413 286
# getting the probabilitie sof non-flooding and flooding
s <- (attr(y_pred_withprob, "probabilities"))
s[1:10,]
##             0         1
## 47  0.6133640 0.3866360
## 64  0.7659772 0.2340228
## 68  0.4257621 0.5742379
## 73  0.6824409 0.3175591
## 86  0.6961334 0.3038666
## 132 0.5831990 0.4168010
## 150 0.4704300 0.5295700
## 151 0.6524968 0.3475032
## 155 0.6005888 0.3994112
## 189 0.4261378 0.5738622
nrow(s)
## [1] 1624
y_pred_withprob <- relevel(y_pred_withprob, ref = "1")
#results of confusion matrix 
confusionMatrix(test_set[, 1], y_pred_withprob)
## Warning in confusionMatrix.default(test_set[, 1], y_pred_withprob): Levels are
## not in the same order for reference and data. Refactoring data to match.
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   0
##          1 286 413
##          0 239 686
##                                           
##                Accuracy : 0.5985          
##                  95% CI : (0.5742, 0.6225)
##     No Information Rate : 0.6767          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1555          
##                                           
##  Mcnemar's Test P-Value : 1.242e-11       
##                                           
##             Sensitivity : 0.5448          
##             Specificity : 0.6242          
##          Pos Pred Value : 0.4092          
##          Neg Pred Value : 0.7416          
##              Prevalence : 0.3233          
##          Detection Rate : 0.1761          
##    Detection Prevalence : 0.4304          
##       Balanced Accuracy : 0.5845          
##                                           
##        'Positive' Class : 1               
## 

Change the rainfall to updated data. The MOSDAC data or global weather forecasts can be used. Here we use the rainfall data of 2022 from indiawris.

#change the normRain to the forecasted rainfall to predict Flood Hazard probability
#for 2022
#Rain
#here we check the prediction by using rains recorded in months of May to July 2022
Rain <- rast("RAINFALL_2022/rainfall_may_july.tif")
rainmax <- max(Rain[], na.rm=T)
rainmin <- min(Rain[], na.rm=T)
normRain <- (Rain - rainmin)/(rainmax - rainmin)

#change name as required
names(normRain) <- 'sum'
plot(normRain)

#2022
lu_2021 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug.tif")
names(lu_2021) <- 'land use'
built_2021 <- rast("conditioningfactors/Method2built_RESAMPL_Aug.tif")
veg_2021 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug.tif")
#reading all static layers
elv <- rast("conditioningfactors/Method2elev_RESAMPL_Aug.tif")
slope <- rast("conditioningfactors/Method2slope_RESAMPL_Aug.tif")
gcn <- rast("conditioningfactors/Method2gcn_RESAMPL_Aug.tif")
lith <- rast("conditioningfactors/Method2lith_RESAMPL_Aug.tif")
drnDens <- rast("conditioningfactors/Method2drnDens_RESAMPL_Aug.tif")
rivdist <- rast("conditioningfactors/Method2rivdist_RESAMPL_Aug_updated.tif")
soil <- rast("conditioningfactors/Method2soil_RESAMPL_Aug.tif")

#combine the rainfall with the conditioning factors
predicvarbls_state <- c(lu_2021, built_2021, veg_2021, soil, lith, elv,slope, gcn, drnDens, rivdist, normRain)
  crop_district <- (districts[i,2])
  plot(crop_district)

  predicvarbls <- terra::crop(predicvarbls_state , crop_district)
  predicvarbls <- terra::mask(predicvarbls , crop_district , overwrite = T)
  
  plot(predicvarbls[[1]])

  length(predicvarbls$assam_soil[])
## [1] 3890118
  #converting all na values to 0
  predicvarbls[is.na(predicvarbls)] <- 0
  #use xy to convert the dataframe back to raster later
  predvar <- (as.data.frame(predicvarbls, xy=T))
  nrow(predvar)
## [1] 3890118
  #the data
  predvar[1:10,]
##           x     y land use ndbi ndvi assam_soil assam_lith srtm_filled_dem
## 1  91.74444 26.67        0    0    0          0          0               0
## 2  91.74472 26.67        0    0    0          0          0               0
## 3  91.74500 26.67        0    0    0          0          0               0
## 4  91.74528 26.67        0    0    0          0          0               0
## 5  91.74556 26.67        0    0    0          0          0               0
## 6  91.74583 26.67        0    0    0          0          0               0
## 7  91.74611 26.67        0    0    0          0          0               0
## 8  91.74639 26.67        0    0    0          0          0               0
## 9  91.74667 26.67        0    0    0          0          0               0
## 10 91.74694 26.67        0    0    0          0          0               0
##    strm_filled_slope_degrees GCN250_ARCIII_average
## 1                          0                     0
## 2                          0                     0
## 3                          0                     0
## 4                          0                     0
## 5                          0                     0
## 6                          0                     0
## 7                          0                     0
## 8                          0                     0
## 9                          0                     0
## 10                         0                     0
##    gmted_drainage_density_without_1 assam_dist_from_major_rivers_updated_3857
## 1                                 0                                         0
## 2                                 0                                         0
## 3                                 0                                         0
## 4                                 0                                         0
## 5                                 0                                         0
## 6                                 0                                         0
## 7                                 0                                         0
## 8                                 0                                         0
## 9                                 0                                         0
## 10                                0                                         0
##    sum
## 1    0
## 2    0
## 3    0
## 4    0
## 5    0
## 6    0
## 7    0
## 8    0
## 9    0
## 10   0
  #prediciton
  y_pred_withprob = predict(classifier, newdata = predvar, probability = T, na.rm=F)
  # the prediction probabilities
  s <- (attr(y_pred_withprob, "probabilities"))
  s[1:10,]
##            0          1
## 1  0.9785273 0.02147268
## 2  0.9785273 0.02147268
## 3  0.9785273 0.02147268
## 4  0.9785273 0.02147268
## 5  0.9785273 0.02147268
## 6  0.9785273 0.02147268
## 7  0.9785273 0.02147268
## 8  0.9785273 0.02147268
## 9  0.9785273 0.02147268
## 10 0.9785273 0.02147268
dummy <- rast(ext=ext(predicvarbls[[1]]),crs=crs(predicvarbls[[1]], proj=T, describe=FALSE, parse=FALSE),
                nrow=dim(predicvarbls[[1]])[1],ncol=dim(predicvarbls[[1]])[2])
  values(dummy) <- s[,2]
  #flood probability for the rain forecast 
  plot(dummy)

  dummy <- crop(dummy, crop_district)
  dummy <- mask(dummy, crop_district, overwrite = T)
  #flood probability for the rain forecast cropped to the district boundaries
  plot(dummy)

#checking the deviation of the rainfall used with the mean daily rainfall in the monsoon months calculated over the last few years
  R_18 <- rast('rainfall_2018_mean.tif')
  R_19 <- rast('rainfall_2019_mean.tif')
  R_20 <- rast('rainfall_2020_mean.tif')
  R_21 <- rast('rainfall_2021_mean.tif')
  
  R_mean <- mean(R_18, R_19, R_20,R_21)
  R_proj <- terra::project(R_mean , crs(state_4326), overwrite = T)
  R_crop <- crop(R_proj , crop_district, overwrite = T, snap = "out")
  R_crop_mask <- mask(R_crop , crop_district, overwrite = T)
  R_fin <- R_crop_mask
  #mean daily rainfall for the state
  plot(R_fin)
  plot(state_4326, add=T)

  #resampling to common spatial extent used in the project
  d <- rast(ext=ext(dummy),crs=crs(dummy, proj=T, describe=FALSE, parse=FALSE),
  nrow=dim(dummy)[1],ncol=dim(dummy)[2])
  R_fin <-resample(R_fin, d)
  plot(R_fin)

  #cropping the rain map to the district boundary
  Rain <- crop(Rain , crop_district, overwrite = T)
  Rain <- mask(Rain , crop_district, overwrite = T)
  plot(Rain)

  ##***change 90 the the number of days rainfall is summed upp in Rain
  flood_prob_intensity <- dummy * (Rain / (R_fin*90))
  plot(flood_prob_intensity)

## select flood probability only within a range
  ###***note: add the threshold same as Method_2 validation***###
  ###*0.3 to 0.95
  c <- (quantile(dummy[],prob = 0.5, na.rm=T))
  b <- (quantile(dummy[],prob = 0.95, na.rm=T))
  
  highfld_mdl <- (dummy > c & dummy < b) 
  #flood probability area after applying threshold
  a <- highfld_mdl
  plot(a)

  #flood probability map
  pred_fld_haz <- a*dummy
  plot(pred_fld_haz)

  #actual flooded map of 2022
  flooded <- rast("fldhazmthd1_floodprob_2022_Aug.tif")
  plot(flooded)
  plot(crop_district, add = T)

  #cropping to the selected district
  highfld_1 <- crop(flooded, crop_district)
  highfld_1 <- mask(highfld_1, crop_district, overwrite = T)
  dum <- rast(ext=ext(highfld_1),crs=crs(highfld_1, proj=T, describe=FALSE, parse=FALSE),
                nrow=dim(highfld_1)[1],ncol=dim(highfld_1)[2])
  a <-resample(a, dum, 'near')
  #actual flooding 2022 in the selected district
  plot(a)

  #selecting only the cells that are flooded more than average
  highfld <- highfld_1 >= mean(highfld_1[],na.rm=T)
  plot(highfld)

perfmodel <- highfld+a
  #combined map showing the actual flooded and the predicted flooded parts
  #0 - not flooded area, 1 - flooded cells either in actual or predicted map,
  #2- cells predicted as floods that are actually flooded
  plot(perfmodel)

  #calculating the model performance
  modelperf <- freq(perfmodel)
  parent <- freq(highfld)
  #count of cells predicted as flooded which are actually flooded
  modelperf[3,3]
##  count 
## 600205
  #count of cell in the actual flood map that are flooded
  parent[2,3]
##  count 
## 941518
  #percentage of correctly predicted flooded cells
  perc_flood_pts_correctly_predicted <- modelperf[3,3]/parent[2,3]
  perc_flood_pts_correctly_predicted
##     count 
## 0.6374865
  #precision 
  precision <- modelperf[3,3]/modelperf[2,3]
  precision
##     count 
## 0.3137382
  subdistricts <- vect("ASSAM_SUBDISTRICT_4326.shp")
  subdistricts$dtname <- toupper(subdistricts$dtname)
  #enter the district
  k <- which(subdistricts$dtname == districts[i,2]$district)
  sel_subdist <- subdistricts[k,]
  plot(sel_subdist, main = "Sub-districts")

  subd_m <- terra::extract(flood_prob_intensity, sel_subdist, 'mean', na.rm=T)
  sel_subdist$fld_mdl <- subd_m$lyr.1
  sel_subdist$sdtname
## [1] "Khoirabari (Pt)"  "Pathorighat (Pt)" "Sipajhar"         "Mangaldoi (Pt)"  
## [5] "Dalgaon (Pt)"     "Kalaigaon (Pt)"
  sel_subdist$fld_mdl
## [1] 0.6524710 0.7266518 0.6682899 0.6224199 0.5779157 0.7480465
  highfld_1 <- crop(flooded, crop_district)
  highfld_1 <- mask(highfld_1, crop_district, overwrite = T)
  subd_a <- terra::extract(highfld_1, sel_subdist, 'mean', na.rm=T)
  sel_subdist$fld_actual <- subd_a$sum
  sel_subdist$sdtname
## [1] "Khoirabari (Pt)"  "Pathorighat (Pt)" "Sipajhar"         "Mangaldoi (Pt)"  
## [5] "Dalgaon (Pt)"     "Kalaigaon (Pt)"
  sel_subdist$fld_actual
## [1] 0.04737001 0.10415190 0.10973606 0.03760505 0.01162693 0.04216893