This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
For the work we have to load the following libraries:
packages <- c("terra","tools","dplyr","tidyverse","caTools","e1071","pROC","ROCR","raster","data.table","sf","sp","caret")
Read the shapefile with the district boundaries. The codes of the pilot districts are :
Morigoan - 3 Kamrup (R) - 30 Jorhat - 22 Darrang - 26
state_4326 <- vect("assam_State.shp")
districts <- vect("assam_district_35.shp")
plot(districts)
#Codes of pilot districts
#Morigoan - 3
#Kamrup (R) - 30
#Jorhat - 22
#Darrang - 26
###****change to district code###
i <- 26
districts[i,2]$district
## [1] "DARRANG"
crop_district <- (districts[i,2])
plot(crop_district)
#class Spat vector converted to a Simple feature collection
crop_district <- st_as_sf(crop_district)
set.seed(452)
#randomly generating 10000 points in the study area
crop_distr_pts <- st_sample(crop_district, size = c(10000,10000), type = "random")
plot(crop_distr_pts)
# converting a simple feature "sfc_POINT" to class Spat vector
darPoints <- vect(crop_distr_pts)
R_fin <- rast("conditioningfactors/annual_rainfall_2017_2021.tif")
elv <- rast("conditioningfactors/Method2elev_RESAMPL_Aug.tif")
elv
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2elev_RESAMPL_Aug.tif
## name : srtm_filled_dem
## min value : 0
## max value : 0.992889
plot(elv)
slope <- rast("conditioningfactors/Method2slope_RESAMPL_Aug.tif")
slope
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2slope_RESAMPL_Aug.tif
## name : strm_filled_slope_degrees
## min value : 0
## max value : 1
plot(slope)
gcn <- rast("conditioningfactors/Method2gcn_RESAMPL_Aug.tif")
gcn
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2gcn_RESAMPL_Aug.tif
## name : GCN250_ARCIII_average
## min value : 0.1656022
## max value : 1
plot(gcn)
lith <- rast("conditioningfactors/Method2lith_RESAMPL_Aug.tif")
lith
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2lith_RESAMPL_Aug.tif
## name : assam_lith
## min value : 0
## max value : 0.999826
plot(lith)
drnDens <- rast("conditioningfactors/Method2drnDens_RESAMPL_Aug.tif")
drnDens
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2drnDens_RESAMPL_Aug.tif
## name : gmted_drainage_density_without_1
## min value : 0.0001062773
## max value : 0.9996718
plot(drnDens)
rivdist <- rast("conditioningfactors/Method2rivdist_RESAMPL_Aug_updated.tif")
rivdist
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2rivdist_RESAMPL_Aug_updated.tif
## name : assam_dist_from_major_rivers_updated_3857
## min value : 1.549304e-06
## max value : 0.9419088
plot(rivdist)
soil <- rast("conditioningfactors/Method2soil_RESAMPL_Aug.tif")
soil
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2soil_RESAMPL_Aug.tif
## name : assam_soil
## min value : 0
## max value : 1
plot(soil)
lu_2017 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2018.tif")
names(lu_2017) <- 'land use'
lu_2017
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2lu_RESAMPL_Aug_2018.tif
## name : land use
## min value : 0.06136545
## max value : 1
plot(lu_2017)
built_2017 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2018.tif")
built_2017
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2built_RESAMPL_Aug_2018.tif
## name : ndbi
## min value : 0.01254883
## max value : 0.8411762
plot(built_2017)
veg_2017 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2018.tif")
veg_2017
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : Method2veg_RESAMPL_Aug_2018.tif
## name : ndvi
## min value : 0.01301401
## max value : 0.7431054
plot(veg_2017)
rain_2017 <- R_fin[[2]]
rain_2017
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : annual_rainfall_2017_2021.tif
## name : sum
## min value : 0.1673821
## max value : 0.6586117
plot(rain_2017)
#2018
lu_2018 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2019.tif")
names(lu_2018) <- 'land use'
built_2018 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2019.tif")
veg_2018 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2019.tif")
rain_2018 <- R_fin[[3]]
#2019
lu_2019 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2020.tif")
names(lu_2019) <- 'land use'
built_2019 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2020.tif")
veg_2019 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2020.tif")
rain_2019 <- R_fin[[4]]
#2020
lu_2020 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug_2021.tif")
names(lu_2020) <- 'land use'
built_2020 <- rast("conditioningfactors/Method2built_RESAMPL_Aug_2021.tif")
veg_2020 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug_2021.tif")
rain_2020 <- R_fin[[5]]
highfld_2017_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2017AugRS_01.tif")
highfld_2017_01
## class : SpatRaster
## dimensions : 13807, 22750, 1 (nrow, ncol, nlyr)
## resolution : 0.0002777778, 0.0002777778 (x, y)
## extent : 89.69847, 96.01792, 24.13625, 27.97153 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : floodedareas_grtr1in5_2017AugRS_01.tif
## name : Inundation
## min value : 0
## max value : 1
plot(highfld_2017_01)
highfld_2018_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2018AugRS_01.tif")
highfld_2019_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2019AugRS_01.tif")
highfld_2020_01 <- rast("conditioningfactors/floodedareas_grtr1in5_2020AugRS_01.tif")
varbls_2017 <- c(highfld_2017_01, lu_2017, built_2017, veg_2017, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2017)#, rd_dist, natreserv )
varbls_2018 <- c(highfld_2018_01, lu_2018, built_2018, veg_2018, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2018)#, rd_dist, natreserv)
varbls_2019 <- c(highfld_2019_01, lu_2019, built_2019, veg_2019, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2019)#, rd_dist, natreserv)
varbls_2020 <- c(highfld_2020_01, lu_2020, built_2020, veg_2020, soil, lith, elv,slope, gcn, drnDens, rivdist, rain_2020)#, rd_dist, natreserv)
varbls <- c(varbls_2017, varbls_2018, varbls_2019, varbls_2020)
plot(varbls)
totyr_pt_2017 <- terra::extract(varbls_2017,darPoints)
nrow(totyr_pt_2017)
## [1] 10000
totyr_pt_2017[1:10,]
## ID Inundation land use ndbi ndvi assam_soil assam_lith
## 1 1 0 0.1726873 0.5720696 0.4606273 0.3869510 0.03376448
## 2 2 0 0.1726873 0.5510150 0.5820765 0.3869510 0.03376448
## 3 3 0 0.1726873 0.3622118 0.5960086 0.3869510 0.03376448
## 4 4 0 NaN 0.2294836 0.4068016 0.6178755 0.03376448
## 5 5 0 0.1726873 0.2045756 0.3414046 0.6178755 0.03376448
## 6 6 0 0.1726873 0.1808382 0.4126885 0.6178755 0.03376448
## 7 7 0 0.1103080 0.5817766 0.4479067 0.0000000 0.03376448
## 8 8 0 0.1103080 0.5628227 0.4242240 0.0000000 0.03376448
## 9 9 0 0.1726873 0.5596329 0.6116224 0.3869510 0.03376448
## 10 10 0 0.1103080 0.5637544 0.4434202 0.3869510 0.03376448
## srtm_filled_dem strm_filled_slope_degrees GCN250_ARCIII_average
## 1 0.8386708 4.956383e-05 0.9285714
## 2 0.8382441 1.302078e-04 0.9285714
## 3 0.8398046 1.942053e-04 0.6710718
## 4 0.9727713 3.325343e-04 0.9586411
## 5 0.8364154 3.155470e-04 0.3912604
## 6 0.8392438 1.146059e-04 0.6710718
## 7 0.9735331 1.058102e-04 0.9651336
## 8 0.9766369 1.000000e+00 NaN
## 9 0.8378174 4.464984e-04 0.7180905
## 10 0.9730958 1.302078e-04 NaN
## gmted_drainage_density_without_1 assam_dist_from_major_rivers_updated_3857
## 1 0.8147682 0.9198999
## 2 0.8541090 0.1018517
## 3 0.8558805 0.8541810
## 4 0.6088081 0.8914124
## 5 0.8218293 0.9154325
## 6 0.8255780 0.8925930
## 7 0.7433632 0.9288179
## 8 0.2470118 0.9196220
## 9 0.8198205 0.1048963
## 10 0.5985722 0.9378961
## sum
## 1 0.2430130
## 2 0.2578792
## 3 0.2242319
## 4 0.2389359
## 5 0.2564047
## 6 0.2397397
## 7 0.2179935
## 8 0.1989775
## 9 0.2544802
## 10 0.2029372
totyr_pt_2018 <- terra::extract(varbls_2018,darPoints)
nrow(totyr_pt_2018)
## [1] 10000
totyr_pt_2019 <- terra::extract(varbls_2019,darPoints)
nrow(totyr_pt_2019)
## [1] 10000
totyr_pt_2020 <- terra::extract(varbls_2020,darPoints)
nrow(totyr_pt_2020)
## [1] 10000
totyr_pt <- rbind(totyr_pt_2017, totyr_pt_2018, totyr_pt_2019, totyr_pt_2020)
nrow(totyr_pt)
## [1] 40000
totyr_pt <- subset(totyr_pt, select = -ID)
totyr <- totyr_pt
#removing any rows with NA values
totyr <- na.omit(totyr)
#finding the points with flooded cells
totyr_fld <- totyr %>% filter(Inundation == 1)
nrow(totyr_fld)
## [1] 2329
#finding the points with non-flooded cells
totyr_nonfld <- totyr %>% filter(Inundation == 0)
nrow(totyr_nonfld)
## [1] 30836
split = sample.split(totyr_nonfld$sum, SplitRatio = 0.90)
nonfldremoved_set = subset(totyr_nonfld, split == TRUE)
nrow(nonfldremoved_set)
## [1] 27752
nonfldtest_set = subset(totyr_nonfld, split == FALSE)
nrow(nonfldtest_set)
## [1] 3084
#combining with flooded cells
totyr_pt <- rbind(nonfldtest_set,totyr_fld)
nrow(totyr_pt)
## [1] 5413
totyr_pt[1:10,]
## Inundation land use ndbi ndvi assam_soil assam_lith
## 46 0 0.78696096 0.5661634 0.4440765 0.6178755 0.03376448
## 47 0 0.54490745 0.4577798 0.6851165 0.0000000 0.03376448
## 53 0 0.17268729 0.5515885 0.6142564 0.3326333 0.03376448
## 64 0 0.80118942 0.5684456 0.4940663 0.6178755 0.03376448
## 68 0 0.17268729 0.5614538 0.5877375 0.0000000 0.03376448
## 70 0 0.06136545 0.3340998 0.4411479 0.3869510 0.03376448
## 71 0 0.17268729 0.5723739 0.5504879 0.3869510 0.03376448
## 73 0 0.43424875 0.2149087 0.4006418 0.3869510 0.03376448
## 75 0 0.06136545 0.2307765 0.2980303 0.3869510 0.03376448
## 84 0 0.06136545 0.4110089 0.7016809 0.3869510 0.03376448
## srtm_filled_dem strm_filled_slope_degrees GCN250_ARCIII_average
## 46 0.9727713 3.527999e-04 0.9343126
## 47 0.8378662 1.117787e-04 0.1943066
## 53 0.8385001 2.750403e-04 0.6990030
## 64 0.8396217 1.838343e-04 0.7582844
## 68 0.8313316 1.838343e-04 0.5584058
## 70 0.8360497 5.807444e-05 0.5160706
## 71 0.8366105 4.993081e-04 0.7725866
## 73 0.8359765 9.748569e-05 0.4885632
## 75 0.8385977 5.894643e-05 0.9285714
## 84 0.8361229 4.440166e-05 0.4222988
## gmted_drainage_density_without_1 assam_dist_from_major_rivers_updated_3857
## 46 0.6336079 0.9043211
## 47 0.8189856 0.4118861
## 53 0.8589817 0.9222444
## 64 0.6151277 0.9212288
## 68 0.8760197 0.8540909
## 70 0.8521604 0.1074567
## 71 0.8628972 0.8917370
## 73 0.8273830 0.1093554
## 75 0.8703403 0.4153286
## 84 0.8580140 0.9036356
## sum
## 46 0.1983337
## 47 0.2582810
## 53 0.2486364
## 64 0.2238039
## 68 0.2665491
## 70 0.2598820
## 71 0.2642629
## 73 0.2717497
## 75 0.2368228
## 84 0.2619651
totyr_pt$Inundation = factor(totyr_pt$Inundation , levels = c(0, 1))
set.seed(123)
split = sample.split(totyr_pt$Inundation, SplitRatio = 0.70)
training_set = subset(totyr_pt, split == TRUE)
#dataset rows to train the model
nrow(training_set)
## [1] 3789
test_set = subset(totyr_pt, split == FALSE)
#dataset rows to test the model
nrow(test_set)
## [1] 1624
Training the data using SVM model
classifier = svm(formula = Inundation ~ .,
data = training_set,
probability = T,
type = 'C-classification')
## Warning in svm.default(x, y, scale = scale, ..., na.action = na.action):
## Variable(s) 'assam_lith' constant. Cannot scale data.
classifier
##
## Call:
## svm(formula = Inundation ~ ., data = training_set, probability = T,
## type = "C-classification")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 3270
AUC values ranges between 0 & 1. A model whose predictions are 100% wrong has an AUC value of 0.0 and a model whose predictions are 100% correct gives an AUC value of 1.0. An AUC 1.0 conclude that the model predicted the values 100% accurately. Hence, the AUC value must be as high as possible.
y_pred = predict(classifier, newdata = test_set[-1])
length(y_pred)
## [1] 1624
nrow(test_set)
## [1] 1624
test_set <- na.omit(test_set)
# Define two vectors
actual_values <- (test_set[1]$Inundation)
predict_value <- as.numeric(y_pred)-1
auc_value <- roc(actual_values, predict_value)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(auc_value)
## Area under the curve: 0.5
#predicting using the test set
y_pred_withprob = predict(classifier, newdata = test_set[-1], probability = T)
#confusion matrix table
cm <- table(test_set[, 1], y_pred_withprob)
cm
## y_pred_withprob
## 0 1
## 0 686 239
## 1 413 286
# getting the probabilitie sof non-flooding and flooding
s <- (attr(y_pred_withprob, "probabilities"))
s[1:10,]
## 0 1
## 47 0.6133640 0.3866360
## 64 0.7659772 0.2340228
## 68 0.4257621 0.5742379
## 73 0.6824409 0.3175591
## 86 0.6961334 0.3038666
## 132 0.5831990 0.4168010
## 150 0.4704300 0.5295700
## 151 0.6524968 0.3475032
## 155 0.6005888 0.3994112
## 189 0.4261378 0.5738622
nrow(s)
## [1] 1624
y_pred_withprob <- relevel(y_pred_withprob, ref = "1")
#results of confusion matrix
confusionMatrix(test_set[, 1], y_pred_withprob)
## Warning in confusionMatrix.default(test_set[, 1], y_pred_withprob): Levels are
## not in the same order for reference and data. Refactoring data to match.
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 0
## 1 286 413
## 0 239 686
##
## Accuracy : 0.5985
## 95% CI : (0.5742, 0.6225)
## No Information Rate : 0.6767
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1555
##
## Mcnemar's Test P-Value : 1.242e-11
##
## Sensitivity : 0.5448
## Specificity : 0.6242
## Pos Pred Value : 0.4092
## Neg Pred Value : 0.7416
## Prevalence : 0.3233
## Detection Rate : 0.1761
## Detection Prevalence : 0.4304
## Balanced Accuracy : 0.5845
##
## 'Positive' Class : 1
##
Change the rainfall to updated data. The MOSDAC data or global weather forecasts can be used. Here we use the rainfall data of 2022 from indiawris.
#change the normRain to the forecasted rainfall to predict Flood Hazard probability
#for 2022
#Rain
#here we check the prediction by using rains recorded in months of May to July 2022
Rain <- rast("RAINFALL_2022/rainfall_may_july.tif")
rainmax <- max(Rain[], na.rm=T)
rainmin <- min(Rain[], na.rm=T)
normRain <- (Rain - rainmin)/(rainmax - rainmin)
#change name as required
names(normRain) <- 'sum'
plot(normRain)
#2022
lu_2021 <- rast("conditioningfactors/Method2lu_RESAMPL_Aug.tif")
names(lu_2021) <- 'land use'
built_2021 <- rast("conditioningfactors/Method2built_RESAMPL_Aug.tif")
veg_2021 <- rast("conditioningfactors/Method2veg_RESAMPL_Aug.tif")
#reading all static layers
elv <- rast("conditioningfactors/Method2elev_RESAMPL_Aug.tif")
slope <- rast("conditioningfactors/Method2slope_RESAMPL_Aug.tif")
gcn <- rast("conditioningfactors/Method2gcn_RESAMPL_Aug.tif")
lith <- rast("conditioningfactors/Method2lith_RESAMPL_Aug.tif")
drnDens <- rast("conditioningfactors/Method2drnDens_RESAMPL_Aug.tif")
rivdist <- rast("conditioningfactors/Method2rivdist_RESAMPL_Aug_updated.tif")
soil <- rast("conditioningfactors/Method2soil_RESAMPL_Aug.tif")
#combine the rainfall with the conditioning factors
predicvarbls_state <- c(lu_2021, built_2021, veg_2021, soil, lith, elv,slope, gcn, drnDens, rivdist, normRain)
crop_district <- (districts[i,2])
plot(crop_district)
predicvarbls <- terra::crop(predicvarbls_state , crop_district)
predicvarbls <- terra::mask(predicvarbls , crop_district , overwrite = T)
plot(predicvarbls[[1]])
length(predicvarbls$assam_soil[])
## [1] 3890118
#converting all na values to 0
predicvarbls[is.na(predicvarbls)] <- 0
#use xy to convert the dataframe back to raster later
predvar <- (as.data.frame(predicvarbls, xy=T))
nrow(predvar)
## [1] 3890118
#the data
predvar[1:10,]
## x y land use ndbi ndvi assam_soil assam_lith srtm_filled_dem
## 1 91.74444 26.67 0 0 0 0 0 0
## 2 91.74472 26.67 0 0 0 0 0 0
## 3 91.74500 26.67 0 0 0 0 0 0
## 4 91.74528 26.67 0 0 0 0 0 0
## 5 91.74556 26.67 0 0 0 0 0 0
## 6 91.74583 26.67 0 0 0 0 0 0
## 7 91.74611 26.67 0 0 0 0 0 0
## 8 91.74639 26.67 0 0 0 0 0 0
## 9 91.74667 26.67 0 0 0 0 0 0
## 10 91.74694 26.67 0 0 0 0 0 0
## strm_filled_slope_degrees GCN250_ARCIII_average
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
## gmted_drainage_density_without_1 assam_dist_from_major_rivers_updated_3857
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
## sum
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
#prediciton
y_pred_withprob = predict(classifier, newdata = predvar, probability = T, na.rm=F)
# the prediction probabilities
s <- (attr(y_pred_withprob, "probabilities"))
s[1:10,]
## 0 1
## 1 0.9785273 0.02147268
## 2 0.9785273 0.02147268
## 3 0.9785273 0.02147268
## 4 0.9785273 0.02147268
## 5 0.9785273 0.02147268
## 6 0.9785273 0.02147268
## 7 0.9785273 0.02147268
## 8 0.9785273 0.02147268
## 9 0.9785273 0.02147268
## 10 0.9785273 0.02147268
dummy <- rast(ext=ext(predicvarbls[[1]]),crs=crs(predicvarbls[[1]], proj=T, describe=FALSE, parse=FALSE),
nrow=dim(predicvarbls[[1]])[1],ncol=dim(predicvarbls[[1]])[2])
values(dummy) <- s[,2]
#flood probability for the rain forecast
plot(dummy)
dummy <- crop(dummy, crop_district)
dummy <- mask(dummy, crop_district, overwrite = T)
#flood probability for the rain forecast cropped to the district boundaries
plot(dummy)
#checking the deviation of the rainfall used with the mean daily rainfall in the monsoon months calculated over the last few years
R_18 <- rast('rainfall_2018_mean.tif')
R_19 <- rast('rainfall_2019_mean.tif')
R_20 <- rast('rainfall_2020_mean.tif')
R_21 <- rast('rainfall_2021_mean.tif')
R_mean <- mean(R_18, R_19, R_20,R_21)
R_proj <- terra::project(R_mean , crs(state_4326), overwrite = T)
R_crop <- crop(R_proj , crop_district, overwrite = T, snap = "out")
R_crop_mask <- mask(R_crop , crop_district, overwrite = T)
R_fin <- R_crop_mask
#mean daily rainfall for the state
plot(R_fin)
plot(state_4326, add=T)
#resampling to common spatial extent used in the project
d <- rast(ext=ext(dummy),crs=crs(dummy, proj=T, describe=FALSE, parse=FALSE),
nrow=dim(dummy)[1],ncol=dim(dummy)[2])
R_fin <-resample(R_fin, d)
plot(R_fin)
#cropping the rain map to the district boundary
Rain <- crop(Rain , crop_district, overwrite = T)
Rain <- mask(Rain , crop_district, overwrite = T)
plot(Rain)
##***change 90 the the number of days rainfall is summed upp in Rain
flood_prob_intensity <- dummy * (Rain / (R_fin*90))
plot(flood_prob_intensity)
## select flood probability only within a range
###***note: add the threshold same as Method_2 validation***###
###*0.3 to 0.95
c <- (quantile(dummy[],prob = 0.5, na.rm=T))
b <- (quantile(dummy[],prob = 0.95, na.rm=T))
highfld_mdl <- (dummy > c & dummy < b)
#flood probability area after applying threshold
a <- highfld_mdl
plot(a)
#flood probability map
pred_fld_haz <- a*dummy
plot(pred_fld_haz)
#actual flooded map of 2022
flooded <- rast("fldhazmthd1_floodprob_2022_Aug.tif")
plot(flooded)
plot(crop_district, add = T)
#cropping to the selected district
highfld_1 <- crop(flooded, crop_district)
highfld_1 <- mask(highfld_1, crop_district, overwrite = T)
dum <- rast(ext=ext(highfld_1),crs=crs(highfld_1, proj=T, describe=FALSE, parse=FALSE),
nrow=dim(highfld_1)[1],ncol=dim(highfld_1)[2])
a <-resample(a, dum, 'near')
#actual flooding 2022 in the selected district
plot(a)
#selecting only the cells that are flooded more than average
highfld <- highfld_1 >= mean(highfld_1[],na.rm=T)
plot(highfld)
perfmodel <- highfld+a
#combined map showing the actual flooded and the predicted flooded parts
#0 - not flooded area, 1 - flooded cells either in actual or predicted map,
#2- cells predicted as floods that are actually flooded
plot(perfmodel)
#calculating the model performance
modelperf <- freq(perfmodel)
parent <- freq(highfld)
#count of cells predicted as flooded which are actually flooded
modelperf[3,3]
## count
## 600205
#count of cell in the actual flood map that are flooded
parent[2,3]
## count
## 941518
#percentage of correctly predicted flooded cells
perc_flood_pts_correctly_predicted <- modelperf[3,3]/parent[2,3]
perc_flood_pts_correctly_predicted
## count
## 0.6374865
#precision
precision <- modelperf[3,3]/modelperf[2,3]
precision
## count
## 0.3137382
subdistricts <- vect("ASSAM_SUBDISTRICT_4326.shp")
subdistricts$dtname <- toupper(subdistricts$dtname)
#enter the district
k <- which(subdistricts$dtname == districts[i,2]$district)
sel_subdist <- subdistricts[k,]
plot(sel_subdist, main = "Sub-districts")
subd_m <- terra::extract(flood_prob_intensity, sel_subdist, 'mean', na.rm=T)
sel_subdist$fld_mdl <- subd_m$lyr.1
sel_subdist$sdtname
## [1] "Khoirabari (Pt)" "Pathorighat (Pt)" "Sipajhar" "Mangaldoi (Pt)"
## [5] "Dalgaon (Pt)" "Kalaigaon (Pt)"
sel_subdist$fld_mdl
## [1] 0.6524710 0.7266518 0.6682899 0.6224199 0.5779157 0.7480465
highfld_1 <- crop(flooded, crop_district)
highfld_1 <- mask(highfld_1, crop_district, overwrite = T)
subd_a <- terra::extract(highfld_1, sel_subdist, 'mean', na.rm=T)
sel_subdist$fld_actual <- subd_a$sum
sel_subdist$sdtname
## [1] "Khoirabari (Pt)" "Pathorighat (Pt)" "Sipajhar" "Mangaldoi (Pt)"
## [5] "Dalgaon (Pt)" "Kalaigaon (Pt)"
sel_subdist$fld_actual
## [1] 0.04737001 0.10415190 0.10973606 0.03760505 0.01162693 0.04216893