This R code aims to produce mosquito risk maps using remote sensing data.
library(raster); library(sf)
# Specify the path that contains the raster data layers
path<-"F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Mosquito_Test1"
# List all files with tif extension from above folder
predictors<-list.files(path=path,pattern = "tif$",full.names = T)
# Stack all layers together
predictors<-stack(predictors)
# Read the shapefile containing mosquito count sampling locations (you can use data processed above shape_final)
Mosquito_sampling<-shapefile("F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Mosquito_Sampling\\Mix_House_Mosquito.shp") # Upload mosquito points
# Convert it to sf object
Mos_points<-st_as_sf(Mosquito_sampling)
rownames(Mos_points)<-NULL # Reset index
# Extract values from raster layers using sampling locations.
Value_Extract<-raster::extract(predictors,Mos_points,df=T)
# Select only varibles from column 2. The first column is ID, and it is useless
my_data <- Value_Extract[, c(2:ncol(Value_Extract))]
# Look at first few rows
head(my_data)
# Combine corelation coeffieints and tests
library(correlation)
library(psych)
head(correlation::correlation(plot_data, include_factors = TRUE, method = "pearson"))
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
---------------------------------------------------------------------------------------------
DEM | EVI_Mean_1km | -0.23 | [-0.32, -0.15] | -5.19 | 467 | < .001 | Pearson | 469
DEM | EVI_Mean_500m | -0.23 | [-0.32, -0.15] | -5.20 | 467 | < .001 | Pearson | 469
DEM | NDVI_Mean_1km | -0.07 | [-0.16, 0.02] | -1.47 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_200m | -0.07 | [-0.16, 0.02] | -1.60 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_2km | -0.08 | [-0.17, 0.01] | -1.82 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_500m | -0.07 | [-0.15, 0.03] | -1.41 | 467 | > .999 | Pearson | 469
# Plot correlation matrix
library("PerformanceAnalytics")
chart.Correlation(my_data, histogram=TRUE, pch=19)
# Making histogram
multi.hist(my_data,dcol="red")
We see that some variables tend to be strong correlated, particularly EVI, NDVI, temperature data. Hence, correlation test is conducted to see if there is a signigicant correlation or not for those variables.
EVI variablesEVI_variable<-my_data[,c(2,3,4)]
EVI_test=correlation::correlation(EVI_variable, include_factors = TRUE, method = "pearson")
# setwd("F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Test")
# write.csv(EVI_test,file="EVI_corelation.csv",row.names = F)
EVI_test
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
--------------------------------------------------------------------------------------
EVI_max | EVI_mean | 0.97 | [0.96, 0.97] | 81.15 | 467 | < .001 | Pearson | 469
EVI_max | EVI_min | 0.87 | [0.85, 0.89] | 38.54 | 467 | < .001 | Pearson | 469
EVI_mean | EVI_min | 0.96 | [0.95, 0.97] | 73.37 | 467 | < .001 | Pearson | 469
NDVI variableslibrary(dplyr)
NDVI_varaible<-my_data %>% select(NDVI_max:NDVI_min)
NDVI_test=correlation::correlation(NDVI_varaible, include_factors = TRUE, method = "pearson")
NDVI_test
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
--------------------------------------------------------------------------------------
NDVI_max | NDVI_mean | 0.96 | [0.95, 0.96] | 69.89 | 467 | < .001 | Pearson | 469
NDVI_max | NDVI_min | 0.79 | [0.75, 0.82] | 27.68 | 467 | < .001 | Pearson | 469
NDVI_mean | NDVI_min | 0.93 | [0.92, 0.95] | 56.90 | 467 | < .001 | Pearson | 469
# write.csv(NDVI_test,file="NDVI_corelation.csv",row.names = F)
Temp variablesTemp_variable<-my_data %>% select(Temp_lag_mean:Temp_min)
Temp_test=correlation::correlation(Temp_variable, include_factors = TRUE, method = "pearson")
Temp_test
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
-----------------------------------------------------------------------------------------
Temp_lag_mean | Temp_max | 0.87 | [0.85, 0.89] | 38.66 | 467 | < .001 | Pearson | 469
Temp_lag_mean | Temp_mean | 0.80 | [0.76, 0.83] | 28.34 | 467 | < .001 | Pearson | 469
Temp_lag_mean | Temp_min | 0.28 | [0.20, 0.37] | 6.41 | 467 | < .001 | Pearson | 469
Temp_max | Temp_mean | 0.86 | [0.83, 0.88] | 35.90 | 467 | < .001 | Pearson | 469
Temp_max | Temp_min | 0.34 | [0.25, 0.41] | 7.73 | 467 | < .001 | Pearson | 469
Temp_mean | Temp_min | 0.72 | [0.67, 0.76] | 22.47 | 467 | < .001 | Pearson | 469
As the land cover has five classes, so we need to convert it to factors, not integers.
# Encode land cover as factor and number of mosquito (Count) as integer
my_data$Landcover<-as.factor(my_data$Landcover)
# Add Count column to the dataset
my_data$Count<-Mos_points$Sum_ttl
my_data$Count<-as.integer(my_data$Count) # Convert it to be count data (integer)
# Check overdisspersion for Count variable
print( mean(my_data$Count)); print(var(my_data$Count)) # Our count data is overdispersion as variance is greater than mean
[1] 34.78038
[1] 30698.91
# If someone wants to export data to your local computer
# setwd("F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito")
# write.csv(my_data,"Myfile.csv")
# Summary of our dataset all variables
# summary(my_data)
Splitting data into training and testing sets
library(caTools)
# Splitting data into training and testing
set.seed(123)
sample = sample.split(my_data$Count, SplitRatio = 0.8)
train = subset(my_data, sample == TRUE)
test = subset(my_data, sample == FALSE)
library(caret)
ne_model<-glm(Count~., data=train, family = negative.binomial(theta = 1) ) # Set the theta=2 provides better model
glm.fit: algorithm did not converge
predictions<- predict(ne_model,newdata=test)
data.frame(R2 = R2(predictions, test$Count), RMSE = RMSE(predictions, test$Count), MAE = MAE(predictions,test$Count))
variable_select<-stepAIC(ne_model, trace = F)
glm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not converge
variable_select$anova
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
Count ~ DEM + EVI_max + EVI_mean + EVI_min + Landcover + NDVI_max +
NDVI_mean + NDVI_min + NDWI + Pop_VN + Rain_lag_mean + Rain_max +
Rain_mean + Rain_min + Temp_lag_mean + Temp_max + Temp_mean +
Temp_min
Final Model:
Count ~ EVI_max + EVI_mean + EVI_min + NDVI_max + NDVI_mean +
NDVI_min + NDWI + Pop_VN + Rain_lag_mean + Rain_max + Rain_mean +
Rain_min + Temp_lag_mean + Temp_max + Temp_mean
Step Df Deviance Resid. Df Resid. Dev AIC
1 361 30396.867 32250.929
2 - Landcover 3 7693.381 364 22703.487 24551.548
3 - DEM 1 21664.025 365 1039.461 2885.522
library(raster); library(sf)
# Specify the path that contains the raster data layers
path<-"F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Cover_Ratio"
# List all files with tif extension from above folder
predictors<-list.files(path=path,pattern = "tif$",full.names = T)
# Stack all layers together
predictors<-stack(predictors)
# Extract values from raster layers using sampling locations.
Value_Extract<-raster::extract(predictors,Mos_points,df=T)
# Select only varibles from column 2. The first column is ID, and it is useless
my_data <- Value_Extract[, c(2:ncol(Value_Extract))]
# Look at first few rows
head(my_data)
write.csv(my_data,file="Buffered_computed.csv",row.names =F)
# Combine corelation coeffieints and tests
library(correlation)
library(psych)
head(correlation::correlation(plot_data, include_factors = TRUE, method = "pearson"))
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
---------------------------------------------------------------------------------------------
DEM | EVI_Mean_1km | -0.23 | [-0.32, -0.15] | -5.19 | 467 | < .001 | Pearson | 469
DEM | EVI_Mean_500m | -0.23 | [-0.32, -0.15] | -5.20 | 467 | < .001 | Pearson | 469
DEM | NDVI_Mean_1km | -0.07 | [-0.16, 0.02] | -1.47 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_200m | -0.07 | [-0.16, 0.02] | -1.60 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_2km | -0.08 | [-0.17, 0.01] | -1.82 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_500m | -0.07 | [-0.15, 0.03] | -1.41 | 467 | > .999 | Pearson | 469
# Plot correlation matrix
library("PerformanceAnalytics")
chart.Correlation(my_data, histogram=TRUE, pch=19)
# Making histogram
multi.hist(my_data,dcol="red")
# Add Count column to the dataset
my_data$Count<-Mos_points$Sum_ttl
my_data$Count<-as.integer(my_data$Count) # Convert it to be count data (integer)
head(my_data)
library(caTools)
# Splitting data into training and testing
set.seed(123)
sample = sample.split(my_data$Count, SplitRatio = 0.80)
train = subset(my_data, sample == TRUE)
test = subset(my_data, sample == FALSE)
ne_model<-glm(Count~., data=train, family = negative.binomial(theta = 1)) # Set the theta=2 provides better model
glm.fit: algorithm did not converge
predictions<- predict(ne_model,newdata=test)
data.frame(R2 = R2(predictions, test$Count), RMSE = RMSE(predictions, test$Count), MAE = MAE(predictions,test$Count))
There are six variables excluded from the final model, NDVI_200m, NDVI_500m, EVI_1km, EVI_2km, Mean_rain_1km, Urban_200m.
variable_select<-stepAIC(ne_model, trace = F,direction ="backward")
glm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not converge
variable_select$anova
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
Count ~ DEM + EVI_Mean_1km + EVI_Mean_2km + EVI_Mean_500m + NDVI_Mean_1km +
NDVI_Mean_200m + NDVI_Mean_2km + NDVI_Mean_500m + NDWI_1km +
NDWI_200m + NDWI_2km + NDWI_500m + Pop_1km + Pop_2km + Rain_lag_1km +
Rain_lag_2km + Rain_max_1km + Rain_max_2km + Rain_Mean_1km +
Rain_Mean_2km + Rain_min_1km + Rain_min_2km + Rice_1km +
Rice_200m + Rice_2km + Rice_500m + Temp_lag_1km + Temp_Mean_1km +
Temp_Mean_2km + Urban_1km + Urban_200m + Urban_2km + Urban_500m
Final Model:
Count ~ EVI_Mean_500m + NDVI_Mean_1km + NDVI_Mean_2km + NDWI_1km +
NDWI_200m + NDWI_2km + NDWI_500m + Pop_1km + Pop_2km + Rain_lag_1km +
Rain_lag_2km + Rain_max_1km + Rain_max_2km + Rain_Mean_2km +
Rain_min_1km + Rain_min_2km + Rice_1km + Rice_200m + Rice_2km +
Rice_500m + Temp_lag_1km + Temp_Mean_1km + Temp_Mean_2km +
Urban_1km + Urban_2km + Urban_500m
Step Df Deviance Resid. Df Resid. Dev AIC
1 352 840.8793 2735.734
2 - NDVI_Mean_200m 1 18.4776746 353 822.4016 2715.256
3 - Urban_200m 1 0.8779154 354 821.5237 2712.378
4 - Rain_Mean_1km 1 0.4261576 355 821.9498 2710.804
5 - NDVI_Mean_500m 1 0.5709151 356 822.5208 2709.375
6 - EVI_Mean_1km 1 0.8417544 357 823.3625 2708.217
7 - EVI_Mean_2km 1 1.8423857 358 825.2049 2708.059
library(pscl)
zero_model<-zeroinfl(Count~., data=train, dist = "negbin",link = "logit")
system is computationally singular: reciprocal condition number = 2.51058e-22FALSE
predictions<- predict(zero_model,newdata=test)
data.frame(R2 = R2(predictions, test$Count), RMSE = RMSE(predictions, test$Count), MAE = MAE(predictions,test$Count))
library(raster); library(sf)
# Specify the path that contains the raster data layers
path<-"F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Cover_Ratio\\Variable_Selection"
# List all files with tif extension from above folder
predictors<-list.files(path=path,pattern = "tif$",full.names = T)
# Stack all layers together
predictors<-stack(predictors)
# Extract values from raster layers using sampling locations.
Value_Extract<-raster::extract(predictors,Mos_points,df=T)
# Select only varibles from column 2. The first column is ID, and it is useless
my_data <- Value_Extract[, c(2:ncol(Value_Extract))]
# Look at first few rows
head(my_data)
# Combine corelation coeffieints and tests
library(correlation)
library(psych)
head(correlation::correlation(plot_data, include_factors = TRUE, method = "pearson"))
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
---------------------------------------------------------------------------------------------
DEM | EVI_Mean_1km | -0.23 | [-0.32, -0.15] | -5.19 | 467 | < .001 | Pearson | 469
DEM | EVI_Mean_500m | -0.23 | [-0.32, -0.15] | -5.20 | 467 | < .001 | Pearson | 469
DEM | NDVI_Mean_1km | -0.07 | [-0.16, 0.02] | -1.47 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_200m | -0.07 | [-0.16, 0.02] | -1.60 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_2km | -0.08 | [-0.17, 0.01] | -1.82 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_500m | -0.07 | [-0.15, 0.03] | -1.41 | 467 | > .999 | Pearson | 469
# Plot correlation matrix
library("PerformanceAnalytics")
chart.Correlation(my_data, histogram=TRUE, pch=19)
# Making histogram
multi.hist(my_data,dcol="red")
# Add Count column to the dataset
my_data$Count<-Mos_points$Sum_ttl
my_data$Count<-as.integer(my_data$Count) # Convert it to be count data (integer)
head(my_data)
library(caTools)
# Splitting data into training and testing
set.seed(123)
sample = sample.split(my_data$Count, SplitRatio = 0.8)
train = subset(my_data, sample == TRUE)
test = subset(my_data, sample == FALSE)
library(caret)
ne_model<-glm(Count~., data=train, family = negative.binomial(theta = 1) ) # Set the theta=2 provides better model
predictions<- predict(ne_model,newdata=test)
data.frame(R2 = R2(predictions, test$Count), RMSE = RMSE(predictions, test$Count), MAE = MAE(predictions,test$Count))
variable_select<-stepAIC(ne_model, trace = F)
glm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not converge
variable_select$anova
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
Count ~ DEM + EVI_Mean_500m + NDVI_Mean_1km + NDVI_Mean_2km +
NDWI_1km + NDWI_200m + NDWI_2km + NDWI_500m + Pop_1km + Pop_2km +
Rain_lag_1km + Rain_lag_2km + Rain_max_1km + Rain_max_2km +
Rain_Mean_2km + Rain_min_1km + Rain_min_2km + Rice_1km +
Rice_200m + Rice_2km + Rice_500m + Temp_lag_1km + Temp_Mean_1km +
Temp_Mean_2km + Urban_1km + Urban_2km + Urban_500m
Final Model:
Count ~ EVI_Mean_500m + NDVI_Mean_1km + NDVI_Mean_2km + NDWI_1km +
NDWI_200m + NDWI_2km + NDWI_500m + Pop_1km + Pop_2km + Rain_lag_1km +
Rain_lag_2km + Rain_max_1km + Rain_max_2km + Rain_Mean_2km +
Rain_min_1km + Rain_min_2km + Rice_1km + Rice_200m + Rice_2km +
Rice_500m + Temp_lag_1km + Temp_Mean_1km + Temp_Mean_2km +
Urban_1km + Urban_2km + Urban_500m
Step Df Deviance Resid. Df Resid. Dev AIC
1 358 825.2049 2708.059
library(raster); library(sf)
# Specify the path that contains the raster data layers
path<-"F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Cover_Ratio\\Stepwise_1km"
# List all files with tif extension from above folder
predictors<-list.files(path=path,pattern = "tif$",full.names = T)
# Stack all layers together
predictors<-stack(predictors)
# Extract values from raster layers using sampling locations.
Value_Extract<-raster::extract(predictors,Mos_points,df=T)
# Select only varibles from column 2. The first column is ID, and it is useless
my_data <- Value_Extract[, c(2:ncol(Value_Extract))]
# Look at first few rows
head(my_data)
Note: Rice and NDVI had strong correlation >0.7
# Combine corelation coeffieints and tests
library(correlation)
library(psych)
head(correlation::correlation(plot_data, include_factors = TRUE, method = "pearson"))
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
---------------------------------------------------------------------------------------------
DEM | EVI_Mean_1km | -0.23 | [-0.32, -0.15] | -5.19 | 467 | < .001 | Pearson | 469
DEM | EVI_Mean_500m | -0.23 | [-0.32, -0.15] | -5.20 | 467 | < .001 | Pearson | 469
DEM | NDVI_Mean_1km | -0.07 | [-0.16, 0.02] | -1.47 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_200m | -0.07 | [-0.16, 0.02] | -1.60 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_2km | -0.08 | [-0.17, 0.01] | -1.82 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_500m | -0.07 | [-0.15, 0.03] | -1.41 | 467 | > .999 | Pearson | 469
# Plot correlation matrix
library("PerformanceAnalytics")
chart.Correlation(my_data, histogram=TRUE, pch=19)
# Making histogram
multi.hist(my_data,dcol="red")
# Add Count column to the dataset
my_data$Count<-Mos_points$Sum_ttl
my_data$Count<-as.integer(my_data$Count) # Convert it to be count data (integer)
head(my_data)
library(caTools)
# Splitting data into training and testing
set.seed(123)
sample = sample.split(my_data$Count, SplitRatio = 0.8)
train = subset(my_data, sample == TRUE)
test = subset(my_data, sample == FALSE)
library(caret)
ne_model<-glm(Count~., data=train, family = negative.binomial(theta = 1) ) # Set the theta=1
predictions<- predict(ne_model,newdata=test)
data.frame(R2 = R2(predictions, test$Count), RMSE = RMSE(predictions, test$Count), MAE = MAE(predictions,test$Count))
variable_select<-stepAIC(ne_model, trace = F)
glm.fit: algorithm did not convergeglm.fit: algorithm did not convergeglm.fit: algorithm did not converge
variable_select$anova
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
Count ~ DEM + NDVI_Mean_1km + NDWI_1km + Pop_1km + Rain_lag_1km +
Rain_max_1km + Rain_min_1km + Rice_1km + Temp_lag_1km
Final Model:
Count ~ DEM + NDVI_Mean_1km + Pop_1km + Rain_lag_1km + Rain_max_1km +
Rain_min_1km
Step Df Deviance Resid. Df Resid. Dev AIC
1 376 1098.774 2945.628
2 - NDWI_1km 1 0.5204301 377 1099.294 2944.149
3 - Rice_1km 1 1.9125103 378 1101.207 2944.061
Sen_output<-predict(predictors, ne_model, type="response",progress="window")
setwd("F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Predicted_Maps")
The working directory was changed to F:/Research/Research_Cooperation/ILRI_Mosquito_Mapping/Mosquito/Predicted_Maps inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
writeRaster(Sen_output, filename = "Risk_Map_1km.tif",overwrite=T)
library(raster); library(sf)
# Specify the path that contains the raster data layers
path<-"F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Cover_Ratio\\2km_buffer"
# List all files with tif extension from above folder
predictors<-list.files(path=path,pattern = "tif$",full.names = T)
# Stack all layers together
predictors<-stack(predictors)
# Extract values from raster layers using sampling locations.
Value_Extract<-raster::extract(predictors,Mos_points,df=T)
# Select only varibles from column 2. The first column is ID, and it is useless
my_data <- Value_Extract[, c(2:ncol(Value_Extract))]
# Look at first few rows
head(my_data)
# Combine corelation coeffieints and tests
library(correlation)
library(psych)
head(correlation::correlation(plot_data, include_factors = TRUE, method = "pearson"))
Parameter1 | Parameter2 | r | 95% CI | t | df | p | Method | n_Obs
---------------------------------------------------------------------------------------------
DEM | EVI_Mean_1km | -0.23 | [-0.32, -0.15] | -5.19 | 467 | < .001 | Pearson | 469
DEM | EVI_Mean_500m | -0.23 | [-0.32, -0.15] | -5.20 | 467 | < .001 | Pearson | 469
DEM | NDVI_Mean_1km | -0.07 | [-0.16, 0.02] | -1.47 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_200m | -0.07 | [-0.16, 0.02] | -1.60 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_2km | -0.08 | [-0.17, 0.01] | -1.82 | 467 | > .999 | Pearson | 469
DEM | NDVI_Mean_500m | -0.07 | [-0.15, 0.03] | -1.41 | 467 | > .999 | Pearson | 469
# Plot correlation matrix
library("PerformanceAnalytics")
chart.Correlation(my_data, histogram=TRUE, pch=19)
# Making histogram
multi.hist(my_data,dcol="red")
# Add Count column to the dataset
my_data$Count<-Mos_points$Sum_ttl
my_data$Count<-as.integer(my_data$Count) # Convert it to be count data (integer)
head(my_data)
library(caTools)
# Splitting data into training and testing
set.seed(123)
sample = sample.split(my_data$Count, SplitRatio = 0.8)
train = subset(my_data, sample == TRUE)
test = subset(my_data, sample == FALSE)
library(caret)
ne_model<-glm(Count~., data=train, family = negative.binomial(theta = 1) ) # Set the theta=1
predictions<- predict(ne_model,newdata=test)
data.frame(R2 = R2(predictions, test$Count), RMSE = RMSE(predictions, test$Count), MAE = MAE(predictions,test$Count))
variable_select<-stepAIC(ne_model, trace = F)
variable_select$anova
Stepwise Model Path
Analysis of Deviance Table
Initial Model:
Count ~ NDVI_Mean_2km + NDWI_2km + Pop_2km + Rain_Mean_2km +
Rice_2km + Temp_Mean_2km
Final Model:
Count ~ NDVI_Mean_2km + Pop_2km + Rain_Mean_2km
Step Df Deviance Resid. Df Resid. Dev AIC
1 379 1193.511 3034.366
2 - Temp_Mean_2km 1 0.12233845 380 1193.634 3032.488
3 - NDWI_2km 1 0.09926757 381 1193.733 3030.588
Sen_output<-predict(predictors, ne_model, type="response",progress="window")
setwd("F:\\Research\\Research_Cooperation\\ILRI_Mosquito_Mapping\\Mosquito\\Predicted_Maps")
The working directory was changed to F:/Research/Research_Cooperation/ILRI_Mosquito_Mapping/Mosquito/Predicted_Maps inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
writeRaster(Sen_output, filename = "Risk_Map_7.tif",overwrite=T)