aedes <- read.csv("0253828-230224095556074.csv", sep="\t")
aedes <- aedes[complete.cases(aedes$decimalLatitude, aedes$decimalLongitude), ]
sample_ae <- aedes %>% sample_n(50000, replace = FALSE)
sp_points_proj <- st_as_sf(sample_ae, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)
sp_points_proj <- st_transform(sp_points_proj, crs = 4326)
plot(sp_points_proj[,1], main = "")
Remove the attributs to keep only the coordinates
presence_points <- SpatialPointsDataFrame(
coords = aedes[, c("decimalLongitude", "decimalLatitude")],
data = aedes,
proj4string = CRS("+proj=longlat +datum=WGS84")
)
presence <- aedes[,c("decimalLongitude", "decimalLatitude")]
sample_ae <- presence %>% sample_n(50000, replace = FALSE)
colnames(sample_ae)<-c("longitude","latitude")
Generate 50 000 pseudo absences
num_points <- 50000
min_longitude <- -180
max_longitude <- 180
min_latitude <- -90
max_latitude <- 90
random_points <- data.frame(
longitude = runif(num_points, min_longitude, max_longitude),
latitude = runif(num_points, min_latitude, max_latitude)
)
random_points$presence <- 0
sample_ae$presence <- 1
list_covar <- list.files("./mood_tutorial/NEO_monthly/", pattern = "TIFF", recursive = T, full.names = T)
length(list_covar)
## [1] 7392
rast(list_covar[20])
## class : SpatRaster
## dimensions : 1800, 3600, 1 (nrow, ncol, nlyr)
## resolution : 0.1, 0.1 (x, y)
## extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : MCD43C3_M_BSA_2001-09.FLOAT.TIFF
## name : MCD43C3_M_BSA_2001-09.FLOAT
Based on the biology Average Land Surface Temperature (Day), Rainfall, Topography
Selection of one raster for each covariates
# Temperature
temp = rast(grep("MOD_LSTD_CLIM_M", list_covar, value = T)[1])
setMinMax(temp)
NAflag(temp) = 99999
plot(temp)
title(main = "Temperature")
# Rainfall
rainfall = rast(grep("TRMM_3B43M", list_covar, value = T)[1])
setMinMax(rainfall)
NAflag(rainfall) = 99999
plot(rainfall)
title(main = "Rainfall")
# Topography
topo = rast(grep("SRTM_RAMP2_TOPO", list_covar, value = T)[1])
setMinMax(topo)
NAflag(topo) = 99999
plot(topo)
title(main = "Topology")
mosquito <- rbind(sample_ae, random_points)
a <- mosquito[,c("longitude", "latitude")]
a_temp <- raster::extract(temp, a)
colnames(a_temp) <- c("ID", "temperature")
mosquito <- cbind(mosquito, a_temp)
a_rain <- raster::extract(rainfall, a)
colnames(a_rain) <- c("ID", "rainfall")
mosquito <- cbind(mosquito, a_rain)
a_topo <- raster::extract(topo, a)
colnames(a_topo) <- c("ID", "topography")
mosquito <- cbind(mosquito, a_topo)
mosquito <- mosquito[,c("longitude", "latitude", "presence", "temperature", "rainfall", "topography")]
NA_mosquito <- na.omit(mosquito)
model <- glm(presence ~ temperature + rainfall + topography, data = NA_mosquito, family = binomial)
summary(model)
##
## Call:
## glm(formula = presence ~ temperature + rainfall + topography,
## family = binomial, data = NA_mosquito)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.4227 0.2416 0.4138 0.4875 2.9522
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.9385136 0.0373852 78.60 <2e-16 ***
## temperature -0.0449537 0.0013297 -33.81 <2e-16 ***
## rainfall 0.0042904 0.0001768 24.27 <2e-16 ***
## topography -0.0025280 0.0000367 -68.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 40501 on 47705 degrees of freedom
## Residual deviance: 31257 on 47702 degrees of freedom
## AIC: 31265
##
## Number of Fisher Scoring iterations: 5
modEvA::RsqGLM(model)
## $CoxSnell
## [1] 0.1761672
##
## $Nagelkerke
## [1] 0.3079043
##
## $McFadden
## [1] 0.2282592
##
## $Tjur
## [1] 0.2559452
##
## $sqPearson
## [1] 0.2613938
predicted_probabilities <- predict(model, type = "response")
log_loss <- function(y_true, y_pred) {
-mean(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred))
}
true_labels <- mosquito$presence # Vraies valeurs de classe
loss <- log_loss(true_labels, predicted_probabilities)
## Warning in y_true * log(y_pred): la taille d'un objet plus long n'est pas
## multiple de la taille d'un objet plus court
## Warning in (1 - y_true) * log(1 - y_pred): la taille d'un objet plus long n'est
## pas multiple de la taille d'un objet plus court
print(loss)
## [1] 1.283899