Chargement des packages
# Load packages
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(ggplot2)
library(leaflet)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(caret)
## Loading required package: lattice
library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
# 1. Load CSV
data <- read.csv("C:/Users/test2/Downloads/Location Intelligence Cybersecurity 2025.csv", stringsAsFactors = FALSE)
# 2. Clean column names
colnames(data) <- gsub(" ", "", colnames(data))
colnames(data) <- gsub("\\.", "_", colnames(data))
# 3. Identify key columns
lat_col <- "Latitude"
lon_col <- "Longitude"
target_col <- "Cyber_Attack_Type"
# Check if columns exist
if(!(lat_col %in% colnames(data) & lon_col %in% colnames(data))){
stop("Latitude or Longitude columns not found.")
}
if(!(target_col %in% colnames(data))){
stop("Target variable not found.")
}
# 4. Filter rows with valid coordinates
data_geo <- data[!is.na(data[[lat_col]]) & !is.na(data[[lon_col]]), ]
# 5. Convert target variable to factor
data_geo[[target_col]] <- as.factor(data_geo[[target_col]])
# 6. Identify categorical and numeric columns
cat_cols <- names(data_geo)[sapply(data_geo, is.character)]
cat_cols <- setdiff(cat_cols, target_col)
num_cols <- names(data_geo)[sapply(data_geo, is.numeric)]
# 7. Handle categorical variables with too many levels (>53)
cat_cols_levels <- sapply(data_geo[cat_cols], function(x) length(unique(x)))
cat_cols_keep <- names(cat_cols_levels[cat_cols_levels <= 53])
if(length(cat_cols_keep) > 0){
data_geo[cat_cols_keep] <- lapply(data_geo[cat_cols_keep], as.factor)
}
# 8. Impute missing numeric values
if(length(num_cols) > 0){
for(col in num_cols){
data_geo[[col]][is.na(data_geo[[col]])] <- median(data_geo[[col]], na.rm = TRUE)
}
}
# 9. Create spatial object
data_sf <- st_as_sf(data_geo, coords = c(lon_col, lat_col), crs = 4326)
# 10. Basic spatial visualization
leaflet(data_sf) %>%
addTiles() %>%
addCircleMarkers(
radius = 5,
color = "red",
stroke = FALSE,
fillOpacity = 0.6,
popup = ~paste(target_col, ":", get(target_col))
)
# 11. Spatial clustering with DBSCAN
coords <- st_coordinates(data_sf)
cluster <- dbscan(coords, eps = 0.5, minPts = 5)
data_sf$cluster <- factor(cluster$cluster)
# Plot clusters
ggplot(data_sf) +
geom_sf(aes(color = cluster), size = 2) +
theme_minimal() +
labs(title = "Geographic clusters of attacks")

# 12. Prepare data for Random Forest
set.seed(123)
train_index <- createDataPartition(data_sf[[target_col]], p = 0.8, list = FALSE)
train <- data_sf[train_index, ]
test <- data_sf[-train_index, ]
# Drop geometry for modeling
train_df <- st_drop_geometry(train)
test_df <- st_drop_geometry(test)
# Model columns
model_cols <- c(target_col, intersect(c(num_cols, cat_cols_keep), colnames(train_df)))
# 13. Faster Random Forest
rf_model <- randomForest(
as.formula(paste(target_col, "~ .")),
data = train_df[ , model_cols],
importance = TRUE,
ntree = 50,
mtry = floor(sqrt(length(model_cols)-1))
)
print(rf_model)
##
## Call:
## randomForest(formula = as.formula(paste(target_col, "~ .")), data = train_df[, model_cols], importance = TRUE, ntree = 50, mtry = floor(sqrt(length(model_cols) - 1)))
## Type of random forest: classification
## Number of trees: 50
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 93.27%
## Confusion matrix:
## Brute Force Attack Credential Stuffing
## Brute Force Attack 299 285
## Credential Stuffing 265 260
## Cross-Site Scripting (Xss) 289 257
## Ddos 263 262
## Dns Spoofing 253 276
## Malware 288 253
## Man-In-The-Middle 289 287
## Phishing 284 247
## Ransomware 287 267
## Session Hijacking 286 249
## Social Engineering 281 276
## Spyware 295 246
## Sql Injection 280 257
## Trojan Horse 271 265
## Zero-Day Exploit 292 281
## Cross-Site Scripting (Xss) Ddos Dns Spoofing Malware
## Brute Force Attack 255 236 244 252
## Credential Stuffing 287 211 245 223
## Cross-Site Scripting (Xss) 259 251 246 224
## Ddos 250 260 231 247
## Dns Spoofing 225 248 241 227
## Malware 235 257 244 236
## Man-In-The-Middle 250 229 257 245
## Phishing 250 240 265 229
## Ransomware 265 241 265 262
## Session Hijacking 268 279 219 228
## Social Engineering 247 220 269 227
## Spyware 248 254 257 239
## Sql Injection 259 236 245 256
## Trojan Horse 249 227 237 240
## Zero-Day Exploit 261 226 272 251
## Man-In-The-Middle Phishing Ransomware
## Brute Force Attack 224 221 238
## Credential Stuffing 219 231 201
## Cross-Site Scripting (Xss) 233 228 211
## Ddos 234 214 206
## Dns Spoofing 255 244 204
## Malware 220 232 217
## Man-In-The-Middle 238 210 241
## Phishing 208 230 203
## Ransomware 265 212 221
## Session Hijacking 255 205 198
## Social Engineering 226 235 245
## Spyware 218 216 224
## Sql Injection 213 219 215
## Trojan Horse 238 234 218
## Zero-Day Exploit 235 215 205
## Session Hijacking Social Engineering Spyware
## Brute Force Attack 196 197 199
## Credential Stuffing 216 207 212
## Cross-Site Scripting (Xss) 199 230 216
## Ddos 213 213 201
## Dns Spoofing 223 205 214
## Malware 218 214 208
## Man-In-The-Middle 204 207 234
## Phishing 196 247 199
## Ransomware 192 197 211
## Session Hijacking 231 208 209
## Social Engineering 243 206 207
## Spyware 191 216 210
## Sql Injection 212 219 202
## Trojan Horse 222 212 196
## Zero-Day Exploit 199 242 199
## Sql Injection Trojan Horse Zero-Day Exploit
## Brute Force Attack 216 247 223
## Credential Stuffing 219 250 222
## Cross-Site Scripting (Xss) 185 245 253
## Ddos 224 204 211
## Dns Spoofing 210 222 227
## Malware 208 226 260
## Man-In-The-Middle 197 223 220
## Phishing 200 220 238
## Ransomware 186 202 222
## Session Hijacking 231 213 239
## Social Engineering 207 193 203
## Spyware 227 206 237
## Sql Injection 206 214 223
## Trojan Horse 211 223 214
## Zero-Day Exploit 221 233 203
## class.error
## Brute Force Attack 0.9153454
## Credential Stuffing 0.9250288
## Cross-Site Scripting (Xss) 0.9265457
## Ddos 0.9242645
## Dns Spoofing 0.9306275
## Malware 0.9328783
## Man-In-The-Middle 0.9325970
## Phishing 0.9334491
## Ransomware 0.9367668
## Session Hijacking 0.9343377
## Social Engineering 0.9408895
## Spyware 0.9397245
## Sql Injection 0.9403935
## Trojan Horse 0.9354932
## Zero-Day Exploit 0.9425743
varImpPlot(rf_model)

# 14. Predictions and evaluation
pred <- predict(rf_model, newdata = test_df[ , model_cols])
conf_mat <- confusionMatrix(pred, test_df[[target_col]])
print(conf_mat)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Brute Force Attack Credential Stuffing
## Brute Force Attack 66 66
## Credential Stuffing 64 70
## Cross-Site Scripting (Xss) 74 67
## Ddos 60 66
## Dns Spoofing 54 76
## Malware 50 72
## Man-In-The-Middle 64 49
## Phishing 64 45
## Ransomware 52 31
## Session Hijacking 46 53
## Social Engineering 70 57
## Spyware 50 41
## Sql Injection 49 54
## Trojan Horse 54 51
## Zero-Day Exploit 66 68
## Reference
## Prediction Cross-Site Scripting (Xss) Ddos Dns Spoofing
## Brute Force Attack 73 61 63
## Credential Stuffing 48 69 56
## Cross-Site Scripting (Xss) 74 57 64
## Ddos 59 52 58
## Dns Spoofing 55 83 53
## Malware 61 60 78
## Man-In-The-Middle 67 52 76
## Phishing 62 65 62
## Ransomware 49 43 41
## Session Hijacking 61 48 54
## Social Engineering 53 55 48
## Spyware 50 62 51
## Sql Injection 54 48 50
## Trojan Horse 53 59 48
## Zero-Day Exploit 62 44 66
## Reference
## Prediction Malware Man-In-The-Middle Phishing Ransomware
## Brute Force Attack 67 67 78 66
## Credential Stuffing 58 66 84 63
## Cross-Site Scripting (Xss) 67 77 68 61
## Ddos 57 59 61 65
## Dns Spoofing 57 76 55 55
## Malware 51 51 60 54
## Man-In-The-Middle 78 55 61 57
## Phishing 62 55 53 50
## Ransomware 57 59 51 63
## Session Hijacking 47 57 45 62
## Social Engineering 61 56 40 60
## Spyware 63 47 54 57
## Sql Injection 43 59 58 56
## Trojan Horse 46 50 45 46
## Zero-Day Exploit 65 48 50 58
## Reference
## Prediction Session Hijacking Social Engineering Spyware
## Brute Force Attack 69 77 64
## Credential Stuffing 54 61 61
## Cross-Site Scripting (Xss) 72 65 66
## Ddos 56 59 54
## Dns Spoofing 62 53 63
## Malware 67 61 71
## Man-In-The-Middle 58 67 64
## Phishing 45 56 51
## Ransomware 47 60 36
## Session Hijacking 61 54 61
## Social Engineering 54 55 57
## Spyware 55 54 56
## Sql Injection 66 43 50
## Trojan Horse 54 51 55
## Zero-Day Exploit 59 55 62
## Reference
## Prediction Sql Injection Trojan Horse Zero-Day Exploit
## Brute Force Attack 67 67 66
## Credential Stuffing 76 54 67
## Cross-Site Scripting (Xss) 67 64 68
## Ddos 57 69 60
## Dns Spoofing 65 55 57
## Malware 66 64 55
## Man-In-The-Middle 50 56 55
## Phishing 40 50 62
## Ransomware 53 52 53
## Session Hijacking 50 57 54
## Social Engineering 46 51 66
## Spyware 56 51 64
## Sql Injection 55 62 53
## Trojan Horse 50 48 39
## Zero-Day Exploit 65 64 64
##
## Overall Statistics
##
## Accuracy : 0.067
## 95% CI : (0.0627, 0.0714)
## No Information Rate : 0.0675
## P-Value [Acc > NIR] : 0.601287
##
## Kappa : 3e-04
##
## Mcnemar's Test P-Value : 0.004938
##
## Statistics by Class:
##
## Class: Brute Force Attack Class: Credential Stuffing
## Sensitivity 0.074745 0.08083
## Specificity 0.922056 0.92789
## Pos Pred Value 0.064897 0.07361
## Neg Pred Value 0.932295 0.93439
## Prevalence 0.067487 0.06619
## Detection Rate 0.005044 0.00535
## Detection Prevalence 0.077729 0.07268
## Balanced Accuracy 0.498400 0.50436
## Class: Cross-Site Scripting (Xss) Class: Ddos
## Sensitivity 0.083995 0.060606
## Specificity 0.923216 0.931294
## Pos Pred Value 0.073195 0.058296
## Neg Pred Value 0.933157 0.933891
## Prevalence 0.067334 0.065576
## Detection Rate 0.005656 0.003974
## Detection Prevalence 0.077270 0.068175
## Balanced Accuracy 0.503606 0.495950
## Class: Dns Spoofing Class: Malware
## Sensitivity 0.061060 0.058020
## Specificity 0.929109 0.928718
## Pos Pred Value 0.057671 0.055375
## Neg Pred Value 0.933005 0.931925
## Prevalence 0.066341 0.067181
## Detection Rate 0.004051 0.003898
## Detection Prevalence 0.070238 0.070391
## Balanced Accuracy 0.495085 0.493369
## Class: Man-In-The-Middle Class: Phishing Class: Ransomware
## Sensitivity 0.062358 0.061414 0.072165
## Specificity 0.930011 0.937076 0.943985
## Pos Pred Value 0.060506 0.064477 0.084337
## Neg Pred Value 0.932074 0.933942 0.934344
## Prevalence 0.067411 0.065958 0.066723
## Detection Rate 0.004204 0.004051 0.004815
## Detection Prevalence 0.069474 0.062825 0.057093
## Balanced Accuracy 0.496185 0.499245 0.508075
## Class: Session Hijacking Class: Social Engineering
## Sensitivity 0.069397 0.063146
## Specificity 0.938632 0.936625
## Pos Pred Value 0.075309 0.066345
## Neg Pred Value 0.933355 0.933415
## Prevalence 0.067181 0.066570
## Detection Rate 0.004662 0.004204
## Detection Prevalence 0.061908 0.063360
## Balanced Accuracy 0.504014 0.499885
## Class: Spyware Class: Sql Injection Class: Trojan Horse
## Sensitivity 0.06429 0.063731 0.055556
## Specificity 0.93818 0.939039 0.942635
## Pos Pred Value 0.06905 0.068750 0.064085
## Neg Pred Value 0.93359 0.934223 0.933847
## Prevalence 0.06657 0.065958 0.066035
## Detection Rate 0.00428 0.004204 0.003669
## Detection Prevalence 0.06198 0.061143 0.057245
## Balanced Accuracy 0.50124 0.501385 0.499095
## Class: Zero-Day Exploit
## Sensitivity 0.072480
## Specificity 0.931809
## Pos Pred Value 0.071429
## Neg Pred Value 0.932803
## Prevalence 0.067487
## Detection Rate 0.004891
## Detection Prevalence 0.068481
## Balanced Accuracy 0.502145
# 15. Interactive prediction map
test$Predicted <- pred
attack_types <- levels(test$Predicted)
colors <- colorFactor(topo.colors(length(attack_types)), domain = attack_types)
leaflet(test) %>%
addTiles() %>%
addCircleMarkers(
radius = 5,
color = ~colors(Predicted),
stroke = FALSE,
fillOpacity = 0.7,
popup = ~paste("Actual:", get(target_col), "<br>",
"Predicted:", Predicted)
) %>%
addLegend("bottomright", pal = colors, values = ~Predicted,
title = "Predicted attack type")