R Markdown

Chargement des packages

# Load packages
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(ggplot2)
library(leaflet)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
## Loading required package: lattice
library(dbscan)
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
# 1. Load CSV
data <- read.csv("C:/Users/test2/Downloads/Location Intelligence Cybersecurity 2025.csv", stringsAsFactors = FALSE)

# 2. Clean column names
colnames(data) <- gsub(" ", "", colnames(data))
colnames(data) <- gsub("\\.", "_", colnames(data))

# 3. Identify key columns
lat_col <- "Latitude"
lon_col <- "Longitude"
target_col <- "Cyber_Attack_Type"

# Check if columns exist
if(!(lat_col %in% colnames(data) & lon_col %in% colnames(data))){
  stop("Latitude or Longitude columns not found.")
}
if(!(target_col %in% colnames(data))){
  stop("Target variable not found.")
}

# 4. Filter rows with valid coordinates
data_geo <- data[!is.na(data[[lat_col]]) & !is.na(data[[lon_col]]), ]

# 5. Convert target variable to factor
data_geo[[target_col]] <- as.factor(data_geo[[target_col]])

# 6. Identify categorical and numeric columns
cat_cols <- names(data_geo)[sapply(data_geo, is.character)]
cat_cols <- setdiff(cat_cols, target_col)
num_cols <- names(data_geo)[sapply(data_geo, is.numeric)]

# 7. Handle categorical variables with too many levels (>53)
cat_cols_levels <- sapply(data_geo[cat_cols], function(x) length(unique(x)))
cat_cols_keep <- names(cat_cols_levels[cat_cols_levels <= 53])

if(length(cat_cols_keep) > 0){
  data_geo[cat_cols_keep] <- lapply(data_geo[cat_cols_keep], as.factor)
}

# 8. Impute missing numeric values
if(length(num_cols) > 0){
  for(col in num_cols){
    data_geo[[col]][is.na(data_geo[[col]])] <- median(data_geo[[col]], na.rm = TRUE)
  }
}

# 9. Create spatial object
data_sf <- st_as_sf(data_geo, coords = c(lon_col, lat_col), crs = 4326)

# 10. Basic spatial visualization
leaflet(data_sf) %>%
  addTiles() %>%
  addCircleMarkers(
    radius = 5,
    color = "red",
    stroke = FALSE,
    fillOpacity = 0.6,
    popup = ~paste(target_col, ":", get(target_col))
  )
# 11. Spatial clustering with DBSCAN
coords <- st_coordinates(data_sf)
cluster <- dbscan(coords, eps = 0.5, minPts = 5)
data_sf$cluster <- factor(cluster$cluster)

# Plot clusters
ggplot(data_sf) +
  geom_sf(aes(color = cluster), size = 2) +
  theme_minimal() +
  labs(title = "Geographic clusters of attacks")

# 12. Prepare data for Random Forest
set.seed(123)
train_index <- createDataPartition(data_sf[[target_col]], p = 0.8, list = FALSE)
train <- data_sf[train_index, ]
test <- data_sf[-train_index, ]

# Drop geometry for modeling
train_df <- st_drop_geometry(train)
test_df  <- st_drop_geometry(test)

# Model columns
model_cols <- c(target_col, intersect(c(num_cols, cat_cols_keep), colnames(train_df)))

# 13. Faster Random Forest
rf_model <- randomForest(
  as.formula(paste(target_col, "~ .")), 
  data = train_df[ , model_cols], 
  importance = TRUE,
  ntree = 50,
  mtry = floor(sqrt(length(model_cols)-1))
)

print(rf_model)
## 
## Call:
##  randomForest(formula = as.formula(paste(target_col, "~ .")),      data = train_df[, model_cols], importance = TRUE, ntree = 50,      mtry = floor(sqrt(length(model_cols) - 1))) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 93.27%
## Confusion matrix:
##                            Brute Force Attack Credential Stuffing
## Brute Force Attack                        299                 285
## Credential Stuffing                       265                 260
## Cross-Site Scripting (Xss)                289                 257
## Ddos                                      263                 262
## Dns Spoofing                              253                 276
## Malware                                   288                 253
## Man-In-The-Middle                         289                 287
## Phishing                                  284                 247
## Ransomware                                287                 267
## Session Hijacking                         286                 249
## Social Engineering                        281                 276
## Spyware                                   295                 246
## Sql Injection                             280                 257
## Trojan Horse                              271                 265
## Zero-Day Exploit                          292                 281
##                            Cross-Site Scripting (Xss) Ddos Dns Spoofing Malware
## Brute Force Attack                                255  236          244     252
## Credential Stuffing                               287  211          245     223
## Cross-Site Scripting (Xss)                        259  251          246     224
## Ddos                                              250  260          231     247
## Dns Spoofing                                      225  248          241     227
## Malware                                           235  257          244     236
## Man-In-The-Middle                                 250  229          257     245
## Phishing                                          250  240          265     229
## Ransomware                                        265  241          265     262
## Session Hijacking                                 268  279          219     228
## Social Engineering                                247  220          269     227
## Spyware                                           248  254          257     239
## Sql Injection                                     259  236          245     256
## Trojan Horse                                      249  227          237     240
## Zero-Day Exploit                                  261  226          272     251
##                            Man-In-The-Middle Phishing Ransomware
## Brute Force Attack                       224      221        238
## Credential Stuffing                      219      231        201
## Cross-Site Scripting (Xss)               233      228        211
## Ddos                                     234      214        206
## Dns Spoofing                             255      244        204
## Malware                                  220      232        217
## Man-In-The-Middle                        238      210        241
## Phishing                                 208      230        203
## Ransomware                               265      212        221
## Session Hijacking                        255      205        198
## Social Engineering                       226      235        245
## Spyware                                  218      216        224
## Sql Injection                            213      219        215
## Trojan Horse                             238      234        218
## Zero-Day Exploit                         235      215        205
##                            Session Hijacking Social Engineering Spyware
## Brute Force Attack                       196                197     199
## Credential Stuffing                      216                207     212
## Cross-Site Scripting (Xss)               199                230     216
## Ddos                                     213                213     201
## Dns Spoofing                             223                205     214
## Malware                                  218                214     208
## Man-In-The-Middle                        204                207     234
## Phishing                                 196                247     199
## Ransomware                               192                197     211
## Session Hijacking                        231                208     209
## Social Engineering                       243                206     207
## Spyware                                  191                216     210
## Sql Injection                            212                219     202
## Trojan Horse                             222                212     196
## Zero-Day Exploit                         199                242     199
##                            Sql Injection Trojan Horse Zero-Day Exploit
## Brute Force Attack                   216          247              223
## Credential Stuffing                  219          250              222
## Cross-Site Scripting (Xss)           185          245              253
## Ddos                                 224          204              211
## Dns Spoofing                         210          222              227
## Malware                              208          226              260
## Man-In-The-Middle                    197          223              220
## Phishing                             200          220              238
## Ransomware                           186          202              222
## Session Hijacking                    231          213              239
## Social Engineering                   207          193              203
## Spyware                              227          206              237
## Sql Injection                        206          214              223
## Trojan Horse                         211          223              214
## Zero-Day Exploit                     221          233              203
##                            class.error
## Brute Force Attack           0.9153454
## Credential Stuffing          0.9250288
## Cross-Site Scripting (Xss)   0.9265457
## Ddos                         0.9242645
## Dns Spoofing                 0.9306275
## Malware                      0.9328783
## Man-In-The-Middle            0.9325970
## Phishing                     0.9334491
## Ransomware                   0.9367668
## Session Hijacking            0.9343377
## Social Engineering           0.9408895
## Spyware                      0.9397245
## Sql Injection                0.9403935
## Trojan Horse                 0.9354932
## Zero-Day Exploit             0.9425743
varImpPlot(rf_model)

# 14. Predictions and evaluation
pred <- predict(rf_model, newdata = test_df[ , model_cols])
conf_mat <- confusionMatrix(pred, test_df[[target_col]])
print(conf_mat)
## Confusion Matrix and Statistics
## 
##                             Reference
## Prediction                   Brute Force Attack Credential Stuffing
##   Brute Force Attack                         66                  66
##   Credential Stuffing                        64                  70
##   Cross-Site Scripting (Xss)                 74                  67
##   Ddos                                       60                  66
##   Dns Spoofing                               54                  76
##   Malware                                    50                  72
##   Man-In-The-Middle                          64                  49
##   Phishing                                   64                  45
##   Ransomware                                 52                  31
##   Session Hijacking                          46                  53
##   Social Engineering                         70                  57
##   Spyware                                    50                  41
##   Sql Injection                              49                  54
##   Trojan Horse                               54                  51
##   Zero-Day Exploit                           66                  68
##                             Reference
## Prediction                   Cross-Site Scripting (Xss) Ddos Dns Spoofing
##   Brute Force Attack                                 73   61           63
##   Credential Stuffing                                48   69           56
##   Cross-Site Scripting (Xss)                         74   57           64
##   Ddos                                               59   52           58
##   Dns Spoofing                                       55   83           53
##   Malware                                            61   60           78
##   Man-In-The-Middle                                  67   52           76
##   Phishing                                           62   65           62
##   Ransomware                                         49   43           41
##   Session Hijacking                                  61   48           54
##   Social Engineering                                 53   55           48
##   Spyware                                            50   62           51
##   Sql Injection                                      54   48           50
##   Trojan Horse                                       53   59           48
##   Zero-Day Exploit                                   62   44           66
##                             Reference
## Prediction                   Malware Man-In-The-Middle Phishing Ransomware
##   Brute Force Attack              67                67       78         66
##   Credential Stuffing             58                66       84         63
##   Cross-Site Scripting (Xss)      67                77       68         61
##   Ddos                            57                59       61         65
##   Dns Spoofing                    57                76       55         55
##   Malware                         51                51       60         54
##   Man-In-The-Middle               78                55       61         57
##   Phishing                        62                55       53         50
##   Ransomware                      57                59       51         63
##   Session Hijacking               47                57       45         62
##   Social Engineering              61                56       40         60
##   Spyware                         63                47       54         57
##   Sql Injection                   43                59       58         56
##   Trojan Horse                    46                50       45         46
##   Zero-Day Exploit                65                48       50         58
##                             Reference
## Prediction                   Session Hijacking Social Engineering Spyware
##   Brute Force Attack                        69                 77      64
##   Credential Stuffing                       54                 61      61
##   Cross-Site Scripting (Xss)                72                 65      66
##   Ddos                                      56                 59      54
##   Dns Spoofing                              62                 53      63
##   Malware                                   67                 61      71
##   Man-In-The-Middle                         58                 67      64
##   Phishing                                  45                 56      51
##   Ransomware                                47                 60      36
##   Session Hijacking                         61                 54      61
##   Social Engineering                        54                 55      57
##   Spyware                                   55                 54      56
##   Sql Injection                             66                 43      50
##   Trojan Horse                              54                 51      55
##   Zero-Day Exploit                          59                 55      62
##                             Reference
## Prediction                   Sql Injection Trojan Horse Zero-Day Exploit
##   Brute Force Attack                    67           67               66
##   Credential Stuffing                   76           54               67
##   Cross-Site Scripting (Xss)            67           64               68
##   Ddos                                  57           69               60
##   Dns Spoofing                          65           55               57
##   Malware                               66           64               55
##   Man-In-The-Middle                     50           56               55
##   Phishing                              40           50               62
##   Ransomware                            53           52               53
##   Session Hijacking                     50           57               54
##   Social Engineering                    46           51               66
##   Spyware                               56           51               64
##   Sql Injection                         55           62               53
##   Trojan Horse                          50           48               39
##   Zero-Day Exploit                      65           64               64
## 
## Overall Statistics
##                                           
##                Accuracy : 0.067           
##                  95% CI : (0.0627, 0.0714)
##     No Information Rate : 0.0675          
##     P-Value [Acc > NIR] : 0.601287        
##                                           
##                   Kappa : 3e-04           
##                                           
##  Mcnemar's Test P-Value : 0.004938        
## 
## Statistics by Class:
## 
##                      Class: Brute Force Attack Class: Credential Stuffing
## Sensitivity                           0.074745                    0.08083
## Specificity                           0.922056                    0.92789
## Pos Pred Value                        0.064897                    0.07361
## Neg Pred Value                        0.932295                    0.93439
## Prevalence                            0.067487                    0.06619
## Detection Rate                        0.005044                    0.00535
## Detection Prevalence                  0.077729                    0.07268
## Balanced Accuracy                     0.498400                    0.50436
##                      Class: Cross-Site Scripting (Xss) Class: Ddos
## Sensitivity                                   0.083995    0.060606
## Specificity                                   0.923216    0.931294
## Pos Pred Value                                0.073195    0.058296
## Neg Pred Value                                0.933157    0.933891
## Prevalence                                    0.067334    0.065576
## Detection Rate                                0.005656    0.003974
## Detection Prevalence                          0.077270    0.068175
## Balanced Accuracy                             0.503606    0.495950
##                      Class: Dns Spoofing Class: Malware
## Sensitivity                     0.061060       0.058020
## Specificity                     0.929109       0.928718
## Pos Pred Value                  0.057671       0.055375
## Neg Pred Value                  0.933005       0.931925
## Prevalence                      0.066341       0.067181
## Detection Rate                  0.004051       0.003898
## Detection Prevalence            0.070238       0.070391
## Balanced Accuracy               0.495085       0.493369
##                      Class: Man-In-The-Middle Class: Phishing Class: Ransomware
## Sensitivity                          0.062358        0.061414          0.072165
## Specificity                          0.930011        0.937076          0.943985
## Pos Pred Value                       0.060506        0.064477          0.084337
## Neg Pred Value                       0.932074        0.933942          0.934344
## Prevalence                           0.067411        0.065958          0.066723
## Detection Rate                       0.004204        0.004051          0.004815
## Detection Prevalence                 0.069474        0.062825          0.057093
## Balanced Accuracy                    0.496185        0.499245          0.508075
##                      Class: Session Hijacking Class: Social Engineering
## Sensitivity                          0.069397                  0.063146
## Specificity                          0.938632                  0.936625
## Pos Pred Value                       0.075309                  0.066345
## Neg Pred Value                       0.933355                  0.933415
## Prevalence                           0.067181                  0.066570
## Detection Rate                       0.004662                  0.004204
## Detection Prevalence                 0.061908                  0.063360
## Balanced Accuracy                    0.504014                  0.499885
##                      Class: Spyware Class: Sql Injection Class: Trojan Horse
## Sensitivity                 0.06429             0.063731            0.055556
## Specificity                 0.93818             0.939039            0.942635
## Pos Pred Value              0.06905             0.068750            0.064085
## Neg Pred Value              0.93359             0.934223            0.933847
## Prevalence                  0.06657             0.065958            0.066035
## Detection Rate              0.00428             0.004204            0.003669
## Detection Prevalence        0.06198             0.061143            0.057245
## Balanced Accuracy           0.50124             0.501385            0.499095
##                      Class: Zero-Day Exploit
## Sensitivity                         0.072480
## Specificity                         0.931809
## Pos Pred Value                      0.071429
## Neg Pred Value                      0.932803
## Prevalence                          0.067487
## Detection Rate                      0.004891
## Detection Prevalence                0.068481
## Balanced Accuracy                   0.502145
# 15. Interactive prediction map
test$Predicted <- pred
attack_types <- levels(test$Predicted)
colors <- colorFactor(topo.colors(length(attack_types)), domain = attack_types)

leaflet(test) %>%
  addTiles() %>%
  addCircleMarkers(
    radius = 5,
    color = ~colors(Predicted),
    stroke = FALSE,
    fillOpacity = 0.7,
    popup = ~paste("Actual:", get(target_col), "<br>",
                   "Predicted:", Predicted)
  ) %>%
  addLegend("bottomright", pal = colors, values = ~Predicted,
            title = "Predicted attack type")