library(survival)
## Warning: package 'survival' was built under R version 4.4.3
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-10
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
test <- read.csv("test.csv")
train <- read.csv("train.csv")
features <- c(
  "area_first_ha",
  "area_growth_abs_0_5h",
  "area_growth_rate_ha_per_h",
  "radial_growth_rate_m_per_h",
  "log_area_ratio_0_5h",
  "dt_first_last_0_5h"
)
train_cox <- train %>%
  select(all_of(features), time_to_hit_hours, event) %>%
  na.omit()

y <- Surv(train_cox$time_to_hit_hours, train_cox$event)
X <- as.matrix(train_cox[, features])
set.seed(123)

cox_fit <- glmnet(
  x = X,
  y = y,
  family = "cox",
  alpha = 0   # ridge
)
test_X <- test %>%
  select(all_of(features)) %>%
  mutate(across(everything(), ~ifelse(is.na(.), median(train[[cur_column()]], na.rm=TRUE), .)))

test_mat <- as.matrix(test_X)
cv_cox <- cv.glmnet(
  x = X,
  y = y,
  family = "cox",
  alpha = 0
)

cox_fit <- cv_cox$glmnet.fit
best_lambda <- cv_cox$lambda.min
lp <- predict(cox_fit, newx = test_mat, s = best_lambda, type = "link")
basehaz <- basehaz(coxph(y ~ X), centered = FALSE)
get_prob <- function(t) {
  H0_t <- max(basehaz$hazard[basehaz$time <= t])
  1 - exp(-H0_t * exp(lp))
}
submission <- data.frame(
  event_id = test$event_id,
  prob_12h = get_prob(12),
  prob_24h = get_prob(24),
  prob_48h = get_prob(48),
  prob_72h = get_prob(72)
)
submission[,2:5] <- t(apply(submission[,2:5], 1, cummax))
submission[,2:5] <- pmin(pmax(submission[,2:5], 0), 1)

Data Results

head(train)
##   event_id num_perimeters_0_5h dt_first_last_0_5h low_temporal_resolution_0_5h
## 1 10892457                   3           4.265188                            0
## 2 11757157                   2           1.169918                            0
## 3 11945086                   4           4.777526                            0
## 4 12044083                   1           0.000000                            1
## 5 12052347                   2           4.975273                            0
## 6 12773599                   1           0.000000                            1
##   area_first_ha area_growth_abs_0_5h area_growth_rel_0_5h
## 1     79.696304             2.875935           0.03608617
## 2      8.946749             0.000000           0.00000000
## 3    106.482638             0.000000           0.00000000
## 4     67.631125             0.000000           0.00000000
## 5     35.632874             0.000000           0.00000000
## 6    184.767610             0.000000           0.00000000
##   area_growth_rate_ha_per_h log1p_area_first log1p_growth log_area_ratio_0_5h
## 1                 0.6742808         4.390693     1.354787          0.03545032
## 2                 0.0000000         2.297246     0.000000          0.00000000
## 3                 0.0000000         4.677329     0.000000          0.00000000
## 4                 0.0000000         4.228746     0.000000          0.00000000
## 5                 0.0000000         3.600946     0.000000          0.00000000
## 6                 0.0000000         5.224496     0.000000          0.00000000
##   relative_growth_0_5h radial_growth_m radial_growth_rate_m_per_h
## 1           0.03608617        9.007182                    2.11179
## 2           0.00000000        0.000000                    0.00000
## 3           0.00000000        0.000000                    0.00000
## 4           0.00000000        0.000000                    0.00000
## 5           0.00000000        0.000000                    0.00000
## 6           0.00000000        0.000000                    0.00000
##   centroid_displacement_m centroid_speed_m_per_h spread_bearing_deg
## 1                8.274971               1.940119           70.13051
## 2                0.000000               0.000000            0.00000
## 3                0.000000               0.000000            0.00000
## 4                0.000000               0.000000            0.00000
## 5                0.000000               0.000000            0.00000
## 6                0.000000               0.000000            0.00000
##   spread_bearing_sin spread_bearing_cos dist_min_ci_0_5h dist_std_ci_0_5h
## 1          0.9404692          0.3398788         6166.122        0.2050853
## 2          0.0000000          1.0000000         2930.926        0.0000000
## 3          0.0000000          1.0000000         3272.375        0.0000000
## 4          0.0000000          1.0000000        64119.871        0.0000000
## 5          0.0000000          1.0000000        18005.432        0.0000000
## 6          0.0000000          1.0000000        35232.563        0.0000000
##   dist_change_ci_0_5h dist_slope_ci_0_5h closing_speed_m_per_h
## 1           0.4350516       1.090997e-01            -0.1020005
## 2           0.0000000      -3.887003e-13             0.0000000
## 3           0.0000000      -1.390327e-13             0.0000000
## 4           0.0000000       0.000000e+00             0.0000000
## 5           0.0000000       3.656059e-13             0.0000000
## 6           0.0000000       0.000000e+00             0.0000000
##   closing_speed_abs_m_per_h projected_advance_m dist_accel_m_per_h2
## 1                 0.1020005          -0.4350516        7.275611e-02
## 2                 0.0000000           0.0000000        0.000000e+00
## 3                 0.0000000           0.0000000        7.965118e-14
## 4                 0.0000000           0.0000000        0.000000e+00
## 5                 0.0000000           0.0000000        0.000000e+00
## 6                 0.0000000           0.0000000        0.000000e+00
##   dist_fit_r2_0_5h alignment_cos alignment_abs cross_track_component
## 1        0.8863733   -0.05464908    0.05464908             -1.937219
## 2        0.0000000   -0.56889821    0.56889821              0.000000
## 3        0.0000000    0.88238544    0.88238544              0.000000
## 4        0.0000000    0.00000000    0.00000000              0.000000
## 5        0.0000000    0.93463412    0.93463412              0.000000
## 6        0.0000000    0.00000000    0.00000000              0.000000
##   along_track_speed event_start_hour event_start_dayofweek event_start_month
## 1        -0.1060257               19                     4                 5
## 2         0.0000000                4                     4                 6
## 3         0.0000000               22                     4                 8
## 4         0.0000000               20                     5                 8
## 5         0.0000000               21                     5                 7
## 6         0.0000000                0                     5                 7
##   time_to_hit_hours event
## 1        18.8925118     0
## 2        22.0481080     1
## 3         0.8888955     1
## 4        60.9530215     0
## 5        44.9902745     0
## 6        44.0263836     0
coef(cox_fit, s = best_lambda)
## 6 x 1 sparse Matrix of class "dgCMatrix"
##                                        1
## area_first_ha              -0.0002397069
## area_growth_abs_0_5h        0.0001447967
## area_growth_rate_ha_per_h   0.0015840410
## radial_growth_rate_m_per_h  0.0009465384
## log_area_ratio_0_5h         0.2897564321
## dt_first_last_0_5h          0.2821868252
cox_model <- coxph(
  Surv(time_to_hit_hours, event) ~ area_first_ha +
    area_growth_abs_0_5h +
    area_growth_rate_ha_per_h +
    radial_growth_rate_m_per_h +
    log_area_ratio_0_5h +
    dt_first_last_0_5h,
  data = train
)
summary(cox_model)
## Call:
## coxph(formula = Surv(time_to_hit_hours, event) ~ area_first_ha + 
##     area_growth_abs_0_5h + area_growth_rate_ha_per_h + radial_growth_rate_m_per_h + 
##     log_area_ratio_0_5h + dt_first_last_0_5h, data = train)
## 
##   n= 221, number of events= 69 
## 
##                                  coef  exp(coef)   se(coef)      z Pr(>|z|)    
## area_first_ha              -0.0005421  0.9994581  0.0002781 -1.949   0.0513 .  
## area_growth_abs_0_5h       -0.0101896  0.9898622  0.0088605 -1.150   0.2501    
## area_growth_rate_ha_per_h   0.0650903  1.0672554  0.0525069  1.240   0.2151    
## radial_growth_rate_m_per_h -0.0271272  0.9732374  0.0245883 -1.103   0.2699    
## log_area_ratio_0_5h         1.7169051  5.5672714  1.5377997  1.116   0.2642    
## dt_first_last_0_5h          0.3153275  1.3707081  0.0646456  4.878 1.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                            exp(coef) exp(-coef) lower .95 upper .95
## area_first_ha                 0.9995     1.0005    0.9989     1.000
## area_growth_abs_0_5h          0.9899     1.0102    0.9728     1.007
## area_growth_rate_ha_per_h     1.0673     0.9370    0.9629     1.183
## radial_growth_rate_m_per_h    0.9732     1.0275    0.9274     1.021
## log_area_ratio_0_5h           5.5673     0.1796    0.2733   113.402
## dt_first_last_0_5h            1.3707     0.7295    1.2076     1.556
## 
## Concordance= 0.734  (se = 0.03 )
## Likelihood ratio test= 50.2  on 6 df,   p=4e-09
## Wald test            = 52.37  on 6 df,   p=2e-09
## Score (logrank) test = 75.64  on 6 df,   p=3e-14
head(submission)
##   event_id        X1      X1.1      X1.2      X1.3
## 1 10662602 0.1755451 0.2457842 0.2651369 0.5710906
## 2 13353600 0.1706769 0.2392676 0.2581995 0.5598728
## 3 13942327 0.1753823 0.2455665 0.2649052 0.5707190
## 4 16112781 0.1650420 0.2317026 0.2501394 0.5466071
## 5 17132808 0.4193757 0.5481620 0.5800589 0.9078264
## 6 17445696 0.1662342 0.2333050 0.2518473 0.5494390
summary(submission)
##     event_id              X1               X1.1              X1.2        
##  Min.   :10662602   Min.   :0.01091   Min.   :0.01591   Min.   :0.01736  
##  1st Qu.:40275359   1st Qu.:0.15751   1st Qu.:0.22155   1st Qu.:0.23931  
##  Median :54801108   Median :0.17323   Median :0.24269   Median :0.26185  
##  Mean   :56953929   Mean   :0.21551   Mean   :0.29068   Mean   :0.31050  
##  3rd Qu.:75369419   3rd Qu.:0.17553   3rd Qu.:0.24577   3rd Qu.:0.26512  
##  Max.   :99649465   Max.   :0.62442   Max.   :0.76093   Max.   :0.79047  
##       X1.3        
##  Min.   :0.04699  
##  1st Qu.:0.52839  
##  Median :0.56579  
##  Mean   :0.58875  
##  3rd Qu.:0.57107  
##  Max.   :0.98636
all(submission$prob_12h <= submission$prob_24h &
    submission$prob_24h <= submission$prob_48h &
    submission$prob_48h <= submission$prob_72h)
## [1] TRUE
knitr::kable(head(submission))
event_id X1 X1.1 X1.2 X1.3
10662602 0.1755451 0.2457842 0.2651369 0.5710906
13353600 0.1706769 0.2392676 0.2581995 0.5598728
13942327 0.1753823 0.2455665 0.2649052 0.5707190
16112781 0.1650420 0.2317026 0.2501394 0.5466071
17132808 0.4193757 0.5481620 0.5800589 0.9078264
17445696 0.1662342 0.2333050 0.2518473 0.5494390