library(survival)
## Warning: package 'survival' was built under R version 4.4.3
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-10
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
test <- read.csv("test.csv")
train <- read.csv("train.csv")
features <- c(
"area_first_ha",
"area_growth_abs_0_5h",
"area_growth_rate_ha_per_h",
"radial_growth_rate_m_per_h",
"log_area_ratio_0_5h",
"dt_first_last_0_5h"
)
train_cox <- train %>%
select(all_of(features), time_to_hit_hours, event) %>%
na.omit()
y <- Surv(train_cox$time_to_hit_hours, train_cox$event)
X <- as.matrix(train_cox[, features])
set.seed(123)
cox_fit <- glmnet(
x = X,
y = y,
family = "cox",
alpha = 0 # ridge
)
test_X <- test %>%
select(all_of(features)) %>%
mutate(across(everything(), ~ifelse(is.na(.), median(train[[cur_column()]], na.rm=TRUE), .)))
test_mat <- as.matrix(test_X)
cv_cox <- cv.glmnet(
x = X,
y = y,
family = "cox",
alpha = 0
)
cox_fit <- cv_cox$glmnet.fit
best_lambda <- cv_cox$lambda.min
lp <- predict(cox_fit, newx = test_mat, s = best_lambda, type = "link")
basehaz <- basehaz(coxph(y ~ X), centered = FALSE)
get_prob <- function(t) {
H0_t <- max(basehaz$hazard[basehaz$time <= t])
1 - exp(-H0_t * exp(lp))
}
submission <- data.frame(
event_id = test$event_id,
prob_12h = get_prob(12),
prob_24h = get_prob(24),
prob_48h = get_prob(48),
prob_72h = get_prob(72)
)
submission[,2:5] <- t(apply(submission[,2:5], 1, cummax))
submission[,2:5] <- pmin(pmax(submission[,2:5], 0), 1)
Data Results
head(train)
## event_id num_perimeters_0_5h dt_first_last_0_5h low_temporal_resolution_0_5h
## 1 10892457 3 4.265188 0
## 2 11757157 2 1.169918 0
## 3 11945086 4 4.777526 0
## 4 12044083 1 0.000000 1
## 5 12052347 2 4.975273 0
## 6 12773599 1 0.000000 1
## area_first_ha area_growth_abs_0_5h area_growth_rel_0_5h
## 1 79.696304 2.875935 0.03608617
## 2 8.946749 0.000000 0.00000000
## 3 106.482638 0.000000 0.00000000
## 4 67.631125 0.000000 0.00000000
## 5 35.632874 0.000000 0.00000000
## 6 184.767610 0.000000 0.00000000
## area_growth_rate_ha_per_h log1p_area_first log1p_growth log_area_ratio_0_5h
## 1 0.6742808 4.390693 1.354787 0.03545032
## 2 0.0000000 2.297246 0.000000 0.00000000
## 3 0.0000000 4.677329 0.000000 0.00000000
## 4 0.0000000 4.228746 0.000000 0.00000000
## 5 0.0000000 3.600946 0.000000 0.00000000
## 6 0.0000000 5.224496 0.000000 0.00000000
## relative_growth_0_5h radial_growth_m radial_growth_rate_m_per_h
## 1 0.03608617 9.007182 2.11179
## 2 0.00000000 0.000000 0.00000
## 3 0.00000000 0.000000 0.00000
## 4 0.00000000 0.000000 0.00000
## 5 0.00000000 0.000000 0.00000
## 6 0.00000000 0.000000 0.00000
## centroid_displacement_m centroid_speed_m_per_h spread_bearing_deg
## 1 8.274971 1.940119 70.13051
## 2 0.000000 0.000000 0.00000
## 3 0.000000 0.000000 0.00000
## 4 0.000000 0.000000 0.00000
## 5 0.000000 0.000000 0.00000
## 6 0.000000 0.000000 0.00000
## spread_bearing_sin spread_bearing_cos dist_min_ci_0_5h dist_std_ci_0_5h
## 1 0.9404692 0.3398788 6166.122 0.2050853
## 2 0.0000000 1.0000000 2930.926 0.0000000
## 3 0.0000000 1.0000000 3272.375 0.0000000
## 4 0.0000000 1.0000000 64119.871 0.0000000
## 5 0.0000000 1.0000000 18005.432 0.0000000
## 6 0.0000000 1.0000000 35232.563 0.0000000
## dist_change_ci_0_5h dist_slope_ci_0_5h closing_speed_m_per_h
## 1 0.4350516 1.090997e-01 -0.1020005
## 2 0.0000000 -3.887003e-13 0.0000000
## 3 0.0000000 -1.390327e-13 0.0000000
## 4 0.0000000 0.000000e+00 0.0000000
## 5 0.0000000 3.656059e-13 0.0000000
## 6 0.0000000 0.000000e+00 0.0000000
## closing_speed_abs_m_per_h projected_advance_m dist_accel_m_per_h2
## 1 0.1020005 -0.4350516 7.275611e-02
## 2 0.0000000 0.0000000 0.000000e+00
## 3 0.0000000 0.0000000 7.965118e-14
## 4 0.0000000 0.0000000 0.000000e+00
## 5 0.0000000 0.0000000 0.000000e+00
## 6 0.0000000 0.0000000 0.000000e+00
## dist_fit_r2_0_5h alignment_cos alignment_abs cross_track_component
## 1 0.8863733 -0.05464908 0.05464908 -1.937219
## 2 0.0000000 -0.56889821 0.56889821 0.000000
## 3 0.0000000 0.88238544 0.88238544 0.000000
## 4 0.0000000 0.00000000 0.00000000 0.000000
## 5 0.0000000 0.93463412 0.93463412 0.000000
## 6 0.0000000 0.00000000 0.00000000 0.000000
## along_track_speed event_start_hour event_start_dayofweek event_start_month
## 1 -0.1060257 19 4 5
## 2 0.0000000 4 4 6
## 3 0.0000000 22 4 8
## 4 0.0000000 20 5 8
## 5 0.0000000 21 5 7
## 6 0.0000000 0 5 7
## time_to_hit_hours event
## 1 18.8925118 0
## 2 22.0481080 1
## 3 0.8888955 1
## 4 60.9530215 0
## 5 44.9902745 0
## 6 44.0263836 0
coef(cox_fit, s = best_lambda)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## 1
## area_first_ha -0.0002397069
## area_growth_abs_0_5h 0.0001447967
## area_growth_rate_ha_per_h 0.0015840410
## radial_growth_rate_m_per_h 0.0009465384
## log_area_ratio_0_5h 0.2897564321
## dt_first_last_0_5h 0.2821868252
cox_model <- coxph(
Surv(time_to_hit_hours, event) ~ area_first_ha +
area_growth_abs_0_5h +
area_growth_rate_ha_per_h +
radial_growth_rate_m_per_h +
log_area_ratio_0_5h +
dt_first_last_0_5h,
data = train
)
summary(cox_model)
## Call:
## coxph(formula = Surv(time_to_hit_hours, event) ~ area_first_ha +
## area_growth_abs_0_5h + area_growth_rate_ha_per_h + radial_growth_rate_m_per_h +
## log_area_ratio_0_5h + dt_first_last_0_5h, data = train)
##
## n= 221, number of events= 69
##
## coef exp(coef) se(coef) z Pr(>|z|)
## area_first_ha -0.0005421 0.9994581 0.0002781 -1.949 0.0513 .
## area_growth_abs_0_5h -0.0101896 0.9898622 0.0088605 -1.150 0.2501
## area_growth_rate_ha_per_h 0.0650903 1.0672554 0.0525069 1.240 0.2151
## radial_growth_rate_m_per_h -0.0271272 0.9732374 0.0245883 -1.103 0.2699
## log_area_ratio_0_5h 1.7169051 5.5672714 1.5377997 1.116 0.2642
## dt_first_last_0_5h 0.3153275 1.3707081 0.0646456 4.878 1.07e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## area_first_ha 0.9995 1.0005 0.9989 1.000
## area_growth_abs_0_5h 0.9899 1.0102 0.9728 1.007
## area_growth_rate_ha_per_h 1.0673 0.9370 0.9629 1.183
## radial_growth_rate_m_per_h 0.9732 1.0275 0.9274 1.021
## log_area_ratio_0_5h 5.5673 0.1796 0.2733 113.402
## dt_first_last_0_5h 1.3707 0.7295 1.2076 1.556
##
## Concordance= 0.734 (se = 0.03 )
## Likelihood ratio test= 50.2 on 6 df, p=4e-09
## Wald test = 52.37 on 6 df, p=2e-09
## Score (logrank) test = 75.64 on 6 df, p=3e-14
head(submission)
## event_id X1 X1.1 X1.2 X1.3
## 1 10662602 0.1755451 0.2457842 0.2651369 0.5710906
## 2 13353600 0.1706769 0.2392676 0.2581995 0.5598728
## 3 13942327 0.1753823 0.2455665 0.2649052 0.5707190
## 4 16112781 0.1650420 0.2317026 0.2501394 0.5466071
## 5 17132808 0.4193757 0.5481620 0.5800589 0.9078264
## 6 17445696 0.1662342 0.2333050 0.2518473 0.5494390
summary(submission)
## event_id X1 X1.1 X1.2
## Min. :10662602 Min. :0.01091 Min. :0.01591 Min. :0.01736
## 1st Qu.:40275359 1st Qu.:0.15751 1st Qu.:0.22155 1st Qu.:0.23931
## Median :54801108 Median :0.17323 Median :0.24269 Median :0.26185
## Mean :56953929 Mean :0.21551 Mean :0.29068 Mean :0.31050
## 3rd Qu.:75369419 3rd Qu.:0.17553 3rd Qu.:0.24577 3rd Qu.:0.26512
## Max. :99649465 Max. :0.62442 Max. :0.76093 Max. :0.79047
## X1.3
## Min. :0.04699
## 1st Qu.:0.52839
## Median :0.56579
## Mean :0.58875
## 3rd Qu.:0.57107
## Max. :0.98636
all(submission$prob_12h <= submission$prob_24h &
submission$prob_24h <= submission$prob_48h &
submission$prob_48h <= submission$prob_72h)
## [1] TRUE
knitr::kable(head(submission))
| 10662602 |
0.1755451 |
0.2457842 |
0.2651369 |
0.5710906 |
| 13353600 |
0.1706769 |
0.2392676 |
0.2581995 |
0.5598728 |
| 13942327 |
0.1753823 |
0.2455665 |
0.2649052 |
0.5707190 |
| 16112781 |
0.1650420 |
0.2317026 |
0.2501394 |
0.5466071 |
| 17132808 |
0.4193757 |
0.5481620 |
0.5800589 |
0.9078264 |
| 17445696 |
0.1662342 |
0.2333050 |
0.2518473 |
0.5494390 |