## y group time id offset
## 1 12 0 0 1 0.1382749
## 2 15 0 1 1 -0.3795184
## 3 6 0 2 1 -0.2060559
## 4 14 0 0 2 -0.1336986
## 5 33 0 1 2 0.3672245
## 6 16 0 2 2 0.1079441
set.seed(1234)
n = 50
group = rep(c(0,1), each = n/2)
age = rpois(n, lambda = 5)
beta = c(3, 0.3, 0.2, 0.1)
X = model.matrix(~group + age + age*group)
mu = exp(X %*% beta)
y = rep(NA, n)
library(tweeDEseq)
for (i in 1:n) y[i] = rPT(1, mu = mu[i], D = 2, a = 0, max = 1000)
dataset = data.frame(y, group, age)
head(dataset)
## y group age
## 1 19 0 2
## 2 61 0 6
## 3 60 0 5
## 4 66 0 6
## 5 68 0 7
## 6 66 0 6
## Starting values:
## 2.933 0.329 0.198 0.103 3
## initial loglik value: -193.8
## initial value 193.795191
## iter 2 value 193.795055
## iter 3 value 193.792028
## iter 4 value 193.783070
## iter 5 value 193.570209
## iter 6 value 193.568788
## iter 7 value 193.561353
## iter 8 value 193.560493
## iter 8 value 193.560493
## iter 8 value 193.560493
## final value 193.560493
## converged
## Convergence reached. Computing hessian...
## Loading required namespace: numDeriv
## ... done
## Loading required namespace: matrixcalc
## Loglikelihood: -193.56
## Parameter estimates:
## Estimate Std. error z p.value
## (Intercept) 2.9326 0.1178 24.8874 0.0000
## group 0.3312 0.1334 2.4822 0.0131
## age 0.1976 0.0221 8.9236 0.0000
## group:age 0.1033 0.0238 4.3385 0.0000
##
## Dispersion = 1.83
## Power = 0
## Registered S3 method overwritten by 'DALEX':
## method from
## print.description questionr
## satisfaction_level last_evaluation number_project average_montly_hours
## 1: 0.38 0.53 2 157
## 2: 0.80 0.86 5 262
## 3: 0.11 0.88 7 272
## 4: 0.72 0.87 5 223
## 5: 0.37 0.52 2 159
## 6: 0.41 0.50 2 153
## time_spend_company Work_accident left promotion_last_5years sales
## 1: 3 0 1 0 sales
## 2: 6 0 1 0 sales
## 3: 4 0 1 0 sales
## 4: 5 0 1 0 sales
## 5: 3 0 1 0 sales
## 6: 3 0 1 0 sales
## salary
## 1: low
## 2: medium
## 3: medium
## 4: low
## 5: low
## 6: low
## 6 x 19 sparse Matrix of class "dgCMatrix"
## [[ suppressing 19 column names 'satisfaction_level', 'last_evaluation', 'number_project' ... ]]
##
## 1 0.38 0.53 2 157 3 . . . . . . . . . 1 . . 1 .
## 2 0.80 0.86 5 262 6 . . . . . . . . . 1 . . . 1
## 3 0.11 0.88 7 272 4 . . . . . . . . . 1 . . . 1
## 4 0.72 0.87 5 223 5 . . . . . . . . . 1 . . 1 .
## 5 0.37 0.52 2 159 3 . . . . . . . . . 1 . . 1 .
## 6 0.41 0.50 2 153 3 . . . . . . . . . 1 . . 1 .
## Tree Node ID Feature Split Yes No Missing Quality
## 1: 0 0 0-0 satisfaction_level 0.465 0-1 0-2 0-1 3123.2509800
## 2: 0 1 0-1 number_project 2.500 0-3 0-4 0-3 892.9471440
## 3: 0 2 0-2 time_spend_company 4.500 0-5 0-6 0-5 1284.8271500
## 4: 0 3 0-3 Leaf NA <NA> <NA> <NA> 0.4536083
## 5: 0 4 0-4 Leaf NA <NA> <NA> <NA> -0.1082209
## 6: 0 5 0-5 Leaf NA <NA> <NA> <NA> -0.5823490
## Cover
## 1: 3749.75
## 2: 1045.75
## 3: 2704.00
## 4: 435.50
## 5: 610.25
## 6: 2208.50
## Feature Gain Cover Frequency
## 1: satisfaction_level 0.439789879 0.34785700 0.32330827
## 2: time_spend_company 0.222734548 0.17881869 0.16541353
## 3: number_project 0.177174300 0.12337939 0.13533835
## 4: average_montly_hours 0.072518366 0.14989533 0.16541353
## 5: last_evaluation 0.070729248 0.14119107 0.14285714
## 6: Work_accident 0.009315458 0.02909931 0.03007519

## Parent Child sumGain frequency
## 1: satisfaction_level number_project 3573.8695 6
## 2: satisfaction_level time_spend_company 3421.1675 5
## 3: satisfaction_level satisfaction_level 1078.1480 10
## 4: last_evaluation average_montly_hours 843.8720 4
## 5: last_evaluation satisfaction_level 826.7479 6
## 6: last_evaluation time_spend_company 651.9038 4

## Parent Child sumGain frequency
## 1: last_evaluation average_montly_hours 745.5943 2
## 2: last_evaluation satisfaction_level 708.8723 4
## 3: last_evaluation time_spend_company 634.9984 3
## 4: satisfaction_level time_spend_company 559.9985 2
## 5: last_evaluation number_project 390.1898 1
## 6: average_montly_hours time_spend_company 318.0142 2

## Feature sumGain sumCover meanGain
## 1: satisfaction_level 10040.0 43920 264.10
## 2: time_spend_company 4016.0 19820 267.70
## 3: number_project 3706.0 13940 264.70
## 4: last_evaluation 1181.0 15340 90.81
## 5: average_montly_hours 886.0 18190 46.63
## 6: last_evaluation:average_montly_hours 745.6 1767 372.80
## meanCover frequency mean5Gain
## 1: 1156.0 38 1513.0
## 2: 1321.0 15 670.4
## 3: 995.6 14 697.4
## 4: 1180.0 13 183.0
## 5: 957.6 19 97.5
## 6: 883.7 2 372.8


## contribution
## xgboost: intercept -1.530
## xgboost: time_spend_company = 5 1.519
## xgboost: last_evaluation = 1 1.485
## xgboost: Work_accident = 0 -0.736
## xgboost: satisfaction_level:time_spend_company = 0.89:5 0.406
## xgboost: last_evaluation:time_spend_company = 1:5 0.316
## xgboost: number_project:last_evaluation = 5:1 0.258
## xgboost: satisfaction_level = 0.89 -0.238
## xgboost: last_evaluation:average_montly_hours = 1:224 0.227
## xgboost: number_project = 5 -0.224
## xgboost: salary = 2 0.166
## xgboost: average_montly_hours:last_evaluation = 224:1 -0.156
## xgboost: last_evaluation:satisfaction_level = 1:0.89 0.111
## xgboost: average_montly_hours:time_spend_company = 224:5 0.098
## xgboost: time_spend_company:last_evaluation = 5:1 0.095
## xgboost: average_montly_hours:number_project = 224:5 0.094
## xgboost: average_montly_hours = 224 -0.048
## xgboost: satisfaction_level:number_project = 0.89:5 -0.003
## xgboost: prediction 1.839
