## KIDSDRIV TARGET_AMT
## 1 0 5658.933
## 2 1 6219.881
## 3 2 5541.682
## 4 3 4915.414
## 5 4 4054.000
## AGEBIN TARGET_AMT
## 1 (0,57] 5598.432
## 2 (57,82] 6825.734
## HOMEKIDS TARGET_AMT
## 1 0 5685.224
## 2 1 5522.436
## 3 2 6085.350
## 4 3 5431.659
## 5 4 5609.528
## 6 5 5009.000
## HK0 TARGET_AMT
## 1 0 5722.475
## 2 1 5685.224
## No_Income TARGET_AMT
## 1 0 5840.581
## 2 1 5052.280
## PARENT1 TARGET_AMT
## 1 No 5603.351
## 2 Yes 6050.365
## LHV TARGET_AMT
## 1 0 5833.665
## 2 1 5674.895
## MSTATUS TARGET_AMT
## 1 Yes 5425.837
## 2 z_No 5966.716
## SEX TARGET_AMT
## 1 M 6146.700
## 2 z_F 5343.804
## EDUCATION TARGET_AMT
## 1 <High School 5678.822
## 2 Bachelors 5882.672
## 3 Masters 5966.203
## 4 PhD 6623.198
## 5 z_High School 5340.430
## HS TARGET_AMT
## 1 0 5913.112
## 2 1 5340.430
## [1] 0.01145817
## TRAVBIN TARGET_AMT
## 1 20 8993.472
## 2 30 5160.282
## 3 40 5663.291
## 4 50 5527.179
## 5 60 6109.261
## 6 70 6642.549
## 7 80 4284.250
## [1] -0.04024829
## [1] 0.0263967
## [1] 0.005176834
## CAR_USE TARGET_AMT
## 1 Commercial 6098.789
## 2 Private 5326.728
## [1] 0.02359552
## [1] 0.033044
## BLUEBOOKBIN TARGET_AMT
## 1 0 5261.043
## 2 0.5 6089.899
## 3 1 6889.024
## 4 1.5 5808.918
## 5 2 5085.294
## 6 2.5 5675.657
## [1] 0.02429707
## [1] 0.02816773
## CAR_TYPE TARGET_AMT
## 1 Minivan 5601.665
## 2 Panel Truck 7464.703
## 3 Pickup 5430.106
## 4 Sports Car 5412.733
## 5 Van 6908.553
## 6 z_SUV 5241.104
## LOWRISKCAR TARGET_AMT
## 1 0 7169.753
## 2 1 5388.646
## RED_CAR TARGET_AMT
## 1 no 5568.224
## 2 yes 6036.419
## [1] 0.005805864
## REVOKED TARGET_AMT
## 1 No 5847.834
## 2 Yes 5139.949
## [1] 0.03981117
## [1] -0.01573614
## URBANICITY TARGET_AMT
## 1 Highly Urban/ Urban 5711.058
## 2 z_Highly Rural/ Rural 5544.839
## Warning in par(fig = c(0, 0.8, 0, 0.8), new = TRUE): calling par(new=TRUE)
## with no plot
## [1] 0.001960785
## Variable Pearson PVal
## 1 TARGET_AMT 1.00 0.00
## 2 KIDSDRIV 0.02 0.65
## 3 AGE 0.07 0.10
## 4 HOMEKIDS -0.01 0.90
## 5 YOJ 0.07 0.12
## 6 INCOME 0.02 0.67
## 7 PARENT1 0.05 0.26
## 8 HOME_VAL -0.02 0.59
## 9 MSTATUS 0.13 0.00
## 10 SEX 0.05 0.25
## 11 TRAVTIME 0.03 0.46
## 12 CAR_USE -0.03 0.49
## 13 BLUEBOOK -0.03 0.51
## 14 RED_CAR -0.07 0.12
## 15 OLDCLAIM -0.03 0.41
## 16 CLM_FREQ -0.01 0.87
## 17 REVOKED -0.06 0.15
## 18 URBANICITY 0.04 0.34
## 19 HK0 0.00 0.95
## 20 No_Income -0.11 0.01
## 21 TRAVBIN 0.06 0.16
## 22 PICKUP -0.02 0.62
## 23 SR_CARAGE -0.02 0.64
## Warning: Removed 12 rows containing missing values (position_stack).
## [1] 0.3840836 NA
## [1] 0.003522819 NA
## [1] 0.04073636 NA
## [1] 0.2208314 NA
##
## Call:
## lm(formula = TARGET_AMT ~ BLUEBOOKBIN, data = traini[traini$TARGET_FLAG ==
## 1, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6730 -3130 -1584 207 101496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5261.0 328.4 16.020 < 0.0000000000000002 ***
## BLUEBOOKBIN0.5 828.9 513.7 1.614 0.10676
## BLUEBOOKBIN1 1628.0 556.5 2.925 0.00348 **
## BLUEBOOKBIN1.5 547.9 682.4 0.803 0.42212
## BLUEBOOKBIN2 -175.7 497.1 -0.354 0.72372
## BLUEBOOKBIN2.5 414.6 540.6 0.767 0.44320
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7730 on 2147 degrees of freedom
## Multiple R-squared: 0.005795, Adjusted R-squared: 0.00348
## F-statistic: 2.503 on 5 and 2147 DF, p-value: 0.02869
##
## Call:
## glm(formula = TARGET_FLAG ~ EDUCATION, data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3403 -0.3200 -0.1972 0.6597 0.8283
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.32003 0.01258 25.447 < 0.0000000000000002
## EDUCATIONBachelors -0.08676 0.01559 -5.565 0.000000027028342
## EDUCATIONMasters -0.12281 0.01652 -7.433 0.000000000000116
## EDUCATIONPhD -0.14833 0.02048 -7.242 0.000000000000484
## EDUCATIONz_High School 0.02031 0.01549 1.311 0.19
##
## (Intercept) ***
## EDUCATIONBachelors ***
## EDUCATIONMasters ***
## EDUCATIONPhD ***
## EDUCATIONz_High School
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1902816)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1551.9 on 8156 degrees of freedom
## AIC: 9625.8
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = TARGET_FLAG ~ CAR_TYPE, data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3352 -0.2955 -0.2633 0.6648 0.8373
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.162704 0.009419 17.274 < 0.0000000000000002 ***
## CAR_TYPEPanel Truck 0.100610 0.019241 5.229 0.0000001748 ***
## CAR_TYPEPickup 0.156231 0.015024 10.399 < 0.0000000000000002 ***
## CAR_TYPESports Car 0.172467 0.017278 9.982 < 0.0000000000000002 ***
## CAR_TYPEVan 0.105296 0.018506 5.690 0.0000000131 ***
## CAR_TYPEz_SUV 0.132850 0.013102 10.139 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.190302)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1551.9 on 8155 degrees of freedom
## AIC: 9627.6
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = TARGET_FLAG ~ AGEBIN, data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3408 -0.2584 -0.2584 0.6592 0.7416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.258424 0.005041 51.260 < 0.0000000000000002 ***
## AGEBIN(57,82] 0.082400 0.019709 4.181 0.0000293 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1938493)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1581.6 on 8159 degrees of freedom
## AIC: 9774.4
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## glm(formula = TARGET_FLAG ~ as.factor(BLUEBOOKBIN), data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3282 -0.2605 -0.2327 0.6718 0.8035
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.260461 0.009518 27.364
## as.factor(BLUEBOOKBIN)0.5 -0.027775 0.014411 -1.927
## as.factor(BLUEBOOKBIN)1 -0.011512 0.015896 -0.724
## as.factor(BLUEBOOKBIN)1.5 -0.063990 0.017813 -3.592
## as.factor(BLUEBOOKBIN)2 0.067772 0.015429 4.393
## as.factor(BLUEBOOKBIN)2.5 0.050480 0.016599 3.041
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## as.factor(BLUEBOOKBIN)0.5 0.05397 .
## as.factor(BLUEBOOKBIN)1 0.46895
## as.factor(BLUEBOOKBIN)1.5 0.00033 ***
## as.factor(BLUEBOOKBIN)2 0.0000113 ***
## as.factor(BLUEBOOKBIN)2.5 0.00236 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1927076)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1571.5 on 8155 degrees of freedom
## AIC: 9730.2
##
## Number of Fisher Scoring iterations: 2
## [1] "INDEX" "TARGET_FLAG" "TARGET_AMT" "KIDSDRIV" "AGE"
## [6] "HOMEKIDS" "YOJ" "INCOME" "PARENT1" "HOME_VAL"
## [11] "MSTATUS" "SEX" "EDUCATION" "JOB" "TRAVTIME"
## [16] "CAR_USE" "BLUEBOOK" "TIF" "CAR_TYPE" "RED_CAR"
## [21] "OLDCLAIM" "CLM_FREQ" "REVOKED" "MVR_PTS" "CAR_AGE"
## [26] "URBANICITY" "AGEBIN" "HK0" "No_Income" "LHV"
## [31] "HS" "TRAVBIN" "BLUEBOOKBIN" "PICKUP" "SC"
## [36] "minivan" "SUV" "LOWRISKCAR" "LOGOLDCLAIM" "SR_CARAGE"
## [41] "CLAIM" "FAMILY"
traini$AGEBIN<-as.numeric(traini$AGEBIN)
traini$TRAVBIN<-as.numeric(traini$TRAVBIN)
trainsub<-traini[traini$TARGET_FLAG==1,]
cormat_amt<-as.matrix(cor(trainsub[c(3,4,7,9,11,12,16,17,18,20,22,23,24,26,27,28,29,30,32,38,39,40)]))
corrplot(cormat_amt,method = "color",tl.cex=.7)
cormat_flag<-as.matrix(cor(traini[c(2,4,7,9,11,12,16,17,18,20,22,23,24,26,27,28,29,30,32,38,39,40)]))
corrplot(cormat_flag,method = "color",tl.cex=.7)
##
## Call:
## lm(formula = TARGET_AMT ~ MSTATUS + AGEBIN + No_Income + REVOKED +
## CAR_TYPE + BLUEBOOKBIN + as.numeric(TRAVBIN) + MVR_PTS +
## LOGOLDCLAIM + CAR_USE + HK0 + HS, data = traini[traini$TARGET_FLAG ==
## 1, ], weights = wts)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -3.7382 -0.8879 -0.4518 0.1214 29.2820
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3587.22 1349.34 2.658 0.00791 **
## MSTATUS 279.22 281.08 0.993 0.32064
## AGEBIN 941.22 811.81 1.159 0.24642
## No_Income -1510.65 283.34 -5.332 0.000000108 ***
## REVOKED -464.24 270.00 -1.719 0.08569 .
## CAR_TYPEPanel Truck 1948.11 1015.66 1.918 0.05524 .
## CAR_TYPEPickup 1196.94 492.66 2.430 0.01520 *
## CAR_TYPESports Car -1064.92 477.33 -2.231 0.02578 *
## CAR_TYPEVan 1052.48 863.62 1.219 0.22310
## CAR_TYPEz_SUV -388.70 425.78 -0.913 0.36139
## BLUEBOOKBIN0.5 -120.41 506.16 -0.238 0.81199
## BLUEBOOKBIN1 527.38 665.06 0.793 0.42787
## BLUEBOOKBIN1.5 -1218.07 469.63 -2.594 0.00956 **
## BLUEBOOKBIN2 -631.57 346.46 -1.823 0.06845 .
## BLUEBOOKBIN2.5 176.29 467.73 0.377 0.70628
## as.numeric(TRAVBIN) 20.18 16.55 1.219 0.22299
## MVR_PTS 264.26 134.53 1.964 0.04963 *
## LOGOLDCLAIM 262.60 135.79 1.934 0.05326 .
## CAR_USE 232.68 326.98 0.712 0.47678
## HK0 -193.09 287.97 -0.671 0.50259
## HS 132.14 276.61 0.478 0.63292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.165 on 2132 degrees of freedom
## Multiple R-squared: 0.2122, Adjusted R-squared: 0.2048
## F-statistic: 28.71 on 20 and 2132 DF, p-value: < 0.00000000000000022
## GVIF Df GVIF^(1/(2*Df))
## MSTATUS 1.566579 1 1.251631
## AGEBIN 1.213132 1 1.101423
## No_Income 2.875327 1 1.695679
## REVOKED 2.410994 1 1.552738
## CAR_TYPE 7.650203 5 1.225652
## BLUEBOOKBIN 4.889756 5 1.172003
## as.numeric(TRAVBIN) 1.944586 1 1.394484
## MVR_PTS 1.772381 1 1.331308
## LOGOLDCLAIM 1.581929 1 1.257748
## CAR_USE 2.011945 1 1.418431
## HK0 1.804389 1 1.343276
## HS 2.346703 1 1.531895
## [1] -0.02484418
##
## Call:
## lm(formula = TARGET_AMT ~ SEX + No_Income + REVOKED + LOWRISKCAR +
## BLUEBOOKBIN + as.numeric(TRAVBIN) + MVR_PTS, data = traini[traini$TARGET_FLAG ==
## 1, ], weights = wts2)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -3.3508 -0.8843 -0.4282 0.1325 30.3721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8087.41 1097.65 7.368 0.000000000000246 ***
## SEX -1417.76 294.90 -4.808 0.000001633217490 ***
## No_Income -1753.32 264.41 -6.631 0.000000000042029 ***
## REVOKED -537.23 256.37 -2.096 0.03624 *
## LOWRISKCAR -503.81 649.33 -0.776 0.43790
## BLUEBOOKBIN0.5 -140.94 497.14 -0.283 0.77682
## BLUEBOOKBIN1 674.23 660.94 1.020 0.30779
## BLUEBOOKBIN1.5 -1049.77 436.64 -2.404 0.01629 *
## BLUEBOOKBIN2 -675.38 323.57 -2.087 0.03698 *
## BLUEBOOKBIN2.5 220.48 462.73 0.476 0.63378
## as.numeric(TRAVBIN) 24.92 14.26 1.748 0.08065 .
## MVR_PTS 397.06 124.83 3.181 0.00149 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.168 on 2141 degrees of freedom
## Multiple R-squared: 0.2068, Adjusted R-squared: 0.2027
## F-statistic: 50.74 on 11 and 2141 DF, p-value: < 0.00000000000000022
## GVIF Df GVIF^(1/(2*Df))
## SEX 1.580500 1 1.257179
## No_Income 2.497251 1 1.580269
## REVOKED 2.167957 1 1.472398
## LOWRISKCAR 1.309848 1 1.144486
## BLUEBOOKBIN 2.782307 5 1.107747
## as.numeric(TRAVBIN) 1.438548 1 1.199395
## MVR_PTS 1.521918 1 1.233660
## [1] -0.01506344
## Loading required package: lattice
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
##
## Call:
## glm(formula = TARGET_FLAG ~ KIDSDRIV + PARENT1 + MSTATUS + JOB +
## as.numeric(TRAVBIN) + CAR_USE + CAR_TYPE + REVOKED + AGEBIN +
## BLUEBOOKBIN + HK0 + LHV + SR_CARAGE + HS + CLAIM + TIF +
## AGEBIN, data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8589 -0.2832 -0.1311 0.3264 1.1382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1491357 0.0512765 -2.908 0.003642 **
## KIDSDRIV 0.0553152 0.0097676 5.663 0.000000015369093 ***
## PARENT1 0.0603262 0.0191043 3.158 0.001596 **
## MSTATUS 0.0782673 0.0115750 6.762 0.000000000014574 ***
## JOBClerical 0.0409642 0.0247408 1.656 0.097814 .
## JOBDoctor -0.0365510 0.0332723 -1.099 0.272001
## JOBHome Maker 0.0540337 0.0269235 2.007 0.044790 *
## JOBLawyer 0.0258082 0.0252423 1.022 0.306614
## JOBManager -0.0550561 0.0234567 -2.347 0.018942 *
## JOBProfessional 0.0098014 0.0231974 0.423 0.672655
## JOBStudent 0.0615722 0.0262337 2.347 0.018946 *
## JOBz_Blue Collar 0.0514954 0.0229218 2.247 0.024694 *
## as.numeric(TRAVBIN) -0.0025748 0.0006354 -4.052 0.000051195952883 ***
## CAR_USE -0.0950533 0.0140000 -6.790 0.000000000012042 ***
## CAR_TYPEPanel Truck 0.0437222 0.0227315 1.923 0.054462 .
## CAR_TYPEPickup 0.0758939 0.0153132 4.956 0.000000733729598 ***
## CAR_TYPESports Car 0.1142615 0.0164744 6.936 0.000000000004352 ***
## CAR_TYPEVan 0.0646369 0.0187486 3.448 0.000569 ***
## CAR_TYPEz_SUV 0.0853386 0.0124826 6.837 0.000000000008697 ***
## REVOKED 0.1513386 0.0136205 11.111 < 0.0000000000000002 ***
## AGEBIN 0.1481709 0.0206609 7.172 0.000000000000807 ***
## BLUEBOOKBIN0.5 -0.0082901 0.0139869 -0.593 0.553397
## BLUEBOOKBIN1 -0.0015668 0.0153939 -0.102 0.918935
## BLUEBOOKBIN1.5 -0.0480574 0.0178381 -2.694 0.007073 **
## BLUEBOOKBIN2 0.0386424 0.0146548 2.637 0.008384 **
## BLUEBOOKBIN2.5 0.0018004 0.0155184 0.116 0.907644
## HK0 -0.0154104 0.0140616 -1.096 0.273145
## LHV 0.0438774 0.0110912 3.956 0.000076844042579 ***
## SR_CARAGE -0.0092806 0.0053953 -1.720 0.085449 .
## HS 0.0324414 0.0113404 2.861 0.004238 **
## CLAIM 0.0662625 0.0030683 21.595 < 0.0000000000000002 ***
## TIF -0.0308475 0.0044481 -6.935 0.000000000004374 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1604579)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1304.4 on 8129 degrees of freedom
## AIC: 8261.5
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## roc.default(response = traini$TARGET_FLAG, predictor = gmod1a$fitted.values, plot = TRUE)
##
## Data: gmod1a$fitted.values in 6008 controls (traini$TARGET_FLAG 0) < 2153 cases (traini$TARGET_FLAG 1).
## Area under the curve: 0.7694
## GVIF Df GVIF^(1/(2*Df))
## KIDSDRIV 1.269559 1 1.126747
## PARENT1 2.126426 1 1.458227
## MSTATUS 1.635869 1 1.279011
## JOB 3.529538 8 1.082013
## as.numeric(TRAVBIN) 1.702065 1 1.304632
## CAR_USE 2.326691 1 1.525350
## CAR_TYPE 2.758824 5 1.106808
## REVOKED 1.014503 1 1.007225
## AGEBIN 1.327672 1 1.152246
## BLUEBOOKBIN 1.712618 5 1.055276
## HK0 2.293618 1 1.514470
## LHV 1.235122 1 1.111360
## SR_CARAGE 1.480344 1 1.216694
## HS 1.334302 1 1.155120
## CLAIM 1.042723 1 1.021138
## TIF 1.006188 1 1.003089
## [1] 0.9479028
## [1] 0.2837901
## [1] 0.7869283
## [1] 0.6612554
## [1] 0.8599471
##
## Call:
## glm(formula = TARGET_FLAG ~ KIDSDRIV + PARENT1 + MSTATUS + JOB +
## logTRAVTIME + CAR_USE + TIF + LOWRISKCAR + REVOKED + AGEBIN +
## BLUEBOOKBIN + HK0 + LHV + HS + CLAIM + YOJ + JOB + TIF, data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8788 -0.2796 -0.1322 0.3343 1.1072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.149463 0.045280 -3.301 0.000968 ***
## KIDSDRIV 0.047439 0.009673 4.904 0.000000956085761 ***
## PARENT1 0.060813 0.019172 3.172 0.001519 **
## MSTATUS 0.076676 0.011652 6.580 0.000000000049832 ***
## JOBClerical 0.056479 0.024020 2.351 0.018730 *
## JOBDoctor -0.039403 0.033187 -1.187 0.235143
## JOBHome Maker 0.057585 0.027430 2.099 0.035816 *
## JOBLawyer 0.020322 0.025136 0.808 0.418831
## JOBManager -0.049071 0.023383 -2.099 0.035888 *
## JOBProfessional 0.014559 0.022984 0.633 0.526448
## JOBStudent 0.057749 0.026276 2.198 0.027994 *
## JOBz_Blue Collar 0.060775 0.021815 2.786 0.005350 **
## logTRAVTIME 0.021523 0.004474 4.811 0.000001528628645 ***
## CAR_USE -0.095507 0.012914 -7.395 0.000000000000155 ***
## TIF -0.030040 0.004462 -6.732 0.000000000017839 ***
## LOWRISKCAR -0.001358 0.014685 -0.092 0.926343
## REVOKED 0.156366 0.013653 11.453 < 0.0000000000000002 ***
## AGEBIN 0.129974 0.018424 7.055 0.000000000001872 ***
## BLUEBOOKBIN0.5 -0.009783 0.013736 -0.712 0.476346
## BLUEBOOKBIN1 -0.005492 0.015400 -0.357 0.721402
## BLUEBOOKBIN1.5 -0.054918 0.017355 -3.164 0.001560 **
## BLUEBOOKBIN2 0.053751 0.014490 3.709 0.000209 ***
## BLUEBOOKBIN2.5 0.024493 0.015316 1.599 0.109823
## HK0 -0.047653 0.013176 -3.617 0.000300 ***
## LHV 0.046542 0.011124 4.184 0.000028964549069 ***
## HS 0.037845 0.011030 3.431 0.000604 ***
## CLAIM 0.068156 0.003070 22.204 < 0.0000000000000002 ***
## YOJ -0.003964 0.001294 -3.063 0.002199 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.161485)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1313.4 on 8133 degrees of freedom
## AIC: 8309.6
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## roc.default(response = traini$TARGET_FLAG, predictor = gmod2a$fitted.values, plot = TRUE)
##
## Data: gmod2a$fitted.values in 6008 controls (traini$TARGET_FLAG 0) < 2153 cases (traini$TARGET_FLAG 1).
## Area under the curve: 0.7646
## GVIF Df GVIF^(1/(2*Df))
## KIDSDRIV 1.237181 1 1.112286
## PARENT1 2.127820 1 1.458705
## MSTATUS 1.647250 1 1.283453
## JOB 3.434417 8 1.080167
## logTRAVTIME 1.011340 1 1.005654
## CAR_USE 1.967227 1 1.402579
## TIF 1.006105 1 1.003048
## LOWRISKCAR 1.571636 1 1.253649
## REVOKED 1.012798 1 1.006379
## AGEBIN 1.048995 1 1.024204
## BLUEBOOKBIN 1.345821 5 1.030146
## HK0 2.000937 1 1.414545
## LHV 1.234612 1 1.111131
## HS 1.254312 1 1.119961
## CLAIM 1.036903 1 1.018284
## YOJ 1.362909 1 1.167437
## [1] 0.9495672
## [1] 0.270785
## [1] 0.7841924
## [1] 0.6580135
## [1] 0.8589927
##
## Call:
## glm(formula = TARGET_FLAG ~ FAMILY + MSTATUS + JOB + logTRAVTIME +
## LOWRISKCAR + TIF + REVOKED + AGEBIN + BLUEBOOKBIN + HS +
## CLAIM + YOJ + JOB + TIF + LHV, data = traini)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8177 -0.2798 -0.1339 0.3414 1.0980
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.242454 0.038338 -6.324 0.00000000026825 ***
## FAMILY -0.049373 0.004507 -10.955 < 0.0000000000000002 ***
## MSTATUS 0.082565 0.009697 8.514 < 0.0000000000000002 ***
## JOBClerical 0.008993 0.023189 0.388 0.69818
## JOBDoctor -0.101678 0.032205 -3.157 0.00160 **
## JOBHome Maker 0.002529 0.026478 0.096 0.92390
## JOBLawyer -0.038761 0.023901 -1.622 0.10490
## JOBManager -0.091437 0.022732 -4.022 0.00005812644403 ***
## JOBProfessional -0.027300 0.022336 -1.222 0.22166
## JOBStudent 0.040474 0.026191 1.545 0.12230
## JOBz_Blue Collar 0.067208 0.021864 3.074 0.00212 **
## logTRAVTIME 0.021676 0.004486 4.832 0.00000137904911 ***
## LOWRISKCAR -0.039355 0.013792 -2.853 0.00434 **
## TIF -0.030065 0.004476 -6.717 0.00000000001983 ***
## REVOKED 0.157706 0.013694 11.516 < 0.0000000000000002 ***
## AGEBIN 0.130824 0.018440 7.095 0.00000000000141 ***
## BLUEBOOKBIN0.5 -0.014879 0.013763 -1.081 0.27970
## BLUEBOOKBIN1 -0.002223 0.015442 -0.144 0.88556
## BLUEBOOKBIN1.5 -0.043498 0.017341 -2.508 0.01215 *
## BLUEBOOKBIN2 0.058643 0.014513 4.041 0.00005376680693 ***
## BLUEBOOKBIN2.5 0.026799 0.015362 1.745 0.08110 .
## HS 0.048659 0.010969 4.436 0.00000927695885 ***
## CLAIM 0.069569 0.003072 22.644 < 0.0000000000000002 ***
## YOJ -0.004118 0.001294 -3.182 0.00147 **
## LHV 0.043610 0.011151 3.911 0.00009269791513 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.1625214)
##
## Null deviance: 1585.0 on 8160 degrees of freedom
## Residual deviance: 1322.3 on 8136 degrees of freedom
## AIC: 8358.8
##
## Number of Fisher Scoring iterations: 2
##
## Call:
## roc.default(response = traini$TARGET_FLAG, predictor = gmod3a$fitted.values, plot = TRUE)
##
## Data: gmod3a$fitted.values in 6008 controls (traini$TARGET_FLAG 0) < 2153 cases (traini$TARGET_FLAG 1).
## Area under the curve: 0.7609
## GVIF Df GVIF^(1/(2*Df))
## FAMILY 1.075084 1 1.036863
## MSTATUS 1.133553 1 1.064684
## JOB 2.201279 8 1.050551
## logTRAVTIME 1.010519 1 1.005246
## LOWRISKCAR 1.377429 1 1.173639
## TIF 1.005986 1 1.002988
## REVOKED 1.012507 1 1.006234
## AGEBIN 1.044109 1 1.021817
## BLUEBOOKBIN 1.317160 5 1.027931
## HS 1.232395 1 1.110133
## CLAIM 1.032176 1 1.015961
## YOJ 1.354352 1 1.163766
## LHV 1.232619 1 1.110234
## [1] 0.9464048
## [1] 0.262889
## [1] 0.7817957
## [1] 0.6373874
## [1] 0.8562608
## Warning: NAs introduced by coercion
### First we need to predict who is likely to have a claim.
exp2$TARGET_FLAG<-round(predict.glm(gmod2a,newdata = exp2))
sum(exp2$TARGET_FLAG)
## [1] 50
LogPredAmounts<-(predict.lm(mod2b,newdata = exp2[exp2$TARGET_FLAG==1,])-17135)/fac+8
summary(LogPredAmounts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.778 4.370 4.645 4.610 4.874 5.382
sd(LogPredAmounts)
## [1] 0.3693754
par(mfrow=c(2,1))
hist(traini$LOG_AMT, main = "Training Log(Amounts)",xlim=c(4,12))
hist(LogPredAmounts,main = "Experiment Log(Predicted Amounts)",xlim=c(4,12))
#My distribution is less tight around the mean but also has fewer outliers