Poisson.Deviance <- function(pred, obs){200*(mean(pred)-mean(obs)+mean(log((obs/pred)^obs)))}
load("C:\\Users\\user\\Desktop\\freMTPL2freq.rda")
set.seed(100)
ll <- sample(c(1:nrow(freMTPL2freq)), round(0.9*nrow(freMTPL2freq)), replace = FALSE)
learn <- freMTPL2freq[ll,]
test <- freMTPL2freq[-ll,]
n_l = nrow(learn)
n_t = nrow(test)
library(rpart)
## Warning: 程序包'rpart'是用R版本4.4.2 来建造的
library(rpart.plot)
## Warning: 程序包'rpart.plot'是用R版本4.4.2 来建造的
tree1 <- rpart(cbind(Exposure,ClaimNb) ~ Area + VehPower + VehAge
+ DrivAge + BonusMalus + VehBrand + VehGas + Density + Region,
data = learn, method="poisson",
control=rpart.control(xval=1, minbucket=1500, cp=0.00005))
tree1
## n= 610212
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 610212 202513.2000 0.10107650
## 2) VehAge>=0.5 558264 170893.2000 0.09060582
## 4) BonusMalus< 57.5 370099 102548.1000 0.07104234
## 8) VehAge>=12.5 72041 17242.9300 0.05194728
## 16) DrivAge< 41.5 19955 3354.0600 0.03469194
## 32) Density< 472.5 13088 1977.0870 0.02911974
## 64) Region=R11,R22,R23,R24,R25,R41,R43,R52,R54,R72,R73,R74,R82,R83,R91,R94 11088 1566.5430 0.02641443 *
## 65) Region=R21,R26,R31,R42,R53,R93 2000 399.7157 0.04565388 *
## 33) Density>=472.5 6867 1355.2250 0.04651561
## 66) VehAge>=16.5 1790 235.0763 0.02828077 *
## 67) VehAge< 16.5 5077 1108.4710 0.05320576 *
## 17) DrivAge>=41.5 52086 13787.4700 0.05788736
## 34) VehAge>=17.5 11564 2489.6710 0.04255365
## 68) Region=R21,R23,R26,R31,R41,R43,R54,R72,R73,R83,R91,R94 2184 301.5672 0.02684206 *
## 69) Region=R11,R22,R24,R25,R42,R52,R53,R74,R82,R93 9380 2176.6410 0.04593593 *
## 35) VehAge< 17.5 40522 11252.6900 0.06240305
## 70) Region=R22,R23,R41,R42,R43,R52,R54,R72,R73,R74,R83,R94 8495 1912.1520 0.04682113
## 140) VehBrand=B1,B12,B13,B14,B2,B4 6444 1333.4640 0.04133264 *
## 141) VehBrand=B10,B11,B3,B5,B6 2051 567.8954 0.06471031 *
## 71) Region=R11,R21,R24,R25,R26,R31,R53,R82,R91,R93 32027 9311.1420 0.06644108
## 142) Density< 401 19415 5279.9410 0.05907223
## 284) DrivAge>=49.5 12606 3352.5100 0.05492159
## 568) Region=R25,R31,R91,R93 1642 262.5201 0.03323221 *
## 569) Region=R11,R21,R24,R26,R53,R82 10964 3078.1230 0.05767125 *
## 285) DrivAge< 49.5 6809 1919.2080 0.06784897
## 570) Density< 42.5 2133 520.5560 0.05040382 *
## 571) Density>=42.5 4676 1388.0250 0.07660286 *
## 143) Density>=401 12612 4001.2470 0.07927352
## 286) VehBrand=B11,B12,B13,B14,B6 1770 343.5664 0.04489425 *
## 287) VehBrand=B1,B10,B2,B3,B4,B5 10842 3635.8430 0.08470103
## 574) VehAge>=15.5 2873 762.5905 0.06505433 *
## 575) VehAge< 15.5 7969 2861.0690 0.09193148
## 1150) Density>=3319 2342 640.3034 0.07026589 *
## 1151) Density< 3319 5627 2209.7840 0.10069150 *
## 9) VehAge< 12.5 298058 84975.6700 0.07613833
## 18) DrivAge< 39.5 66195 15115.7700 0.05802313
## 36) Density< 1313.5 49216 10648.7500 0.05214276
## 72) Region=R24,R25,R26,R52,R54,R74,R83,R94 22929 4833.0650 0.04577503
## 144) VehBrand=B1,B10,B11,B13,B2,B3 17572 3551.1080 0.04267205 *
## 145) VehBrand=B12,B14,B4,B5,B6 5357 1271.3130 0.05696684 *
## 73) Region=R11,R21,R22,R23,R31,R41,R42,R43,R53,R72,R73,R82,R91,R93 26287 5791.5120 0.05906762
## 146) VehPower< 9.5 23399 4977.3270 0.05650441 *
## 147) VehPower>=9.5 2888 801.6129 0.08252970 *
## 37) Density>=1313.5 16979 4398.1560 0.07759025
## 74) VehPower< 5.5 5907 1256.4380 0.05839881
## 148) Region=R23,R25,R26,R41,R52,R54,R73,R82,R83,R91 2104 274.3783 0.03185520 *
## 149) Region=R11,R21,R22,R24,R31,R42,R43,R53,R72,R74,R93,R94 3803 957.0218 0.07403195 *
## 75) VehPower>=5.5 11072 3116.9120 0.08869021
## 150) VehBrand=B1,B12,B13,B14,B2,B3,B6 9307 2471.3790 0.08128688
## 300) VehPower< 8.5 7363 1811.7480 0.07399171 *
## 301) VehPower>=8.5 1944 648.2979 0.11096580 *
## 151) VehBrand=B10,B11,B4,B5 1765 629.8030 0.12638830 *
## 19) DrivAge>=39.5 231863 69643.0500 0.08098350
## 38) BonusMalus< 51.5 216252 64134.4000 0.07877503
## 76) Region=R11,R21,R22,R23,R26,R31,R41,R42,R52,R54,R72,R73,R74,R83,R93 105948 26465.1300 0.06960676
## 152) Density< 1989.5 74868 18225.8100 0.06524732
## 304) DrivAge>=56.5 26785 6028.5020 0.05488491
## 608) Region=R11,R21,R22,R26,R31,R41,R42,R72,R73,R74,R83 13470 2731.6240 0.04790029 *
## 609) Region=R23,R52,R54,R93 13315 3282.6240 0.06201679 *
## 305) DrivAge< 56.5 48083 12154.5700 0.07165684
## 610) Region=R31,R41,R73,R74 10681 2385.2310 0.05903584 *
## 611) Region=R11,R21,R22,R23,R26,R42,R52,R54,R72,R83,R93 37402 9751.8060 0.07539822
## 1222) DrivAge< 44.5 10722 2522.4130 0.06638516
## 2444) VehBrand=B1,B11,B12,B14,B2,B6 8447 1787.7710 0.05860208 *
## 2445) VehBrand=B10,B13,B3,B4,B5 2275 718.5544 0.09287882 *
## 1223) DrivAge>=44.5 26680 7220.6700 0.07901836
## 2446) DrivAge>=45.5 24359 6459.0060 0.07619470
## 4892) VehBrand=B12,B3,B4,B6 12793 2867.7440 0.06664958
## 9784) VehPower< 9.5 10636 2237.4820 0.06140731 *
## 9785) VehPower>=9.5 2157 618.4707 0.09422875 *
## 4893) VehBrand=B1,B10,B11,B13,B14,B2,B5 11566 3577.7260 0.08452261
## 9786) Region=R11,R23,R52,R72 5962 1700.1500 0.07390865 *
## 9787) Region=R21,R22,R26,R42,R54,R83,R93 5604 1867.1530 0.09657541 *
## 2447) DrivAge< 45.5 2321 747.4779 0.11049280 *
## 153) Density>=1989.5 31080 8200.1240 0.08100467
## 306) VehPower< 5.5 11045 2568.3810 0.06808386 *
## 307) VehPower>=5.5 20035 5612.8700 0.08832735
## 614) Region=R11,R21,R22,R23,R26,R42,R52,R54,R73,R83 12133 3064.6610 0.07835707 *
## 615) Region=R31,R41,R72,R93 7902 2531.8720 0.10291930 *
## 77) Region=R24,R25,R43,R53,R82,R91,R94 110304 37561.3200 0.08579789
## 154) Density< 228.5 58065 19009.8400 0.07904320
## 308) VehBrand=B10,B11,B12,B13,B14,B3,B4,B6 19142 5067.4320 0.06765141 *
## 309) VehBrand=B1,B2,B5 38923 13915.0700 0.08359497
## 618) VehAge>=7.5 16114 5510.7540 0.07713346 *
## 619) VehAge< 7.5 22809 8393.9210 0.08821321 *
## 155) Density>=228.5 52239 18504.5000 0.09389478
## 310) VehBrand=B10,B11,B12,B14,B3,B6 19721 5727.1760 0.08337724
## 620) VehPower< 5.5 6137 1566.7870 0.06862881 *
## 621) VehPower>=5.5 13584 4147.0800 0.08994695 *
## 311) VehBrand=B1,B13,B2,B4,B5 32518 12757.6800 0.09899298
## 622) Density>=1304.5 10947 3924.6080 0.08990584 *
## 623) Density< 1304.5 21571 8823.5190 0.10349860
## 1246) Region=R24,R43,R53,R82 18439 7291.7580 0.10005980
## 2492) DrivAge< 44.5 2683 814.2055 0.07659123 *
## 2493) DrivAge>=44.5 15756 6465.0580 0.10373640
## 4986) VehAge>=7.5 6306 2450.6350 0.09200427 *
## 4987) VehAge< 7.5 9450 4004.1140 0.11146930 *
## 1247) Region=R25,R91,R94 3132 1519.5420 0.12925320 *
## 39) BonusMalus>=51.5 15611 5393.3940 0.11508240
## 78) VehBrand=B10,B12,B14,B6 5515 1513.8280 0.09284452 *
## 79) VehBrand=B1,B11,B13,B2,B3,B4,B5 10096 3863.1170 0.12470450
## 158) VehAge>=9.5 2525 840.4064 0.09708720 *
## 159) VehAge< 9.5 7571 3009.7310 0.13405600 *
## 5) BonusMalus>=57.5 188165 65202.0000 0.14243220
## 10) BonusMalus< 95.5 165358 52997.0800 0.12484240
## 20) DrivAge< 38.5 110769 31603.5600 0.10380880
## 40) BonusMalus< 76.5 64966 17061.6200 0.08968541
## 80) VehAge>=11.5 15481 3474.8790 0.06784939
## 160) DrivAge< 31.5 8044 1548.4800 0.05291392
## 320) Region=R11,R21,R22,R23,R24,R25,R41,R42,R43,R52,R53,R54,R72,R73,R83,R91,R94 5742 1022.2310 0.04573780 *
## 321) Region=R26,R31,R74,R82,R93 2302 515.8830 0.07432589 *
## 161) DrivAge>=31.5 7437 1899.7220 0.08425223
## 322) VehGas=Regular 4350 1019.5110 0.07097273 *
## 323) VehGas=Diesel 3087 869.2496 0.10428000 *
## 81) VehAge< 11.5 49485 13531.5000 0.09685020
## 162) DrivAge< 32.5 32503 8460.2690 0.08724457
## 324) VehPower< 5.5 12743 2910.9910 0.07071987
## 648) Region=R22,R24,R25,R26,R42,R43,R53,R54,R72,R73,R82,R83,R94 7191 1547.5760 0.05899664
## 1296) VehBrand=B1,B10,B11,B12,B13,B14,B2,B6 5048 898.9420 0.04590341
## 2592) BonusMalus< 65.5 1726 227.6364 0.02934943 *
## 2593) BonusMalus>=65.5 3322 661.0961 0.05581604 *
## 1297) VehBrand=B3,B4,B5 2143 625.5721 0.08852834 *
## 649) Region=R11,R21,R23,R31,R41,R52,R74,R91,R93 5552 1344.6160 0.08992865 *
## 325) VehPower>=5.5 19760 5516.4720 0.09863767
## 650) Region=R24,R25,R42,R43,R54,R83 5443 1370.3410 0.07664061 *
## 651) Region=R11,R21,R22,R23,R26,R31,R41,R52,R53,R72,R73,R74,R82,R91,R93,R94 14317 4125.1660 0.10883050
## 1302) VehPower< 8.5 11760 3218.2300 0.10187000 *
## 1303) VehPower>=8.5 2557 894.9984 0.14190710 *
## 163) DrivAge>=32.5 16982 5030.7460 0.11556440
## 326) VehBrand=B12,B2,B6 8465 2307.6030 0.09979518
## 652) Region=R21,R23,R25,R26,R31,R43,R53,R72,R83 1861 345.7515 0.06407078 *
## 653) Region=R11,R22,R24,R41,R42,R52,R54,R73,R74,R82,R91,R93,R94 6604 1947.9480 0.10921220 *
## 327) VehBrand=B1,B10,B11,B13,B14,B3,B4,B5 8517 2707.8920 0.13012990
## 654) BonusMalus>=63.5 5702 1677.5910 0.11532150
## 1308) BonusMalus< 66.5 1551 347.5432 0.08082072 *
## 1309) BonusMalus>=66.5 4151 1319.1610 0.12855510 *
## 655) BonusMalus< 63.5 2815 1018.8670 0.15594420 *
## 41) BonusMalus>=76.5 45803 14399.1900 0.12568500
## 82) VehAge>=12.5 10812 2904.6420 0.09594250 *
## 83) VehAge< 12.5 34991 11448.0700 0.13531570
## 166) BonusMalus< 90.5 26750 8401.0270 0.12438590
## 332) VehPower< 4.5 5104 1357.9540 0.09142491 *
## 333) VehPower>=4.5 21646 7017.1810 0.13247430
## 666) Region=R22,R23,R24,R25,R31,R41,R43,R52,R54,R73,R93 12129 3618.0500 0.11536780 *
## 667) Region=R11,R21,R26,R42,R53,R72,R74,R82,R83,R91,R94 9517 3373.0050 0.15517130 *
## 167) BonusMalus>=90.5 8241 3006.3750 0.17192520
## 334) VehBrand=B10,B12,B14 1749 434.3224 0.10286260 *
## 335) VehBrand=B1,B11,B13,B2,B3,B4,B5,B6 6492 2546.0760 0.18903850
## 670) VehAge>=9.5 2320 716.1324 0.13698320 *
## 671) VehAge< 9.5 4172 1808.6120 0.21646810 *
## 21) DrivAge>=38.5 54589 20922.9000 0.16302950
## 42) BonusMalus>=63.5 38382 13646.4300 0.14435880
## 84) BonusMalus< 64.5 4399 1200.1970 0.08801653 *
## 85) BonusMalus>=64.5 33983 12385.5900 0.15178820
## 170) Region=R11,R26,R42,R72,R73,R91,R93 14718 4373.2270 0.12599140
## 340) Region=R91 2179 459.3754 0.07582583 *
## 341) Region=R11,R26,R42,R72,R73,R93 12539 3889.5710 0.13448370
## 682) VehBrand=B12,B13 4452 1207.5850 0.10578680 *
## 683) VehBrand=B1,B10,B11,B14,B2,B3,B4,B5,B6 8087 2664.4250 0.14921400 *
## 171) Region=R21,R22,R23,R24,R25,R31,R41,R43,R52,R53,R54,R74,R82,R83,R94 19265 7966.7170 0.16800700
## 342) VehAge>=10.5 8036 2999.3710 0.13901220 *
## 343) VehAge< 10.5 11229 4931.5830 0.18846460
## 686) VehBrand=B12 2316 771.8453 0.13166220 *
## 687) VehBrand=B1,B10,B11,B13,B14,B2,B3,B4,B5,B6 8913 4137.4240 0.20013210
## 1374) DrivAge< 42.5 2159 795.6395 0.14723640 *
## 1375) DrivAge>=42.5 6754 3321.4790 0.21503280
## 2750) VehGas=Regular 2895 1380.8090 0.18499890 *
## 2751) VehGas=Diesel 3859 1927.8010 0.23927630 *
## 43) BonusMalus< 63.5 16207 7159.4520 0.20232160
## 86) BonusMalus< 61.5 10176 3815.1150 0.14617820
## 172) BonusMalus>=58.5 6346 2013.9200 0.10859960 *
## 173) BonusMalus< 58.5 3830 1726.0600 0.20124040 *
## 87) BonusMalus>=61.5 6031 3129.4600 0.29480540
## 174) Region=R11,R22,R23,R42,R43,R72,R73,R83,R91,R93,R94 2426 922.3735 0.18894920 *
## 175) Region=R21,R24,R25,R26,R31,R41,R52,R53,R54,R74,R82 3605 2137.4200 0.34974680 *
## 11) BonusMalus>=95.5 22807 10917.3600 0.29983630
## 22) VehBrand=B12,B4 4937 1884.8830 0.18777630
## 44) VehAge< 1.5 1608 443.6318 0.10847520 *
## 45) VehAge>=1.5 3329 1406.3820 0.22732720 *
## 23) VehBrand=B1,B10,B11,B13,B14,B2,B3,B5,B6 17870 8921.1320 0.33146650
## 46) BonusMalus< 100.5 12859 5822.5180 0.28359990
## 92) DrivAge>=24.5 7487 2951.8330 0.23091300
## 184) Region=R22,R23,R26,R31,R41,R43,R72,R83,R91,R94 1558 418.5323 0.13751210 *
## 185) Region=R11,R21,R24,R25,R42,R52,R53,R54,R73,R74,R82,R93 5929 2506.0490 0.25409250
## 370) Density< 853.5 2704 1019.0840 0.21141790 *
## 371) Density>=853.5 3225 1473.9180 0.29120140 *
## 93) DrivAge< 24.5 5372 2807.1730 0.36763920
## 186) Area=A,C 2145 944.1089 0.26679580 *
## 187) Area=B,D,E,F 3227 1833.2270 0.43304140 *
## 47) BonusMalus>=100.5 5011 3024.1450 0.41118840
## 94) BonusMalus< 120.5 3404 1992.1600 0.37295420 *
## 95) BonusMalus>=120.5 1607 1013.0520 0.49445540 *
## 3) VehAge< 0.5 51948 26933.7400 0.31504200
## 6) VehBrand=B1,B10,B11,B13,B14,B2,B3,B4,B5,B6 18572 5244.5510 0.12628530
## 12) BonusMalus< 61.5 14651 4003.1770 0.11307060
## 24) Region=R26,R31,R41,R42,R54,R72,R73,R74,R82,R91,R93 7184 1561.2800 0.08583189 *
## 25) Region=R11,R21,R22,R23,R24,R25,R43,R52,R53,R83,R94 7467 2408.4170 0.13922660
## 50) VehPower>=6.5 3364 839.6734 0.10574280 *
## 51) VehPower< 6.5 4103 1551.0830 0.16587490 *
## 13) BonusMalus>=61.5 3921 1206.8590 0.18215570 *
## 7) VehBrand=B12 33376 20269.5400 0.45580580
## 14) VehGas=Diesel 16691 6309.3660 0.23662770
## 28) VehPower< 4.5 2888 419.4473 0.07331867 *
## 29) VehPower>=4.5 13803 5762.8930 0.26973000
## 58) VehPower>=7.5 7070 2229.2200 0.19525490
## 116) VehPower< 9.5 3962 608.3534 0.08496435 *
## 117) VehPower>=9.5 3108 1480.8970 0.32727070 *
## 59) VehPower< 7.5 6733 3458.3520 0.34580160
## 118) Region=R11,R21,R23,R24,R31,R53,R72,R73,R83,R94 3616 1598.8090 0.27410270 *
## 119) Region=R22,R25,R26,R41,R42,R43,R52,R54,R74,R82,R91,R93 3117 1829.8880 0.42715720 *
## 15) VehGas=Regular 16685 13042.6300 0.66668610
## 30) VehPower>=9.5 2427 1196.5010 0.30341220 *
## 31) VehPower< 9.5 14258 11670.6500 0.72735490
## 62) VehPower< 4.5 6073 3997.1800 0.54995700
## 124) DrivAge>=56.5 1660 785.1871 0.31200350 *
## 125) DrivAge< 56.5 4413 3146.9050 0.63719760 *
## 63) VehPower>=4.5 8185 7559.7260 0.84731730
## 126) VehPower>=6.5 3513 2506.3070 0.56742610 *
## 127) VehPower< 6.5 4672 4896.6810 1.05032900
## 254) DrivAge>=41.5 3030 3124.8750 0.94912390 *
## 255) DrivAge< 41.5 1642 1751.2660 1.21310500 *
rpart.plot(tree1)
## Warning: labs do not fit even at cex 0.15, there may be some overplotting
average_loss <- cbind(tree1$cptable[,2], tree1$cptable[,3], tree1$cptable[,3]* tree1$frame$dev[1] / n_l)
plot(x=average_loss[,1], y=average_loss[,3]*100, type='l', col="blue", ylim=c(30.5,33.5),
xlab="number of splits", ylab="average in-sample loss (in 10^(-2))",
main="decrease of in-sample loss")
points(x=average_loss[,1], y=average_loss[,3]*100, pch=19, col="blue")
learn$fit <- predict(tree1)*learn$Exposure
test$fit <- predict(tree1, newdata=test)*test$Exposure
c(Poisson.Deviance(learn$fit, learn$ClaimNb),Poisson.Deviance(test$fit, test$ClaimNb))
## [1] 30.50441 30.25269
K <- 10
tree2 <- rpart(cbind(Exposure,ClaimNb) ~ Area + VehPower + VehAge + DrivAge +
BonusMalus + VehBrand + VehGas + Density + Region,
data = learn, method="poisson",
control=rpart.control(xval=K, minbucket=1500, cp=0.00005))
printcp(tree2)
##
## Rates regression tree:
## rpart(formula = cbind(Exposure, ClaimNb) ~ Area + VehPower +
## VehAge + DrivAge + BonusMalus + VehBrand + VehGas + Density +
## Region, data = learn, method = "poisson", control = rpart.control(xval = K,
## minbucket = 1500, cp = 5e-05))
##
## Variables actually used in tree construction:
## [1] Area BonusMalus Density DrivAge Region VehAge VehBrand
## [8] VehGas VehPower
##
## Root node error: 202513/610212 = 0.33187
##
## n= 610212
##
## CP nsplit rel error xerror xstd
## 1 2.3141e-02 0 1.00000 1.00000 0.0046651
## 2 1.5520e-02 1 0.97686 0.97687 0.0045185
## 3 7.0101e-03 2 0.96134 0.96136 0.0044763
## 4 6.3579e-03 3 0.95433 0.95438 0.0044470
## 5 4.5308e-03 4 0.94797 0.94803 0.0044267
## 6 2.3239e-03 5 0.94344 0.94351 0.0044187
## 7 1.6271e-03 6 0.94112 0.94130 0.0044237
## 8 1.0708e-03 7 0.93949 0.93970 0.0044240
## 9 8.6648e-04 8 0.93842 0.93863 0.0044176
## 10 8.1944e-04 9 0.93755 0.93788 0.0044196
## 11 7.0488e-04 11 0.93591 0.93679 0.0044192
## 12 6.6782e-04 12 0.93521 0.93616 0.0044178
## 13 6.2725e-04 14 0.93387 0.93494 0.0044165
## 14 5.6915e-04 15 0.93325 0.93378 0.0044167
## 15 5.4983e-04 16 0.93268 0.93317 0.0044163
## 16 5.3303e-04 17 0.93213 0.93283 0.0044167
## 17 5.3155e-04 18 0.93159 0.93242 0.0044165
## 18 5.0070e-04 20 0.93053 0.93164 0.0044086
## 19 3.7101e-04 21 0.93003 0.93086 0.0044032
## 20 3.6772e-04 22 0.92966 0.93039 0.0044051
## 21 3.4401e-04 23 0.92929 0.93029 0.0044059
## 22 3.4003e-04 24 0.92895 0.93007 0.0044061
## 23 3.2140e-04 25 0.92861 0.92977 0.0044060
## 24 3.1363e-04 26 0.92829 0.92973 0.0044062
## 25 2.9945e-04 27 0.92797 0.92955 0.0044058
## 26 2.7277e-04 28 0.92767 0.92915 0.0044053
## 27 2.3199e-04 29 0.92740 0.92878 0.0044059
## 28 2.2953e-04 30 0.92717 0.92850 0.0044057
## 29 2.2541e-04 31 0.92694 0.92839 0.0044058
## 30 2.2274e-04 32 0.92671 0.92837 0.0044041
## 31 2.0229e-04 33 0.92649 0.92815 0.0044038
## 32 2.0082e-04 35 0.92609 0.92794 0.0044015
## 33 1.9989e-04 36 0.92588 0.92793 0.0044015
## 34 1.7659e-04 37 0.92568 0.92797 0.0044028
## 35 1.7218e-04 38 0.92551 0.92787 0.0043997
## 36 1.7043e-04 39 0.92534 0.92775 0.0043995
## 37 1.6532e-04 40 0.92517 0.92778 0.0044001
## 38 1.6199e-04 41 0.92500 0.92778 0.0044007
## 39 1.4733e-04 42 0.92484 0.92765 0.0044007
## 40 1.4653e-04 43 0.92469 0.92745 0.0044012
## 41 1.4643e-04 45 0.92440 0.92744 0.0044011
## 42 1.3500e-04 46 0.92425 0.92746 0.0044112
## 43 1.3457e-04 47 0.92412 0.92736 0.0044112
## 44 1.3173e-04 48 0.92398 0.92736 0.0044113
## 45 1.2843e-04 49 0.92385 0.92731 0.0044116
## 46 1.2827e-04 51 0.92359 0.92726 0.0044109
## 47 1.2307e-04 52 0.92346 0.92719 0.0044103
## 48 1.1990e-04 54 0.92322 0.92711 0.0044108
## 49 1.1939e-04 55 0.92310 0.92705 0.0044110
## 50 1.1018e-04 56 0.92298 0.92692 0.0044113
## 51 1.0783e-04 57 0.92287 0.92672 0.0044108
## 52 1.0739e-04 58 0.92276 0.92671 0.0044107
## 53 1.0534e-04 59 0.92265 0.92668 0.0044099
## 54 1.0352e-04 60 0.92255 0.92668 0.0044101
## 55 1.0335e-04 61 0.92245 0.92665 0.0044101
## 56 1.0142e-04 63 0.92224 0.92665 0.0044104
## 57 1.0027e-04 64 0.92214 0.92665 0.0044107
## 58 9.7016e-05 65 0.92204 0.92661 0.0044106
## 59 9.3493e-05 66 0.92194 0.92665 0.0044113
## 60 9.3193e-05 67 0.92185 0.92659 0.0044111
## 61 8.7209e-05 68 0.92175 0.92662 0.0044117
## 62 8.6715e-05 69 0.92167 0.92648 0.0044108
## 63 8.6587e-05 70 0.92158 0.92648 0.0044108
## 64 8.1225e-05 71 0.92149 0.92650 0.0044113
## 65 8.0671e-05 72 0.92141 0.92659 0.0044122
## 66 7.7671e-05 73 0.92133 0.92668 0.0044135
## 67 7.5310e-05 74 0.92125 0.92673 0.0044146
## 68 7.0382e-05 75 0.92118 0.92682 0.0044145
## 69 6.8652e-05 76 0.92111 0.92686 0.0044127
## 70 6.5720e-05 77 0.92104 0.92685 0.0044115
## 71 6.4420e-05 78 0.92097 0.92686 0.0044119
## 72 6.4188e-05 79 0.92091 0.92687 0.0044120
## 73 6.4096e-05 83 0.92065 0.92687 0.0044120
## 74 6.3549e-05 84 0.92059 0.92679 0.0044080
## 75 6.2081e-05 85 0.92052 0.92694 0.0044097
## 76 6.0162e-05 86 0.92046 0.92716 0.0044120
## 77 5.8946e-05 87 0.92040 0.92724 0.0044127
## 78 5.8227e-05 88 0.92034 0.92735 0.0044134
## 79 5.7661e-05 89 0.92028 0.92737 0.0044138
## 80 5.6603e-05 90 0.92022 0.92741 0.0044138
## 81 5.6457e-05 91 0.92017 0.92742 0.0044131
## 82 5.6399e-05 92 0.92011 0.92740 0.0044130
## 83 5.5965e-05 95 0.91994 0.92745 0.0044133
## 84 5.4224e-05 96 0.91989 0.92756 0.0044143
## 85 5.4126e-05 97 0.91983 0.92760 0.0044144
## 86 5.3759e-05 98 0.91978 0.92761 0.0044147
## 87 5.3468e-05 99 0.91972 0.92763 0.0044149
## 88 5.3291e-05 100 0.91967 0.92760 0.0044147
## 89 5.2560e-05 101 0.91962 0.92761 0.0044148
## 90 5.1468e-05 102 0.91956 0.92773 0.0044153
## 91 5.1313e-05 103 0.91951 0.92777 0.0044156
## 92 5.1187e-05 104 0.91946 0.92778 0.0044156
## 93 5.0902e-05 105 0.91941 0.92777 0.0044155
## 94 5.0559e-05 106 0.91936 0.92776 0.0044155
## 95 5.0414e-05 109 0.91921 0.92776 0.0044155
## 96 5.0000e-05 110 0.91916 0.92783 0.0044160
library(plotrix)
library(Hmisc)
## Warning: 程序包'Hmisc'是用R版本4.4.2 来建造的
##
## 载入程序包:'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
set.seed(100)
xgroup <- rep(1:K, length = nrow(learn))
xfit <- xpred.rpart(tree2, xgroup)
(n_subtrees <- dim(tree2$cptable)[1])
## [1] 96
std3 <- numeric(n_subtrees)
err3 <- numeric(n_subtrees)
err_group <- numeric(K)
for (i in 1:n_subtrees){
for (k in 1:K){
ind_group <- which(xgroup ==k)
err_group[k] <- 2*sum(learn[ind_group,"Exposure"]*xfit[ind_group,i]-
learn[ind_group,"ClaimNb"]+log((learn[ind_group,"ClaimNb"]/
(learn[ind_group,"Exposure"]*xfit[ind_group,i]))
^learn[ind_group,"ClaimNb"]))
}
err3[i] <- mean(err_group)
std3[i] <- sd(err_group)
}
average_loss <- cbind(tree2$cptable[,2], tree2$cptable[,3], tree2$cptable[,3]* tree2$frame$dev[1] / n_l)
y1 <- err3*K/n_l
s1 <- K*std3/n_l
x1 <- log10(tree2$cptable[,1])
xmain <- "cross-validation error plot"
xlabel <- "cost-complexity parameter (log-scale)"
ylabel <- "CV error (in 10^(-2))"
errbar(x=x1, y=y1*100, yplus=(y1+s1)*100, yminus=(y1-s1)*100, xlim=rev(range(x1)),
col="blue", main=xmain, ylab=ylabel, xlab=xlabel)
lines(x=x1, y=y1*100, col="blue")
abline(h=c(min(y1+s1)*100), lty=1, col="orange")
abline(h=c(min(y1)*100), lty=1, col="magenta")
legend(x="topright", col=c("blue", "orange", "magenta"), lty=c(1,1,1),
lwd=c(1,1,1), pch=c(19,-1,-1), legend=c("tree2", "1-SD rule", "min.CV rule"))
learn$fit <- predict(tree2)*learn$Exposure
test$fit <- predict(tree2, newdata=test)*test$Exposure
c(Poisson.Deviance(learn$fit, learn$ClaimNb),Poisson.Deviance(test$fit, test$ClaimNb))
## [1] 30.50441 30.25269
tree3 <- prune(tree2, cp=0.0001)
rpart.plot(tree3)
## Warning: labs do not fit even at cex 0.15, there may be some overplotting
learn$fit <- predict(tree3)*learn$Exposure
test$fit <- predict(tree3, newdata=test)*test$Exposure
c(Poisson.Deviance(learn$fit, learn$ClaimNb),Poisson.Deviance(test$fit, test$ClaimNb))
## [1] 30.59995 30.24319
第二题set.seed(100)
ll <- sample(c(1:nrow(freMTPL2freq)), round(0.9*nrow(freMTPL2freq)), replace = FALSE)
learn <- freMTPL2freq[ll,]
test <- freMTPL2freq[-ll,]
n_l = nrow(learn)
n_t = nrow(test)
library(rpart)
J0 <- 3 #depth of tree
M0 <- 50 #iterations
learn$fit0 <- learn$Exposure
test$fit0 <- test$Exposure
for (m in 1:M0){
PBM.1 <- rpart(cbind(fit0,ClaimNb) ~ Area + VehPower + VehAge + DrivAge
+ BonusMalus + VehBrand + VehGas + Density + Region,
data=learn, method="poisson",
control=rpart.control(maxdepth=J0, maxsurrogate=0, xval=1,
minbucket=10000, cp=0.00005))
learn$fit0 <- learn$fit0 * predict(PBM.1)
learn[,paste("PBM_",m, sep="")] <- learn$fit0
test$fit0 <- test$fit0 * predict(PBM.1, newdata=test)
test[,paste("PBM_",m, sep="")] <- test$fit0
}
losses <- array(NA, c(2,M0))
for (m in 1:M0){
losses[1,m] <- 200*(sum(learn[,paste("PBM_",m, sep="")])-sum(learn$ClaimNb)+
sum(log((learn$ClaimNb/learn[,paste("PBM_",m, sep="")])^
(learn$ClaimNb))))/n_l
losses[2,m] <- 200*(sum(test[,paste("PBM_",m, sep="")])-sum(test$ClaimNb)+
sum(log((test$ClaimNb/test[,paste("PBM_",m, sep="")])^
(test$ClaimNb))))/n_t
}
losses[,M0]
## [1] 30.56398 30.18851
plot(x=c(0:M0), y=c(32.93518, losses[1,]), type='l', col="red", ylim=c(30,33.5),
xlab="number of iterations", ylab="average in-sample loss (in 10^(-2))",
main=paste("decrease of in-sample loss (depth=", J0,")", sep=""))
points(x=c(0:M0), y=c(32.93518, losses[1,]), pch=19, col="red")
plot(x=c(1:M0), y=losses[2,], type='l', lwd=2, col="red", ylim=c(30,31.5),
xlab="number of iterations", ylab="average out-of-sample loss (in 10^(-2))",
main="decrease of out-of-sample loss")