2 Final Variable Selection By Algorithm

Linde Berbers

2021-06-30

Install packages

#install.packages('sp')
#install.packages('raster')
#install.packages('rpart')
#install.packages("knitr")
#install.packages("leaflet")
#install.packages("DT")

Load packages

library(sp)
library(raster)
library(rpart)
library(knitr)
library(leaflet)
library(DT)

Load data

load("C:/Users/Linde/Desktop/Scriptie/Schrijven begin/Textuele conceptversie/Final Conceptversion MSc Linde Berbers/Supplementary Information/Supplementary Data 2. RStudio Data/Script Data/EurekaFirst_Brick_df.RData")

Make rpart model & Choose best cp by minimal xerror

Rpart.Choose <- rpart(HigherThan1km ~ Land + GeoMean + Lithology + Temp + DistTrans + DistFault + Faults + DistPlateBound + Precip + DistTrench + DistVolc + Volc + Trans + Trench + PlateBound + SeaSedThick + SeaFloorAge + Ophiolites + Ridge, data=EurekaFirst_Brick_df, method="class", cp=0.0001)

printcp(Rpart.Choose)
#> 
#> Classification tree:
#> rpart(formula = HigherThan1km ~ Land + GeoMean + Lithology + 
#>     Temp + DistTrans + DistFault + Faults + DistPlateBound + 
#>     Precip + DistTrench + DistVolc + Volc + Trans + Trench + 
#>     PlateBound + SeaSedThick + SeaFloorAge + Ophiolites + Ridge, 
#>     data = EurekaFirst_Brick_df, method = "class", cp = 1e-04)
#> 
#> Variables actually used in tree construction:
#> [1] DistFault      DistPlateBound DistTrans      DistTrench     DistVolc      
#> [6] GeoMean        Lithology      Precip         Temp          
#> 
#> Root node error: 4363/180000 = 0.024239
#> 
#> n= 180000 
#> 
#>            CP nsplit rel error  xerror      xstd
#> 1  0.73458629      0  1.000000 1.00000 0.0149547
#> 2  0.03082741      1  0.265414 0.26679 0.0077944
#> 3  0.01822141      3  0.203759 0.20766 0.0068815
#> 4  0.00389640      5  0.167316 0.17671 0.0063505
#> 5  0.00297960      6  0.163420 0.17396 0.0063011
#> 6  0.00229200      8  0.157460 0.17121 0.0062513
#> 7  0.00194820     10  0.152876 0.17121 0.0062513
#> 8  0.00183360     12  0.148980 0.16686 0.0061716
#> 9  0.00143250     14  0.145313 0.16319 0.0061037
#> 10 0.00137520     21  0.133624 0.16250 0.0060909
#> 11 0.00126060     24  0.129498 0.16159 0.0060738
#> 12 0.00114600     26  0.126977 0.15815 0.0060090
#> 13 0.00091680     30  0.121705 0.15884 0.0060220
#> 14 0.00080220     38  0.114371 0.15609 0.0059699
#> 15 0.00076400     40  0.112766 0.15242 0.0058996
#> 16 0.00075309     43  0.110474 0.14898 0.0058329
#> 17 0.00068760     52  0.103598 0.14898 0.0058329
#> 18 0.00061120     54  0.102223 0.14852 0.0058240
#> 19 0.00057300     57  0.100390 0.14852 0.0058240
#> 20 0.00045840     62  0.097410 0.14852 0.0058240
#> 21 0.00022920     66  0.095576 0.14852 0.0058240
#> 22 0.00015280     76  0.093055 0.15150 0.0058819
#> 23 0.00010000     79  0.092597 0.14990 0.0058508

Make new rpart model with optimal cp & Note which Variables actually used in tree construction

Rpart.Choose.cp <- rpart(HigherThan1km ~ Land + GeoMean + Lithology + Temp + DistTrans + DistFault + Faults + DistPlateBound + Precip + DistTrench + DistVolc + Volc + Trans + Trench + PlateBound + SeaSedThick + SeaFloorAge + Ophiolites + Ridge, data=EurekaFirst_Brick_df, method="class", cp=0.00091680)

printcp(Rpart.Choose.cp)
#> 
#> Classification tree:
#> rpart(formula = HigherThan1km ~ Land + GeoMean + Lithology + 
#>     Temp + DistTrans + DistFault + Faults + DistPlateBound + 
#>     Precip + DistTrench + DistVolc + Volc + Trans + Trench + 
#>     PlateBound + SeaSedThick + SeaFloorAge + Ophiolites + Ridge, 
#>     data = EurekaFirst_Brick_df, method = "class", cp = 0.0009168)
#> 
#> Variables actually used in tree construction:
#> [1] DistFault      DistPlateBound DistTrans      DistTrench     DistVolc      
#> [6] GeoMean        Lithology      Precip         Temp          
#> 
#> Root node error: 4363/180000 = 0.024239
#> 
#> n= 180000 
#> 
#>           CP nsplit rel error  xerror      xstd
#> 1  0.7345863      0   1.00000 1.00000 0.0149547
#> 2  0.0308274      1   0.26541 0.26679 0.0077944
#> 3  0.0182214      3   0.20376 0.20766 0.0068815
#> 4  0.0038964      5   0.16732 0.17511 0.0063218
#> 5  0.0029796      6   0.16342 0.17121 0.0062513
#> 6  0.0022920      8   0.15746 0.16961 0.0062221
#> 7  0.0019482     10   0.15288 0.16594 0.0061547
#> 8  0.0018336     12   0.14898 0.16457 0.0061293
#> 9  0.0014325     14   0.14531 0.16136 0.0060695
#> 10 0.0013752     21   0.13362 0.15952 0.0060350
#> 11 0.0012606     24   0.12950 0.15769 0.0060004
#> 12 0.0011460     26   0.12698 0.15654 0.0059786
#> 13 0.0009168     30   0.12171 0.15333 0.0059172
#> 14 0.0009168     38   0.11437 0.15219 0.0058952