Random forest para extraer las variables mas importantes dentro de un conjunto de datos.
library("readxl")
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.3
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(reshape)
## Warning: package 'reshape' was built under R version 4.0.3
my_data <- read_excel("data_e.xlsx", sheet = 2, col_names = TRUE, col_types = NULL, na = "", skip = 0)
head(my_data, 10)
## # A tibble: 10 x 32
## SOC_TCH SOC_Modelado Aspect BSI EVI Ecosystm Elevation IBI Litologia
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 395. 310. 328. NA NA NA 2.25 NA NA
## 2 120. 295. 354. NA NA NA 1171. NA NA
## 3 548. 438. 302. -0.272 2.41 0 7.83 -0.251 0
## 4 57.2 83.3 353. -0.367 2.62 0 1.04 -0.326 0
## 5 226. 271. 69.9 -0.534 2.56 0 11.7 -0.498 0
## 6 448. 376. 160. -0.464 2.64 0 17.1 -0.441 30
## 7 437. 310. NA -0.492 2.32 398 3.46 -0.473 50
## 8 196. 297. 205. -0.444 2.43 0 5.34 -0.471 0
## 9 245. 299. 214. -0.410 2.22 0 4.82 -0.414 52
## 10 500. 411. 230. -0.488 2.38 0 4.52 -0.491 5
## # ... with 23 more variables: MNDVI <dbl>, MeanCurvature <dbl>, NDFI <dbl>,
## # NDSI <dbl>, NDVI <dbl>, NDWBI <dbl>, NDWI <dbl>, NIRv <dbl>,
## # Precipitacionmean <dbl>, SAVI <dbl>, Slope <dbl>, Suelo_Map <dbl>,
## # Temp_Cmean <dbl>, VH <dbl>, VV <dbl>, `clay_15-30cm_mean` <dbl>,
## # `nitrogen_15-30cm_mean` <dbl>, `ocd_15-30cm_mean` <dbl>,
## # `ocs_0-30cm_mean` <dbl>, `phh2o_15-30cm_mean` <dbl>,
## # `sand_15-30cm_mean` <dbl>, `silt_15-30cm_mean` <dbl>,
## # `soc_15-30cm_mean` <dbl>
my_data <- my_data[,c(1:24)]
head(my_data, 10)
## # A tibble: 10 x 24
## SOC_TCH SOC_Modelado Aspect BSI EVI Ecosystm Elevation IBI Litologia
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 395. 310. 328. NA NA NA 2.25 NA NA
## 2 120. 295. 354. NA NA NA 1171. NA NA
## 3 548. 438. 302. -0.272 2.41 0 7.83 -0.251 0
## 4 57.2 83.3 353. -0.367 2.62 0 1.04 -0.326 0
## 5 226. 271. 69.9 -0.534 2.56 0 11.7 -0.498 0
## 6 448. 376. 160. -0.464 2.64 0 17.1 -0.441 30
## 7 437. 310. NA -0.492 2.32 398 3.46 -0.473 50
## 8 196. 297. 205. -0.444 2.43 0 5.34 -0.471 0
## 9 245. 299. 214. -0.410 2.22 0 4.82 -0.414 52
## 10 500. 411. 230. -0.488 2.38 0 4.52 -0.491 5
## # ... with 15 more variables: MNDVI <dbl>, MeanCurvature <dbl>, NDFI <dbl>,
## # NDSI <dbl>, NDVI <dbl>, NDWBI <dbl>, NDWI <dbl>, NIRv <dbl>,
## # Precipitacionmean <dbl>, SAVI <dbl>, Slope <dbl>, Suelo_Map <dbl>,
## # Temp_Cmean <dbl>, VH <dbl>, VV <dbl>
modelo1 <- randomForest(SOC_Modelado~.,data=my_data,
na.action=na.exclude, ntree=500,importance=TRUE,maxnodes=10,mtry=15)
#Creamos un objeto con las "importancias" de las variables
importancia=data.frame(importance(modelo1))
importancia
## X.IncMSE IncNodePurity
## SOC_TCH 82.7975789 1236497.178
## Aspect 0.6757742 2844.196
## BSI 4.6984524 6898.734
## EVI 1.8391752 3585.374
## Ecosystm -0.6317713 2485.744
## Elevation 4.4431005 10920.708
## IBI 7.1033365 29820.822
## Litologia 0.7528086 2036.331
## MNDVI 5.1785647 12031.317
## MeanCurvature 11.0511140 31182.950
## NDFI 1.7860819 2581.574
## NDSI 3.5711645 10694.038
## NDVI 3.9045482 4328.771
## NDWBI 5.1324803 8794.663
## NDWI 6.5644762 26598.865
## NIRv 4.2853375 10072.954
## Precipitacionmean -1.7314727 3778.785
## SAVI 2.6109916 3178.828
## Slope 8.6014344 85566.273
## Suelo_Map 4.4306046 14687.146
## Temp_Cmean 7.6030904 21324.051
## VH 0.6145729 2790.763
## VV 0.3827696 2185.542
#importancia<-sort_df(importancia,vars='MeanDecreaseGini')
importancia <- importancia[order(-importancia$X.IncMSE),]
importancia
## X.IncMSE IncNodePurity
## SOC_TCH 82.7975789 1236497.178
## MeanCurvature 11.0511140 31182.950
## Slope 8.6014344 85566.273
## Temp_Cmean 7.6030904 21324.051
## IBI 7.1033365 29820.822
## NDWI 6.5644762 26598.865
## MNDVI 5.1785647 12031.317
## NDWBI 5.1324803 8794.663
## BSI 4.6984524 6898.734
## Elevation 4.4431005 10920.708
## Suelo_Map 4.4306046 14687.146
## NIRv 4.2853375 10072.954
## NDVI 3.9045482 4328.771
## NDSI 3.5711645 10694.038
## SAVI 2.6109916 3178.828
## EVI 1.8391752 3585.374
## NDFI 1.7860819 2581.574
## Litologia 0.7528086 2036.331
## Aspect 0.6757742 2844.196
## VH 0.6145729 2790.763
## VV 0.3827696 2185.542
## Ecosystm -0.6317713 2485.744
## Precipitacionmean -1.7314727 3778.785
varImpPlot(modelo1)

dotchart(sort(importancia[,1]), xlim=c(0,90), xlab="%IncMSE")
