En la última parte de este tutorial, hemos realizado una clasificación SVM usando un núcleo radial para las coberturas terrestres de la zona seca de los Andes colombianos en Santander. La tarea de este tutorial es evaluar (ejecutar y ajustar) los otros dos núcleos. Luego, debe comparar los tres modelos kernel (lineal, radial y sigmoide) y proponer el mejor para realizar una clasificación final de las coberturas del suelo de la zona seca de los Andes colombianos en Santander. Debes enviar los códigos y tres líneas de texto explicando la selección de tu modelo.
{r} include=FALSE} knitr::opts_chunk$set(echo = TRUE)
# To clean environment
rm(list = ls(all.names = TRUE))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 518072 27.7 1158105 61.9 644200 34.5
## Vcells 923405 7.1 8388608 64.0 1634597 12.5
library(e1071)
#setwd("")
TABLA = read.csv("C:/Users/luisc/Downloads/TABLA_SANTANDER_60m.csv")
TABLA_1 = TABLA[,c(1,6:20)]
dim(TABLA_1)
## [1] 1102 16
names(TABLA_1)
## [1] "Cover" "BLUE_2019" "GREEN_2019" "RED_2019"
## [5] "RED_EDGE_1_2019" "RED_EDGE_2_2019" "RED_EDGE_3_2019" "NIR_2019"
## [9] "RED_EDGE_4_2019" "SWIR_1_2019" "SWIR_2_2019" "NDVI_2019"
## [13] "EVI_2019" "SAVI_2019" "SVVI_2019" "RNDVI_2019"
str(TABLA_1)
## 'data.frame': 1102 obs. of 16 variables:
## $ Cover : chr "Water" "Water" "Wild_grass" "Wild_grass" ...
## $ BLUE_2019 : int 954 1038 473 441 463 421 424 1197 386 1088 ...
## $ GREEN_2019 : int 1191 1277 669 618 658 599 605 1495 627 1346 ...
## $ RED_2019 : int 1352 1418 675 631 715 570 577 1651 496 1528 ...
## $ RED_EDGE_1_2019: int 1439 1496 1029 965 1040 935 947 1747 1032 1624 ...
## $ RED_EDGE_2_2019: int 1309 1390 1505 1457 1440 1529 1528 1660 2172 1494 ...
## $ RED_EDGE_3_2019: int 1401 1471 1673 1636 1603 1708 1694 1753 2484 1557 ...
## $ NIR_2019 : int 1312 1352 1701 1661 1624 1700 1714 1631 2484 1435 ...
## $ RED_EDGE_4_2019: int 1307 1412 1895 1853 1827 1917 1901 1661 2769 1454 ...
## $ SWIR_1_2019 : int 747 1088 2506 2379 2473 2208 2320 916 2035 837 ...
## $ SWIR_2_2019 : int 594 880 1863 1694 1828 1522 1612 710 1149 719 ...
## $ NDVI_2019 : int -209 -497 4161 4345 3802 4811 4739 -387 6566 -367 ...
## $ EVI_2019 : int -95 -157 2105 2119 1838 2360 2366 -78 3960 -214 ...
## $ SAVI_2019 : int -38 -62 912 927 817 1019 1017 -30 1644 -81 ...
## $ SVVI_2019 : int -20 -14 349 332 332 318 322 -34 234 -3 ...
## $ RNDVI_2019 : int 21 -292 648 688 630 567 618 8 685 -251 ...
head(TABLA_1)
## Cover BLUE_2019 GREEN_2019 RED_2019 RED_EDGE_1_2019 RED_EDGE_2_2019
## 1 Water 954 1191 1352 1439 1309
## 2 Water 1038 1277 1418 1496 1390
## 3 Wild_grass 473 669 675 1029 1505
## 4 Wild_grass 441 618 631 965 1457
## 5 Wild_grass 463 658 715 1040 1440
## 6 Wild_grass 421 599 570 935 1529
## RED_EDGE_3_2019 NIR_2019 RED_EDGE_4_2019 SWIR_1_2019 SWIR_2_2019 NDVI_2019
## 1 1401 1312 1307 747 594 -209
## 2 1471 1352 1412 1088 880 -497
## 3 1673 1701 1895 2506 1863 4161
## 4 1636 1661 1853 2379 1694 4345
## 5 1603 1624 1827 2473 1828 3802
## 6 1708 1700 1917 2208 1522 4811
## EVI_2019 SAVI_2019 SVVI_2019 RNDVI_2019
## 1 -95 -38 -20 21
## 2 -157 -62 -14 -292
## 3 2105 912 349 648
## 4 2119 927 332 688
## 5 1838 817 332 630
## 6 2360 1019 318 567
N=nrow(TABLA_1)
train=sample (N, round(N*0.8))
## MODELO RADIAL
set.seed(1)# To make the tuning reproducible
tune_model_1= tune(svm, as.factor(Cover) ~., data=TABLA_1[train,], kernel ="radial", ranges=list(cost=c(0.1,1,10,100,1000),
gamma=c(0.5,1,2,3,4)))
summary(tune_model_1)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.5
##
## - best performance: 0.3061032
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-01 0.5 0.4421476 0.04098154
## 2 1e+00 0.5 0.3366445 0.04189435
## 3 1e+01 0.5 0.3061032 0.03097161
## 4 1e+02 0.5 0.3220633 0.04653680
## 5 1e+03 0.5 0.3186415 0.04488277
## 6 1e-01 1.0 0.5146834 0.03770369
## 7 1e+00 1.0 0.3378447 0.04940565
## 8 1e+01 1.0 0.3253575 0.03589689
## 9 1e+02 1.0 0.3219356 0.04835619
## 10 1e+03 1.0 0.3230720 0.04898109
## 11 1e-01 2.0 0.6621042 0.05784473
## 12 1e+00 2.0 0.3696246 0.04259976
## 13 1e+01 2.0 0.3616190 0.05207588
## 14 1e+02 2.0 0.3582227 0.05064825
## 15 1e+03 2.0 0.3582227 0.05064825
## 16 1e-01 3.0 0.7980720 0.05966904
## 17 1e+00 3.0 0.4002298 0.02209483
## 18 1e+01 3.0 0.3831588 0.03541793
## 19 1e+02 3.0 0.3854188 0.03415141
## 20 1e+03 3.0 0.3854188 0.03415141
## 21 1e-01 4.0 0.8468590 0.04108734
## 22 1e+00 4.0 0.4217058 0.03939657
## 23 1e+01 4.0 0.4091803 0.04866990
## 24 1e+02 4.0 0.4091803 0.04866990
## 25 1e+03 4.0 0.4091803 0.04866990
model_1 = svm(as.factor(Cover) ~., data=TABLA_1[train,], kernel ="radial", gamma=0.5, cost=10, scale=FALSE)
summary(model_1)
##
## Call:
## svm(formula = as.factor(Cover) ~ ., data = TABLA_1[train, ], kernel = "radial",
## gamma = 0.5, cost = 10, scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 10
##
## Number of Support Vectors: 882
##
## ( 136 149 134 154 103 146 49 11 )
##
##
## Number of Classes: 8
##
## Levels:
## Crop Forest Grassland Ground_NoVeg Infra Shrub Water Wild_grass
## MODELO LINEAL
set.seed(1)# To make the tuning reproducible
tune_model_2= tune(svm, as.factor(Cover) ~., data=TABLA_1[train,], kernel ="linear", ranges=list(cost=c(0.1,1,10,100,1000),
gamma=c(0.5,1,2,3,4)))
summary(tune_model_2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 100 0.5
##
## - best performance: 0.2392875
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-01 0.5 0.3752937 0.06306831
## 2 1e+00 0.5 0.2914326 0.04725716
## 3 1e+01 0.5 0.2495020 0.05766047
## 4 1e+02 0.5 0.2392875 0.05414997
## 5 1e+03 0.5 0.2495148 0.05028507
## 6 1e-01 1.0 0.3752937 0.06306831
## 7 1e+00 1.0 0.2914326 0.04725716
## 8 1e+01 1.0 0.2495020 0.05766047
## 9 1e+02 1.0 0.2392875 0.05414997
## 10 1e+03 1.0 0.2495148 0.05028507
## 11 1e-01 2.0 0.3752937 0.06306831
## 12 1e+00 2.0 0.2914326 0.04725716
## 13 1e+01 2.0 0.2495020 0.05766047
## 14 1e+02 2.0 0.2392875 0.05414997
## 15 1e+03 2.0 0.2495148 0.05028507
## 16 1e-01 3.0 0.3752937 0.06306831
## 17 1e+00 3.0 0.2914326 0.04725716
## 18 1e+01 3.0 0.2495020 0.05766047
## 19 1e+02 3.0 0.2392875 0.05414997
## 20 1e+03 3.0 0.2495148 0.05028507
## 21 1e-01 4.0 0.3752937 0.06306831
## 22 1e+00 4.0 0.2914326 0.04725716
## 23 1e+01 4.0 0.2495020 0.05766047
## 24 1e+02 4.0 0.2392875 0.05414997
## 25 1e+03 4.0 0.2495148 0.05028507
## MODELO LINEAL CON LOS MEJORES AJUSTES
model_2 = svm(as.factor(Cover) ~., data=TABLA_1[train,], kernel ="linear", gamma=0.5, cost=100, scale=FALSE)
summary(model_2)
##
## Call:
## svm(formula = as.factor(Cover) ~ ., data = TABLA_1[train, ], kernel = "linear",
## gamma = 0.5, cost = 100, scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 100
##
## Number of Support Vectors: 372
##
## ( 59 70 60 42 60 50 27 4 )
##
##
## Number of Classes: 8
##
## Levels:
## Crop Forest Grassland Ground_NoVeg Infra Shrub Water Wild_grass
### modelo sigmoide
set.seed(1)# To make the tuning reproducible
tune_model_3= tune(svm, as.factor(Cover) ~., data=TABLA_1[train,], kernel ="sigmoid", ranges=list(cost=c(0.1,1,10,100,1000),
gamma=c(0.5,1,2,3,4)))
summary(tune_model_3)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 0.1 0.5
##
## - best performance: 0.6745914
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-01 0.5 0.6745914 0.04737419
## 2 1e+00 0.5 0.7958759 0.05552674
## 3 1e+01 0.5 0.7869127 0.04497599
## 4 1e+02 0.5 0.7925817 0.04072561
## 5 1e+03 0.5 0.8004724 0.04984672
## 6 1e-01 1.0 0.7041369 0.04976961
## 7 1e+00 1.0 0.7642748 0.06675753
## 8 1e+01 1.0 0.7574566 0.06563984
## 9 1e+02 1.0 0.7574821 0.06495345
## 10 1e+03 1.0 0.7631512 0.06432790
## 11 1e-01 2.0 0.7130873 0.05461240
## 12 1e+00 2.0 0.7892109 0.05197532
## 13 1e+01 2.0 0.7880618 0.05347835
## 14 1e+02 2.0 0.7858018 0.05309207
## 15 1e+03 2.0 0.7880746 0.05295646
## 16 1e-01 3.0 0.7233018 0.04543499
## 17 1e+00 3.0 0.7825077 0.06245750
## 18 1e+01 3.0 0.7904239 0.06160497
## 19 1e+02 3.0 0.7960546 0.05224005
## 20 1e+03 3.0 0.7915347 0.05713611
## 21 1e-01 4.0 0.7143003 0.04358522
## 22 1e+00 4.0 0.7881129 0.05387860
## 23 1e+01 4.0 0.7846782 0.04384329
## 24 1e+02 4.0 0.7869510 0.04340727
## 25 1e+03 4.0 0.7914964 0.04144346
## modelo sigmoide con mejores ajustes
model_3 = svm(as.factor(Cover) ~., data=TABLA_1[train,], kernel ="sigmoid", gamma=0.5, cost=0.1, scale=FALSE)
summary(model_3)
##
## Call:
## svm(formula = as.factor(Cover) ~ ., data = TABLA_1[train, ], kernel = "sigmoid",
## gamma = 0.5, cost = 0.1, scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: sigmoid
## cost: 0.1
## coef.0: 0
##
## Number of Support Vectors: 882
##
## ( 136 149 134 154 103 146 49 11 )
##
##
## Number of Classes: 8
##
## Levels:
## Crop Forest Grassland Ground_NoVeg Infra Shrub Water Wild_grass
#install.packages("Metrics")
library(Metrics)
accuracy_model_1 = accuracy(actual=TABLA_1$Cover[-train], predicted=predict(model_1, TABLA_1[-train,]))
accuracy_model_1
## [1] 0.1636364
accuracy_model_2 = accuracy(actual=TABLA_1$Cover[-train], predicted=predict(model_2, TABLA_1[-train,]))
accuracy_model_2
## [1] 0.6227273
accuracy_model_3 = accuracy(actual=TABLA_1$Cover[-train], predicted=predict(model_3, TABLA_1[-train,]))
accuracy_model_3
## [1] 0.2545455
COMPARANDO LOS DIFERENTES EL MODELOS, EL QUE MEJOR AJUSTE TUVO FUE EL LINEAL CON UNA PRECISION ARRIBA DE 60% COMPARADO CON LA MUY BAJA PRECISIÓN DE LOS OTROS DOS MODELOS (RADIAL Y, SIGMOIDE) QUE LLEGÓ A PENAS AL 15-20%. ESTO SE PUEDE EXPLICAR POR EL COMPORTAMIENTO LINEAL DEL METODO KERNEL ESTANDAR EN CLASIFICACION, PRESENTADO COMO UN HIPERPLANO OPTIMO DE SEPARACION QUE ACTUA COMO UNA SEPARACION ENTRE CLASES.