3.baskı, Seçkin Yayınevi, Ankara.

Bölüm 5. Özellik Seçimi

Özellik Seçme Yöntemleri

Filtre, sarmal ve gömülü özellik seçme yöntemlerinin çalışma prensibi

Bölüm İçin Gerekli Paketler

install.packages("caret")
install.packages("FSelector")
install.packages("partykit")
install.packages("readxl")
install.packages("mlbench")

Relief - 1

# Create a vector
 x<-c(  80,  50 , 40 , 50 , 62,
        80 , 60 , 90 , 50 , 85,
        70 , 70 , 80 , 30 , 90,
        50 , 60 , 50 , 45 , 60,
        60 , 72 , 40 , 80 , 95,
        62 , 62 , 60 , 60 , 52,
        64 , 64 , 70 , 45 , 48,
        66 , 70 , 54 , 42 , 50,
        62 , 68 , 52 , 86 , 98,
        70 , 60 , 62 , 52 , 55)
# Create matrix data
M<-t(matrix(x, nrow = 5, ncol = 10))
colnames(M) <- c("C","VT","SQL","Ist","VM")
rownames(M) <- c("b1","b2","b3","b4","b5","b6","b7","b8","b9","b10")

Relief - 2

# Create the distance matrix
round(dist(M,method = "euclidean"),2)
##        b1    b2    b3    b4    b5    b6    b7    b8    b9
## b2  55.94                                                
## b3  57.31 26.93                                          
## b4  33.60 56.12 50.25                                    
## b5  53.60 63.59 65.03 52.86                              
## b6  32.68 49.16 53.59 23.17 52.47                        
## b7  39.66 45.45 46.49 27.50 66.44 18.68                  
## b8  31.62 53.67 49.36 21.93 60.87 21.07 17.58            
## b9  58.17 57.42 63.66 57.77 14.46 53.78 67.27 65.30      
## b10 27.15 42.28 46.18 24.86 55.79 12.04 14.63 17.46 56.86

Relief - 3

class<-c(0,1,1,0,1,0,0,0,1,0)
df <- data.frame(M,class)
df
##      C VT SQL Ist VM class
## b1  80 50  40  50 62     0
## b2  80 60  90  50 85     1
## b3  70 70  80  30 90     1
## b4  50 60  50  45 60     0
## b5  60 72  40  80 95     1
## b6  62 62  60  60 52     0
## b7  64 64  70  45 48     0
## b8  66 70  54  42 50     0
## b9  62 68  52  86 98     1
## b10 70 60  62  52 55     0

Relief - 4

res<-relief(class~.,df)
res
##     attr_importance
## C       -0.27397564
## VT      -0.06946542
## SQL      0.17461794
## Ist      0.17602041
## VM       0.66584718

Relief - Iris Örneği

res<-relief(Species~., iris)
res
##              attr_importance
## Sepal.Length       0.1422222
## Sepal.Width        0.1212500
## Petal.Length       0.3194915
## Petal.Width        0.2975000

CFS

head(df)
##     C VT SQL Ist VM class
## b1 80 50  40  50 62     0
## b2 80 60  90  50 85     1
## b3 70 70  80  30 90     1
## b4 50 60  50  45 60     0
## b5 60 72  40  80 95     1
## b6 62 62  60  60 52     0
res <- cfs(class~., df)
res
## [1] "VM"

Ki-kare

veri <- read_excel("data/veri.xlsx")
veri[] <- lapply(veri, factor)
res<-chi.squared(Y~., veri)
res
##    attr_importance
## X1       0.3245096
## X2       0.4935973
## X3       0.3016192

Ki-kare –> BreastCancer Örneği - 1

BreastData<-BreastCancer[,2:11]
head(BreastData)
##   Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size Bare.nuclei
## 1            5         1          1             1            2           1
## 2            5         4          4             5            7          10
## 3            3         1          1             1            2           2
## 4            6         8          8             1            3           4
## 5            4         1          1             3            2           1
## 6            8        10         10             8            7          10
##   Bl.cromatin Normal.nucleoli Mitoses     Class
## 1           3               1       1    benign
## 2           3               2       1    benign
## 3           3               1       1    benign
## 4           3               7       1    benign
## 5           3               1       1    benign
## 6           9               7       1 malignant

Ki-kare –> BreastCancer Örneği - 2

res<-chi.squared(Class~., BreastData)
res
##                 attr_importance
## Cl.thickness          0.7461821
## Cell.size             0.8819200
## Cell.shape            0.8672678
## Marg.adhesion         0.7464962
## Epith.c.size          0.7986194
## Bare.nuclei           0.8461519
## Bl.cromatin           0.8112600
## Normal.nucleoli       0.7754269
## Mitoses               0.5295016

ERGS - 1

Dikkat: Aşağıdaki uygulamanın yapılabilmesi için kitapta verilen ergs fonksiyonunun yüklenmiş olması gerekir.

res<-ergs(Class~., Sonar, threshold=1, nsf=10,na.rm =TRUE,verbose=TRUE)
## ---------------------------------------------------------
##  
##  Effective Range based Gene Selection (ERGS) Algorithm 
## 
##  Threshold = 1 
##  Number of selected features = 10 
##  Names of selected features: 
##   V11 V12 V45 V49 V10 V48 V9 V46 V13 V47 
## ---------------------------------------------------------
## 

ERGS - 2

res$selectedFNames #seçilen özellikler
##  [1] "V11" "V12" "V45" "V49" "V10" "V48" "V9"  "V46" "V13" "V47"
head(res$selectedData) #seçilen veri setinden bir kesit
##      V11    V12    V45    V49    V10    V48     V9    V46    V13    V47 Class
## 1 0.1609 0.1582 0.2641 0.0383 0.2111 0.1343 0.3109 0.1386 0.2238 0.1051     R
## 2 0.4918 0.6552 0.0621 0.0409 0.2872 0.0742 0.3337 0.0203 0.6919 0.0530     R
## 3 0.6333 0.7060 0.2111 0.0130 0.6194 0.0744 0.5598 0.0176 0.5544 0.1348     R
## 4 0.0881 0.1992 0.4295 0.0681 0.1264 0.1576 0.0598 0.3654 0.0184 0.2655     R
## 5 0.4152 0.3952 0.0692 0.0230 0.4459 0.0085 0.3564 0.0528 0.4256 0.0357     R
## 6 0.2988 0.4250 0.1192 0.0264 0.3039 0.0494 0.2105 0.1089 0.6343 0.0623     R