3.baskı, Seçkin Yayınevi, Ankara.
Filtre, sarmal ve gömülü özellik seçme yöntemlerinin çalışma prensibi
install.packages("caret")
install.packages("FSelector")
install.packages("partykit")
install.packages("readxl")
install.packages("mlbench")
# Create a vector
x<-c( 80, 50 , 40 , 50 , 62,
80 , 60 , 90 , 50 , 85,
70 , 70 , 80 , 30 , 90,
50 , 60 , 50 , 45 , 60,
60 , 72 , 40 , 80 , 95,
62 , 62 , 60 , 60 , 52,
64 , 64 , 70 , 45 , 48,
66 , 70 , 54 , 42 , 50,
62 , 68 , 52 , 86 , 98,
70 , 60 , 62 , 52 , 55)
# Create matrix data
M<-t(matrix(x, nrow = 5, ncol = 10))
colnames(M) <- c("C","VT","SQL","Ist","VM")
rownames(M) <- c("b1","b2","b3","b4","b5","b6","b7","b8","b9","b10")
# Create the distance matrix round(dist(M,method = "euclidean"),2)
## b1 b2 b3 b4 b5 b6 b7 b8 b9 ## b2 55.94 ## b3 57.31 26.93 ## b4 33.60 56.12 50.25 ## b5 53.60 63.59 65.03 52.86 ## b6 32.68 49.16 53.59 23.17 52.47 ## b7 39.66 45.45 46.49 27.50 66.44 18.68 ## b8 31.62 53.67 49.36 21.93 60.87 21.07 17.58 ## b9 58.17 57.42 63.66 57.77 14.46 53.78 67.27 65.30 ## b10 27.15 42.28 46.18 24.86 55.79 12.04 14.63 17.46 56.86
class<-c(0,1,1,0,1,0,0,0,1,0) df <- data.frame(M,class) df
## C VT SQL Ist VM class ## b1 80 50 40 50 62 0 ## b2 80 60 90 50 85 1 ## b3 70 70 80 30 90 1 ## b4 50 60 50 45 60 0 ## b5 60 72 40 80 95 1 ## b6 62 62 60 60 52 0 ## b7 64 64 70 45 48 0 ## b8 66 70 54 42 50 0 ## b9 62 68 52 86 98 1 ## b10 70 60 62 52 55 0
res<-relief(class~.,df) res
## attr_importance ## C -0.27397564 ## VT -0.06946542 ## SQL 0.17461794 ## Ist 0.17602041 ## VM 0.66584718
res<-relief(Species~., iris) res
## attr_importance ## Sepal.Length 0.1422222 ## Sepal.Width 0.1212500 ## Petal.Length 0.3194915 ## Petal.Width 0.2975000
head(df)
## C VT SQL Ist VM class ## b1 80 50 40 50 62 0 ## b2 80 60 90 50 85 1 ## b3 70 70 80 30 90 1 ## b4 50 60 50 45 60 0 ## b5 60 72 40 80 95 1 ## b6 62 62 60 60 52 0
res <- cfs(class~., df) res
## [1] "VM"
veri <- read_excel("data/veri.xlsx")
veri[] <- lapply(veri, factor)
res<-chi.squared(Y~., veri)
res
## attr_importance ## X1 0.3245096 ## X2 0.4935973 ## X3 0.3016192
BreastData<-BreastCancer[,2:11] head(BreastData)
## Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size Bare.nuclei ## 1 5 1 1 1 2 1 ## 2 5 4 4 5 7 10 ## 3 3 1 1 1 2 2 ## 4 6 8 8 1 3 4 ## 5 4 1 1 3 2 1 ## 6 8 10 10 8 7 10 ## Bl.cromatin Normal.nucleoli Mitoses Class ## 1 3 1 1 benign ## 2 3 2 1 benign ## 3 3 1 1 benign ## 4 3 7 1 benign ## 5 3 1 1 benign ## 6 9 7 1 malignant
res<-chi.squared(Class~., BreastData) res
## attr_importance ## Cl.thickness 0.7461821 ## Cell.size 0.8819200 ## Cell.shape 0.8672678 ## Marg.adhesion 0.7464962 ## Epith.c.size 0.7986194 ## Bare.nuclei 0.8461519 ## Bl.cromatin 0.8112600 ## Normal.nucleoli 0.7754269 ## Mitoses 0.5295016
Dikkat: Aşağıdaki uygulamanın yapılabilmesi için kitapta verilen ergs fonksiyonunun yüklenmiş olması gerekir.
res<-ergs(Class~., Sonar, threshold=1, nsf=10,na.rm =TRUE,verbose=TRUE)
## --------------------------------------------------------- ## ## Effective Range based Gene Selection (ERGS) Algorithm ## ## Threshold = 1 ## Number of selected features = 10 ## Names of selected features: ## V11 V12 V45 V49 V10 V48 V9 V46 V13 V47 ## --------------------------------------------------------- ##
res$selectedFNames #seçilen özellikler
## [1] "V11" "V12" "V45" "V49" "V10" "V48" "V9" "V46" "V13" "V47"
head(res$selectedData) #seçilen veri setinden bir kesit
## V11 V12 V45 V49 V10 V48 V9 V46 V13 V47 Class ## 1 0.1609 0.1582 0.2641 0.0383 0.2111 0.1343 0.3109 0.1386 0.2238 0.1051 R ## 2 0.4918 0.6552 0.0621 0.0409 0.2872 0.0742 0.3337 0.0203 0.6919 0.0530 R ## 3 0.6333 0.7060 0.2111 0.0130 0.6194 0.0744 0.5598 0.0176 0.5544 0.1348 R ## 4 0.0881 0.1992 0.4295 0.0681 0.1264 0.1576 0.0598 0.3654 0.0184 0.2655 R ## 5 0.4152 0.3952 0.0692 0.0230 0.4459 0.0085 0.3564 0.0528 0.4256 0.0357 R ## 6 0.2988 0.4250 0.1192 0.0264 0.3039 0.0494 0.2105 0.1089 0.6343 0.0623 R