RadialSVM.R

#Procedure Fallowed for SVM: 
#1. Load Dataset 
#2. Assigned parameters to compute average of all 50 iterations
#3. Start iterating 50 times.
#4. Spit Dataset into 80:20 ratios for train and test 
#5. Ran SVM with radial kernel and some gamma and Cost. 
#6. Use tune function to find best model with range of Gamma and Cost
#7. Manually compared best model with Computed model. 
#8. As we observed continually computed best model performs better. 
#9. Adapted best model to predict with test data
#10.    Print prediction, to check model performance.
#11.    Repeat this procedure for 50 times with randomly assigned Test and Train datasets.
#12.    Compute the average Matrix and Print the confusion matrix.

#Repeated this procedure again with Kernel = Linear Out of these two kernel Linear performed better. 
#Mostly for radial SVM cost 1 and gamma 0.5 is will perform better than any other combination. 
#Low cost will allow less number of support vectors with less soft margin and Low gamma higher variance with more influence on support vectors. 


Heart<- read.csv("/Users/jyothi/Downloads/Heart.csv")
library(e1071)
#Remove NA's
hdata <- na.omit(Heart)
#Taking sample
hrt_smp_size <-floor(0.80* nrow(hdata))
# Declaring variables to store values for each 
# iteration to find average values. 
x1 <- 0
x2 <- 0
x3 <- 0  
x4 <- 0 
# Number of Iterations
factor <- 50

for(i in 1:factor){ # Iterating factor times
  train_ind <-sample(seq_len(nrow(hdata)), size = hrt_smp_size)
  heart_train <- Heart[train_ind,]
  heart_test  <-Heart[-train_ind,]
  heart_test <- na.omit(heart_test)
  heart_train <- na.omit(heart_train)
  heartsvm <- svm(AHD~ . , data=heart_train  ,method = "C-classification" ,kernel ="radial", cost=1 ,gamma=0.5)
  tune.out=tune(svm, AHD~., data=heart_train ,kernel="radial",ranges=list(cost=c(0.1,1,10,100,1000),gamma=c(0.5,1,2,3,4) ))
 
  
   trainpred=predict(tune.out$best.model ,heart_test)

  x = table(predict=trainpred, truth=heart_test$AHD)
  x1 = x1 + x[1]
  x2=  x2 +  x[2]
  x3 = x3 +  x[3]
  x4= x4 +  x[4]
}

svm.out  <- matrix(c(x1/factor,x2/factor,x3/factor,x4/factor) , nrow=2,ncol=2)
colnames(svm.out) <- c("no", "yes")
rownames(svm.out) <- c("no", "yes")
svm.out

##        no   yes
## no  28.10 15.66
## yes  4.36 15.78

# Comparing with Radial, linear kernel showed better accuracy

RadialSVM.R

jyothi

Mon Nov 21 13:11:28 2016