data_pr = datasets::Puromycin
names(data_pr)
## [1] "conc" "rate" "state"
head(data_pr, 10)
##MEMBENTUK DATA TRAINING DAN DATA TESTING
library(caTools)
## Warning: package 'caTools' was built under R version 4.3.3
set.seed(123)
split<- sample.split(Puromycin$state, SplitRatio = 0.75)
data_train<- subset(Puromycin, split == TRUE)
data_test<- subset(Puromycin, split == FALSE)
head(split, 10)
## [1] TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE
head(data_train, 10)
head(data_test, 10)
summary(Puromycin[1:3])
## conc rate state
## Min. :0.0200 Min. : 47.0 treated :12
## 1st Qu.:0.0600 1st Qu.: 91.5 untreated:11
## Median :0.1100 Median :124.0
## Mean :0.3122 Mean :126.8
## 3rd Qu.:0.5600 3rd Qu.:158.5
## Max. :1.1000 Max. :207.0
summary(data_train[1:3])
## conc rate state
## Min. :0.0200 Min. : 47.0 treated :9
## 1st Qu.:0.0600 1st Qu.: 84.0 untreated:8
## Median :0.1100 Median :115.0
## Mean :0.2888 Mean :122.4
## 3rd Qu.:0.5600 3rd Qu.:159.0
## Max. :1.1000 Max. :201.0
summary(data_test[1:3])
## conc rate state
## Min. :0.0600 Min. : 86.0 treated :3
## 1st Qu.:0.1375 1st Qu.:123.2 untreated:3
## Median :0.2200 Median :134.0
## Mean :0.3783 Mean :139.3
## 3rd Qu.:0.4750 3rd Qu.:150.0
## Max. :1.1000 Max. :207.0
##MENSTANDARISASI ATRIBUT DATA
data_train[1:2] = scale(data_train[1:2])
data_test[1:2] = scale(data_test[1:2])
head(data_train, 10)
head(data_test, 10)
summary(data_train, 10)
## conc rate state
## Min. :-0.7441 Min. :-1.5028 treated :9
## 1st Qu.:-0.6334 1st Qu.:-0.7655 untreated:8
## Median :-0.4950 Median :-0.1477
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.7506 3rd Qu.: 0.7291
## Max. : 2.2454 Max. : 1.5661
summary(data_test, 10)
## conc rate state
## Min. :-0.8074 Min. :-1.3245 treated :3
## 1st Qu.:-0.6108 1st Qu.:-0.3994 untreated:3
## Median :-0.4016 Median :-0.1324
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2452 3rd Qu.: 0.2649
## Max. : 1.8303 Max. : 1.6804
##MEMBENTUK SUPPORT VEKTOR
library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
attach(data_train)
svm.Lin = svm(state ~ conc + rate,
data = data_train, type = "C-classification", kernel = "linear")
print(svm.Lin)
##
## Call:
## svm(formula = state ~ conc + rate, data = data_train, type = "C-classification",
## kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 16
##KLASIFIKASI PADA DATA TRAINING
data_train.prediksi = predict(svm.Lin, newdata = data_train)
hasil = data.frame(data_train$state, data_train.prediksi)
colnames(hasil) = c("Aktual", "Prediksi")
head(hasil, n = 10)
##MEMVALIDASI HASIL KLASIFIKASI
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.2
cm.train = table(data_train$state, data_train.prediksi)
confusionMatrix(cm.train)
## Confusion Matrix and Statistics
##
## data_train.prediksi
## treated untreated
## treated 6 3
## untreated 3 5
##
## Accuracy : 0.6471
## 95% CI : (0.3833, 0.8579)
## No Information Rate : 0.5294
## P-Value [Acc > NIR] : 0.2344
##
## Kappa : 0.2917
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.6667
## Specificity : 0.6250
## Pos Pred Value : 0.6667
## Neg Pred Value : 0.6250
## Prevalence : 0.5294
## Detection Rate : 0.3529
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.6458
##
## 'Positive' Class : treated
##
#KLASIFIKASI PADA DATA UJI
library(caret)
data_train.prediksi = predict(svm.Lin, newdata = data_train)
hasil.train = data.frame(data_train$state, data_train.prediksi)
colnames(hasil.train) = c("Aktual", "Prediksi")
head(hasil.train, n = 10)
cm.train = table(data_train$state, data_train.prediksi)
confusionMatrix(cm.train)
## Confusion Matrix and Statistics
##
## data_train.prediksi
## treated untreated
## treated 6 3
## untreated 3 5
##
## Accuracy : 0.6471
## 95% CI : (0.3833, 0.8579)
## No Information Rate : 0.5294
## P-Value [Acc > NIR] : 0.2344
##
## Kappa : 0.2917
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.6667
## Specificity : 0.6250
## Pos Pred Value : 0.6667
## Neg Pred Value : 0.6250
## Prevalence : 0.5294
## Detection Rate : 0.3529
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.6458
##
## 'Positive' Class : treated
##
##MENGEVALUASI MODEL
##MODEL FINE TUNING
library(e1071)
attach(data_train)
## The following objects are masked from data_train (pos = 6):
##
## conc, rate, state
svm.RBF1 = svm(state ~ conc + rate,
data = data_train, type = "C-classification", kernel = "radial", cost = 0.5)
svm.RBF2 = svm(state ~ conc + rate,
data = data_train, type = "C-classification", kernel = "radial", cost = 1.0)
print(svm.RBF1)
##
## Call:
## svm(formula = state ~ conc + rate, data = data_train, type = "C-classification",
## kernel = "radial", cost = 0.5)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.5
##
## Number of Support Vectors: 16
print(svm.RBF2)
##
## Call:
## svm(formula = state ~ conc + rate, data = data_train, type = "C-classification",
## kernel = "radial", cost = 1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 16
data_train.prediksi1 = predict(svm.RBF1, newdata = data_train)
data_train.prediksi2 = predict(svm.RBF2, newdata = data_train)
library(caret)
cm.train1 = table(data_train$state, data_train.prediksi1)
cm.train2 = table(data_train$state, data_train.prediksi2)
confusionMatrix(cm.train1)
## Confusion Matrix and Statistics
##
## data_train.prediksi1
## treated untreated
## treated 9 0
## untreated 8 0
##
## Accuracy : 0.5294
## 95% CI : (0.2781, 0.7702)
## No Information Rate : 1
## P-Value [Acc > NIR] : 1.00000
##
## Kappa : 0
##
## Mcnemar's Test P-Value : 0.01333
##
## Sensitivity : 0.5294
## Specificity : NA
## Pos Pred Value : NA
## Neg Pred Value : NA
## Prevalence : 1.0000
## Detection Rate : 0.5294
## Detection Prevalence : 0.5294
## Balanced Accuracy : NA
##
## 'Positive' Class : treated
##
confusionMatrix(cm.train2)
## Confusion Matrix and Statistics
##
## data_train.prediksi2
## treated untreated
## treated 5 4
## untreated 3 5
##
## Accuracy : 0.5882
## 95% CI : (0.3292, 0.8156)
## No Information Rate : 0.5294
## P-Value [Acc > NIR] : 0.4063
##
## Kappa : 0.1793
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.6250
## Specificity : 0.5556
## Pos Pred Value : 0.5556
## Neg Pred Value : 0.6250
## Prevalence : 0.4706
## Detection Rate : 0.2941
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.5903
##
## 'Positive' Class : treated
##
Tuning = tune(svm, as.factor(state) ~.,
data = data_train, type = "C-classification", kernel = "radial", ranges = list(gamma = c(0.5, 1.0, 1.5), cost = 10^(0.1:1))
)
print(Tuning)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma cost
## 0.5 1.258925
##
## - best performance: 0.85
svm.Tuning = svm(state ~ conc + rate,
data = data_train, type = "C-classification", kernel = "radial", gamma = 0.5, cost = 1.258925)
data_train.Tuning = predict(svm.Tuning, newdata = data_train)
library(caret)
cm.Tuning = table(data_train$state, data_train.Tuning)
confusionMatrix(cm.Tuning)
## Confusion Matrix and Statistics
##
## data_train.Tuning
## treated untreated
## treated 5 4
## untreated 1 7
##
## Accuracy : 0.7059
## 95% CI : (0.4404, 0.8969)
## No Information Rate : 0.6471
## P-Value [Acc > NIR] : 0.4097
##
## Kappa : 0.4218
##
## Mcnemar's Test P-Value : 0.3711
##
## Sensitivity : 0.8333
## Specificity : 0.6364
## Pos Pred Value : 0.5556
## Neg Pred Value : 0.8750
## Prevalence : 0.3529
## Detection Rate : 0.2941
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.7348
##
## 'Positive' Class : treated
##
##MENGKLASIFIKASIKAN DATA BARU
meanState = mean(Puromycin$conc)
SDevState = sd(Puromycin$conc)
meanSa1 = mean(Puromycin$rate)
SDevSa1 = sd(Puromycin$rate)
#MEMPERSIAPKAN DATA FRAME BARU
NewObs = datasets::Puromycin
str(NewObs)
## 'data.frame': 23 obs. of 3 variables:
## $ conc : num 0.02 0.02 0.06 0.06 0.11 0.11 0.22 0.22 0.56 0.56 ...
## $ rate : num 76 47 97 107 123 139 159 152 191 201 ...
## $ state: Factor w/ 2 levels "treated","untreated": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "reference")= chr "A1.3, p. 269"
NewObs[3] = factor(NewObs[3], levels = c(0,1))
head(NewObs, n = 10)
NewObs$conc = (NewObs$conc - meanState)/SDevState
NewObs$rate = (NewObs$rate - meanSa1)/ SDevSa1
head(NewObs, n = 10)
Prediksi = predict(svm.Tuning, newdata = NewObs[-3])
NewObs$state = Prediksi
NewObs