set.seed(123456)
#
rdata = read.delim(file = 'https://www.ebi.ac.uk/~hamedhm/Viability_with_E-ERAD-401_data_PRELIMINARY_for_Hamed.tsv',
skip = 0,
check.names = TRUE)
rdata[, -c(1:3)] =rdata[, -c(1:3)]
rdata[is.na(rdata)] = min(rdata[, -c(1:3)], na.rm = TRUE)/5
r2 = rdata
smpT = sample(seq(length(r2$Gene.Name)),
size = round(length(r2$Gene.Name) * 1),
replace = FALSE)
r3 = r2[smpT, ]
# Log transformations
r3[, -c(1:3)] = log(r3[, -c(1:3)])
r3$viaI = as.integer(r3$corrected_Viability)
r4 = (r3[, -c(1:3)])
######################### 1
# PLOTS
plot(
1,
type = 'n',
ylim = c(min(r3[, -c(1:3, 32)]), max(r3[, -c(1:3, 32)])),
xlim = c(0, ncol(r3) - 2)
)
apply(r3, 1, function(y) {
lines(spline(y[-c(1:3, 32)]), lty = 2 , col = y[32])
})
## NULL
legend(
'topright',
legend = unique(r3$corrected_Viability),
pch = as.integer(unique(r3$viaI)),
col = as.integer(unique(r3$viaI))
)

######################### 2
# Dim reduction + Transform
r5 = r55 = prcomp(r4[, -c(29)])
dm1 = 12
dm2 = 3
r5 = as.data.frame(r5$x[, 1:dm1])
library(tsne)
r5 = as.data.frame(tsne::tsne(
X = r5,
k = dm2,
max_iter = 400,
whiten = FALSE,
perplexity = 15
))
## sigma summary: Min. : 0.0222906093178496 |1st Qu. : 0.0341605949529348 |Median : 0.0410054263700528 |Mean : 0.0475678820475926 |3rd Qu. : 0.0567661488016042 |Max. : 0.153947042963101 |
## Epoch: Iteration #100 error is: 15.9495466128711
## Epoch: Iteration #200 error is: 1.34340179686372
## Epoch: Iteration #300 error is: 1.06669511987609
## Epoch: Iteration #400 error is: 0.943785858410371
r5$via = r3$corrected_Viability
######################### 3
par(mfrow = c(2, 2))
for (i in 1:(dm1 - 1)) {
for (j in (i + 1):dm1) {
plot(
r55$x[, i],
r55$x[, j],
col = as.integer(r5$via),
pch = as.integer(r5$via),
xlab = colnames(r55$x)[i],
ylab = colnames(r55$x)[j]
)
legend(
'topleft',
legend = paste(unique(r5$via)),
pch = as.integer(unique(r5$via)),
col = as.integer(unique(r5$via))
)
}
}
















par(mfrow = c(2, 2))

for (i in 1:(dm2 - 1)) {
for (j in (i + 1):dm2) {
plot(
r5[, i],
r5[, j],
col = as.integer(r5$via),
pch = as.integer(r5$via),
xlab = colnames(r5)[i],
ylab = colnames(r5)[j]
)
legend(
'topleft',
legend = paste(unique(r5$via)),
pch = as.integer(unique(r5$via)),
col = as.integer(unique(r5$via))
)
}
}
library(car)
scatter3d(
x = (r5$V1),
y = (r5$V2),
r5$V3,
surface = FALSE,
group = r5$via,
fit = "smooth",
grid = FALSE,
ellipsoid = FALSE
)
## Loading required namespace: rgl
######################### 4
#Conflicting Lethal Subviable Viable
r6 = r5
r6$viaI = 0
r6[r6$via == 'Conflicting', 'viaI'] = mean(as.matrix(subset(r5, r5$via ==
'Conflicting')[, -(dm2 + 1)]), na.rm = TRUE)
r6[r6$via == 'Viable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Viable') [, -(dm2 +
1)]), na.rm = TRUE)
r6[r6$via == 'Lethal', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Lethal') [, -(dm2 +
1)]), na.rm = TRUE)
r6[r6$via == 'Subviable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Subviable') [, -(dm2 +
1)]), na.rm = TRUE)
r6$viaI = round((r6$viaI - min(r6$viaI)) / (max(r6$viaI) - min(r6$viaI)), 2)
######################### 5
# SVM
library(e1071)
smp = sample(1:length(r6$via),
size = length(r6$via) / 3 * 2,
replace = FALSE)
svob = tune(svm,
via ~ V1 + V2 + V3,
data = r6,
kernel = "radial",
scale = FALSE,
epsilon = 10 ^ -18,
tolerance = .0001,
ranges = list(
cost = c(0.01, .1,1, 2, 5,10, 100),
gamma = c(.1, 0.5, 1, 2, 3, 4)
)
)
plot(svob)


summary(svob)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 0.1
##
## - best performance: 0.334669
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 0.1 0.3606856 0.01637640
## 2 1e-01 0.1 0.3609508 0.01606173
## 3 1e+00 0.1 0.3346690 0.01825104
## 4 2e+00 0.1 0.3503323 0.01933390
## 5 5e+00 0.1 0.3705246 0.01892428
## 6 1e+01 0.1 0.3899098 0.02308911
## 7 1e+02 0.1 0.4265548 0.02306263
## 8 1e-02 0.5 0.3606856 0.01637640
## 9 1e-01 0.5 0.3606856 0.01637640
## 10 1e+00 0.5 0.3590948 0.01672993
## 11 2e+00 0.5 0.3901751 0.02069548
## 12 5e+00 0.5 0.4122164 0.02074873
## 13 1e+01 0.5 0.4233655 0.02444938
## 14 1e+02 0.5 0.4321315 0.02225862
## 15 1e-02 1.0 0.3606856 0.01637640
## 16 1e-01 1.0 0.3606856 0.01637640
## 17 1e+00 1.0 0.3654735 0.01477293
## 18 2e+00 1.0 0.3914929 0.02086399
## 19 5e+00 1.0 0.4079618 0.02136888
## 20 1e+01 1.0 0.4124774 0.02173452
## 21 1e+02 1.0 0.4148661 0.02007513
## 22 1e-02 2.0 0.3606856 0.01637640
## 23 1e-01 2.0 0.3606856 0.01637640
## 24 1e+00 2.0 0.3691849 0.01938450
## 25 2e+00 2.0 0.3928234 0.01471819
## 26 5e+00 2.0 0.3999937 0.01540367
## 27 1e+01 2.0 0.4002596 0.01391348
## 28 1e+02 2.0 0.4031823 0.01180218
## 29 1e-02 3.0 0.3606856 0.01637640
## 30 1e-01 3.0 0.3606856 0.01637640
## 31 1e+00 3.0 0.3721055 0.01745208
## 32 2e+00 3.0 0.3845900 0.01666715
## 33 5e+00 3.0 0.3859198 0.01478042
## 34 1e+01 3.0 0.3867169 0.01268423
## 35 1e+02 3.0 0.3872482 0.01266100
## 36 1e-02 4.0 0.3606856 0.01637640
## 37 1e-01 4.0 0.3606856 0.01637640
## 38 1e+00 4.0 0.3715722 0.01563835
## 39 2e+00 4.0 0.3806034 0.01633135
## 40 5e+00 4.0 0.3829964 0.01554598
## 41 1e+01 4.0 0.3837928 0.01462389
## 42 1e+02 4.0 0.3840581 0.01492864
b = svm(
x = r6[smp, c(1:dm2)],
y = r6$via[smp],
scale = FALSE,
kernel = 'radial',
gamma = svob$best.parameters$gamma,
#degree = 1,
epsilon = 10 ^ -18,
#probability = FALSE,
cost = svob$best.parameters$cost,
#shrinking = TRUE,
tolerance = .0001,
cachesize=500
)
table(r6$via[smp])
##
## Conflicting Lethal Subviable Viable
## 19 648 247 1596
table(fitted(b), r6$via[smp])
##
## Conflicting Lethal Subviable Viable
## Conflicting 0 0 0 0
## Lethal 3 369 57 94
## Subviable 0 2 15 1
## Viable 16 277 175 1501
table(predict(b, newdata = r6[-smp, c(1:dm2)]), r6$via[-smp])
##
## Conflicting Lethal Subviable Viable
## Conflicting 0 0 0 0
## Lethal 2 116 24 88
## Subviable 0 4 2 6
## Viable 3 209 84 717
######################### 6
# f1 = as.formula(paste('viaI~', paste(
# 'V', 1:dm2, sep = '', collapse = '+'
# )))
#
# library(neuralnet)
# nn = neuralnet(
# formula = f1,
# data = r6[smp,],
# hidden = c(15, 5,1),
# linear.output = TRUE
# #stepmax = 10 ^ 6,
# )
# #plot(nn)
# r7 = compute(nn, r6[-smp, 1:dm2])$net.result
# results <- data.frame(actual = r6$viaI[-smp], prediction = r7)
# roundedresults <- sapply(results, round, digits = 1)
# roundedresultsdf = data.frame(roundedresults)
# table(roundedresultsdf$actual, roundedresultsdf$prediction)
##########
library(mlr)
## Loading required package: ParamHelpers
## Warning: replacing previous import 'BBmisc::isFALSE' by
## 'backports::isFALSE' when loading 'mlr'
##
## Attaching package: 'mlr'
## The following object is masked from 'package:e1071':
##
## impute
set.seed(123456)
#r6$via[r6$via != 'Lethal'] = 'Viable'
r6 = droplevels(r6)
task = makeClassifTask(data = r6[, -c(dm2 + 2)], target = "via")
lrn = makeLearner('classif.randomForestSRC')#classif.nnet, classif.LiblineaRL1L2SVC, classif.lssvm||,classif.lvq1|,classif.mda,classif.mlp,classif.multinom,classif.nnet|classif.nnTrain,classif.OneR,classif.PART,classif.qda,classif.quaDA,classif.randomForest|||classif.randomForestSRC|||
n = nrow(r6)
train.set = sample(n, size = 2 / 3 * n)
test.set = setdiff(1:n, train.set)
model = train(lrn, task, subset = train.set)
pred = predict(model, task = task, subset = test.set)
performance(pred, measures = list(mmce, acc))
## mmce acc
## 0.3577689 0.6422311
plotLearnerPrediction(lrn, task = task)

calculateConfusionMatrix(pred, relative = TRUE, sums = FALSE)#$relative.row
## Relative confusion matrix (normalized by row/column):
## predicted
## true Conflicting Lethal Subviable Viable -err.-
## Conflicting 0.000/0.000 0.375/0.010 0.000/0.000 0.625/0.006 1.00
## Lethal 0.006/1.000 0.422/0.430 0.048/0.366 0.524/0.181 0.58
## Subviable 0.000/0.000 0.378/0.147 0.050/0.146 0.571/0.075 0.95
## Viable 0.000/0.000 0.156/0.414 0.025/0.488 0.820/0.738 0.18
## -err.- 1.00 0.57 0.85 0.26 0.36
##
##
## Absolute confusion matrix:
## predicted
## true Conflicting Lethal Subviable Viable -err.-
## Conflicting 0 3 0 5 8
## Lethal 2 132 15 164 181
## Subviable 0 45 6 68 113
## Viable 0 127 20 668 147
## -err.- 2 175 35 237 449