set.seed(123456)
#
rdata = read.delim(file = 'https://www.ebi.ac.uk/~hamedhm/Viability_with_E-ERAD-401_data_PRELIMINARY_for_Hamed.tsv',
skip = 0,
check.names = TRUE)
rdata[, -c(1:3)] =rdata[, -c(1:3)]
rdata[is.na(rdata)] = min(rdata[, -c(1:3)], na.rm = TRUE)/5
r2 = rdata
smpT = sample(seq(length(r2$Gene.Name)),
size = round(length(r2$Gene.Name) * .20),
replace = FALSE)
r3 = r2[smpT, ]
# Log transformations
r3[, -c(1:3)] = log(r3[, -c(1:3)])
r3$viaI = as.integer(r3$corrected_Viability)
r4 = (r3[, -c(1:3)])
######################### 1
# PLOTS
plot(
1,
type = 'n',
ylim = c(min(r3[, -c(1:3, 32)]), max(r3[, -c(1:3, 32)])),
xlim = c(0, ncol(r3) - 2)
)
apply(r3, 1, function(y) {
lines(spline(y[-c(1:3, 32)]), lty = 2 , col = y[32])
})
## NULL
legend(
'topright',
legend = unique(r3$corrected_Viability),
pch = as.integer(unique(r3$viaI)),
col = as.integer(unique(r3$viaI))
)

######################### 2
# Dim reduction + Transform
r5 = r55 = prcomp(r4[, -c(29)])
dm1 = 12
dm2 = 3
r5 = as.data.frame(r5$x[, 1:dm1])
library(tsne)
r5 = as.data.frame(tsne::tsne(
X = r5,
k = dm2,
max_iter = 400,
whiten = FALSE,
perplexity = 15
))
## sigma summary: Min. : 0.0334774923179871 |1st Qu. : 0.048885794018182 |Median : 0.0598374442537259 |Mean : 0.0672004162052023 |3rd Qu. : 0.0813599056368022 |Max. : 0.159924054107167 |
## Epoch: Iteration #100 error is: 11.635677485841
## Epoch: Iteration #200 error is: 0.519888809390406
## Epoch: Iteration #300 error is: 0.429057761971722
## Epoch: Iteration #400 error is: 0.396374834989037
r5$via = r3$corrected_Viability
######################### 3
par(mfrow = c(2, 2))
for (i in 1:(dm1 - 1)) {
for (j in (i + 1):dm1) {
plot(
r55$x[, i],
r55$x[, j],
col = as.integer(r5$via),
pch = as.integer(r5$via),
xlab = colnames(r55$x)[i],
ylab = colnames(r55$x)[j]
)
legend(
'topleft',
legend = paste(unique(r5$via)),
pch = as.integer(unique(r5$via)),
col = as.integer(unique(r5$via))
)
}
}
















par(mfrow = c(2, 2))

for (i in 1:(dm2 - 1)) {
for (j in (i + 1):dm2) {
plot(
r5[, i],
r5[, j],
col = as.integer(r5$via),
pch = as.integer(r5$via),
xlab = colnames(r5)[i],
ylab = colnames(r5)[j]
)
legend(
'topleft',
legend = paste(unique(r5$via)),
pch = as.integer(unique(r5$via)),
col = as.integer(unique(r5$via))
)
}
}
library(car)
scatter3d(
x = (r5$V1),
y = (r5$V2),
r5$V3,
surface = FALSE,
group = r5$via,
fit = "smooth",
grid = FALSE,
ellipsoid = FALSE
)
## Loading required namespace: rgl
######################### 4
#Conflicting Lethal Subviable Viable
r6 = r5
r6$viaI = 0
r6[r6$via == 'Conflicting', 'viaI'] = mean(as.matrix(subset(r5, r5$via ==
'Conflicting')[, -(dm2 + 1)]), na.rm = TRUE)
r6[r6$via == 'Viable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Viable') [, -(dm2 +
1)]), na.rm = TRUE)
r6[r6$via == 'Lethal', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Lethal') [, -(dm2 +
1)]), na.rm = TRUE)
r6[r6$via == 'Subviable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Subviable') [, -(dm2 +
1)]), na.rm = TRUE)
r6$viaI = round((r6$viaI - min(r6$viaI)) / (max(r6$viaI) - min(r6$viaI)), 2)
######################### 5
# SVM
library(e1071)
smp = sample(1:length(r6$via),
size = length(r6$via) / 3 * 2,
replace = FALSE)
svob = tune(svm,
via ~ V1 + V2 + V3,
data = r6,
kernel = "radial",
scale = FALSE,
epsilon = 10 ^ -18,
tolerance = .0001,
ranges = list(
cost = c(0.01, .1,1, 2, 5,10, 100),
gamma = c(.1, 0.5, 1, 2, 3, 4)
)
)
plot(svob)


summary(svob)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 0.1
##
## - best performance: 0.3703509
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 0.1 0.3729825 0.05168003
## 2 1e-01 0.1 0.3729825 0.05168003
## 3 1e+00 0.1 0.3703509 0.05177364
## 4 2e+00 0.1 0.3850351 0.06128622
## 5 5e+00 0.1 0.4222456 0.05688329
## 6 1e+01 0.1 0.4540526 0.05595538
## 7 1e+02 0.1 0.4673509 0.04880140
## 8 1e-02 0.5 0.3729825 0.05168003
## 9 1e-01 0.5 0.3729825 0.05168003
## 10 1e+00 0.5 0.3930000 0.05541080
## 11 2e+00 0.5 0.4223333 0.06448519
## 12 5e+00 0.5 0.4369298 0.06839252
## 13 1e+01 0.5 0.4395789 0.06485052
## 14 1e+02 0.5 0.4409123 0.06620151
## 15 1e-02 1.0 0.3729825 0.05168003
## 16 1e-01 1.0 0.3729825 0.05168003
## 17 1e+00 1.0 0.3797018 0.05209900
## 18 2e+00 1.0 0.4170000 0.04879019
## 19 5e+00 1.0 0.4223158 0.04860214
## 20 1e+01 1.0 0.4223158 0.05350067
## 21 1e+02 1.0 0.4236491 0.05451609
## 22 1e-02 2.0 0.3729825 0.05168003
## 23 1e-01 2.0 0.3729825 0.05168003
## 24 1e+00 2.0 0.3810175 0.04606011
## 25 2e+00 2.0 0.3930526 0.03711144
## 26 5e+00 2.0 0.3970351 0.03504954
## 27 1e+01 2.0 0.3970351 0.03504954
## 28 1e+02 2.0 0.3970351 0.03504954
## 29 1e-02 3.0 0.3729825 0.05168003
## 30 1e-01 3.0 0.3729825 0.05168003
## 31 1e+00 3.0 0.3770175 0.04728055
## 32 2e+00 3.0 0.3824035 0.04162675
## 33 5e+00 3.0 0.3824035 0.04162675
## 34 1e+01 3.0 0.3824035 0.04162675
## 35 1e+02 3.0 0.3824035 0.04162675
## 36 1e-02 4.0 0.3729825 0.05168003
## 37 1e-01 4.0 0.3729825 0.05168003
## 38 1e+00 4.0 0.3743509 0.05025858
## 39 2e+00 4.0 0.3823333 0.04744201
## 40 5e+00 4.0 0.3823333 0.04744201
## 41 1e+01 4.0 0.3823333 0.04744201
## 42 1e+02 4.0 0.3823333 0.04744201
b = svm(
x = r6[smp, c(1:dm2)],
y = r6$via[smp],
scale = FALSE,
kernel = 'radial',
gamma = svob$best.parameters$gamma,
#degree = 1,
epsilon = 10 ^ -18,
#probability = FALSE,
cost = svob$best.parameters$cost,
#shrinking = TRUE,
tolerance = .0001,
cachesize=500
)
table(r6$via[smp])
##
## Conflicting Lethal Subviable Viable
## 5 120 65 312
table(fitted(b), r6$via[smp])
##
## Conflicting Lethal Subviable Viable
## Conflicting 0 0 0 0
## Lethal 1 57 8 8
## Subviable 0 2 12 2
## Viable 4 61 45 302
table(predict(b, newdata = r6[-smp, c(1:dm2)]), r6$via[-smp])
##
## Conflicting Lethal Subviable Viable
## Conflicting 0 0 0 0
## Lethal 0 12 6 10
## Subviable 0 1 0 1
## Viable 0 51 21 149
######################### 6
# f1 = as.formula(paste('viaI~', paste(
# 'V', 1:dm2, sep = '', collapse = '+'
# )))
#
# library(neuralnet)
# nn = neuralnet(
# formula = f1,
# data = r6[smp,],
# hidden = c(15, 5,1),
# linear.output = TRUE
# #stepmax = 10 ^ 6,
# )
# #plot(nn)
# r7 = compute(nn, r6[-smp, 1:dm2])$net.result
# results <- data.frame(actual = r6$viaI[-smp], prediction = r7)
# roundedresults <- sapply(results, round, digits = 1)
# roundedresultsdf = data.frame(roundedresults)
# table(roundedresultsdf$actual, roundedresultsdf$prediction)
##########
library(mlr)
## Loading required package: ParamHelpers
## Warning: replacing previous import 'BBmisc::isFALSE' by
## 'backports::isFALSE' when loading 'mlr'
##
## Attaching package: 'mlr'
## The following object is masked from 'package:e1071':
##
## impute
set.seed(123456)
#r6$via[r6$via != 'Lethal'] = 'Viable'
r6 = droplevels(r6)
task = makeClassifTask(data = r6[, -c(dm2 + 2)], target = "via")
lrn = makeLearner('classif.randomForestSRC')#classif.nnet, classif.LiblineaRL1L2SVC, classif.lssvm||,classif.lvq1|,classif.mda,classif.mlp,classif.multinom,classif.nnet|classif.nnTrain,classif.OneR,classif.PART,classif.qda,classif.quaDA,classif.randomForest|||classif.randomForestSRC|||
n = nrow(r6)
train.set = sample(n, size = 2 / 3 * n)
test.set = setdiff(1:n, train.set)
model = train(lrn, task, subset = train.set)
pred = predict(model, task = task, subset = test.set)
performance(pred, measures = list(mmce, acc))
## mmce acc
## 0.4422311 0.5577689
plotLearnerPrediction(lrn, task = task)

calculateConfusionMatrix(pred, relative = TRUE, sums = FALSE)#$relative.row
## Relative confusion matrix (normalized by row/column):
## predicted
## true Conflicting Lethal Subviable Viable -err.-
## Conflicting 0.00/0.000 0.00/0.000 0.00/0.000 1.00/0.006 1.00
## Lethal 0.00/0.000 0.40/0.391 0.15/0.360 0.45/0.173 0.60
## Subviable 0.00/0.000 0.24/0.125 0.12/0.160 0.65/0.136 0.88
## Viable 0.00/0.000 0.20/0.484 0.08/0.480 0.72/0.685 0.28
## -err.- 0.00 0.61 0.84 0.31 0.44
##
##
## Absolute confusion matrix:
## predicted
## true Conflicting Lethal Subviable Viable -err.-
## Conflicting 0 0 0 1 1
## Lethal 0 25 9 28 37
## Subviable 0 8 4 22 30
## Viable 0 31 12 111 43
## -err.- 0 39 21 51 111