Top_R_code.R

set.seed(123456)
#
rdata = read.delim(file = 'https://www.ebi.ac.uk/~hamedhm/Viability_with_E-ERAD-401_data_PRELIMINARY_for_Hamed.tsv',
                                     skip = 0,
                                     check.names = TRUE)
rdata[, -c(1:3)] =rdata[, -c(1:3)]
rdata[is.na(rdata)] = min(rdata[, -c(1:3)], na.rm = TRUE)/5

r2 = rdata
smpT  = sample(seq(length(r2$Gene.Name)),
                             size = round(length(r2$Gene.Name) * 1),
                             replace = FALSE)
r3    = r2[smpT, ]
# Log transformations
r3[, -c(1:3)] = log(r3[, -c(1:3)])
r3$viaI = as.integer(r3$corrected_Viability)
r4 = (r3[, -c(1:3)])

######################### 1
# PLOTS
plot(
    1,
    type = 'n',
    ylim = c(min(r3[, -c(1:3, 32)]), max(r3[, -c(1:3, 32)])),
    xlim = c(0, ncol(r3) - 2)
)

apply(r3, 1, function(y) {
    lines(spline(y[-c(1:3, 32)]), lty = 2 , col = y[32])
})

## NULL

legend(
    'topright',
    legend = unique(r3$corrected_Viability),
    pch = as.integer(unique(r3$viaI)),
    col = as.integer(unique(r3$viaI))
)

######################### 2
# Dim reduction + Transform
r5 = r55 = prcomp(r4[, -c(29)])
dm1 = 12
dm2 = 3
r5 = as.data.frame(r5$x[, 1:dm1])
library(tsne)
r5 = as.data.frame(tsne::tsne(
    X = r5,
    k = dm2,
    max_iter = 400,
    whiten = FALSE,
    perplexity = 15
))

## sigma summary: Min. : 0.0222906093178496 |1st Qu. : 0.0341605949529348 |Median : 0.0410054263700528 |Mean : 0.0475678820475926 |3rd Qu. : 0.0567661488016042 |Max. : 0.153947042963101 |

## Epoch: Iteration #100 error is: 15.9495466128711

## Epoch: Iteration #200 error is: 1.34340179686372

## Epoch: Iteration #300 error is: 1.06669511987609

## Epoch: Iteration #400 error is: 0.943785858410371

r5$via = r3$corrected_Viability


######################### 3
par(mfrow = c(2, 2))
for (i in 1:(dm1 - 1)) {
    for (j in (i + 1):dm1) {
        plot(
            r55$x[, i],
            r55$x[, j],
            col = as.integer(r5$via),
            pch = as.integer(r5$via),
            xlab = colnames(r55$x)[i],
            ylab = colnames(r55$x)[j]
        )
        legend(
            'topleft',
            legend = paste(unique(r5$via)),
            pch = as.integer(unique(r5$via)),
            col = as.integer(unique(r5$via))
        )
    }
}

par(mfrow = c(2, 2))

for (i in 1:(dm2 - 1)) {
    for (j in (i + 1):dm2) {
        plot(
            r5[, i],
            r5[, j],
            col = as.integer(r5$via),
            pch = as.integer(r5$via),
            xlab = colnames(r5)[i],
            ylab = colnames(r5)[j]
        )
        legend(
            'topleft',
            legend = paste(unique(r5$via)),
            pch = as.integer(unique(r5$via)),
            col = as.integer(unique(r5$via))
        )
    }
}

library(car)
scatter3d(
    x = (r5$V1),
    y = (r5$V2),
    r5$V3,
    surface = FALSE,
    group = r5$via,
    fit = "smooth",
    grid = FALSE,
    ellipsoid = FALSE
)

## Loading required namespace: rgl

######################### 4
#Conflicting      Lethal   Subviable      Viable
r6 = r5
r6$viaI = 0
r6[r6$via == 'Conflicting', 'viaI'] = mean(as.matrix(subset(r5, r5$via ==
                                                                                                                            'Conflicting')[, -(dm2 + 1)]), na.rm = TRUE)
r6[r6$via == 'Viable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Viable')     [, -(dm2 +
                                                                                                                                                                                    1)]), na.rm = TRUE)
r6[r6$via == 'Lethal', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Lethal')     [, -(dm2 +
                                                                                                                                                                                    1)]), na.rm = TRUE)
r6[r6$via == 'Subviable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Subviable')  [, -(dm2 +
                                                                                                                                                                                        1)]), na.rm = TRUE)
r6$viaI  = round((r6$viaI - min(r6$viaI)) / (max(r6$viaI) - min(r6$viaI)), 2)



######################### 5
# SVM
library(e1071)

smp = sample(1:length(r6$via),
                         size = length(r6$via) / 3 * 2,
                         replace = FALSE)



svob = tune(svm,
                        via ~ V1 + V2 + V3,
                        data = r6,
                        kernel = "radial",
                        scale = FALSE,
                        epsilon = 10 ^ -18,
                        tolerance = .0001,
                        ranges = list(
                            cost = c(0.01, .1,1, 2, 5,10, 100),
                            gamma = c(.1, 0.5, 1, 2, 3, 4)
                        )
)

plot(svob)

summary(svob)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##     1   0.1
## 
## - best performance: 0.334669 
## 
## - Detailed performance results:
##     cost gamma     error dispersion
## 1  1e-02   0.1 0.3606856 0.01637640
## 2  1e-01   0.1 0.3609508 0.01606173
## 3  1e+00   0.1 0.3346690 0.01825104
## 4  2e+00   0.1 0.3503323 0.01933390
## 5  5e+00   0.1 0.3705246 0.01892428
## 6  1e+01   0.1 0.3899098 0.02308911
## 7  1e+02   0.1 0.4265548 0.02306263
## 8  1e-02   0.5 0.3606856 0.01637640
## 9  1e-01   0.5 0.3606856 0.01637640
## 10 1e+00   0.5 0.3590948 0.01672993
## 11 2e+00   0.5 0.3901751 0.02069548
## 12 5e+00   0.5 0.4122164 0.02074873
## 13 1e+01   0.5 0.4233655 0.02444938
## 14 1e+02   0.5 0.4321315 0.02225862
## 15 1e-02   1.0 0.3606856 0.01637640
## 16 1e-01   1.0 0.3606856 0.01637640
## 17 1e+00   1.0 0.3654735 0.01477293
## 18 2e+00   1.0 0.3914929 0.02086399
## 19 5e+00   1.0 0.4079618 0.02136888
## 20 1e+01   1.0 0.4124774 0.02173452
## 21 1e+02   1.0 0.4148661 0.02007513
## 22 1e-02   2.0 0.3606856 0.01637640
## 23 1e-01   2.0 0.3606856 0.01637640
## 24 1e+00   2.0 0.3691849 0.01938450
## 25 2e+00   2.0 0.3928234 0.01471819
## 26 5e+00   2.0 0.3999937 0.01540367
## 27 1e+01   2.0 0.4002596 0.01391348
## 28 1e+02   2.0 0.4031823 0.01180218
## 29 1e-02   3.0 0.3606856 0.01637640
## 30 1e-01   3.0 0.3606856 0.01637640
## 31 1e+00   3.0 0.3721055 0.01745208
## 32 2e+00   3.0 0.3845900 0.01666715
## 33 5e+00   3.0 0.3859198 0.01478042
## 34 1e+01   3.0 0.3867169 0.01268423
## 35 1e+02   3.0 0.3872482 0.01266100
## 36 1e-02   4.0 0.3606856 0.01637640
## 37 1e-01   4.0 0.3606856 0.01637640
## 38 1e+00   4.0 0.3715722 0.01563835
## 39 2e+00   4.0 0.3806034 0.01633135
## 40 5e+00   4.0 0.3829964 0.01554598
## 41 1e+01   4.0 0.3837928 0.01462389
## 42 1e+02   4.0 0.3840581 0.01492864

b = svm(
    x = r6[smp, c(1:dm2)],
    y =  r6$via[smp],
    scale = FALSE,
    kernel = 'radial',
    gamma = svob$best.parameters$gamma,
    #degree = 1,
    epsilon = 10 ^ -18,
    #probability = FALSE,
    cost = svob$best.parameters$cost,
    #shrinking = TRUE,
    tolerance = .0001,
    cachesize=500

)




table(r6$via[smp])

## 
## Conflicting      Lethal   Subviable      Viable 
##          19         648         247        1596

table(fitted(b), r6$via[smp])

##              
##               Conflicting Lethal Subviable Viable
##   Conflicting           0      0         0      0
##   Lethal                3    369        57     94
##   Subviable             0      2        15      1
##   Viable               16    277       175   1501

table(predict(b, newdata = r6[-smp, c(1:dm2)]), r6$via[-smp])

##              
##               Conflicting Lethal Subviable Viable
##   Conflicting           0      0         0      0
##   Lethal                2    116        24     88
##   Subviable             0      4         2      6
##   Viable                3    209        84    717

######################### 6

# f1 = as.formula(paste('viaI~', paste(
#   'V', 1:dm2, sep = '', collapse =  '+'
# )))
#
# library(neuralnet)
# nn = neuralnet(
#   formula = f1,
#   data = r6[smp,],
#   hidden = c(15, 5,1),
#   linear.output = TRUE
#   #stepmax = 10 ^ 6,
# )
# #plot(nn)
# r7 = compute(nn, r6[-smp, 1:dm2])$net.result
# results <- data.frame(actual = r6$viaI[-smp], prediction = r7)
# roundedresults <- sapply(results, round, digits = 1)
# roundedresultsdf = data.frame(roundedresults)
# table(roundedresultsdf$actual, roundedresultsdf$prediction)


##########
library(mlr)

## Loading required package: ParamHelpers

## Warning: replacing previous import 'BBmisc::isFALSE' by
## 'backports::isFALSE' when loading 'mlr'

## 
## Attaching package: 'mlr'

## The following object is masked from 'package:e1071':
## 
##     impute

set.seed(123456)
#r6$via[r6$via != 'Lethal'] = 'Viable'
r6 = droplevels(r6)


task = makeClassifTask(data = r6[, -c(dm2 + 2)], target = "via")
lrn = makeLearner('classif.randomForestSRC')#classif.nnet, classif.LiblineaRL1L2SVC, classif.lssvm||,classif.lvq1|,classif.mda,classif.mlp,classif.multinom,classif.nnet|classif.nnTrain,classif.OneR,classif.PART,classif.qda,classif.quaDA,classif.randomForest|||classif.randomForestSRC|||

n = nrow(r6)
train.set = sample(n, size = 2 / 3 * n)
test.set = setdiff(1:n, train.set)
model = train(lrn, task, subset = train.set)
pred = predict(model, task = task, subset = test.set)
performance(pred, measures = list(mmce, acc))

##      mmce       acc 
## 0.3577689 0.6422311

plotLearnerPrediction(lrn, task = task)

calculateConfusionMatrix(pred, relative = TRUE, sums = FALSE)#$relative.row

## Relative confusion matrix (normalized by row/column):
##              predicted
## true          Conflicting Lethal      Subviable   Viable      -err.-     
##   Conflicting 0.000/0.000 0.375/0.010 0.000/0.000 0.625/0.006 1.00       
##   Lethal      0.006/1.000 0.422/0.430 0.048/0.366 0.524/0.181 0.58       
##   Subviable   0.000/0.000 0.378/0.147 0.050/0.146 0.571/0.075 0.95       
##   Viable      0.000/0.000 0.156/0.414 0.025/0.488 0.820/0.738 0.18       
##   -err.-             1.00        0.57        0.85        0.26 0.36       
## 
## 
## Absolute confusion matrix:
##              predicted
## true          Conflicting Lethal Subviable Viable -err.-
##   Conflicting           0      3         0      5      8
##   Lethal                2    132        15    164    181
##   Subviable             0     45         6     68    113
##   Viable                0    127        20    668    147
##   -err.-                2    175        35    237    449

Top_R_code.R

hamedhm

Wed Jan 31 12:04:15 2018