Top_R_code.R

set.seed(123456)
#
rdata = read.delim(file = 'https://www.ebi.ac.uk/~hamedhm/Viability_with_E-ERAD-401_data_PRELIMINARY_for_Hamed.tsv',
                                     skip = 0,
                                     check.names = TRUE)
rdata[, -c(1:3)] =rdata[, -c(1:3)]
rdata[is.na(rdata)] = min(rdata[, -c(1:3)], na.rm = TRUE)/5

r2 = rdata
smpT  = sample(seq(length(r2$Gene.Name)),
                             size = round(length(r2$Gene.Name) * .20),
                             replace = FALSE)
r3    = r2[smpT, ]
# Log transformations
r3[, -c(1:3)] = log(r3[, -c(1:3)])
r3$viaI = as.integer(r3$corrected_Viability)
r4 = (r3[, -c(1:3)])

######################### 1
# PLOTS
plot(
    1,
    type = 'n',
    ylim = c(min(r3[, -c(1:3, 32)]), max(r3[, -c(1:3, 32)])),
    xlim = c(0, ncol(r3) - 2)
)

apply(r3, 1, function(y) {
    lines(spline(y[-c(1:3, 32)]), lty = 2 , col = y[32])
})

## NULL

legend(
    'topright',
    legend = unique(r3$corrected_Viability),
    pch = as.integer(unique(r3$viaI)),
    col = as.integer(unique(r3$viaI))
)

######################### 2
# Dim reduction + Transform
r5 = r55 = prcomp(r4[, -c(29)])
dm1 = 12
dm2 = 3
r5 = as.data.frame(r5$x[, 1:dm1])
library(tsne)
r5 = as.data.frame(tsne::tsne(
    X = r5,
    k = dm2,
    max_iter = 400,
    whiten = FALSE,
    perplexity = 15
))

## sigma summary: Min. : 0.0334774923179871 |1st Qu. : 0.048885794018182 |Median : 0.0598374442537259 |Mean : 0.0672004162052023 |3rd Qu. : 0.0813599056368022 |Max. : 0.159924054107167 |

## Epoch: Iteration #100 error is: 11.635677485841

## Epoch: Iteration #200 error is: 0.519888809390406

## Epoch: Iteration #300 error is: 0.429057761971722

## Epoch: Iteration #400 error is: 0.396374834989037

r5$via = r3$corrected_Viability


######################### 3
par(mfrow = c(2, 2))
for (i in 1:(dm1 - 1)) {
    for (j in (i + 1):dm1) {
        plot(
            r55$x[, i],
            r55$x[, j],
            col = as.integer(r5$via),
            pch = as.integer(r5$via),
            xlab = colnames(r55$x)[i],
            ylab = colnames(r55$x)[j]
        )
        legend(
            'topleft',
            legend = paste(unique(r5$via)),
            pch = as.integer(unique(r5$via)),
            col = as.integer(unique(r5$via))
        )
    }
}

par(mfrow = c(2, 2))

for (i in 1:(dm2 - 1)) {
    for (j in (i + 1):dm2) {
        plot(
            r5[, i],
            r5[, j],
            col = as.integer(r5$via),
            pch = as.integer(r5$via),
            xlab = colnames(r5)[i],
            ylab = colnames(r5)[j]
        )
        legend(
            'topleft',
            legend = paste(unique(r5$via)),
            pch = as.integer(unique(r5$via)),
            col = as.integer(unique(r5$via))
        )
    }
}

library(car)
scatter3d(
    x = (r5$V1),
    y = (r5$V2),
    r5$V3,
    surface = FALSE,
    group = r5$via,
    fit = "smooth",
    grid = FALSE,
    ellipsoid = FALSE
)

## Loading required namespace: rgl

######################### 4
#Conflicting      Lethal   Subviable      Viable
r6 = r5
r6$viaI = 0
r6[r6$via == 'Conflicting', 'viaI'] = mean(as.matrix(subset(r5, r5$via ==
                                                                                                                            'Conflicting')[, -(dm2 + 1)]), na.rm = TRUE)
r6[r6$via == 'Viable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Viable')     [, -(dm2 +
                                                                                                                                                                                    1)]), na.rm = TRUE)
r6[r6$via == 'Lethal', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Lethal')     [, -(dm2 +
                                                                                                                                                                                    1)]), na.rm = TRUE)
r6[r6$via == 'Subviable', 'viaI'] = mean(as.matrix(subset(r5, r5$via == 'Subviable')  [, -(dm2 +
                                                                                                                                                                                        1)]), na.rm = TRUE)
r6$viaI  = round((r6$viaI - min(r6$viaI)) / (max(r6$viaI) - min(r6$viaI)), 2)



######################### 5
# SVM
library(e1071)

smp = sample(1:length(r6$via),
                         size = length(r6$via) / 3 * 2,
                         replace = FALSE)



svob = tune(svm,
                        via ~ V1 + V2 + V3,
                        data = r6,
                        kernel = "radial",
                        scale = FALSE,
                        epsilon = 10 ^ -18,
                        tolerance = .0001,
                        ranges = list(
                            cost = c(0.01, .1,1, 2, 5,10, 100),
                            gamma = c(.1, 0.5, 1, 2, 3, 4)
                        )
)

plot(svob)

summary(svob)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##     1   0.1
## 
## - best performance: 0.3703509 
## 
## - Detailed performance results:
##     cost gamma     error dispersion
## 1  1e-02   0.1 0.3729825 0.05168003
## 2  1e-01   0.1 0.3729825 0.05168003
## 3  1e+00   0.1 0.3703509 0.05177364
## 4  2e+00   0.1 0.3850351 0.06128622
## 5  5e+00   0.1 0.4222456 0.05688329
## 6  1e+01   0.1 0.4540526 0.05595538
## 7  1e+02   0.1 0.4673509 0.04880140
## 8  1e-02   0.5 0.3729825 0.05168003
## 9  1e-01   0.5 0.3729825 0.05168003
## 10 1e+00   0.5 0.3930000 0.05541080
## 11 2e+00   0.5 0.4223333 0.06448519
## 12 5e+00   0.5 0.4369298 0.06839252
## 13 1e+01   0.5 0.4395789 0.06485052
## 14 1e+02   0.5 0.4409123 0.06620151
## 15 1e-02   1.0 0.3729825 0.05168003
## 16 1e-01   1.0 0.3729825 0.05168003
## 17 1e+00   1.0 0.3797018 0.05209900
## 18 2e+00   1.0 0.4170000 0.04879019
## 19 5e+00   1.0 0.4223158 0.04860214
## 20 1e+01   1.0 0.4223158 0.05350067
## 21 1e+02   1.0 0.4236491 0.05451609
## 22 1e-02   2.0 0.3729825 0.05168003
## 23 1e-01   2.0 0.3729825 0.05168003
## 24 1e+00   2.0 0.3810175 0.04606011
## 25 2e+00   2.0 0.3930526 0.03711144
## 26 5e+00   2.0 0.3970351 0.03504954
## 27 1e+01   2.0 0.3970351 0.03504954
## 28 1e+02   2.0 0.3970351 0.03504954
## 29 1e-02   3.0 0.3729825 0.05168003
## 30 1e-01   3.0 0.3729825 0.05168003
## 31 1e+00   3.0 0.3770175 0.04728055
## 32 2e+00   3.0 0.3824035 0.04162675
## 33 5e+00   3.0 0.3824035 0.04162675
## 34 1e+01   3.0 0.3824035 0.04162675
## 35 1e+02   3.0 0.3824035 0.04162675
## 36 1e-02   4.0 0.3729825 0.05168003
## 37 1e-01   4.0 0.3729825 0.05168003
## 38 1e+00   4.0 0.3743509 0.05025858
## 39 2e+00   4.0 0.3823333 0.04744201
## 40 5e+00   4.0 0.3823333 0.04744201
## 41 1e+01   4.0 0.3823333 0.04744201
## 42 1e+02   4.0 0.3823333 0.04744201

b = svm(
    x = r6[smp, c(1:dm2)],
    y =  r6$via[smp],
    scale = FALSE,
    kernel = 'radial',
    gamma = svob$best.parameters$gamma,
    #degree = 1,
    epsilon = 10 ^ -18,
    #probability = FALSE,
    cost = svob$best.parameters$cost,
    #shrinking = TRUE,
    tolerance = .0001,
    cachesize=500

)




table(r6$via[smp])

## 
## Conflicting      Lethal   Subviable      Viable 
##           5         120          65         312

table(fitted(b), r6$via[smp])

##              
##               Conflicting Lethal Subviable Viable
##   Conflicting           0      0         0      0
##   Lethal                1     57         8      8
##   Subviable             0      2        12      2
##   Viable                4     61        45    302

table(predict(b, newdata = r6[-smp, c(1:dm2)]), r6$via[-smp])

##              
##               Conflicting Lethal Subviable Viable
##   Conflicting           0      0         0      0
##   Lethal                0     12         6     10
##   Subviable             0      1         0      1
##   Viable                0     51        21    149

######################### 6

# f1 = as.formula(paste('viaI~', paste(
#   'V', 1:dm2, sep = '', collapse =  '+'
# )))
#
# library(neuralnet)
# nn = neuralnet(
#   formula = f1,
#   data = r6[smp,],
#   hidden = c(15, 5,1),
#   linear.output = TRUE
#   #stepmax = 10 ^ 6,
# )
# #plot(nn)
# r7 = compute(nn, r6[-smp, 1:dm2])$net.result
# results <- data.frame(actual = r6$viaI[-smp], prediction = r7)
# roundedresults <- sapply(results, round, digits = 1)
# roundedresultsdf = data.frame(roundedresults)
# table(roundedresultsdf$actual, roundedresultsdf$prediction)


##########
library(mlr)

## Loading required package: ParamHelpers

## Warning: replacing previous import 'BBmisc::isFALSE' by
## 'backports::isFALSE' when loading 'mlr'

## 
## Attaching package: 'mlr'

## The following object is masked from 'package:e1071':
## 
##     impute

set.seed(123456)
#r6$via[r6$via != 'Lethal'] = 'Viable'
r6 = droplevels(r6)


task = makeClassifTask(data = r6[, -c(dm2 + 2)], target = "via")
lrn = makeLearner('classif.randomForestSRC')#classif.nnet, classif.LiblineaRL1L2SVC, classif.lssvm||,classif.lvq1|,classif.mda,classif.mlp,classif.multinom,classif.nnet|classif.nnTrain,classif.OneR,classif.PART,classif.qda,classif.quaDA,classif.randomForest|||classif.randomForestSRC|||

n = nrow(r6)
train.set = sample(n, size = 2 / 3 * n)
test.set = setdiff(1:n, train.set)
model = train(lrn, task, subset = train.set)
pred = predict(model, task = task, subset = test.set)
performance(pred, measures = list(mmce, acc))

##      mmce       acc 
## 0.4422311 0.5577689

plotLearnerPrediction(lrn, task = task)

calculateConfusionMatrix(pred, relative = TRUE, sums = FALSE)#$relative.row

## Relative confusion matrix (normalized by row/column):
##              predicted
## true          Conflicting Lethal     Subviable  Viable     -err.-    
##   Conflicting 0.00/0.000  0.00/0.000 0.00/0.000 1.00/0.006 1.00      
##   Lethal      0.00/0.000  0.40/0.391 0.15/0.360 0.45/0.173 0.60      
##   Subviable   0.00/0.000  0.24/0.125 0.12/0.160 0.65/0.136 0.88      
##   Viable      0.00/0.000  0.20/0.484 0.08/0.480 0.72/0.685 0.28      
##   -err.-            0.00        0.61       0.84       0.31 0.44      
## 
## 
## Absolute confusion matrix:
##              predicted
## true          Conflicting Lethal Subviable Viable -err.-
##   Conflicting           0      0         0      1      1
##   Lethal                0     25         9     28     37
##   Subviable             0      8         4     22     30
##   Viable                0     31        12    111     43
##   -err.-                0     39        21     51    111

Top_R_code.R

hamedhm

Wed Jan 31 10:08:42 2018