library(dplyr)
library(readr)
library(ggplot2)
library(plotly)
library(keras)
library(abind)
library(jsonlite)
source("../../dga-wb-r/preprocess.R")
dga_dga_vaclav <- read_csv("../datasets/results_1000_dga_vaclav_mc_no_thres_full_domain.csv")
dga_normal_vaclav <- read_csv("../datasets/results_2000_normal_valclav_mc_no_thres_full_domain.csv")
dga_dga_vaclav$label<- 1
dga_normal_vaclav$label<-0
#dga_results<-stream_in(textConnection(readLines("../datasets/results_full_dga_binary.json",n=1000)))
#normal_results<-stream_in(textConnection(readLines("../datasets/results_full_normal_binary.json",n=2000)))
#write_csv(dga_results,"../datasets/results_full_dga_binary.csv")
#write_csv(normal_results,"../datasets/results_full_normal_binary.csv")
dga_results<-stream_in(textConnection(readLines("../datasets/results_full_dga_binary.json",n=1000)))
normal_results<-read_csv("../datasets/results_full_normal_binary.csv")
dga_results<-read_csv("../datasets/results_full_dga_binary.csv")
PCA VIZ for DGA and Normal full domains
EVALUATION of MC-CNN on 1000 full domains with tunneling top-level domains changed
Dataset contain 3 classes. Normal, DGA and TUNNELING
sample_test_mc_cnn_fulldomains <- read_csv("../datasets/sample_test_mc_cnn-fulldomain.csv")
results_test_mc_cnn_fulldomains <- read_csv("../datasets/results_test_mc_cnn-fulldomains.csv")
Confusion Matrix
caret::confusionMatrix(reference=as.factor(sample_test_mc_cnn_fulldomains$class),data=as.factor(results_test_mc_cnn_fulldomains$class))
longitud de objeto mayor no es múltiplo de la longitud de uno menorLevels are not in the same order for reference and data. Refactoring data to match.
Confusion Matrix and Statistics
Reference
Prediction 0 1 2 class
0 198 1 39 1
1 2 299 276 0
2 0 0 185 0
class 0 0 0 0
Overall Statistics
Accuracy : 0.6813
95% CI : (0.6515, 0.7101)
No Information Rate : 0.4995
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5363
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 0 Class: 1 Class: 2 Class: class
Sensitivity 0.9900 0.9967 0.3700 0.000000
Specificity 0.9488 0.6034 1.0000 1.000000
Pos Pred Value 0.8285 0.5182 1.0000 NaN
Neg Pred Value 0.9974 0.9976 0.6140 0.999001
Prevalence 0.1998 0.2997 0.4995 0.000999
Detection Rate 0.1978 0.2987 0.1848 0.000000
Detection Prevalence 0.2388 0.5764 0.1848 0.000000
Balanced Accuracy 0.9694 0.8000 0.6850 0.500000
Analysis of the resuls of the MC-CNN considering the domain length

Tunneling domains uncorrectly detected as DGA
cbind(sample_test_mc_cnn_fulldomains, pred=results_test_mc_cnn_fulldomains$class) %>% filter(class==2 & pred==1)
LS0tCnRpdGxlOiAiRXZhbHVhdGlvbiBvZiBDTi1DTk4gb24gZnVsbCBkb21haW5zIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KGtlcmFzKQpsaWJyYXJ5KGFiaW5kKQpsaWJyYXJ5KGpzb25saXRlKQpzb3VyY2UoIi4uLy4uL2RnYS13Yi1yL3ByZXByb2Nlc3MuUiIpCgpgYGAKCmBgYHtyfQpkZ2FfZGdhX3ZhY2xhdiA8LSByZWFkX2NzdigiLi4vZGF0YXNldHMvcmVzdWx0c18xMDAwX2RnYV92YWNsYXZfbWNfbm9fdGhyZXNfZnVsbF9kb21haW4uY3N2IikKZGdhX25vcm1hbF92YWNsYXYgPC0gcmVhZF9jc3YoIi4uL2RhdGFzZXRzL3Jlc3VsdHNfMjAwMF9ub3JtYWxfdmFsY2xhdl9tY19ub190aHJlc19mdWxsX2RvbWFpbi5jc3YiKQpkZ2FfZGdhX3ZhY2xhdiRsYWJlbDwtIDEKZGdhX25vcm1hbF92YWNsYXYkbGFiZWw8LTAKI2RnYV9yZXN1bHRzPC1zdHJlYW1faW4odGV4dENvbm5lY3Rpb24ocmVhZExpbmVzKCIuLi9kYXRhc2V0cy9yZXN1bHRzX2Z1bGxfZGdhX2JpbmFyeS5qc29uIixuPTEwMDApKSkKI25vcm1hbF9yZXN1bHRzPC1zdHJlYW1faW4odGV4dENvbm5lY3Rpb24ocmVhZExpbmVzKCIuLi9kYXRhc2V0cy9yZXN1bHRzX2Z1bGxfbm9ybWFsX2JpbmFyeS5qc29uIixuPTIwMDApKSkKI3dyaXRlX2NzdihkZ2FfcmVzdWx0cywiLi4vZGF0YXNldHMvcmVzdWx0c19mdWxsX2RnYV9iaW5hcnkuY3N2IikKI3dyaXRlX2Nzdihub3JtYWxfcmVzdWx0cywiLi4vZGF0YXNldHMvcmVzdWx0c19mdWxsX25vcm1hbF9iaW5hcnkuY3N2IikKZGdhX3Jlc3VsdHM8LXN0cmVhbV9pbih0ZXh0Q29ubmVjdGlvbihyZWFkTGluZXMoIi4uL2RhdGFzZXRzL3Jlc3VsdHNfZnVsbF9kZ2FfYmluYXJ5Lmpzb24iLG49MTAwMCkpKQpub3JtYWxfcmVzdWx0czwtcmVhZF9jc3YoIi4uL2RhdGFzZXRzL3Jlc3VsdHNfZnVsbF9ub3JtYWxfYmluYXJ5LmNzdiIpCmRnYV9yZXN1bHRzPC1yZWFkX2NzdigiLi4vZGF0YXNldHMvcmVzdWx0c19mdWxsX2RnYV9iaW5hcnkuY3N2IikKCmBgYAojIyBQQ0EgVklaIGZvciBER0EgYW5kIE5vcm1hbCBmdWxsIGRvbWFpbnMKYGBge3J9CmRnYV92YWNsYXZfdG9rZW5pemVkPC10b2tlbml6ZShkZ2FfZGdhX3ZhY2xhdiRkb21haW4sZGdhX2RnYV92YWNsYXYkbGFiZWwpCm5vcm1hbF92YWNsYXZfdG9rZW5pemVkPC10b2tlbml6ZShkZ2Ffbm9ybWFsX3ZhY2xhdiRkb21haW4sZGdhX25vcm1hbF92YWNsYXYkbGFiZWwpCgp2YWNsYXZfZGF0YTwtbGlzdCgpCnZhY2xhdl9kYXRhJGVuY29kZTwtYWJpbmQoZGdhX3ZhY2xhdl90b2tlbml6ZWQkZW5jb2RlLG5vcm1hbF92YWNsYXZfdG9rZW5pemVkJGVuY29kZSxhbG9uZz0xKQp2YWNsYXZfZGF0YSRkb21haW48LWMoZGdhX3ZhY2xhdl90b2tlbml6ZWQkZG9tYWluLG5vcm1hbF92YWNsYXZfdG9rZW5pemVkJGRvbWFpbikKdmFjbGF2X2RhdGEkbGFiZWw8LWMoZGdhX3ZhY2xhdl90b2tlbml6ZWQkbGFiZWwsbm9ybWFsX3ZhY2xhdl90b2tlbml6ZWQkbGFiZWwpCnZhY2xhdl9kYXRhJHJlczwtcmVwKG5jaGFyKHZhY2xhdl9kYXRhJGRvbWFpbikpCnZhY2xhdl9kYXRhJHByZWRpY3Rpb24gPC1jKGRnYV9yZXN1bHRzJGNsYXNzLG5vcm1hbF9yZXN1bHRzJGNsYXNzKQpwY2E9cHJjb21wKHZhY2xhdl9kYXRhJGVuY29kZVssMToyMF0sY2VudGVyPVRSVUUsc2NhbGUuPVRSVUUpCiAgcGNhX2RhdGE8LWRhdGEuZnJhbWUocGNhJHgsbGFiZWw9dmFjbGF2X2RhdGEkbGFiZWwsZG9tYWluPXZhY2xhdl9kYXRhJGRvbWFpbixyZXM9dmFjbGF2X2RhdGEkcmVzLHByZWRpY3Rpb249dmFjbGF2X2RhdGEkcHJlZGljdGlvbikKICAKICBwY2FfcGxvdDwtZ2dwbG90KHBjYV9kYXRhICU+JSBmaWx0ZXIobGFiZWw9PTEpICU+JSBzYW1wbGVfbigxMDAwKSxhZXMoeD1QQzEseT1QQzIpKSsKICAgIGdlb21fcG9pbnQoYWVzKHRleHQ9ZG9tYWluLGNvbG9yPWFzLmZhY3RvcihwcmVkaWN0aW9uKSxhbHBoYT1yZXMpKSsKICAgIHRoZW1lX2J3KCkKI3BjYV9wbG90CnBsb3RseTo6Z2dwbG90bHkoKQoKI25jaGFyKHZhY2xhdl9kYXRhJGRvbWFpbikKYGBgCiMgRVZBTFVBVElPTiBvZiBNQy1DTk4gb24gMTAwMCBmdWxsIGRvbWFpbnMgd2l0aCB0dW5uZWxpbmcgdG9wLWxldmVsIGRvbWFpbnMgY2hhbmdlZApEYXRhc2V0IGNvbnRhaW4gMyBjbGFzc2VzLiAKTm9ybWFsLCBER0EgYW5kIFRVTk5FTElORwpgYGB7cn0Kc2FtcGxlX3Rlc3RfbWNfY25uX2Z1bGxkb21haW5zIDwtIHJlYWRfY3N2KCIuLi9kYXRhc2V0cy9zYW1wbGVfdGVzdF9tY19jbm4tZnVsbGRvbWFpbi5jc3YiKQpyZXN1bHRzX3Rlc3RfbWNfY25uX2Z1bGxkb21haW5zIDwtIHJlYWRfY3N2KCIuLi9kYXRhc2V0cy9yZXN1bHRzX3Rlc3RfbWNfY25uLWZ1bGxkb21haW5zLmNzdiIpCgpgYGAKIyMgQ29uZnVzaW9uIE1hdHJpeApgYGB7cn0KI2E8LXJlYWRMaW5lcygiLi4vZGF0YXNldHMvcmVzdWx0c190ZXN0X21jX2Nubi1mdWxsZG9tYWlucy5qc29uIixuPTEwMDApCiNyZXN1bHRzX3Rlc3RfbWNfY25uX2Z1bGxkb21haW5zPC1qc29ubGl0ZTo6c3RyZWFtX2luKHRleHRDb25uZWN0aW9uKGEpKQojcmVzdWx0c190ZXN0X21jX2Nubl9mdWxsZG9tYWlucwojcmVhZHI6OndyaXRlX2NzdihyZXN1bHRzX3Rlc3RfbWNfY25uX2Z1bGxkb21haW5zICU+JSBzZWxlY3QoZG9tYWluLGNsYXNzKSxwYXRoID0gIi4uL2RhdGFzZXRzL3Jlc3VsdHNfdGVzdF9tY19jbm4tZnVsbGRvbWFpbnMuY3N2IikKI3Jlc3VsdHNfdGVzdF9tY19jbm5fZnVsbGRvbWFpbnM8LWNiaW5kKHNhbXBsZV90ZXN0X21jX2Nubl9mdWxsZG9tYWlucyxwcmVkaWN0aW9uPXJlc3Vsc190ZXN0X21jX2Nubl9mdWxsZG9tYWlucyRjbGFzcykKCmNhcmV0Ojpjb25mdXNpb25NYXRyaXgocmVmZXJlbmNlPWFzLmZhY3RvcihzYW1wbGVfdGVzdF9tY19jbm5fZnVsbGRvbWFpbnMkY2xhc3MpLGRhdGE9YXMuZmFjdG9yKHJlc3VsdHNfdGVzdF9tY19jbm5fZnVsbGRvbWFpbnMkY2xhc3MpKQpgYGAKIyBBbmFseXNpcyBvZiB0aGUgcmVzdWxzIG9mIHRoZSBNQy1DTk4gY29uc2lkZXJpbmcgdGhlIGRvbWFpbiBsZW5ndGgKYGBge3J9CmNiaW5kKHNhbXBsZV90ZXN0X21jX2Nubl9mdWxsZG9tYWlucywgcHJlZD1yZXN1bHRzX3Rlc3RfbWNfY25uX2Z1bGxkb21haW5zJGNsYXNzKSAlPiUgZmlsdGVyKGNsYXNzPT0yKSAlPiUgbXV0YXRlKGxlbj1uY2hhcihkb21haW4pKSAlPiUgZ3JvdXBfYnkocHJlZCkgJT4lIHN1bW1hcmlzZSh0b3RhbD1uKCksbGVuX2F2Zz1tZWFuKGxlbiksbGVuX3NkPXNkKGxlbikpCgoKY2JpbmQoc2FtcGxlX3Rlc3RfbWNfY25uX2Z1bGxkb21haW5zLCBwcmVkPXJlc3VsdHNfdGVzdF9tY19jbm5fZnVsbGRvbWFpbnMkY2xhc3MpICU+JSBmaWx0ZXIoY2xhc3MgIT0iY2xhc3MiKSAlPiUgbXV0YXRlKGxlbj1uY2hhcihkb21haW4pKSAlPiUKICBnZ3Bsb3QoKSsKICBnZW9tX3BvaW50KGFlcyh5PWxlbix4PWFzLmZhY3RvcihwcmVkKSxjb2xvcj1hcy5mYWN0b3IocHJlZCkpLGFscGhhPTAuMikrCiAgZ2VvbV9ib3hwbG90KGFlcyh5PWxlbix4PWFzLmZhY3RvcihwcmVkKSxmaWxsPWFzLmZhY3RvcihwcmVkKSksYWxwaGE9MC4yKSsKICB4bGFiKCJQcmVkaWN0aW9uIikrCiAgeWxhYigiZG9tYWluIGxlbmd0aCIpKwogIHRoZW1lX2NsYXNzaWMoKSsKICBmYWNldF93cmFwKH5jbGFzcykKYGBgCiMgVHVubmVsaW5nIGRvbWFpbnMgdW5jb3JyZWN0bHkgZGV0ZWN0ZWQgYXMgREdBCmBgYHtyfQpjYmluZChzYW1wbGVfdGVzdF9tY19jbm5fZnVsbGRvbWFpbnMsIHByZWQ9cmVzdWx0c190ZXN0X21jX2Nubl9mdWxsZG9tYWlucyRjbGFzcykgJT4lIGZpbHRlcihjbGFzcz09MiAmIHByZWQ9PTEpCmBgYAoK