cbind(files)
files
[1,] "dataset.42.neris.labels.gz"
[2,] "dataset.43.neris.labels.gz"
[3,] "dataset.44.rbot.labels.gz"
[4,] "dataset.45.rbotdos.labels.gz"
[5,] "dataset.46.fastflux.labels.gz"
[6,] "dataset.47.donbot.labels.gz"
[7,] "dataset.48.sogou.labels.gz"
[8,] "dataset.49.gvod.labels.gz"
[9,] "dataset.50.bot.labels.gz"
[10,] "dataset.51.rbot.labels.gz"
[11,] "dataset.52.rbot.labels.gz"
[12,] "dataset.53.botp2p.labels.gz"
[13,] "dataset.54.fastflux.labels.gz"
for (labels_file in files){
a=str_split(labels_file,"[.]")[[1]]
model_file=paste(a[1],a[2],a[3],a[5],sep=".")
print(paste(model_file))
}
[1] "dataset.42.neris.gz"
[1] "dataset.43.neris.gz"
[1] "dataset.44.rbot.gz"
[1] "dataset.45.rbotdos.gz"
[1] "dataset.46.fastflux.gz"
[1] "dataset.47.donbot.gz"
[1] "dataset.48.sogou.gz"
[1] "dataset.49.gvod.gz"
[1] "dataset.50.bot.gz"
[1] "dataset.51.rbot.gz"
[1] "dataset.52.rbot.gz"
[1] "dataset.53.botp2p.gz"
[1] "dataset.54.fastflux.gz"
For each 4-tuple on the labels_file find the model in model_file
for (labels_file in files){
print(labels_file)
labels=read.csv(file=paste0(base_dir,labels_file),stringsAsFactors = F,header=F,sep=" ")
names(labels)<-c("proto","src","dst","port","label")
labels=labels %>% filter(proto=='tcp'|proto=='udp')
labels=labels %>% mutate(label=str_replace(label,"flow=",""))
a=str_split(labels_file,"[.]")[[1]]
model_file=paste(a[1],a[2],a[3],a[5],sep=".")
models_labeled_file=paste(a[1],a[2],a[3],sep=".")
models=read.csv(file=paste0(base_dir,model_file),header=T,sep="\t",stringsAsFactors = F)
models=models %>% separate(ModelId,into=c("src","dst","port","proto"),sep="-")
models=models %>% filter(proto=='tcp'|proto=='udp')
models$port=as.numeric(models$port)
models$port=as.numeric(models$port)
models_labeled=left_join(models,labels,by=c("src","dst","proto","port"))
models_labeled=models_labeled %>% select(-LabelName)
models_labeled=models_labeled %>% filter(label!="<NA>")
readr::write_csv(models_labeled,paste0(base_dir,models_labeled_file,".labeled"))
print(nrow(models_labeled))
}