Listing files containing labels

cbind(files)
      files                          
 [1,] "dataset.42.neris.labels.gz"   
 [2,] "dataset.43.neris.labels.gz"   
 [3,] "dataset.44.rbot.labels.gz"    
 [4,] "dataset.45.rbotdos.labels.gz" 
 [5,] "dataset.46.fastflux.labels.gz"
 [6,] "dataset.47.donbot.labels.gz"  
 [7,] "dataset.48.sogou.labels.gz"   
 [8,] "dataset.49.gvod.labels.gz"    
 [9,] "dataset.50.bot.labels.gz"     
[10,] "dataset.51.rbot.labels.gz"    
[11,] "dataset.52.rbot.labels.gz"    
[12,] "dataset.53.botp2p.labels.gz"  
[13,] "dataset.54.fastflux.labels.gz"

Listing the files containing the behavioral models (letters)

for (labels_file in files){
  a=str_split(labels_file,"[.]")[[1]]
  model_file=paste(a[1],a[2],a[3],a[5],sep=".")
  print(paste(model_file))
}  
[1] "dataset.42.neris.gz"
[1] "dataset.43.neris.gz"
[1] "dataset.44.rbot.gz"
[1] "dataset.45.rbotdos.gz"
[1] "dataset.46.fastflux.gz"
[1] "dataset.47.donbot.gz"
[1] "dataset.48.sogou.gz"
[1] "dataset.49.gvod.gz"
[1] "dataset.50.bot.gz"
[1] "dataset.51.rbot.gz"
[1] "dataset.52.rbot.gz"
[1] "dataset.53.botp2p.gz"
[1] "dataset.54.fastflux.gz"

Script used for labeling

For each 4-tuple on the labels_file find the model in model_file

for (labels_file in files){
  print(labels_file)
  labels=read.csv(file=paste0(base_dir,labels_file),stringsAsFactors = F,header=F,sep=" ")
  
  names(labels)<-c("proto","src","dst","port","label")
  labels=labels %>% filter(proto=='tcp'|proto=='udp')
  labels=labels %>% mutate(label=str_replace(label,"flow=",""))
  
  a=str_split(labels_file,"[.]")[[1]]
  model_file=paste(a[1],a[2],a[3],a[5],sep=".")
  models_labeled_file=paste(a[1],a[2],a[3],sep=".")
  
  models=read.csv(file=paste0(base_dir,model_file),header=T,sep="\t",stringsAsFactors = F)
  models=models %>% separate(ModelId,into=c("src","dst","port","proto"),sep="-")
  models=models  %>% filter(proto=='tcp'|proto=='udp')
  models$port=as.numeric(models$port)
  models$port=as.numeric(models$port)
  
  models_labeled=left_join(models,labels,by=c("src","dst","proto","port"))
  models_labeled=models_labeled %>% select(-LabelName)
  models_labeled=models_labeled %>% filter(label!="<NA>") 
  readr::write_csv(models_labeled,paste0(base_dir,models_labeled_file,".labeled"))
  print(nrow(models_labeled))
}
LS0tCnRpdGxlOiAiQ1RVMTMgTGFiZWxpbmcgU2NyaXB0cyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBMaXN0aW5nIGZpbGVzIGNvbnRhaW5pbmcgbGFiZWxzCgpgYGB7cn0KYmFzZV9kaXI9Ii9ob21lL2hhcnBvL0Ryb3Bib3gvb25nb2luZy13b3JrL2dpdC1yZXBvcy9zdHJhdG9zcGhlcmUtZGVlcC9kYXRhc2V0cy9jdHUtMTMvIgpmaWxlcz1saXN0LmZpbGVzKGJhc2VfZGlyLHBhdHRlcm4gPSAibGFiZWxzIiApCmNiaW5kKGZpbGVzKQpgYGAKCiMgTGlzdGluZyB0aGUgZmlsZXMgY29udGFpbmluZyB0aGUgYmVoYXZpb3JhbCBtb2RlbHMgKGxldHRlcnMpCgpgYGB7cn0KZm9yIChsYWJlbHNfZmlsZSBpbiBmaWxlcyl7CiAgYT1zdHJfc3BsaXQobGFiZWxzX2ZpbGUsIlsuXSIpW1sxXV0KICBtb2RlbF9maWxlPXBhc3RlKGFbMV0sYVsyXSxhWzNdLGFbNV0sc2VwPSIuIikKICBwcmludChwYXN0ZShtb2RlbF9maWxlKSkKfSAgCmBgYAoKIyBTY3JpcHQgdXNlZCBmb3IgbGFiZWxpbmcKCkZvciBlYWNoIDQtdHVwbGUgb24gdGhlICoqbGFiZWxzX2ZpbGUqKiBmaW5kIHRoZSBtb2RlbCBpbiAqKm1vZGVsX2ZpbGUqKgoKYGBge3J9CmZvciAobGFiZWxzX2ZpbGUgaW4gZmlsZXMpewogIHByaW50KGxhYmVsc19maWxlKQogIGxhYmVscz1yZWFkLmNzdihmaWxlPXBhc3RlMChiYXNlX2RpcixsYWJlbHNfZmlsZSksc3RyaW5nc0FzRmFjdG9ycyA9IEYsaGVhZGVyPUYsc2VwPSIgIikKICAKICBuYW1lcyhsYWJlbHMpPC1jKCJwcm90byIsInNyYyIsImRzdCIsInBvcnQiLCJsYWJlbCIpCiAgbGFiZWxzPWxhYmVscyAlPiUgZmlsdGVyKHByb3RvPT0ndGNwJ3xwcm90bz09J3VkcCcpCiAgbGFiZWxzPWxhYmVscyAlPiUgbXV0YXRlKGxhYmVsPXN0cl9yZXBsYWNlKGxhYmVsLCJmbG93PSIsIiIpKQogIAogIGE9c3RyX3NwbGl0KGxhYmVsc19maWxlLCJbLl0iKVtbMV1dCiAgbW9kZWxfZmlsZT1wYXN0ZShhWzFdLGFbMl0sYVszXSxhWzVdLHNlcD0iLiIpCiAgbW9kZWxzX2xhYmVsZWRfZmlsZT1wYXN0ZShhWzFdLGFbMl0sYVszXSxzZXA9Ii4iKQogIAogIG1vZGVscz1yZWFkLmNzdihmaWxlPXBhc3RlMChiYXNlX2Rpcixtb2RlbF9maWxlKSxoZWFkZXI9VCxzZXA9Ilx0IixzdHJpbmdzQXNGYWN0b3JzID0gRikKICBtb2RlbHM9bW9kZWxzICU+JSBzZXBhcmF0ZShNb2RlbElkLGludG89Yygic3JjIiwiZHN0IiwicG9ydCIsInByb3RvIiksc2VwPSItIikKICBtb2RlbHM9bW9kZWxzICAlPiUgZmlsdGVyKHByb3RvPT0ndGNwJ3xwcm90bz09J3VkcCcpCiAgbW9kZWxzJHBvcnQ9YXMubnVtZXJpYyhtb2RlbHMkcG9ydCkKICBtb2RlbHMkcG9ydD1hcy5udW1lcmljKG1vZGVscyRwb3J0KQogIAogIG1vZGVsc19sYWJlbGVkPWxlZnRfam9pbihtb2RlbHMsbGFiZWxzLGJ5PWMoInNyYyIsImRzdCIsInByb3RvIiwicG9ydCIpKQogIG1vZGVsc19sYWJlbGVkPW1vZGVsc19sYWJlbGVkICU+JSBzZWxlY3QoLUxhYmVsTmFtZSkKICBtb2RlbHNfbGFiZWxlZD1tb2RlbHNfbGFiZWxlZCAlPiUgZmlsdGVyKGxhYmVsIT0iPE5BPiIpIAogIHJlYWRyOjp3cml0ZV9jc3YobW9kZWxzX2xhYmVsZWQscGFzdGUwKGJhc2VfZGlyLG1vZGVsc19sYWJlbGVkX2ZpbGUsIi5sYWJlbGVkIikpCiAgcHJpbnQobnJvdyhtb2RlbHNfbGFiZWxlZCkpCn0KYGBgCgo=