This is the companion R Markdown document to the following presentations that were delivered in Summer 2015:
This markdown file calculates the outputs and charts that are used on the presentations using the test data available. It is published in Rpubs here
It should provide enough examples for usage of the tools implemented at TIQ-test. Please review our github repository page, report bugs and suggest features!
library(parallel)
## Some limitations from not being an R package: Setting the Working directory
tiqtest.dir = file.path("..", "tiq-test")
current.dir = setwd(tiqtest.dir)
source("tiq-test.R")
## Setting the root data path to where it should be in this repo
.tiq.data.setRootPath(file.path(current.dir, "data"))
## INFO [2015-06-17 11:38:13 CEST] pid=2616 tiq.data.setRootPath: Setting path to '/Users/alexcp/src/tiq-test-Summer2015/data'
We have roughly 1 year (!!) of data available on this public dataset:
print(tiq.data.getAvailableDates("raw", "public_outbound"))
## [1] "20140601" "20140602" "20140603" "20140604" "20140605" "20140606"
## [7] "20140607" "20140608" "20140609" "20140610" "20140611" "20140612"
## [13] "20140613" "20140614" "20140615" "20140616" "20140617" "20140618"
## [19] "20140619" "20140620" "20140621" "20140622" "20140623" "20140624"
## [25] "20140625" "20140626" "20140627" "20140628" "20140629" "20140630"
## [31] "20140701" "20140702" "20140703" "20140704" "20140705" "20140706"
## [37] "20140707" "20140708" "20140709" "20140710" "20140711" "20140712"
## [43] "20140713" "20140714" "20140715" "20140716" "20140717" "20140718"
## [49] "20140719" "20140720" "20140721" "20140722" "20140723" "20140724"
## [55] "20140725" "20140726" "20140727" "20140728" "20140729" "20140730"
## [61] "20140731" "20140801" "20140802" "20140803" "20140804" "20140805"
## [67] "20140806" "20140807" "20140808" "20140809" "20140810" "20140811"
## [73] "20140812" "20140813" "20140814" "20140815" "20140816" "20140817"
## [79] "20140818" "20140819" "20140820" "20140821" "20140822" "20140823"
## [85] "20140824" "20140825" "20140826" "20140827" "20140828" "20140829"
## [91] "20140830" "20140831" "20140901" "20140902" "20140903" "20140904"
## [97] "20140905" "20140906" "20140907" "20140908" "20140909" "20140910"
## [103] "20140911" "20140912" "20140913" "20140914" "20140915" "20140916"
## [109] "20140917" "20140918" "20140919" "20140920" "20140921" "20140922"
## [115] "20140923" "20140924" "20140925" "20140926" "20140927" "20140928"
## [121] "20140929" "20140930" "20141001" "20141002" "20141003" "20141004"
## [127] "20141005" "20141006" "20141007" "20141008" "20141009" "20141010"
## [133] "20141011" "20141012" "20141013" "20141014" "20141015" "20141016"
## [139] "20141017" "20141018" "20141019" "20141020" "20141021" "20141022"
## [145] "20141023" "20141024" "20141025" "20141026" "20141027" "20141028"
## [151] "20141029" "20141030" "20141031" "20141101" "20141102" "20141103"
## [157] "20141104" "20141105" "20141106" "20141107" "20141108" "20141109"
## [163] "20141110" "20141111" "20141112" "20141113" "20141114" "20141115"
## [169] "20141116" "20141117" "20141118" "20141119" "20141120" "20141121"
## [175] "20141122" "20141123" "20141124" "20141125" "20141126" "20141127"
## [181] "20141128" "20141129" "20141130" "20141201" "20141202" "20141203"
## [187] "20141204" "20141205" "20141206" "20141207" "20141208" "20141209"
## [193] "20141210" "20141211" "20141212" "20141213" "20141214" "20141215"
## [199] "20141216" "20141217" "20141218" "20141219" "20141220" "20141221"
## [205] "20141222" "20141223" "20141224" "20141225" "20141226" "20141227"
## [211] "20141228" "20141229" "20141230" "20141231" "20150101" "20150102"
## [217] "20150103" "20150104" "20150105" "20150106" "20150107" "20150108"
## [223] "20150109" "20150110" "20150111" "20150112" "20150113" "20150114"
## [229] "20150115" "20150116" "20150117" "20150118" "20150119" "20150120"
## [235] "20150121" "20150122" "20150123" "20150124" "20150125" "20150126"
## [241] "20150127" "20150128" "20150129" "20150130" "20150131" "20150201"
## [247] "20150202" "20150203" "20150204" "20150205" "20150206" "20150207"
## [253] "20150208" "20150209" "20150210" "20150211" "20150212" "20150213"
## [259] "20150214" "20150215" "20150216" "20150217" "20150218" "20150219"
## [265] "20150220" "20150221" "20150222" "20150223" "20150224" "20150225"
## [271] "20150226" "20150227" "20150228" "20150301" "20150302" "20150303"
## [277] "20150304" "20150305" "20150306" "20150307" "20150308" "20150309"
## [283] "20150310" "20150311" "20150312" "20150313" "20150314" "20150315"
## [289] "20150316" "20150317" "20150318" "20150319" "20150320" "20150321"
## [295] "20150322" "20150323" "20150324" "20150325" "20150326" "20150327"
## [301] "20150328" "20150329" "20150330" "20150331" "20150401" "20150402"
## [307] "20150403" "20150404" "20150405" "20150406" "20150407" "20150408"
## [313] "20150409" "20150410" "20150411" "20150412" "20150413" "20150414"
## [319] "20150415" "20150416" "20150417" "20150418" "20150419" "20150420"
## [325] "20150421" "20150422" "20150423" "20150424" "20150425" "20150426"
## [331] "20150427" "20150428" "20150429" "20150430" "20150501" "20150502"
## [337] "20150503" "20150504" "20150505" "20150506" "20150507" "20150508"
## [343] "20150509" "20150510" "20150511" "20150512" "20150513" "20150514"
## [349] "20150515" "20150516" "20150517" "20150518" "20150519" "20150520"
## [355] "20150521" "20150522" "20150523" "20150524" "20150525" "20150526"
## [361] "20150527" "20150528" "20150529" "20150530" "20150531"
print(tiq.data.getAvailableDates("raw", "public_inbound"))
## [1] "20140601" "20140602" "20140603" "20140604" "20140605" "20140606"
## [7] "20140607" "20140608" "20140609" "20140610" "20140611" "20140612"
## [13] "20140613" "20140614" "20140615" "20140616" "20140617" "20140618"
## [19] "20140619" "20140620" "20140622" "20140623" "20140624" "20140625"
## [25] "20140626" "20140627" "20140628" "20140629" "20140630" "20140701"
## [31] "20140702" "20140703" "20140704" "20140705" "20140706" "20140707"
## [37] "20140708" "20140709" "20140710" "20140711" "20140712" "20140713"
## [43] "20140714" "20140715" "20140716" "20140717" "20140719" "20140720"
## [49] "20140721" "20140722" "20140723" "20140724" "20140725" "20140726"
## [55] "20140727" "20140728" "20140729" "20140730" "20140731" "20140801"
## [61] "20140802" "20140803" "20140804" "20140805" "20140806" "20140807"
## [67] "20140808" "20140809" "20140810" "20140811" "20140812" "20140813"
## [73] "20140814" "20140815" "20140816" "20140817" "20140818" "20140819"
## [79] "20140820" "20140822" "20140823" "20140824" "20140825" "20140826"
## [85] "20140827" "20140828" "20140829" "20140830" "20140901" "20140902"
## [91] "20140903" "20140904" "20140906" "20140907" "20140908" "20140909"
## [97] "20140910" "20140911" "20140912" "20140913" "20140914" "20140915"
## [103] "20140916" "20140917" "20140918" "20140919" "20140920" "20140921"
## [109] "20140922" "20140923" "20140924" "20140925" "20140926" "20140927"
## [115] "20140928" "20140929" "20140930" "20141001" "20141002" "20141003"
## [121] "20141004" "20141005" "20141006" "20141007" "20141008" "20141009"
## [127] "20141010" "20141011" "20141012" "20141013" "20141014" "20141015"
## [133] "20141016" "20141017" "20141018" "20141019" "20141020" "20141021"
## [139] "20141022" "20141023" "20141024" "20141025" "20141026" "20141027"
## [145] "20141028" "20141029" "20141030" "20141031" "20141101" "20141102"
## [151] "20141103" "20141104" "20141105" "20141106" "20141107" "20141108"
## [157] "20141109" "20141110" "20141111" "20141112" "20141113" "20141114"
## [163] "20141115" "20141116" "20141117" "20141118" "20141119" "20141120"
## [169] "20141121" "20141122" "20141123" "20141124" "20141125" "20141126"
## [175] "20141127" "20141128" "20141129" "20141130" "20141201" "20141202"
## [181] "20141203" "20141204" "20141205" "20141206" "20141207" "20141208"
## [187] "20141209" "20141210" "20141211" "20141212" "20141213" "20141214"
## [193] "20141215" "20141216" "20141217" "20141218" "20141219" "20141220"
## [199] "20141221" "20141222" "20141223" "20141224" "20141225" "20141226"
## [205] "20141227" "20141228" "20141229" "20141230" "20141231" "20150101"
## [211] "20150102" "20150103" "20150106" "20150107" "20150108" "20150109"
## [217] "20150110" "20150111" "20150112" "20150113" "20150114" "20150115"
## [223] "20150116" "20150117" "20150118" "20150119" "20150120" "20150121"
## [229] "20150122" "20150123" "20150124" "20150125" "20150126" "20150127"
## [235] "20150128" "20150129" "20150130" "20150131" "20150201" "20150202"
## [241] "20150203" "20150204" "20150205" "20150206" "20150207" "20150208"
## [247] "20150209" "20150210" "20150211" "20150212" "20150213" "20150214"
## [253] "20150215" "20150216" "20150217" "20150218" "20150219" "20150220"
## [259] "20150221" "20150222" "20150223" "20150224" "20150225" "20150226"
## [265] "20150227" "20150228" "20150301" "20150302" "20150303" "20150304"
## [271] "20150305" "20150306" "20150307" "20150308" "20150309" "20150310"
## [277] "20150311" "20150312" "20150313" "20150314" "20150315" "20150316"
## [283] "20150317" "20150318" "20150319" "20150320" "20150321" "20150322"
## [289] "20150323" "20150324" "20150325" "20150326" "20150327" "20150328"
## [295] "20150329" "20150330" "20150331" "20150401" "20150402" "20150403"
## [301] "20150404" "20150405" "20150406" "20150407" "20150408" "20150409"
## [307] "20150410" "20150411" "20150412" "20150413" "20150414" "20150415"
## [313] "20150416" "20150417" "20150418" "20150419" "20150420" "20150421"
## [319] "20150422" "20150424" "20150425" "20150426" "20150427" "20150428"
## [325] "20150429" "20150430" "20150501" "20150502" "20150503" "20150504"
## [331] "20150505" "20150506" "20150507" "20150508" "20150509" "20150510"
## [337] "20150511" "20150512" "20150513" "20150514" "20150515" "20150516"
## [343] "20150517" "20150518" "20150519" "20150520" "20150521" "20150522"
## [349] "20150523" "20150524" "20150525" "20150526" "20150527" "20150528"
## [355] "20150529" "20150530" "20150531"
This time, we also have a private data feeds over the time period, but the information in them cannot be shared publicly as a part of this release. If you are reproducing this at your own environemnt, you will not be able to recreate some of the outputs below:
if (tiq.data.isDatasetAvailable("raw", "private1")) {
print(tiq.data.getAvailableDates("raw", "private1"))
} else {
print("Sorry, private1 dataset is not available.")
}
## [1] "20140903" "20140904" "20140905" "20140906" "20140907" "20140908"
## [7] "20140909" "20140910" "20140911" "20140912" "20140913" "20140914"
## [13] "20140915" "20140916" "20140917" "20140918" "20140919" "20140920"
## [19] "20140921" "20140922" "20140923" "20140924" "20140925" "20140926"
## [25] "20140927" "20140928" "20140929" "20140930" "20141001" "20141002"
## [31] "20141003" "20141004" "20141005" "20141006" "20141007" "20141008"
## [37] "20141009" "20141010" "20141011" "20141012" "20141013" "20141014"
## [43] "20141015" "20141016" "20141017" "20141018" "20141019" "20141020"
## [49] "20141021" "20141022" "20141023" "20141024" "20141025" "20141026"
## [55] "20141027" "20141028" "20141029" "20141030" "20141031" "20141101"
## [61] "20141102" "20141103" "20141104" "20141105" "20141106" "20141107"
## [67] "20141108" "20141109" "20141110" "20141111" "20141112" "20141113"
## [73] "20141114" "20141115" "20141116" "20141117" "20141118" "20141119"
## [79] "20141120" "20141121" "20141122" "20141123" "20141124" "20141125"
## [85] "20141126" "20141127" "20141128" "20141129" "20141130" "20141201"
## [91] "20141202" "20141203" "20141204" "20141205" "20141206" "20141207"
## [97] "20141208" "20141209" "20141210" "20141211" "20141212" "20141213"
## [103] "20141214" "20141215" "20141216" "20141217" "20141218" "20141219"
## [109] "20141220" "20141221" "20141222" "20141223" "20141224" "20141225"
## [115] "20141226" "20141227" "20141228" "20141229" "20141230" "20141231"
## [121] "20150101" "20150102" "20150103" "20150104" "20150105" "20150106"
## [127] "20150107" "20150108" "20150109" "20150110" "20150111" "20150112"
## [133] "20150113" "20150114" "20150115" "20150116" "20150117" "20150118"
## [139] "20150119" "20150120" "20150121" "20150122" "20150123" "20150124"
## [145] "20150125" "20150126" "20150127" "20150128" "20150129" "20150130"
## [151] "20150131" "20150201" "20150202" "20150203" "20150204" "20150205"
## [157] "20150206" "20150207" "20150208" "20150209" "20150210" "20150211"
## [163] "20150212" "20150213" "20150214" "20150215" "20150216" "20150217"
## [169] "20150218" "20150219" "20150220" "20150221" "20150222" "20150223"
## [175] "20150224" "20150225" "20150226" "20150227" "20150228" "20150301"
## [181] "20150302" "20150303" "20150304" "20150305" "20150306" "20150307"
## [187] "20150308" "20150309" "20150310" "20150311" "20150312" "20150313"
## [193] "20150314" "20150315" "20150316" "20150317" "20150318" "20150319"
## [199] "20150320" "20150321" "20150322" "20150323" "20150324" "20150325"
## [205] "20150326" "20150327" "20150328" "20150329" "20150330" "20150331"
## [211] "20150401" "20150402" "20150403" "20150404" "20150405" "20150406"
## [217] "20150407" "20150408" "20150409" "20150410" "20150411" "20150412"
## [223] "20150413" "20150414" "20150415" "20150416" "20150417" "20150418"
## [229] "20150419" "20150420" "20150421" "20150422" "20150423" "20150424"
## [235] "20150425" "20150426" "20150427" "20150428" "20150429" "20150430"
## [241] "20150501" "20150502" "20150503" "20150504" "20150505" "20150506"
## [247] "20150507" "20150508" "20150509" "20150510" "20150511" "20150512"
## [253] "20150513" "20150514" "20150515" "20150516" "20150517" "20150518"
## [259] "20150519" "20150520" "20150521" "20150522" "20150523" "20150524"
## [265] "20150525" "20150526" "20150527" "20150528" "20150529" "20150530"
## [271] "20150531"
This is an example of “RAW” (not enriched) outbound data imported from combine output
outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20150501")
outbound.ti[, list(entity, type, direction, source, date)]
## entity type direction source date
## 1: 103.18.247.72 IPv4 outbound alienvault 2015-05-01
## 2: 103.253.41.10 IPv4 outbound alienvault 2015-05-01
## 3: 103.6.196.92 IPv4 outbound alienvault 2015-05-01
## 4: 103.6.198.12 IPv4 outbound alienvault 2015-05-01
## 5: 103.9.103.141 IPv4 outbound alienvault 2015-05-01
## ---
## 145195: winscoft.com FQDN outbound zeus 2015-05-01
## 145196: worldrecipeblogs.com FQDN outbound zeus 2015-05-01
## 145197: www.nikey.cn FQDN outbound zeus 2015-05-01
## 145198: www.riverwalktrader.co.za FQDN outbound zeus 2015-05-01
## 145199: zetes.vdsinside.com FQDN outbound zeus 2015-05-01
We can use the same loadTI function to also gather the enriched datasets:
enrich.ti = tiq.data.loadTI("enriched", "public_outbound", "20150501")
enrich.ti = enrich.ti[, notes := NULL]
tail(enrich.ti)
## entity type direction source date asnumber
## 1: 94.76.211.87 IPv4 outbound zeus 2015-05-01 29550
## 2: 95.211.243.120 IPv4 outbound zeus 2015-05-01 60781
## 3: 95.211.243.123 IPv4 outbound zeus 2015-05-01 60781
## 4: 95.211.243.125 IPv4 outbound zeus 2015-05-01 60781
## 5: 98.131.185.136 IPv4 outbound zeus 2015-05-01 32392
## 6: 98.131.185.136 IPv4 outbound zeus 2015-05-01 32392
## asname country host
## 1: Simply Transit Ltd GB NA
## 2: LeaseWeb B.V. NL NA
## 3: LeaseWeb B.V. NL NA
## 4: LeaseWeb B.V. NL NA
## 5: Ecommerce Corporation US NA
## 6: Ecommerce Corporation US projects.globaltronics.net
## rhost
## 1: 94-76-211-87.static.as29550.net
## 2: NA
## 3: NA
## 4: NA
## 5: NA
## 6: NA
This specific outbound dataset has the following sources included:
outbound.ti = tiq.data.loadTI("raw", "public_outbound", "20150501")
unique(outbound.ti$source)
## [1] "alienvault" "bambenek" "et_shadowserver_cnc"
## [4] "feodo" "kafeine" "malcode"
## [7] "malwared" "malwaredomainlist" "malwaredomains"
## [10] "malwaregroup" "openphish" "palevotracker"
## [13] "phishtank" "sslbl" "zeus"
We can do the same for the inbound data we have to see the sources we have available:
inbound.ti = tiq.data.loadTI("raw", "public_inbound", "20150501")
unique(inbound.ti$source)
## [1] "alienvault" "autoshun" "blocklistde"
## [4] "botscout" "bruteforceblocker" "charleshaley"
## [7] "ciarmy" "dragonresearch" "dshield"
## [10] "honeypot" "openbl" "packetmail"
## [13] "virbl"
Here are some results of running the Novelty test on the inbound data:
inbound.novelty = tiq.test.noveltyTest("public_inbound", "20150101", "20150531",
select.sources=c("alienvault", "blocklistde",
"dshield", "charleshaley"),
.progress=FALSE)
tiq.test.plotNoveltyTest(inbound.novelty, title="Novelty Test - Inbound Indicators")
And results running on the outbound data:
outbound.novelty = tiq.test.noveltyTest("public_outbound", "20150101", "20150531",
select.sources=c("alienvault", "malwaregroup",
"malcode", "zeus"),
.progress=FALSE)
tiq.test.plotNoveltyTest(outbound.novelty, title="Novelty Test - Outbound Indicators")
We can analyze the public_outbound dataset as a single unit as well, in order to compare it with other repositories:
outbound.novelty = tiq.test.noveltyTest("public_outbound", "20150101", "20150531",
split.tii=F, .progress=FALSE)
tiq.test.plotNoveltyTest(outbound.novelty)
## Warning: Stacking not well defined when ymin != 0
The same can be done with the inbound indicators:
inbound.novelty = tiq.test.noveltyTest("public_inbound", "20150101", "20150531",
split.tii=F, .progress=FALSE)
tiq.test.plotNoveltyTest(inbound.novelty)
## Warning: Stacking not well defined when ymin != 0
And with private sources we may have available:
if (tiq.data.isDatasetAvailable("raw", "private1")) {
private.novelty = tiq.test.noveltyTest("private1", "20150101", "20150531",
split.tii=F, .progress=FALSE)
tiq.test.plotNoveltyTest(private.novelty)
} else {
print("Sorry, private1 dataset is not available.")
}
## Warning: Stacking not well defined when ymin != 0
This is an example of applying the Overlap Test to our inbound dataset
overlap = tiq.test.overlapTest("public_inbound", "20150501", "enriched",
select.sources=NULL)
tiq.test.plotOverlapTest(overlap, title="Overlap Test - Inbound Data - 20150501")
Similarly, an example applying the Overlap Test to the outbound dataset
overlap = tiq.test.overlapTest("public_outbound", "20150501", "enriched",
select.sources=NULL)
tiq.test.plotOverlapTest(overlap, title="Overlap Test - Outbound Data - 20150501")
We can use this function to compare our private dataset to each different source in our public outbound indicator libraries. This gives some interesting insight onto data it may be using from public sources
overlap = tiq.test.overlapTest(c("public_outbound", "private1"), "20150501", "enriched",
split.ti=c(T,F), select.sources=NULL)
tiq.test.plotOverlapTest(overlap, title="Overlap Test - public_outbound VS private1 - 20141101")
With the population data we can generate some plot to compare the top quantities of reported IP addresses on a specific date by Country
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20150501",
select.sources=NULL, split.ti=F)
inbound.pop = tiq.test.extractPopulationFromTI("public_inbound", "country",
date = "20150501",
select.sources=NULL, split.ti=F)
complete.pop = tiq.data.loadPopulation("mmgeo", "country")
tiq.test.plotPopulationBars(c(inbound.pop, outbound.pop, complete.pop), "country")
We can use the same to compare our agregated outbound indicators against the private dataset we have:
if (tiq.data.isDatasetAvailable("enriched", "private1")) {
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20150501",
select.sources=NULL, split.ti=F)
private.pop = tiq.test.extractPopulationFromTI("private1", "country",
date = "20150501",
select.sources=NULL, split.ti=F)
tiq.test.plotPopulationBars(c(private.pop, outbound.pop), "country",
title="Comparing Private1 and Public Feeds on 20150501")
} else {
print("Sorry, private1 dataset is not available.")
}
We can use some inference tools to get a better understanding if the volume of maliciousness we are seeing makes sense in relation to the population we consider to be our reference population.
outbound.pop = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20150501",
select.sources=NULL,
split.ti=FALSE)
complete.pop = tiq.data.loadPopulation("mmgeo", "country")
tests = tiq.test.populationInference(complete.pop$mmgeo,
outbound.pop$public_outbound, "country",
exact = TRUE, top=10)
# Whose proportion is bigger than it should be?
tests[p.value < 0.05/10 & conf.int.end > 0][order(conf.int.end, decreasing=T)]
## country conf.int.start conf.int.end p.value
## 1: US 0.084870546 0.09783018 2.384509e-169
## 2: RU 0.026186375 0.03139187 6.353991e-208
## 3: NL 0.023978511 0.02910542 5.195447e-173
## 4: TH 0.022516321 0.02675944 0.000000e+00
## 5: UA 0.012309106 0.01571787 2.613731e-150
## 6: FR 0.007112142 0.01177276 7.385030e-19
# Whose is smaller?
tests[p.value < 0.05/10 & conf.int.start < 0][order(conf.int.start, decreasing=F)]
## country conf.int.start conf.int.end p.value
## 1: CN -0.035268623 -0.029053639 3.245893e-71
## 2: CA -0.010799505 -0.007832391 2.723407e-25
## 3: GB -0.005771743 -0.001222031 3.132783e-03
# And whose is the same? ¯\_(ツ)_/¯
tests[p.value > 0.05/10]
## country conf.int.start conf.int.end p.value
## 1: DE -0.001333158 0.003429626 0.3980818
This tool also enables us to do trend comparison between the same TI groupings from different days or between different groupings. A suggested usage is comparing the threat intelligence feeds you have against the population of confirmed attacks or firewall blocks you have in your environment.
outbound.pop2 = tiq.test.extractPopulationFromTI("public_outbound", "country",
date = "20150502",
select.sources=NULL,
split.ti=FALSE)
tests = tiq.test.populationInference(outbound.pop$public_outbound,
outbound.pop2$public_outbound, "country",
exact = F, top=10)
# Whose proportion is bigger than it should be?
tests[p.value < 0.05/10 & conf.int.end > 0][order(conf.int.end, decreasing=T)]
## country conf.int.start conf.int.end p.value
## 1: UA 0.005340435 0.01067454 2.866125e-09
# Whose is smaller?
tests[p.value < 0.05/10 & conf.int.start < 0][order(conf.int.start, decreasing=F)]
## Empty data.table (0 rows) of 4 cols: country,conf.int.start,conf.int.end,p.value
# And whose is the same? ¯\_(ツ)_/¯
tests[p.value > 0.05/10]
## country conf.int.start conf.int.end p.value
## 1: CA -0.002362294 0.001820778 0.8276841
## 2: CN -0.004735466 0.004035414 0.8898819
## 3: DE -0.004268857 0.002424020 0.6018893
## 4: FR -0.003965175 0.002593904 0.6972422
## 5: GB -0.004115601 0.002274686 0.5854007
## 6: NL -0.004187223 0.003037313 0.7702996
## 7: RU -0.005102845 0.002196010 0.4433778
## 8: TH -0.001955305 0.004104303 0.4986259
## 9: US -0.012033239 0.006258059 0.5402447
The aging test will try to identify how long a specific indicator has lived in a threat feed. As with other tests, like the population and novelty, you are able to measure this information on aggregate of all your subgroups or separately.
Here is it run against the whole dataset on the Outbound indicators, as they are separated out on subgroups:
outbound.aging = tiq.test.agingTest("public_outbound", "20150101", "20150531")
tiq.test.plotAgingTest(outbound.aging, title="Aging Test - Outbound Data")
Here is it run against the whole dataset on the Inbound indicators. It is interesting to observe how they have different distributions because of the different ways of collecting the data:
inbound.aging = tiq.test.agingTest("public_inbound", "20150101", "20150531")
tiq.test.plotAgingTest(inbound.aging, title="Aging Test - Inbound Data")
You can also look at it as whole thing, as to evaluate the aging of your whole TI repository in its enriched format:
outbound.aging = tiq.test.agingTest("public_outbound", "20150101", "20150531", type="enriched",
split.ti=F)
tiq.test.plotAgingTest(outbound.aging, title="Aging Test - Outbound Data")
Which allows us to compare it against the same formatted data for the private dataset:
if (tiq.data.isDatasetAvailable("enriched", "private1")) {
private.aging = tiq.test.agingTest("private1", "20150101", "20150531", type="enriched",
split.ti=F)
tiq.test.plotAgingTest(private.aging, title="Aging Test - Private Outbound Data", density.limit=0.7)
} else {
print("Sorry, private1 dataset is not available.")
}
For the Uniqueness test examples, we are calculating the absolute uniqueness of the data on different data periods (1, 15, 30 and 60 days) to verify how this uniqueness evolves over time. By running the tests, we see that there is not a lot of variation in the ratio of uniqueness on inbound data:
uniqueTest = rbindlist(mclapply(
c("20150101","20150131","20150331","20150531"),
function(dd) {
tiq.test.uniquenessTest("public_inbound", "20150101", dd,"raw", split.tii = T)
}, mc.allow.recursive = FALSE)
)
uniqueTest[count == 1]
## count ratio days
## 1: 1 0.9684775 1
## 2: 1 0.9678683 31
## 3: 1 0.9639037 90
## 4: 1 0.9631740 151
tiq.test.plotUniquenessTest(uniqueTest, title="Uniqueness Test - Inbound Data")
Neither there is a lot of variation on outbound data:
uniqueTest = rbindlist(mclapply(
c("20150101","20150131","20150331","20150531"),
function(dd) {
tiq.test.uniquenessTest("public_outbound", "20150101", dd,"raw", split.tii = T)
}, mc.allow.recursive = FALSE)
)
uniqueTest[count == 1]
## count ratio days
## 1: 1 0.9912258 1
## 2: 1 0.9898420 31
## 3: 1 0.9893606 90
## 4: 1 0.9352627 151
tiq.test.plotUniquenessTest(uniqueTest, title="Uniqueness Test - Outbound Data")
Also, adding the private data does not change the uniqueness ratios much further. Some work had been done previously on selecting the feeds for little overlap, and we can see that it paid off here.
if (tiq.data.isDatasetAvailable("enriched", "private1")) {
uniqueTest = rbindlist(mclapply(
c("20150101","20150131","20150331","20150531"),
function(dd) {
tiq.test.uniquenessTest(c("public_outbound", "private1"), "20150101", dd,
"raw", split.tii = c(T,F))
}, mc.allow.recursive = FALSE)
)
print(uniqueTest[count == 1])
tiq.test.plotUniquenessTest(uniqueTest, title="Uniqueness Test (enriched) - Private Data vs. Outbound Data")
} else {
print("Sorry, private1 dataset is not available.")
}
## count ratio days
## 1: 1 0.9818253 1
## 2: 1 0.9838021 31
## 3: 1 0.9850241 90
## 4: 1 0.9420800 151
This finishes the analysis of this dataset. Feel free to suggest new tests and sources.