The data for this problem is based on the revision history of the page Language. Wikipedia provides a history for each page that consists of the state of the page at each revision. Rather than manually considering each revision, a script was run that checked whether edits stayed or were reverted. If a change was eventually reverted then that revision is marked as vandalism. This may result in some misclassifications, but the script performs well enough for our needs.
As a result of this preprocessing, some common processing tasks have already been done, including lower-casing and punctuation removal. The columns in the dataset are:
- Vandal = 1 if this edit was vandalism, 0 if not.
- Minor = 1 if the user marked this edit as a “minor edit”, 0 if not.
- Loggedin = 1 if the user made this edit while using a Wikipedia account, 0 if they did not.
- Added = The unique words added.
- Removed = The unique words removed.
packages = c(
"dplyr","ggplot2","caTools","tm","SnowballC","ROCR","rpart","rpart.plot","randomForest")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=TRUE))
Sys.setlocale("LC_ALL","C")
[1] "C"
options(digits=5, scipen=10)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(tm)
Loading required package: NLP
library(SnowballC)
library(ROCR)
Loading required package: gplots
Attaching package: 'gplots'
The following object is masked from 'package:stats':
lowess
library(caTools)
library(rpart)
library(rpart.plot)
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.
Attaching package: 'randomForest'
The following object is masked from 'package:dplyr':
combine
Problem 1 - Bags of Words
1.1 The data set
wiki = read.csv("data/wiki.csv", stringsAsFactors = F)
wiki$Vandal = factor(wiki$Vandal)
table(wiki$Vandal)
0 1
2061 1815
#Vandal 將wiki的資料建成錯的資訊 1=true
#Minor 是否為第一個編輯資料的人
#Loggedin 是否為登入帳號修改的
#Added 加進的字有多少不同的字
#Removed 移除的字有多少不同的字
【P1.1】How many cases of vandalism were detected in the history of this page?
1.2 DTM, The Added Words
library(tm)
library(SnowballC)
# Create corpus for Added Words
txt = iconv(wiki$Added, to = "utf-8", sub="")
corpus = Corpus(VectorSource(txt))
corpus = tm_map(corpus, removeWords, stopwords("english"))
transformation drops documents
corpus = tm_map(corpus, stemDocument)
transformation drops documents
dtm = DocumentTermMatrix(corpus)
dtm
<<DocumentTermMatrix (documents: 3876, terms: 6675)>>
Non-/sparse entries: 15368/25856932
Sparsity : 100%
Maximal term length: 784
Weighting : term frequency (tf)
【P1.2】How many terms appear in dtmAdded?
1.3 Handle Sparsity
Filter out sparse terms by keeping only terms that appear in 0.3% or more of the revisions, and call the new matrix sparseAdded.
nwAdded = rowSums(as.matrix(dtm)) # no. word added in each edit
dtm = removeSparseTerms(dtm, 0.997)
dtm
<<DocumentTermMatrix (documents: 3876, terms: 166)>>
Non-/sparse entries: 2681/640735
Sparsity : 100%
Maximal term length: 28
Weighting : term frequency (tf)
【P1.3】How many terms appear in sparseAdded?
1.4 Create Data Frames, wordAdded & wordRemoved
Convert sparseAdded to a data frame called wordsAdded, and then prepend all the words with the letter A, by using the command:
wordsAdded = as.data.frame(as.matrix(dtm))
colnames(wordsAdded) = paste("A", colnames(wordsAdded)) # for proper column names
Now repeat all of the steps we’ve done so far to create a Removed bag-of-words dataframe, called wordsRemoved, except this time, prepend all of the words with the letter R:
# Create corpus
txt = iconv(wiki$Removed, to = "utf-8", sub="")
corpus = Corpus(VectorSource(txt))
corpus = tm_map(corpus, removeWords, stopwords("english"))
transformation drops documents
corpus = tm_map(corpus, stemDocument)
transformation drops documents
dtm = DocumentTermMatrix(corpus)
dtm
<<DocumentTermMatrix (documents: 3876, terms: 5404)>>
Non-/sparse entries: 13294/20932610
Sparsity : 100%
Maximal term length: 784
Weighting : term frequency (tf)
nwRemoved = rowSums(as.matrix(dtm))
dtm = removeSparseTerms(dtm, 0.997)
dtm
<<DocumentTermMatrix (documents: 3876, terms: 162)>>
Non-/sparse entries: 2552/625360
Sparsity : 100%
Maximal term length: 28
Weighting : term frequency (tf)
wordsRemoved = as.data.frame(as.matrix(dtm))
colnames(wordsRemoved) = paste("R", colnames(wordsRemoved))
【P1.4】How many words are in the wordsRemoved data frame?
1.5 Prepare the Data Frame
Combine the Data Frames wordsAdded & wordsRemoved with the Target Variable wiki$Vandal
wikiWords = cbind(wordsAdded, wordsRemoved)
wikiWords$Vandal = wiki$Vandal
Split the data frame for train and test data
library(caTools)
set.seed(123)
spl = sample.split(wikiWords$Vandal, 0.7)
train = subset(wikiWords, spl == TRUE)
test = subset(wikiWords, spl == FALSE)
table(test$Vandal) %>% prop.table
0 1
0.53138 0.46862
【P1.5】What is the accuracy on the test set of a baseline method that always predicts “not vandalism”?
1.6 CART Model
library(rpart)
library(rpart.plot)
cart = rpart(Vandal~., train, method="class")
pred = predict(cart,test,type='class')
table(test$Vandal, pred) %>% {sum(diag(.)) / sum(.)} # 0.54428
[1] 0.54428
【P1.6】What is the accuracy of the model on the test set, using a threshold of 0.5?
1.7 Plot the Decision Tree
prp(cart)

【P1.7】How many word stems does the CART model use?
1.8 Predictability of the CART model
【P1.8】Given the performance of the CART model relative to the baseline, what is the best explanation of these results?
- Although it beats the baseline, bag of words is not very predictive for this problem.
Problem 2 - Add Features with Problem-specific Knowledge
2.1 Add HTTP column
Add a new column based on whether "http" is added
wiki2 = wikiWords
wiki2$HTTP = ifelse( grepl("http",wiki$Added,fixed=TRUE) , 1, 0)
table(wiki2$HTTP) # 217
0 1
3659 217
【P2.1】Based on this new column, how many revisions added a link?
2.2 Check accuracy again
train2 = subset(wiki2, spl==T)
test2 = subset(wiki2, spl==F)
cart2 = rpart(Vandal~., train2, method="class")
pred2 = predict(cart2,test2,type='class')
table(test2$Vandal, pred2) %>% {sum(diag(.)) / sum(.)} # 0.57524
[1] 0.57524
【P2.2】What is the new accuracy of the CART model on the test set, using a threshold of 0.5?
2.3 Total numbers of words added and removed
wiki2$nwAdded = nwAdded
wiki2$nwRemoved = nwRemoved
mean(nwAdded) # 4.0501
[1] 4.0501
【P2.3】What is the average number of words added?
2.4 Check accuracy again
train = subset(wiki2, spl)
test = subset(wiki2, !spl)
cart = rpart(Vandal~., train, method="class")
pred = predict(cart,test,type='class')
table(test$Vandal, pred) %>% {sum(diag(.)) / sum(.)} # 0.6552
[1] 0.6552
【P2.4】What is the new accuracy of the CART model on the test set?
Problem 3 - Using Non-Textual Data
原始資料之中還有一些之前沒有用到的欄位,我們把它們也加進來
wiki3 = wiki2
wiki3$Minor = wiki$Minor
wiki3$Loggedin = wiki$Loggedin
3.1 Check accuracy again
train = subset(wiki3, spl=T)
test = subset(wiki3, spl=F)
cart = rpart(Vandal~., train, method="class")
pred = predict(cart,test,type='class')
table(test$Vandal, pred) %>% {sum(diag(.)) / sum(.)} # .72472
[1] 0.72472
【P3.1】What is the accuracy of the model on the test set?
3.2 The Decision Tree
prp(cart)

【P3.2】How many splits are there in the tree?
討論議題:
■ 請舉出一些可以繼續提高模型準確率的方法,方法越多越好:
●
●
●
●
●
LS0tDQp0aXRsZTogIkFTMTAtMe+8muiqsOaYr+S+huS6gueahO+8nyINCmF1dGhvcjogIuadjuWKreerkSBNMDY0MDIwMDIzIg0KZGF0ZTogImByIFN5cy50aW1lKClgIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KVGhlIGRhdGEgZm9yIHRoaXMgcHJvYmxlbSBpcyBiYXNlZCBvbiB0aGUgcmV2aXNpb24gaGlzdG9yeSBvZiB0aGUgcGFnZSBMYW5ndWFnZS4gV2lraXBlZGlhIHByb3ZpZGVzIGEgaGlzdG9yeSBmb3IgZWFjaCBwYWdlIHRoYXQgY29uc2lzdHMgb2YgdGhlIHN0YXRlIG9mIHRoZSBwYWdlIGF0IGVhY2ggcmV2aXNpb24uIFJhdGhlciB0aGFuIG1hbnVhbGx5IGNvbnNpZGVyaW5nIGVhY2ggcmV2aXNpb24sIGEgc2NyaXB0IHdhcyBydW4gdGhhdCBjaGVja2VkIHdoZXRoZXIgZWRpdHMgc3RheWVkIG9yIHdlcmUgcmV2ZXJ0ZWQuIElmIGEgY2hhbmdlIHdhcyBldmVudHVhbGx5IHJldmVydGVkIHRoZW4gdGhhdCByZXZpc2lvbiBpcyBtYXJrZWQgYXMgdmFuZGFsaXNtLiBUaGlzIG1heSByZXN1bHQgaW4gc29tZSBtaXNjbGFzc2lmaWNhdGlvbnMsIGJ1dCB0aGUgc2NyaXB0IHBlcmZvcm1zIHdlbGwgZW5vdWdoIGZvciBvdXIgbmVlZHMuDQoNCkFzIGEgcmVzdWx0IG9mIHRoaXMgcHJlcHJvY2Vzc2luZywgc29tZSBjb21tb24gcHJvY2Vzc2luZyB0YXNrcyBoYXZlIGFscmVhZHkgYmVlbiBkb25lLCBpbmNsdWRpbmcgbG93ZXItY2FzaW5nIGFuZCBwdW5jdHVhdGlvbiByZW1vdmFsLiBUaGUgY29sdW1ucyBpbiB0aGUgZGF0YXNldCBhcmU6DQoNCisgVmFuZGFsID0gMSBpZiB0aGlzIGVkaXQgd2FzIHZhbmRhbGlzbSwgMCBpZiBub3QuDQorIE1pbm9yID0gMSBpZiB0aGUgdXNlciBtYXJrZWQgdGhpcyBlZGl0IGFzIGEgIm1pbm9yIGVkaXQiLCAwIGlmIG5vdC4NCisgTG9nZ2VkaW4gPSAxIGlmIHRoZSB1c2VyIG1hZGUgdGhpcyBlZGl0IHdoaWxlIHVzaW5nIGEgV2lraXBlZGlhIGFjY291bnQsIDAgaWYgdGhleSBkaWQgbm90Lg0KKyBBZGRlZCA9IFRoZSB1bmlxdWUgd29yZHMgYWRkZWQuDQorIFJlbW92ZWQgPSBUaGUgdW5pcXVlIHdvcmRzIHJlbW92ZWQuDQoNCjxicj48aHI+DQoNCmBgYHtyfQ0KcGFja2FnZXMgPSBjKA0KICAiZHBseXIiLCJnZ3Bsb3QyIiwiY2FUb29scyIsInRtIiwiU25vd2JhbGxDIiwiUk9DUiIsInJwYXJ0IiwicnBhcnQucGxvdCIsInJhbmRvbUZvcmVzdCIpDQpleGlzdGluZyA9IGFzLmNoYXJhY3RlcihpbnN0YWxsZWQucGFja2FnZXMoKVssMV0pDQpmb3IocGtnIGluIHBhY2thZ2VzWyEocGFja2FnZXMgJWluJSBleGlzdGluZyldKSBpbnN0YWxsLnBhY2thZ2VzKHBrZykNCmBgYA0KDQpgYGB7ciB3YXJuaW5nPUYsIG1lc3NhZ2U9RiwgY2FjaGU9RiwgZXJyb3I9Rn0NCnJtKGxpc3Q9bHMoYWxsPVRSVUUpKQ0KU3lzLnNldGxvY2FsZSgiTENfQUxMIiwiQyIpDQpvcHRpb25zKGRpZ2l0cz01LCBzY2lwZW49MTApDQoNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRtKQ0KbGlicmFyeShTbm93YmFsbEMpDQpsaWJyYXJ5KFJPQ1IpDQpsaWJyYXJ5KGNhVG9vbHMpDQpsaWJyYXJ5KHJwYXJ0KQ0KbGlicmFyeShycGFydC5wbG90KQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpgYGANCjxicj4NCg0KIyMjIFByb2JsZW0gMSAtIEJhZ3Mgb2YgV29yZHMNCg0KIyMjIyMgMS4xIFRoZSBkYXRhIHNldA0KDQpgYGB7cn0NCndpa2kgPSByZWFkLmNzdigiZGF0YS93aWtpLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGKQ0Kd2lraSRWYW5kYWwgPSBmYWN0b3Iod2lraSRWYW5kYWwpDQp0YWJsZSh3aWtpJFZhbmRhbCkNCiNWYW5kYWwg5bCHd2lraeeahOizh+aWmeW7uuaIkOmMr+eahOizh+ioiiAxPXRydWUNCiNNaW5vciAg5piv5ZCm54K656ys5LiA5YCL57eo6Lyv6LOH5paZ55qE5Lq6DQojTG9nZ2VkaW4g5piv5ZCm54K655m75YWl5biz6Jmf5L+u5pS555qEDQojQWRkZWQg5Yqg6YCy55qE5a2X5pyJ5aSa5bCR5LiN5ZCM55qE5a2XDQojUmVtb3ZlZCDnp7vpmaTnmoTlrZfmnInlpJrlsJHkuI3lkIznmoTlrZcNCmBgYA0KDQrjgJBQMS4x44CRX19Ib3cgbWFueSBjYXNlcyBvZiB2YW5kYWxpc20gd2VyZSBkZXRlY3RlZCBpbiB0aGUgaGlzdG9yeSBvZiB0aGlzIHBhZ2U/X18NCg0KKyAxODE1DQorDQoNCiMjIyMjIDEuMiBEVE0sIFRoZSBBZGRlZCBXb3Jkcw0KYGBge3J9DQpsaWJyYXJ5KHRtKQ0KbGlicmFyeShTbm93YmFsbEMpDQoNCiMgQ3JlYXRlIGNvcnB1cyBmb3IgQWRkZWQgV29yZHMNCnR4dCA9IGljb252KHdpa2kkQWRkZWQsIHRvID0gInV0Zi04Iiwgc3ViPSIiKQ0KY29ycHVzID0gQ29ycHVzKFZlY3RvclNvdXJjZSh0eHQpKQ0KY29ycHVzID0gdG1fbWFwKGNvcnB1cywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygiZW5nbGlzaCIpKQ0KY29ycHVzID0gdG1fbWFwKGNvcnB1cywgc3RlbURvY3VtZW50KQ0KZHRtID0gRG9jdW1lbnRUZXJtTWF0cml4KGNvcnB1cykNCmR0bQ0KYGBgDQrjgJBQMS4y44CRX19Ib3cgbWFueSB0ZXJtcyBhcHBlYXIgaW4gYGR0bUFkZGVkYD9fXw0KDQorIDY2NzUNCisNCg0KIyMjIyMgMS4zIEhhbmRsZSBTcGFyc2l0eQ0KRmlsdGVyIG91dCBzcGFyc2UgdGVybXMgYnkga2VlcGluZyBvbmx5IHRlcm1zIHRoYXQgYXBwZWFyIGluIDAuMyUgb3IgbW9yZSBvZiB0aGUgcmV2aXNpb25zLCBhbmQgY2FsbCB0aGUgbmV3IG1hdHJpeCBzcGFyc2VBZGRlZC4gDQpgYGB7cn0NCm53QWRkZWQgPSByb3dTdW1zKGFzLm1hdHJpeChkdG0pKSAgICAgIyBuby4gd29yZCBhZGRlZCBpbiBlYWNoIGVkaXQNCmR0bSA9IHJlbW92ZVNwYXJzZVRlcm1zKGR0bSwgMC45OTcpDQpkdG0NCmBgYA0K44CQUDEuM+OAkV9fSG93IG1hbnkgdGVybXMgYXBwZWFyIGluIGBzcGFyc2VBZGRlZGA/X18NCg0KKyAxNjYNCisNCg0KIyMjIyMgMS40IENyZWF0ZSBEYXRhIEZyYW1lcywgYHdvcmRBZGRlZGAgJiBgd29yZFJlbW92ZWRgDQpDb252ZXJ0IHNwYXJzZUFkZGVkIHRvIGEgZGF0YSBmcmFtZSBjYWxsZWQgYHdvcmRzQWRkZWRgLCBhbmQgdGhlbiBwcmVwZW5kIGFsbCB0aGUgd29yZHMgd2l0aCB0aGUgbGV0dGVyIEEsIGJ5IHVzaW5nIHRoZSBjb21tYW5kOg0KYGBge3J9DQp3b3Jkc0FkZGVkID0gYXMuZGF0YS5mcmFtZShhcy5tYXRyaXgoZHRtKSkNCmNvbG5hbWVzKHdvcmRzQWRkZWQpID0gcGFzdGUoIkEiLCBjb2xuYW1lcyh3b3Jkc0FkZGVkKSkgICMgZm9yIHByb3BlciBjb2x1bW4gbmFtZXMNCmBgYA0KDQpOb3cgcmVwZWF0IGFsbCBvZiB0aGUgc3RlcHMgd2UndmUgZG9uZSBzbyBmYXIgdG8gY3JlYXRlIGEgUmVtb3ZlZCBiYWctb2Ytd29yZHMgZGF0YWZyYW1lLCBjYWxsZWQgYHdvcmRzUmVtb3ZlZGAsIGV4Y2VwdCB0aGlzIHRpbWUsIHByZXBlbmQgYWxsIG9mIHRoZSB3b3JkcyB3aXRoIHRoZSBsZXR0ZXIgUjoNCg0KYGBge3J9DQojIENyZWF0ZSBjb3JwdXMNCnR4dCA9IGljb252KHdpa2kkUmVtb3ZlZCwgdG8gPSAidXRmLTgiLCBzdWI9IiIpDQpjb3JwdXMgPSBDb3JwdXMoVmVjdG9yU291cmNlKHR4dCkpDQpjb3JwdXMgPSB0bV9tYXAoY29ycHVzLCByZW1vdmVXb3Jkcywgc3RvcHdvcmRzKCJlbmdsaXNoIikpDQpjb3JwdXMgPSB0bV9tYXAoY29ycHVzLCBzdGVtRG9jdW1lbnQpDQpkdG0gPSBEb2N1bWVudFRlcm1NYXRyaXgoY29ycHVzKQ0KZHRtDQpud1JlbW92ZWQgPSByb3dTdW1zKGFzLm1hdHJpeChkdG0pKQ0KZHRtID0gcmVtb3ZlU3BhcnNlVGVybXMoZHRtLCAwLjk5NykNCmR0bQ0Kd29yZHNSZW1vdmVkID0gYXMuZGF0YS5mcmFtZShhcy5tYXRyaXgoZHRtKSkNCmNvbG5hbWVzKHdvcmRzUmVtb3ZlZCkgPSBwYXN0ZSgiUiIsIGNvbG5hbWVzKHdvcmRzUmVtb3ZlZCkpDQpgYGANCuOAkFAxLjTjgJFfX0hvdyBtYW55IHdvcmRzIGFyZSBpbiB0aGUgYHdvcmRzUmVtb3ZlZGAgZGF0YSBmcmFtZT9fXw0KDQorIDE2Mg0KKw0KDQojIyMjIyAxLjUgUHJlcGFyZSB0aGUgRGF0YSBGcmFtZQ0KQ29tYmluZSB0aGUgRGF0YSBGcmFtZXMgYHdvcmRzQWRkZWRgICYgYHdvcmRzUmVtb3ZlZGAgd2l0aCB0aGUgVGFyZ2V0IFZhcmlhYmxlIGB3aWtpJFZhbmRhbGANCmBgYHtyfQ0Kd2lraVdvcmRzID0gY2JpbmQod29yZHNBZGRlZCwgd29yZHNSZW1vdmVkKQ0Kd2lraVdvcmRzJFZhbmRhbCA9IHdpa2kkVmFuZGFsDQpgYGANCg0KU3BsaXQgdGhlIGRhdGEgZnJhbWUgZm9yIHRyYWluIGFuZCB0ZXN0IGRhdGENCmBgYHtyfQ0KbGlicmFyeShjYVRvb2xzKQ0Kc2V0LnNlZWQoMTIzKQ0Kc3BsID0gc2FtcGxlLnNwbGl0KHdpa2lXb3JkcyRWYW5kYWwsIDAuNykNCnRyYWluID0gc3Vic2V0KHdpa2lXb3Jkcywgc3BsID09IFRSVUUpDQp0ZXN0ID0gc3Vic2V0KHdpa2lXb3Jkcywgc3BsID09IEZBTFNFKQ0KdGFibGUodGVzdCRWYW5kYWwpICU+JSBwcm9wLnRhYmxlDQpgYGANCuOAkFAxLjXjgJFfX1doYXQgaXMgdGhlIGFjY3VyYWN5IG9uIHRoZSB0ZXN0IHNldCBvZiBhIGJhc2VsaW5lIG1ldGhvZCB0aGF0IGFsd2F5cyBwcmVkaWN0cyAibm90IHZhbmRhbGlzbSI/X18NCg0KKyAwLjUzMTM4DQorDQoNCiMjIyMjIDEuNiBDQVJUIE1vZGVsDQpgYGB7cn0NCmxpYnJhcnkocnBhcnQpDQpsaWJyYXJ5KHJwYXJ0LnBsb3QpDQpjYXJ0ID0gcnBhcnQoVmFuZGFsfi4sIHRyYWluLCBtZXRob2Q9ImNsYXNzIikNCnByZWQgPSBwcmVkaWN0KGNhcnQsdGVzdCx0eXBlPSdjbGFzcycpDQp0YWJsZSh0ZXN0JFZhbmRhbCwgcHJlZCkgJT4lIHtzdW0oZGlhZyguKSkgLyBzdW0oLil9ICMgMC41NDQyOA0KYGBgDQrjgJBQMS4244CRX19XaGF0IGlzIHRoZSBhY2N1cmFjeSBvZiB0aGUgbW9kZWwgb24gdGhlIHRlc3Qgc2V0LCB1c2luZyBhIHRocmVzaG9sZCBvZiAwLjU/X18NCg0KKyAwLjU0NDI4DQorDQoNCg0KIyMjIyMgMS43IFBsb3QgdGhlIERlY2lzaW9uIFRyZWUNCmBgYHtyIGZpZy5oZWlnaHQ9My42fQ0KcHJwKGNhcnQpDQpgYGANCuOAkFAxLjfjgJFfX0hvdyBtYW55IHdvcmQgc3RlbXMgZG9lcyB0aGUgQ0FSVCBtb2RlbCB1c2U/X18NCg0KKyAzDQorDQoNCiMjIyMjIDEuOCBQcmVkaWN0YWJpbGl0eSBvZiB0aGUgQ0FSVCBtb2RlbA0K44CQUDEuOOOAkV9fR2l2ZW4gdGhlIHBlcmZvcm1hbmNlIG9mIHRoZSBDQVJUIG1vZGVsIHJlbGF0aXZlIHRvIHRoZSBiYXNlbGluZSwgd2hhdCBpcyB0aGUgYmVzdCBleHBsYW5hdGlvbiBvZiB0aGVzZSByZXN1bHRzP19fDQoNCisgQWx0aG91Z2ggaXQgYmVhdHMgdGhlIGJhc2VsaW5lLCBiYWcgb2Ygd29yZHMgaXMgbm90IHZlcnkgcHJlZGljdGl2ZSBmb3IgdGhpcyBwcm9ibGVtLg0KKw0KDQo8YnI+PGhyPg0KDQojIyMgUHJvYmxlbSAyIC0gQWRkIEZlYXR1cmVzIHdpdGggUHJvYmxlbS1zcGVjaWZpYyBLbm93bGVkZ2UNCg0KIyMjIyMgMi4xIEFkZCBgSFRUUGAgY29sdW1uDQpBZGQgYSBuZXcgY29sdW1uIGJhc2VkIG9uIHdoZXRoZXIgYCJodHRwImAgaXMgYWRkZWQgDQpgYGB7cn0NCndpa2kyID0gd2lraVdvcmRzDQp3aWtpMiRIVFRQID0gaWZlbHNlKCBncmVwbCgiaHR0cCIsd2lraSRBZGRlZCxmaXhlZD1UUlVFKSAsIDEsIDApDQp0YWJsZSh3aWtpMiRIVFRQKSAjIDIxNw0KYGBgDQrjgJBQMi4x44CRX19CYXNlZCBvbiB0aGlzIG5ldyBjb2x1bW4sIGhvdyBtYW55IHJldmlzaW9ucyBhZGRlZCBhIGxpbms/X18NCg0KKyAyMTcNCisNCg0KIyMjIyMgMi4yIENoZWNrIGFjY3VyYWN5IGFnYWluDQpgYGB7cn0NCnRyYWluMiA9IHN1YnNldCh3aWtpMiwgc3BsPT1UKQ0KdGVzdDIgPSBzdWJzZXQod2lraTIsIHNwbD09RikNCmNhcnQyID0gcnBhcnQoVmFuZGFsfi4sIHRyYWluMiwgbWV0aG9kPSJjbGFzcyIpDQpwcmVkMiA9IHByZWRpY3QoY2FydDIsdGVzdDIsdHlwZT0nY2xhc3MnKQ0KdGFibGUodGVzdDIkVmFuZGFsLCBwcmVkMikgJT4lIHtzdW0oZGlhZyguKSkgLyBzdW0oLil9ICMgMC41NzUyNA0KYGBgDQrjgJBQMi4y44CRX19XaGF0IGlzIHRoZSBuZXcgYWNjdXJhY3kgb2YgdGhlIENBUlQgbW9kZWwgb24gdGhlIHRlc3Qgc2V0LCB1c2luZyBhIHRocmVzaG9sZCBvZiAwLjU/X18NCg0KKyAwLjU3NTI0DQorDQoNCiMjIyMjIDIuMyBUb3RhbCBudW1iZXJzIG9mIHdvcmRzIGFkZGVkIGFuZCByZW1vdmVkDQpgYGB7cn0NCndpa2kyJG53QWRkZWQgPSBud0FkZGVkDQp3aWtpMiRud1JlbW92ZWQgPSBud1JlbW92ZWQNCm1lYW4obndBZGRlZCkgIyA0LjA1MDENCmBgYA0K44CQUDIuM+OAkV9fV2hhdCBpcyB0aGUgYXZlcmFnZSBudW1iZXIgb2Ygd29yZHMgYWRkZWQ/X18NCg0KKyA0LjA1MDENCisNCg0KIyMjIyMgMi40IENoZWNrIGFjY3VyYWN5IGFnYWluDQpgYGB7cn0NCnRyYWluID0gc3Vic2V0KHdpa2kyLCBzcGwpDQp0ZXN0ID0gc3Vic2V0KHdpa2kyLCAhc3BsKQ0KY2FydCA9IHJwYXJ0KFZhbmRhbH4uLCB0cmFpbiwgbWV0aG9kPSJjbGFzcyIpDQpwcmVkID0gcHJlZGljdChjYXJ0LHRlc3QsdHlwZT0nY2xhc3MnKQ0KdGFibGUodGVzdCRWYW5kYWwsIHByZWQpICU+JSB7c3VtKGRpYWcoLikpIC8gc3VtKC4pfSAjIDAuNjU1Mg0KYGBgDQrjgJBQMi4044CRX19XaGF0IGlzIHRoZSBuZXcgYWNjdXJhY3kgb2YgdGhlIENBUlQgbW9kZWwgb24gdGhlIHRlc3Qgc2V0P19fDQoNCisgDQorDQoNCjxicj48aHI+DQoNCiMjIyBQcm9ibGVtIDMgLSBVc2luZyBOb24tVGV4dHVhbCBEYXRhDQoNCuWOn+Wni+izh+aWmeS5i+S4remChOacieS4gOS6m+S5i+WJjeaykuacieeUqOWIsOeahOashOS9je+8jOaIkeWAkeaKiuWug+WAkeS5n+WKoOmAsuS+hg0KYGBge3J9DQp3aWtpMyA9IHdpa2kyDQp3aWtpMyRNaW5vciA9IHdpa2kkTWlub3INCndpa2kzJExvZ2dlZGluID0gd2lraSRMb2dnZWRpbg0KYGBgDQoNCiMjIyMjIDMuMSBDaGVjayBhY2N1cmFjeSBhZ2Fpbg0KYGBge3J9DQp0cmFpbiA9IHN1YnNldCh3aWtpMywgc3BsPVQpDQp0ZXN0ID0gc3Vic2V0KHdpa2kzLCBzcGw9RikNCmNhcnQgPSBycGFydChWYW5kYWx+LiwgdHJhaW4sIG1ldGhvZD0iY2xhc3MiKQ0KcHJlZCA9IHByZWRpY3QoY2FydCx0ZXN0LHR5cGU9J2NsYXNzJykNCnRhYmxlKHRlc3QkVmFuZGFsLCBwcmVkKSAlPiUge3N1bShkaWFnKC4pKSAvIHN1bSguKX0gIyAuNzI0NzINCmBgYA0K44CQUDMuMeOAkV9fV2hhdCBpcyB0aGUgYWNjdXJhY3kgb2YgdGhlIG1vZGVsIG9uIHRoZSB0ZXN0IHNldD9fXw0KDQorIDAuNzI0NzINCisNCg0KIyMjIyMgMy4yIFRoZSBEZWNpc2lvbiBUcmVlDQpgYGB7ciAgZmlnLmhlaWdodD0zLjZ9DQpwcnAoY2FydCkNCmBgYA0KDQrjgJBQMy4y44CRX19Ib3cgbWFueSBzcGxpdHMgYXJlIHRoZXJlIGluIHRoZSB0cmVlP19fDQoNCisgMw0KKw0KICANCjxicj48aHI+DQoNCjxwIGNsYXNzPSJxaXoiPg0K6KiO6KuW6K2w6aGM77yaPGJyPg0KJmVtc3A7IOKWoCDoq4voiInlh7rkuIDkupvlj6/ku6Xnubznuozmj5Dpq5jmqKHlnovmupbnorrnjofnmoTmlrnms5XvvIzmlrnms5XotorlpJrotorlpb3vvJogPGJyPg0KJmVtc3A7ICZlbXNwOyDil48gIDxicj4NCiZlbXNwOyAmZW1zcDsg4pePICA8YnI+DQomZW1zcDsgJmVtc3A7IOKXjyAgPGJyPg0KJmVtc3A7ICZlbXNwOyDil48gIDxicj4NCiZlbXNwOyAmZW1zcDsg4pePICA8YnI+DQo8YnI+DQo8L3A+DQoNCjxicj48YnI+PGJyPjxicj4NCg0KDQoNCg0KDQo=