Introduction
In this capstone we will be applying data science in the area of natural language processing. We’ll describe the major features of the training data with our exploratory data analysis and present how to create the predictive model.
Data download
Our first step is to download the dataset from the given link.
## Init variables
capstonedatasetlink <- "https://d396qusza40orc.cloudfront.net/dsscapstone/dataset/Coursera-SwiftKey.zip"
projectfolderpath <- "./projectData"
zipname <- "SwiftKey.zip"
zippath <- paste0(projectfolderpath, "/", zipname)
datasetfolder <- "final"
datasetpath<- paste0(projectfolderpath, "/", datasetfolder)
## Check if directory already exists?
if(!file.exists(projectfolderpath)){
dir.create(projectfolderpath)
}
## Check if zip has already been downloaded in projectData directory?
if(!file.exists(zippath)){
download.file(capstonedatasetlink,destfile=zippath,mode = "wb")
}
## Check if zip has already been unzipped?
if(!file.exists(datasetfolder)){
unzip(zipfile=zippath,exdir=projectfolderpath)
}
Data load
After downloading the datset, we are starting to read the dataset that we need. The dataset consists of text files from 3 different sources: news, blogs and twitter. In this project, we will focus on the Englis US datasets.
enpath <- file.path(datasetpath , "en_US")
files<-list.files(enpath, recursive=TRUE)
# Lets make a file connection of the twitter data set
con <- file(paste0(enpath, "/en_US.twitter.txt"), "r")
#lineTwitter<-readLines(con, 100, skipNul = TRUE)
lineTwitter<-readLines(con, skipNul = TRUE)
# Close the connection handle when you are done
close(con)
# Lets make a file connection of the blog data set
con <- file(paste0(enpath, "/en_US.blogs.txt"), "r")
#llineBlogs<-readLines(con, 100, skipNul = TRUE)
lineBlogs<-readLines(con, skipNul = TRUE)
# Close the connection handle when you are done
close(con)
# Lets make a file connection of the news data set
con <- file(paste0(enpath, "/en_US.news.txt"), "r")
#lineNews<-readLines(con, 100, skipNul = TRUE)
lineNews<-readLines(con, skipNul = TRUE)
# Close the connection handle when you are done
close(con)
Data summary
A summary for the US blogs, news and twitter files can be seen on the table below.
# Get file sizes
lineBlogsSize <- file.info(paste0(enpath, "/en_US.blogs.txt"))$size / 1024 ^ 2
lineNewsSize <- file.info(paste0(enpath, "/en_US.news.txt"))$size / 1024 ^ 2
lineTwitterSize <- file.info(paste0(enpath, "/en_US.twitter.txt"))$size / 1024 ^ 2
# Get words in files
lineBlogsWords <- stri_count_words(lineBlogs)
lineNewsWords <- stri_count_words(lineNews)
lineTwitterWords <- stri_count_words(lineTwitter)
# Summary of the data sets
data.frame(source = c("blogs", "news", "twitter"),
fileSize_MB = c(lineBlogsSize, lineNewsSize, lineTwitterSize),
wordCounts = c(sum(lineBlogsWords), sum(lineNewsWords), sum(lineTwitterWords)),
wordMeans = c(mean(lineBlogsWords), mean(lineNewsWords), mean(lineTwitterWords)),
lineCounts = c(length(lineBlogs), length(lineNews), length(lineTwitter)))
Data sampling
We observe that the files have a considerable amount of data (e.g. 2 million lines for twitter file). In order to create a model, we will need to take a sample of the data to avoid using to much memory.
percent <- 0.01
sampling <- function(data, percent){
return(data[as.logical(rbinom(length(data),1,percent))])
}
sampleBlogs <- sampling(lineBlogs, percent)
sampleNews <- sampling(lineNews, percent)
sampleTwitter <- sampling(lineTwitter, percent)
remove(lineBlogs)
remove(lineNews)
remove(lineTwitter)
# Get words in files
lineBlogsWords <- stri_count_words(sampleBlogs)
lineNewsWords <- stri_count_words(sampleNews)
lineTwitterWords <- stri_count_words(sampleTwitter)
# Summary of the data sets
data.frame(source = c("blogs", "news", "twitter"),
wordCounts = c(sum(lineBlogsWords), sum(lineNewsWords), sum(lineTwitterWords)),
wordMeans = c(mean(lineBlogsWords), mean(lineNewsWords), mean(lineTwitterWords)),
lineCounts = c(length(sampleBlogs), length(sampleNews), length(sampleTwitter)))
NA
Cleaning and Tokenization
In order to be able manipulate our data, we will create a cleaned corpus, which will consist of the three sample text files. This involves removing punctuations, numbers, excess whitespace, stopwords, and changing the text to lower case.
allSamples <- c(sampleBlogs, sampleNews, sampleTwitter)
corpus <- VCorpus(VectorSource(allSamples))
#clean
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removePunctuation, preserve_intra_word_dashes=TRUE)
corpus <- tm_map(corpus, removeWords, stopwords("en"))
#tokenize
unigram <- NGramTokenizer(corpus, Weka_control(min = 1, max = 1,delimiters = " \\r\\n\\t.,;:\"()?!"))
biGram <- NGramTokenizer(corpus, Weka_control(min = 2, max = 2,delimiters = " \\r\\n\\t.,;:\"()?!"))
triGram <- NGramTokenizer(corpus, Weka_control(min = 3, max = 3,delimiters = " \\r\\n\\t.,;:\"()?!"))
## explore and plot
# converting tokens of n-grams into tables
uniGramTable <- data.frame(table(unigram))
biGramTable <- data.frame(table(biGram))
triGramTable <- data.frame(table(triGram))
uniGramTable <- uniGramTable[order(uniGramTable$Freq,decreasing = TRUE),]
biGramTable <- biGramTable[order(biGramTable$Freq,decreasing = TRUE),]
triGramTable <- triGramTable[order(triGramTable$Freq,decreasing = TRUE),]
top1gram <- uniGramTable[1:10,]
colnames(top1gram) <- c("word","count")
top2gram <- biGramTable[1:10,]
colnames(top2gram) <- c("word","count")
top3gram <- triGramTable[1:10,]
colnames(top3gram) <- c("word","count")



Next steps
We will create a prediction algorithm and a Shiny app in a way that would be understandable to a non-data scientist manager. We will be using the n-gram model with to compute the probability of the next word occuring. The input will be tokenized and the last words will be isolated to get the hightest probability next word. Our Shiny app will be interactive and any non-data scientist users will be able to easily navigate through our it.
LS0tCnRpdGxlOiAiQ291cnNlcmEgRGF0YSBTY2llbmNlIENhcHN0b25lOiBNaWxlc3RvbmUgUmVwb3J0IgphdXRob3I6ICJDbGFpcmUgTXVzc28iCmRhdGU6ICJPY3RvYmVyIDIwMjAiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMgSW50cm9kdWN0aW9uCkluIHRoaXMgY2Fwc3RvbmUgd2Ugd2lsbCBiZSBhcHBseWluZyBkYXRhIHNjaWVuY2UgaW4gdGhlIGFyZWEgb2YgbmF0dXJhbCBsYW5ndWFnZSBwcm9jZXNzaW5nLgpXZSdsbCBkZXNjcmliZSB0aGUgbWFqb3IgZmVhdHVyZXMgb2YgdGhlIHRyYWluaW5nIGRhdGEgd2l0aCBvdXIgZXhwbG9yYXRvcnkgZGF0YSBhbmFseXNpcyBhbmQgcHJlc2VudCBob3cgdG8gY3JlYXRlIHRoZSBwcmVkaWN0aXZlIG1vZGVsLgpgYGB7ciBJbnRybywgZWNobz1GQUxTRX0KIyMgRG93bmxvYWQgbGlicmFyaWVzCmxpYnJhcnkoc3RyaW5naSkKbGlicmFyeSh0bSkKbGlicmFyeShSV2VrYSkKCmBgYAoKIyMgRGF0YSBkb3dubG9hZApPdXIgZmlyc3Qgc3RlcCBpcyB0byBkb3dubG9hZCB0aGUgZGF0YXNldCBmcm9tIHRoZSBnaXZlbiBsaW5rLiAKYGBge3IgRGF0YSBEb3dubG9hZCwgcmVzdWx0cz1GQUxTRX0KIyMgSW5pdCB2YXJpYWJsZXMKY2Fwc3RvbmVkYXRhc2V0bGluayA8LSAiaHR0cHM6Ly9kMzk2cXVzemE0MG9yYy5jbG91ZGZyb250Lm5ldC9kc3NjYXBzdG9uZS9kYXRhc2V0L0NvdXJzZXJhLVN3aWZ0S2V5LnppcCIKcHJvamVjdGZvbGRlcnBhdGggPC0gIi4vcHJvamVjdERhdGEiCnppcG5hbWUgPC0gIlN3aWZ0S2V5LnppcCIKemlwcGF0aCA8LSBwYXN0ZTAocHJvamVjdGZvbGRlcnBhdGgsICIvIiwgemlwbmFtZSkKZGF0YXNldGZvbGRlciA8LSAiZmluYWwiCmRhdGFzZXRwYXRoPC0gcGFzdGUwKHByb2plY3Rmb2xkZXJwYXRoLCAiLyIsIGRhdGFzZXRmb2xkZXIpIAoKIyMgQ2hlY2sgaWYgZGlyZWN0b3J5IGFscmVhZHkgZXhpc3RzPwppZighZmlsZS5leGlzdHMocHJvamVjdGZvbGRlcnBhdGgpKXsKICBkaXIuY3JlYXRlKHByb2plY3Rmb2xkZXJwYXRoKQp9CiMjIENoZWNrIGlmIHppcCBoYXMgYWxyZWFkeSBiZWVuIGRvd25sb2FkZWQgaW4gcHJvamVjdERhdGEgZGlyZWN0b3J5PwppZighZmlsZS5leGlzdHMoemlwcGF0aCkpewogIGRvd25sb2FkLmZpbGUoY2Fwc3RvbmVkYXRhc2V0bGluayxkZXN0ZmlsZT16aXBwYXRoLG1vZGUgPSAid2IiKQp9CiMjIENoZWNrIGlmIHppcCBoYXMgYWxyZWFkeSBiZWVuIHVuemlwcGVkPwppZighZmlsZS5leGlzdHMoZGF0YXNldGZvbGRlcikpewogIHVuemlwKHppcGZpbGU9emlwcGF0aCxleGRpcj1wcm9qZWN0Zm9sZGVycGF0aCkKfQoKCmBgYAoKIyMgRGF0YSBsb2FkCkFmdGVyIGRvd25sb2FkaW5nIHRoZSBkYXRzZXQsIHdlIGFyZSBzdGFydGluZyB0byByZWFkIHRoZSBkYXRhc2V0IHRoYXQgd2UgbmVlZC4gClRoZSBkYXRhc2V0IGNvbnNpc3RzIG9mIHRleHQgZmlsZXMgZnJvbSAzIGRpZmZlcmVudCBzb3VyY2VzOiBuZXdzLCBibG9ncyBhbmQgdHdpdHRlci4gSW4gdGhpcyBwcm9qZWN0LCB3ZSB3aWxsIGZvY3VzIG9uIHRoZSBFbmdsaXMgVVMgZGF0YXNldHMuIApgYGB7cn0KZW5wYXRoIDwtIGZpbGUucGF0aChkYXRhc2V0cGF0aCAsICJlbl9VUyIpCmZpbGVzPC1saXN0LmZpbGVzKGVucGF0aCwgcmVjdXJzaXZlPVRSVUUpCgojIExldHMgbWFrZSBhIGZpbGUgY29ubmVjdGlvbiBvZiB0aGUgdHdpdHRlciBkYXRhIHNldApjb24gPC0gZmlsZShwYXN0ZTAoZW5wYXRoLCAiL2VuX1VTLnR3aXR0ZXIudHh0IiksICJyIikgCiNsaW5lVHdpdHRlcjwtcmVhZExpbmVzKGNvbiwgMTAwLCBza2lwTnVsID0gVFJVRSkKbGluZVR3aXR0ZXI8LXJlYWRMaW5lcyhjb24sIHNraXBOdWwgPSBUUlVFKQojIENsb3NlIHRoZSBjb25uZWN0aW9uIGhhbmRsZSB3aGVuIHlvdSBhcmUgZG9uZQpjbG9zZShjb24pCgojIExldHMgbWFrZSBhIGZpbGUgY29ubmVjdGlvbiBvZiB0aGUgYmxvZyBkYXRhIHNldApjb24gPC0gZmlsZShwYXN0ZTAoZW5wYXRoLCAiL2VuX1VTLmJsb2dzLnR4dCIpLCAiciIpIAojbGxpbmVCbG9nczwtcmVhZExpbmVzKGNvbiwgMTAwLCBza2lwTnVsID0gVFJVRSkKbGluZUJsb2dzPC1yZWFkTGluZXMoY29uLCBza2lwTnVsID0gVFJVRSkKIyBDbG9zZSB0aGUgY29ubmVjdGlvbiBoYW5kbGUgd2hlbiB5b3UgYXJlIGRvbmUKY2xvc2UoY29uKQoKIyBMZXRzIG1ha2UgYSBmaWxlIGNvbm5lY3Rpb24gb2YgdGhlIG5ld3MgZGF0YSBzZXQKY29uIDwtIGZpbGUocGFzdGUwKGVucGF0aCwgIi9lbl9VUy5uZXdzLnR4dCIpLCAiciIpIAojbGluZU5ld3M8LXJlYWRMaW5lcyhjb24sIDEwMCwgc2tpcE51bCA9IFRSVUUpCmxpbmVOZXdzPC1yZWFkTGluZXMoY29uLCBza2lwTnVsID0gVFJVRSkKIyBDbG9zZSB0aGUgY29ubmVjdGlvbiBoYW5kbGUgd2hlbiB5b3UgYXJlIGRvbmUKY2xvc2UoY29uKQoKCmBgYAojIyBEYXRhIHN1bW1hcnkKQSBzdW1tYXJ5IGZvciB0aGUgVVMgYmxvZ3MsIG5ld3MgYW5kIHR3aXR0ZXIgZmlsZXMgY2FuIGJlIHNlZW4gb24gdGhlIHRhYmxlIGJlbG93LgpgYGB7cn0KIyBHZXQgZmlsZSBzaXplcwpsaW5lQmxvZ3NTaXplIDwtIGZpbGUuaW5mbyhwYXN0ZTAoZW5wYXRoLCAiL2VuX1VTLmJsb2dzLnR4dCIpKSRzaXplIC8gMTAyNCBeIDIKbGluZU5ld3NTaXplIDwtIGZpbGUuaW5mbyhwYXN0ZTAoZW5wYXRoLCAiL2VuX1VTLm5ld3MudHh0IikpJHNpemUgLyAxMDI0IF4gMgpsaW5lVHdpdHRlclNpemUgPC0gZmlsZS5pbmZvKHBhc3RlMChlbnBhdGgsICIvZW5fVVMudHdpdHRlci50eHQiKSkkc2l6ZSAvIDEwMjQgXiAyCiMgR2V0IHdvcmRzIGluIGZpbGVzCmxpbmVCbG9nc1dvcmRzIDwtIHN0cmlfY291bnRfd29yZHMobGluZUJsb2dzKQpsaW5lTmV3c1dvcmRzIDwtIHN0cmlfY291bnRfd29yZHMobGluZU5ld3MpCmxpbmVUd2l0dGVyV29yZHMgPC0gc3RyaV9jb3VudF93b3JkcyhsaW5lVHdpdHRlcikKIyBTdW1tYXJ5IG9mIHRoZSBkYXRhIHNldHMKZGF0YS5mcmFtZShzb3VyY2UgPSBjKCJibG9ncyIsICJuZXdzIiwgInR3aXR0ZXIiKSwKICAgICAgICAgICBmaWxlU2l6ZV9NQiA9IGMobGluZUJsb2dzU2l6ZSwgbGluZU5ld3NTaXplLCBsaW5lVHdpdHRlclNpemUpLAogICAgICAgICAgIHdvcmRDb3VudHMgPSBjKHN1bShsaW5lQmxvZ3NXb3JkcyksIHN1bShsaW5lTmV3c1dvcmRzKSwgc3VtKGxpbmVUd2l0dGVyV29yZHMpKSwKICAgICAgICAgICB3b3JkTWVhbnMgPSBjKG1lYW4obGluZUJsb2dzV29yZHMpLCBtZWFuKGxpbmVOZXdzV29yZHMpLCBtZWFuKGxpbmVUd2l0dGVyV29yZHMpKSwKICAgICAgICAgICBsaW5lQ291bnRzID0gYyhsZW5ndGgobGluZUJsb2dzKSwgbGVuZ3RoKGxpbmVOZXdzKSwgbGVuZ3RoKGxpbmVUd2l0dGVyKSkpCmBgYAoKCgojIyBEYXRhIHNhbXBsaW5nCldlIG9ic2VydmUgdGhhdCB0aGUgZmlsZXMgaGF2ZSBhIGNvbnNpZGVyYWJsZSBhbW91bnQgb2YgZGF0YSAoZS5nLiAyIG1pbGxpb24gbGluZXMgZm9yIHR3aXR0ZXIgZmlsZSkuIEluIG9yZGVyIHRvIGNyZWF0ZSBhIG1vZGVsLCB3ZSB3aWxsIG5lZWQgdG8gdGFrZSBhIHNhbXBsZSBvZiB0aGUgZGF0YSB0byBhdm9pZCB1c2luZyB0byBtdWNoIG1lbW9yeS4gCmBgYHtyfQoKcGVyY2VudCA8LSAwLjAxCnNhbXBsaW5nIDwtIGZ1bmN0aW9uKGRhdGEsIHBlcmNlbnQpewogIHJldHVybihkYXRhW2FzLmxvZ2ljYWwocmJpbm9tKGxlbmd0aChkYXRhKSwxLHBlcmNlbnQpKV0pCn0KICAKc2FtcGxlQmxvZ3MgICA8LSBzYW1wbGluZyhsaW5lQmxvZ3MsIHBlcmNlbnQpCnNhbXBsZU5ld3MgICA8LSBzYW1wbGluZyhsaW5lTmV3cywgcGVyY2VudCkKc2FtcGxlVHdpdHRlciAgIDwtIHNhbXBsaW5nKGxpbmVUd2l0dGVyLCBwZXJjZW50KQoKcmVtb3ZlKGxpbmVCbG9ncykKcmVtb3ZlKGxpbmVOZXdzKQpyZW1vdmUobGluZVR3aXR0ZXIpCgoKIyBHZXQgd29yZHMgaW4gZmlsZXMKbGluZUJsb2dzV29yZHMgPC0gc3RyaV9jb3VudF93b3JkcyhzYW1wbGVCbG9ncykKbGluZU5ld3NXb3JkcyA8LSBzdHJpX2NvdW50X3dvcmRzKHNhbXBsZU5ld3MpCmxpbmVUd2l0dGVyV29yZHMgPC0gc3RyaV9jb3VudF93b3JkcyhzYW1wbGVUd2l0dGVyKQojIFN1bW1hcnkgb2YgdGhlIGRhdGEgc2V0cwpkYXRhLmZyYW1lKHNvdXJjZSA9IGMoImJsb2dzIiwgIm5ld3MiLCAidHdpdHRlciIpLAogICAgICAgICAgIHdvcmRDb3VudHMgPSBjKHN1bShsaW5lQmxvZ3NXb3JkcyksIHN1bShsaW5lTmV3c1dvcmRzKSwgc3VtKGxpbmVUd2l0dGVyV29yZHMpKSwKICAgICAgICAgICB3b3JkTWVhbnMgPSBjKG1lYW4obGluZUJsb2dzV29yZHMpLCBtZWFuKGxpbmVOZXdzV29yZHMpLCBtZWFuKGxpbmVUd2l0dGVyV29yZHMpKSwKICAgICAgICAgICBsaW5lQ291bnRzID0gYyhsZW5ndGgoc2FtcGxlQmxvZ3MpLCBsZW5ndGgoc2FtcGxlTmV3cyksIGxlbmd0aChzYW1wbGVUd2l0dGVyKSkpCgpgYGAKCiMjIENsZWFuaW5nIGFuZCBUb2tlbml6YXRpb24KSW4gb3JkZXIgdG8gYmUgYWJsZSBtYW5pcHVsYXRlIG91ciBkYXRhLCB3ZSB3aWxsIGNyZWF0ZSBhIGNsZWFuZWQgY29ycHVzLCB3aGljaCB3aWxsIGNvbnNpc3Qgb2YgdGhlIHRocmVlIHNhbXBsZSB0ZXh0IGZpbGVzLiBUaGlzIGludm9sdmVzIHJlbW92aW5nIHB1bmN0dWF0aW9ucywgbnVtYmVycywgZXhjZXNzIHdoaXRlc3BhY2UsIHN0b3B3b3JkcywgYW5kIGNoYW5naW5nIHRoZSB0ZXh0IHRvIGxvd2VyIGNhc2UuIApgYGB7cn0KYWxsU2FtcGxlcyA8LSBjKHNhbXBsZUJsb2dzLCBzYW1wbGVOZXdzLCBzYW1wbGVUd2l0dGVyKQpjb3JwdXMgPC0gVkNvcnB1cyhWZWN0b3JTb3VyY2UoYWxsU2FtcGxlcykpCiNjbGVhbgpjb3JwdXMgPC0gdG1fbWFwKGNvcnB1cywgdG9sb3dlcikKY29ycHVzIDwtIHRtX21hcChjb3JwdXMsIHN0cmlwV2hpdGVzcGFjZSkgCmNvcnB1cyA8LSB0bV9tYXAoY29ycHVzLCByZW1vdmVOdW1iZXJzKSAKY29ycHVzIDwtIHRtX21hcChjb3JwdXMsIHJlbW92ZVB1bmN0dWF0aW9uLCBwcmVzZXJ2ZV9pbnRyYV93b3JkX2Rhc2hlcz1UUlVFKQpjb3JwdXMgPC0gdG1fbWFwKGNvcnB1cywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygiZW4iKSkKCiN0b2tlbml6ZQp1bmlncmFtIDwtIE5HcmFtVG9rZW5pemVyKGNvcnB1cywgV2VrYV9jb250cm9sKG1pbiA9IDEsIG1heCA9IDEsZGVsaW1pdGVycyA9ICIgXFxyXFxuXFx0Liw7OlwiKCk/ISIpKQpiaUdyYW0gPC0gTkdyYW1Ub2tlbml6ZXIoY29ycHVzLCBXZWthX2NvbnRyb2wobWluID0gMiwgbWF4ID0gMixkZWxpbWl0ZXJzID0gIiBcXHJcXG5cXHQuLDs6XCIoKT8hIikpCnRyaUdyYW0gPC0gTkdyYW1Ub2tlbml6ZXIoY29ycHVzLCBXZWthX2NvbnRyb2wobWluID0gMywgbWF4ID0gMyxkZWxpbWl0ZXJzID0gIiBcXHJcXG5cXHQuLDs6XCIoKT8hIikpCgpgYGAKCgoKYGBge3IgcmVzdWx0cz1GQUxTRX0KIyMgZXhwbG9yZSBhbmQgcGxvdAojIGNvbnZlcnRpbmcgdG9rZW5zIG9mIG4tZ3JhbXMgaW50byB0YWJsZXMKdW5pR3JhbVRhYmxlIDwtIGRhdGEuZnJhbWUodGFibGUodW5pZ3JhbSkpCmJpR3JhbVRhYmxlIDwtIGRhdGEuZnJhbWUodGFibGUoYmlHcmFtKSkKdHJpR3JhbVRhYmxlIDwtIGRhdGEuZnJhbWUodGFibGUodHJpR3JhbSkpCgogIAp1bmlHcmFtVGFibGUgPC0gdW5pR3JhbVRhYmxlW29yZGVyKHVuaUdyYW1UYWJsZSRGcmVxLGRlY3JlYXNpbmcgPSBUUlVFKSxdCmJpR3JhbVRhYmxlIDwtIGJpR3JhbVRhYmxlW29yZGVyKGJpR3JhbVRhYmxlJEZyZXEsZGVjcmVhc2luZyA9IFRSVUUpLF0KdHJpR3JhbVRhYmxlIDwtIHRyaUdyYW1UYWJsZVtvcmRlcih0cmlHcmFtVGFibGUkRnJlcSxkZWNyZWFzaW5nID0gVFJVRSksXQoKdG9wMWdyYW0gPC0gdW5pR3JhbVRhYmxlWzE6MTAsXQpjb2xuYW1lcyh0b3AxZ3JhbSkgPC0gYygid29yZCIsImNvdW50IikKdG9wMmdyYW0gPC0gYmlHcmFtVGFibGVbMToxMCxdCmNvbG5hbWVzKHRvcDJncmFtKSA8LSBjKCJ3b3JkIiwiY291bnQiKQp0b3AzZ3JhbSA8LSB0cmlHcmFtVGFibGVbMToxMCxdCmNvbG5hbWVzKHRvcDNncmFtKSA8LSBjKCJ3b3JkIiwiY291bnQiKQoKCmBgYAoKCgpgYGB7ciBlY2hvPUZBTFNFfQpwYXIobWFyPWMoMSwxMiwxLDEpKQpiMSA9IGJhcnBsb3QodG9wMWdyYW0kY291bnQsbWFpbj0iVG9wIFVuaWdyYW0iLG5hbWVzLmFyZz10b3AxZ3JhbSR3b3JkLCBob3Jpej1UUlVFLCB4bGFiID0gIiIsICB4YXh0PSduJywgbGFzPTIpCnRleHQoeT1iMSwgeD10b3AxZ3JhbSRjb3VudCwgY29sPSJ3aGl0ZSIsIHBvcz0yLCBsYWJlbHM9dG9wMWdyYW0kY291bnQpCnRpdGxlKHhsYWI9IkZyZXF1ZW5jeSIsIGxpbmU9MCwgY2V4LmxhYj0xLjIpCmBgYAoKYGBge3IgZWNobz1GQUxTRX0KcGFyKG1hcj1jKDEsMTIsMSwxKSkKYjIgPSBiYXJwbG90KHRvcDJncmFtJGNvdW50LCBtYWluPSJUb3AgQmlncmFtIixuYW1lcy5hcmc9dG9wMmdyYW0kd29yZCwgaG9yaXo9VFJVRSwgeGxhYiA9ICIiLCAgeGF4dD0nbicsIGxhcz0yKQp0ZXh0KHk9YjIsIHg9dG9wMmdyYW0kY291bnQsIGNvbD0id2hpdGUiLCBwb3M9MiwgbGFiZWxzPXRvcDJncmFtJGNvdW50KQp0aXRsZSh4bGFiPSJGcmVxdWVuY3kiLCBsaW5lPTAsIGNleC5sYWI9MS4yKQpgYGAKCmBgYHtyIGVjaG89RkFMU0V9CnBhcihtYXI9YygxLDEyLDEsMSkpCmIzID0gYmFycGxvdCh0b3AzZ3JhbSRjb3VudCwgbWFpbj0iVG9wIFRyaWdyYW0iLG5hbWVzLmFyZz10b3AzZ3JhbSR3b3JkLCBob3Jpej1UUlVFLCB4bGFiID0gIiIsICB4YXh0PSduJywgbGFzPTIpCnRleHQoeT1iMywgeD10b3AzZ3JhbSRjb3VudCwgY29sPSJ3aGl0ZSIsIHBvcz0yLCBsYWJlbHM9dG9wM2dyYW0kY291bnQpCnRpdGxlKHhsYWI9IkZyZXF1ZW5jeSIsIGxpbmU9MCwgY2V4LmxhYj0xLjIpCmBgYAoKCgojIyBOZXh0IHN0ZXBzCldlIHdpbGwgY3JlYXRlIGEgcHJlZGljdGlvbiBhbGdvcml0aG0gYW5kIGEgU2hpbnkgYXBwIGluIGEgd2F5IHRoYXQgd291bGQgYmUgdW5kZXJzdGFuZGFibGUgdG8gYSBub24tZGF0YSBzY2llbnRpc3QgbWFuYWdlci4KV2Ugd2lsbCBiZSB1c2luZyB0aGUgbi1ncmFtIG1vZGVsIHdpdGggdG8gY29tcHV0ZSB0aGUgcHJvYmFiaWxpdHkgb2YgdGhlIG5leHQgd29yZCBvY2N1cmluZy4gVGhlIGlucHV0IHdpbGwgYmUgdG9rZW5pemVkIGFuZCB0aGUgbGFzdCB3b3JkcyB3aWxsIGJlIGlzb2xhdGVkIHRvIGdldCB0aGUgaGlnaHRlc3QgcHJvYmFiaWxpdHkgbmV4dCB3b3JkLiAKT3VyIFNoaW55IGFwcCB3aWxsIGJlIGludGVyYWN0aXZlIGFuZCBhbnkgbm9uLWRhdGEgc2NpZW50aXN0IHVzZXJzIHdpbGwgYmUgYWJsZSB0byBlYXNpbHkgbmF2aWdhdGUgdGhyb3VnaCBvdXIgaXQuIAoKCgo=