Project 4

Introduction

It can be useful to be able to classify new “test” documents using already classified “training” documents. A common example is using a corpus of labeled spam and ham (non-spam) e-mails to predict whether or not a new document is spam.
For this project, you can start with a spam/ham dataset, then predict the class of new documents (either withheld from the training dataset or from another source such as your own spam folder).

About this dataset

The SMS Spam Collection v.1 (hereafter the corpus) is a set of SMS tagged messages that have been collected for SMS Spam research. It contains one set of SMS messages in English of 5,574 messages, tagged acording being ham (legitimate) or spam.

In case you find this corpus useful, please make a reference to previous paper and the web page: http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/ in your papers, research, etc.

Loading dataset

Preview of data

readLines("C:/Users/aleja/Desktop/SMSSpamCollection.csv", n = 10)

##  [1] "ham\tGo until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."                                                 
##  [2] "ham\tOk lar... Joking wif u oni..."                                                                                                                                   
##  [3] "spam\tFree entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"    
##  [4] "ham\tU dun say so early hor... U c already then say..."                                                                                                               
##  [5] "ham\tNah I don't think he goes to usf, he lives around here though"                                                                                                   
##  [6] "spam\tFreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv"            
##  [7] "ham\tEven my brother is not like to speak with me. They treat me like aids patent."                                                                                   
##  [8] "ham\tAs per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune"
##  [9] "spam\tWINNER!! As a valued network customer you have been selected to receivea £900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only."  
## [10] "spam\tHad your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030"

Loading packages and data

library(tm)

## Warning: package 'tm' was built under R version 4.2.3

## Loading required package: NLP

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.2.3

## randomForest 4.7-1.1

## Type rfNews() to see new features/changes/bug fixes.

library(tm)
library(wordcloud)

## Warning: package 'wordcloud' was built under R version 4.2.3

## Loading required package: RColorBrewer

library(RColorBrewer)

# Loading  data
# data is in a CSV file with columns 'label' and 'text'
RawSMS <- read.csv("C:/Users/aleja/Desktop/SMSSpamCollection.csv", sep="\t", header=TRUE)

## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string

#Selecting & renaming appropriate columns of the dataset
RawSMS <- RawSMS[, 1:2]
colnames(RawSMS) <- c("Tag", "Msg")
str(RawSMS)

## 'data.frame':    3183 obs. of  2 variables:
##  $ Tag: chr  "ham" "spam" "ham" "ham" ...
##  $ Msg: chr  "Ok lar... Joking wif u oni..." "Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question("| __truncated__ "U dun say so early hor... U c already then say..." "Nah I don't think he goes to usf, he lives around here though" ...

# Converting 'Tag' column to a factor
RawSMS$Tag <- factor(RawSMS$Tag)

# Creating the corpus
text_corpus <- Corpus(VectorSource(RawSMS$Msg))

# Viewing the content of the first five texts using lapply
lapply(text_corpus[1:5], as.character)

## [[1]]
## [1] "Ok lar... Joking wif u oni..."
## 
## [[2]]
## [1] "Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's"
## 
## [[3]]
## [1] "U dun say so early hor... U c already then say..."
## 
## [[4]]
## [1] "Nah I don't think he goes to usf, he lives around here though"
## 
## [[5]]
## [1] "FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv"

# Cleaning the corpus
cleanCorpus <- tm_map(text_corpus, content_transformer(tolower))  # lowercase all texts

## Warning in tm_map.SimpleCorpus(text_corpus, content_transformer(tolower)):
## transformation drops documents

cleanCorpus <- tm_map(cleanCorpus, removeNumbers)  # remove all numbers

## Warning in tm_map.SimpleCorpus(cleanCorpus, removeNumbers): transformation
## drops documents

cleanCorpus <- tm_map(cleanCorpus, removeWords, stopwords('english'))  # remove common English stopwords

## Warning in tm_map.SimpleCorpus(cleanCorpus, removeWords, stopwords("english")):
## transformation drops documents

cleanCorpus <- tm_map(cleanCorpus, removePunctuation)  # remove all punctuation

## Warning in tm_map.SimpleCorpus(cleanCorpus, removePunctuation): transformation
## drops documents

cleanCorpus <- tm_map(cleanCorpus, stripWhitespace)  # remove all whitespace

## Warning in tm_map.SimpleCorpus(cleanCorpus, stripWhitespace): transformation
## drops documents

# Create Document-Term Matrix
text_dtm <- DocumentTermMatrix(cleanCorpus)

# Inspect the Document-Term Matrix
inspect(text_dtm)

## <<DocumentTermMatrix (documents: 3183, terms: 7970)>>
## Non-/sparse entries: 34619/25333891
## Sparsity           : 100%
## Maximal term length: 40
## Weighting          : term frequency (tf)
## Sample             :
##       Terms
## Docs   call can free get ham just ltgt now spam will
##   1517    0   5    1   1  23    0    8   5    4    1
##   1875   18  17    6   9 157   15   14  17   23   10
##   2452   15   8    9  23 185    6   15  20   26   11
##   2471    8  15    4  10  93    5   16   8   12    5
##   2550   30   9    9  12 145   10    9  21   33   12
##   2963   35  19    9  24 269   16    9  24   39   16
##   3183   53  38   27  36 434   30   25  36   58   45
##   472     0   0    0   1   0    0    0   0    0   11
##   670    15  13   12  21 247   27    9  29   30   17
##   98     70  45   33  37 528   37   25  50   85   40

Train and test data

The provided proportion of 75% for training and 25% for testing is a common choice, but the “right” proportion depends on the specifics of the dataset and the goals of the analysis.

# Creating train and test portions
train_size <- 0.75  # Proportion for training
total_rows <- nrow(text_dtm)

# Calculating the split index
split_index <- round(train_size * total_rows)

# Splitting the data
train_dtm <- text_dtm[1:split_index, ]
test_dtm <- text_dtm[(split_index + 1):total_rows, ]

# Extracting labels for training and testing sets
train_labels <- RawSMS$Tag[1:split_index]
test_labels <- RawSMS$Tag[(split_index + 1):total_rows]

# Verifying the proportions in training and testing sets
tbl_train <- prop.table(table(train_labels))
tbl_test <- prop.table(table(test_labels))

# Printing proportions
cat("Training set proportions:\n", tbl_train, "\n\n")

## Training set proportions:
##  0.8638458 0.1361542

cat("Testing set proportions:\n", tbl_test, "\n")

## Testing set proportions:
##  0.8580402 0.1419598

Model and spam cloud

We’ll convert the sparse matrix into a format suitable for training the Naive Bayes model. This involves identifying the most frequent words in the texts using the findFreqTerms() function.

# Subsetting the spam messages
spamText <- subset(RawSMS, Tag == "spam")

# Defining a vector of darker colors
custom_colors <- c("#8B0000", "#006400", "#00008B", "#FF8C00", "#800080", "#FF1493", "#008B8B", "#8B4513", "#FFD700")

# Creating a word cloud with darker colors
wordcloud(words = spamText$Msg, max.words = 50, scale = c(5, 0.3),
          random.order = FALSE, rot.per = 0.15, colors = custom_colors)

## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents

## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

# Subsetting the ham messages
hamText <- subset(RawSMS, Tag == "ham")

# Define a vector of darker colors
custom_colors <- c("#8B0000", "#006400", "#00008B", "#FF8C00", "#800080", "#FF1493", "#008B8B", "#8B4513", "#FFD700")

# Creating a word cloud with darker colors for ham messages
wordcloud(words = hamText$Msg, max.words = 50, scale = c(5, 0.3),
          random.order = FALSE, rot.per = 0.15, colors = custom_colors)

## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents

## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

# Extracting the most frequent words
freq_words <- findFreqTerms(train_dtm, 5)
str(freq_words)

##  chr [1:1087] "lar" "wif" "apply" "comp" "cup" "entry" "final" "free" "may" ...

# Selecting only the frequent words from the train and test datasets
freq_words_train <- train_dtm[, freq_words]
freq_words_test <- test_dtm[, freq_words]

# Creating a function for conversion
convert <- function(x) {
  x <- ifelse(x > 0, "y", "n")
}

# Applying the conversion function to train and test datasets
train <- apply(freq_words_train, MARGIN = 2, convert)
test <- apply(freq_words_test, MARGIN = 2, convert)

# Verifying the conversion
str(train)

##  chr [1:2387, 1:1087] "y" "n" "n" "n" "n" "n" "n" "n" "n" "n" "n" "n" "n" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ Docs : chr [1:2387] "1" "2" "3" "4" ...
##   ..$ Terms: chr [1:1087] "lar" "wif" "apply" "comp" ...

# Loading the e1071 library for naiveBayes
library(e1071)

## Warning: package 'e1071' was built under R version 4.2.3

# Creating a Naive Bayes classifier
sms_classifier <- naiveBayes(train, train_labels)

# Making predictions on the test set
test_predictions <- predict(sms_classifier, test)

# Evaluating the model
library(gmodels)  # Load the gmodels library for CrossTable

## Warning: package 'gmodels' was built under R version 4.2.3

# Creating a confusion table
confusion_table <- CrossTable(test_predictions, test_labels,
                              prop.chisq = FALSE, prop.t = FALSE,
                              dnn = c('Predicted', 'Actual'))

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  796 
## 
##  
##              | Actual 
##    Predicted |       ham |      spam | Row Total | 
## -------------|-----------|-----------|-----------|
##          ham |       680 |        26 |       706 | 
##              |     0.963 |     0.037 |     0.887 | 
##              |     0.996 |     0.230 |           | 
## -------------|-----------|-----------|-----------|
##         spam |         3 |        87 |        90 | 
##              |     0.033 |     0.967 |     0.113 | 
##              |     0.004 |     0.770 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       683 |       113 |       796 | 
##              |     0.858 |     0.142 |           | 
## -------------|-----------|-----------|-----------|
## 
##

# Printing the confusion table
print(confusion_table)

## $t
##       y
## x      ham spam
##   ham  680   26
##   spam   3   87
## 
## $prop.row
##       y
## x             ham       spam
##   ham  0.96317280 0.03682720
##   spam 0.03333333 0.96666667
## 
## $prop.col
##       y
## x              ham        spam
##   ham  0.995607613 0.230088496
##   spam 0.004392387 0.769911504
## 
## $prop.tbl
##       y
## x              ham        spam
##   ham  0.854271357 0.032663317
##   spam 0.003768844 0.109296482

LS0tDQp0aXRsZTogIlByb2plY3QgNCINCmF1dGhvcjogIkxhdXJhIFB1ZWJsYSINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KIyMjIEludHJvZHVjdGlvbg0KSXQgY2FuIGJlIHVzZWZ1bCB0byBiZSBhYmxlIHRvIGNsYXNzaWZ5IG5ldyAidGVzdCIgZG9jdW1lbnRzIHVzaW5nIGFscmVhZHkgY2xhc3NpZmllZCAidHJhaW5pbmciIGRvY3VtZW50cy4gIEEgY29tbW9uIGV4YW1wbGUgaXMgdXNpbmcgYSBjb3JwdXMgb2YgbGFiZWxlZCBzcGFtIGFuZCBoYW0gKG5vbi1zcGFtKSBlLW1haWxzIHRvIHByZWRpY3Qgd2hldGhlciBvciBub3QgYSBuZXcgZG9jdW1lbnQgaXMgc3BhbS4gIA0KRm9yIHRoaXMgcHJvamVjdCwgeW91IGNhbiBzdGFydCB3aXRoIGEgc3BhbS9oYW0gZGF0YXNldCwgdGhlbiBwcmVkaWN0IHRoZSBjbGFzcyBvZiBuZXcgZG9jdW1lbnRzIChlaXRoZXIgd2l0aGhlbGQgZnJvbSB0aGUgdHJhaW5pbmcgZGF0YXNldCBvciBmcm9tIGFub3RoZXIgc291cmNlIHN1Y2ggYXMgeW91ciBvd24gc3BhbSBmb2xkZXIpLiANCg0KIyMjIEFib3V0IHRoaXMgZGF0YXNldA0KVGhlIFNNUyBTcGFtIENvbGxlY3Rpb24gdi4xIChoZXJlYWZ0ZXIgdGhlIGNvcnB1cykgaXMgYSBzZXQgb2YgU01TIHRhZ2dlZCBtZXNzYWdlcyB0aGF0IGhhdmUgYmVlbiBjb2xsZWN0ZWQgZm9yIFNNUyBTcGFtIHJlc2VhcmNoLiBJdCBjb250YWlucyBvbmUgc2V0IG9mIFNNUyBtZXNzYWdlcyBpbiBFbmdsaXNoIG9mIDUsNTc0IG1lc3NhZ2VzLCB0YWdnZWQgYWNvcmRpbmcgYmVpbmcgaGFtIChsZWdpdGltYXRlKSBvciBzcGFtLg0KDQotIEluIGNhc2UgeW91IGZpbmQgdGhpcyBjb3JwdXMgdXNlZnVsLCBwbGVhc2UgbWFrZSBhIHJlZmVyZW5jZSB0byBwcmV2aW91cyBwYXBlciBhbmQgdGhlIHdlYiBwYWdlOiBodHRwOi8vd3d3LmR0LmZlZS51bmljYW1wLmJyL350aWFnby9zbXNzcGFtY29sbGVjdGlvbi8gaW4geW91ciBwYXBlcnMsIHJlc2VhcmNoLCBldGMuDQoNCg0KIyMjIExvYWRpbmcgZGF0YXNldA0KUHJldmlldyBvZiBkYXRhDQpgYGB7cn0NCnJlYWRMaW5lcygiQzovVXNlcnMvYWxlamEvRGVza3RvcC9TTVNTcGFtQ29sbGVjdGlvbi5jc3YiLCBuID0gMTApDQoNCmBgYA0KTG9hZGluZyBwYWNrYWdlcyBhbmQgZGF0YQ0KYGBge3J9DQoNCmxpYnJhcnkodG0pDQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkNCmxpYnJhcnkodG0pDQpsaWJyYXJ5KHdvcmRjbG91ZCkNCmxpYnJhcnkoUkNvbG9yQnJld2VyKQ0KDQojIExvYWRpbmcgIGRhdGENCiMgZGF0YSBpcyBpbiBhIENTViBmaWxlIHdpdGggY29sdW1ucyAnbGFiZWwnIGFuZCAndGV4dCcNClJhd1NNUyA8LSByZWFkLmNzdigiQzovVXNlcnMvYWxlamEvRGVza3RvcC9TTVNTcGFtQ29sbGVjdGlvbi5jc3YiLCBzZXA9Ilx0IiwgaGVhZGVyPVRSVUUpDQoNCiNTZWxlY3RpbmcgJiByZW5hbWluZyBhcHByb3ByaWF0ZSBjb2x1bW5zIG9mIHRoZSBkYXRhc2V0DQpSYXdTTVMgPC0gUmF3U01TWywgMToyXQ0KY29sbmFtZXMoUmF3U01TKSA8LSBjKCJUYWciLCAiTXNnIikNCnN0cihSYXdTTVMpDQoNCmBgYA0KDQpgYGB7cn0NCiMgQ29udmVydGluZyAnVGFnJyBjb2x1bW4gdG8gYSBmYWN0b3INClJhd1NNUyRUYWcgPC0gZmFjdG9yKFJhd1NNUyRUYWcpDQoNCiMgQ3JlYXRpbmcgdGhlIGNvcnB1cw0KdGV4dF9jb3JwdXMgPC0gQ29ycHVzKFZlY3RvclNvdXJjZShSYXdTTVMkTXNnKSkNCg0KIyBWaWV3aW5nIHRoZSBjb250ZW50IG9mIHRoZSBmaXJzdCBmaXZlIHRleHRzIHVzaW5nIGxhcHBseQ0KbGFwcGx5KHRleHRfY29ycHVzWzE6NV0sIGFzLmNoYXJhY3RlcikNCg0KYGBgDQpgYGB7cn0NCg0KDQojIENsZWFuaW5nIHRoZSBjb3JwdXMNCmNsZWFuQ29ycHVzIDwtIHRtX21hcCh0ZXh0X2NvcnB1cywgY29udGVudF90cmFuc2Zvcm1lcih0b2xvd2VyKSkgICMgbG93ZXJjYXNlIGFsbCB0ZXh0cw0KY2xlYW5Db3JwdXMgPC0gdG1fbWFwKGNsZWFuQ29ycHVzLCByZW1vdmVOdW1iZXJzKSAgIyByZW1vdmUgYWxsIG51bWJlcnMNCmNsZWFuQ29ycHVzIDwtIHRtX21hcChjbGVhbkNvcnB1cywgcmVtb3ZlV29yZHMsIHN0b3B3b3JkcygnZW5nbGlzaCcpKSAgIyByZW1vdmUgY29tbW9uIEVuZ2xpc2ggc3RvcHdvcmRzDQpjbGVhbkNvcnB1cyA8LSB0bV9tYXAoY2xlYW5Db3JwdXMsIHJlbW92ZVB1bmN0dWF0aW9uKSAgIyByZW1vdmUgYWxsIHB1bmN0dWF0aW9uDQpjbGVhbkNvcnB1cyA8LSB0bV9tYXAoY2xlYW5Db3JwdXMsIHN0cmlwV2hpdGVzcGFjZSkgICMgcmVtb3ZlIGFsbCB3aGl0ZXNwYWNlDQoNCiMgQ3JlYXRlIERvY3VtZW50LVRlcm0gTWF0cml4DQp0ZXh0X2R0bSA8LSBEb2N1bWVudFRlcm1NYXRyaXgoY2xlYW5Db3JwdXMpDQoNCiMgSW5zcGVjdCB0aGUgRG9jdW1lbnQtVGVybSBNYXRyaXgNCmluc3BlY3QodGV4dF9kdG0pDQoNCmBgYA0KDQojIyMgVHJhaW4gYW5kIHRlc3QgZGF0YQ0KIFRoZSBwcm92aWRlZCBwcm9wb3J0aW9uIG9mIDc1JSBmb3IgdHJhaW5pbmcgYW5kIDI1JSBmb3IgdGVzdGluZyBpcyBhIGNvbW1vbiBjaG9pY2UsIGJ1dCB0aGUgInJpZ2h0IiBwcm9wb3J0aW9uIGRlcGVuZHMgb24gdGhlIHNwZWNpZmljcyBvZiB0aGUgZGF0YXNldCBhbmQgdGhlIGdvYWxzIG9mIHRoZSBhbmFseXNpcy4gDQoNCmBgYHtyfQ0KIyBDcmVhdGluZyB0cmFpbiBhbmQgdGVzdCBwb3J0aW9ucw0KdHJhaW5fc2l6ZSA8LSAwLjc1ICAjIFByb3BvcnRpb24gZm9yIHRyYWluaW5nDQp0b3RhbF9yb3dzIDwtIG5yb3codGV4dF9kdG0pDQoNCiMgQ2FsY3VsYXRpbmcgdGhlIHNwbGl0IGluZGV4DQpzcGxpdF9pbmRleCA8LSByb3VuZCh0cmFpbl9zaXplICogdG90YWxfcm93cykNCg0KIyBTcGxpdHRpbmcgdGhlIGRhdGENCnRyYWluX2R0bSA8LSB0ZXh0X2R0bVsxOnNwbGl0X2luZGV4LCBdDQp0ZXN0X2R0bSA8LSB0ZXh0X2R0bVsoc3BsaXRfaW5kZXggKyAxKTp0b3RhbF9yb3dzLCBdDQoNCiMgRXh0cmFjdGluZyBsYWJlbHMgZm9yIHRyYWluaW5nIGFuZCB0ZXN0aW5nIHNldHMNCnRyYWluX2xhYmVscyA8LSBSYXdTTVMkVGFnWzE6c3BsaXRfaW5kZXhdDQp0ZXN0X2xhYmVscyA8LSBSYXdTTVMkVGFnWyhzcGxpdF9pbmRleCArIDEpOnRvdGFsX3Jvd3NdDQoNCiMgVmVyaWZ5aW5nIHRoZSBwcm9wb3J0aW9ucyBpbiB0cmFpbmluZyBhbmQgdGVzdGluZyBzZXRzDQp0YmxfdHJhaW4gPC0gcHJvcC50YWJsZSh0YWJsZSh0cmFpbl9sYWJlbHMpKQ0KdGJsX3Rlc3QgPC0gcHJvcC50YWJsZSh0YWJsZSh0ZXN0X2xhYmVscykpDQoNCiMgUHJpbnRpbmcgcHJvcG9ydGlvbnMNCmNhdCgiVHJhaW5pbmcgc2V0IHByb3BvcnRpb25zOlxuIiwgdGJsX3RyYWluLCAiXG5cbiIpDQpjYXQoIlRlc3Rpbmcgc2V0IHByb3BvcnRpb25zOlxuIiwgdGJsX3Rlc3QsICJcbiIpDQoNCmBgYA0KDQoNCiMjIyBNb2RlbCBhbmQgc3BhbSBjbG91ZA0KV2UnbGwgY29udmVydCB0aGUgc3BhcnNlIG1hdHJpeCBpbnRvIGEgZm9ybWF0IHN1aXRhYmxlIGZvciB0cmFpbmluZyB0aGUgTmFpdmUgQmF5ZXMgbW9kZWwuIFRoaXMgaW52b2x2ZXMgaWRlbnRpZnlpbmcgdGhlIG1vc3QgZnJlcXVlbnQgd29yZHMgaW4gdGhlIHRleHRzIHVzaW5nIHRoZSBmaW5kRnJlcVRlcm1zKCkgZnVuY3Rpb24uDQoNCmBgYHtyfQ0KIyBTdWJzZXR0aW5nIHRoZSBzcGFtIG1lc3NhZ2VzDQpzcGFtVGV4dCA8LSBzdWJzZXQoUmF3U01TLCBUYWcgPT0gInNwYW0iKQ0KDQojIERlZmluaW5nIGEgdmVjdG9yIG9mIGRhcmtlciBjb2xvcnMNCmN1c3RvbV9jb2xvcnMgPC0gYygiIzhCMDAwMCIsICIjMDA2NDAwIiwgIiMwMDAwOEIiLCAiI0ZGOEMwMCIsICIjODAwMDgwIiwgIiNGRjE0OTMiLCAiIzAwOEI4QiIsICIjOEI0NTEzIiwgIiNGRkQ3MDAiKQ0KDQojIENyZWF0aW5nIGEgd29yZCBjbG91ZCB3aXRoIGRhcmtlciBjb2xvcnMNCndvcmRjbG91ZCh3b3JkcyA9IHNwYW1UZXh0JE1zZywgbWF4LndvcmRzID0gNTAsIHNjYWxlID0gYyg1LCAwLjMpLA0KICAgICAgICAgIHJhbmRvbS5vcmRlciA9IEZBTFNFLCByb3QucGVyID0gMC4xNSwgY29sb3JzID0gY3VzdG9tX2NvbG9ycykNCg0KDQpgYGANCg0KDQoNCg0KYGBge3J9DQojIFN1YnNldHRpbmcgdGhlIGhhbSBtZXNzYWdlcw0KaGFtVGV4dCA8LSBzdWJzZXQoUmF3U01TLCBUYWcgPT0gImhhbSIpDQoNCiMgRGVmaW5lIGEgdmVjdG9yIG9mIGRhcmtlciBjb2xvcnMNCmN1c3RvbV9jb2xvcnMgPC0gYygiIzhCMDAwMCIsICIjMDA2NDAwIiwgIiMwMDAwOEIiLCAiI0ZGOEMwMCIsICIjODAwMDgwIiwgIiNGRjE0OTMiLCAiIzAwOEI4QiIsICIjOEI0NTEzIiwgIiNGRkQ3MDAiKQ0KDQojIENyZWF0aW5nIGEgd29yZCBjbG91ZCB3aXRoIGRhcmtlciBjb2xvcnMgZm9yIGhhbSBtZXNzYWdlcw0Kd29yZGNsb3VkKHdvcmRzID0gaGFtVGV4dCRNc2csIG1heC53b3JkcyA9IDUwLCBzY2FsZSA9IGMoNSwgMC4zKSwNCiAgICAgICAgICByYW5kb20ub3JkZXIgPSBGQUxTRSwgcm90LnBlciA9IDAuMTUsIGNvbG9ycyA9IGN1c3RvbV9jb2xvcnMpDQoNCmBgYA0KDQoNCg0KYGBge3J9DQoNCiMgRXh0cmFjdGluZyB0aGUgbW9zdCBmcmVxdWVudCB3b3Jkcw0KZnJlcV93b3JkcyA8LSBmaW5kRnJlcVRlcm1zKHRyYWluX2R0bSwgNSkNCnN0cihmcmVxX3dvcmRzKQ0KDQojIFNlbGVjdGluZyBvbmx5IHRoZSBmcmVxdWVudCB3b3JkcyBmcm9tIHRoZSB0cmFpbiBhbmQgdGVzdCBkYXRhc2V0cw0KZnJlcV93b3Jkc190cmFpbiA8LSB0cmFpbl9kdG1bLCBmcmVxX3dvcmRzXQ0KZnJlcV93b3Jkc190ZXN0IDwtIHRlc3RfZHRtWywgZnJlcV93b3Jkc10NCg0KIyBDcmVhdGluZyBhIGZ1bmN0aW9uIGZvciBjb252ZXJzaW9uDQpjb252ZXJ0IDwtIGZ1bmN0aW9uKHgpIHsNCiAgeCA8LSBpZmVsc2UoeCA+IDAsICJ5IiwgIm4iKQ0KfQ0KDQojIEFwcGx5aW5nIHRoZSBjb252ZXJzaW9uIGZ1bmN0aW9uIHRvIHRyYWluIGFuZCB0ZXN0IGRhdGFzZXRzDQp0cmFpbiA8LSBhcHBseShmcmVxX3dvcmRzX3RyYWluLCBNQVJHSU4gPSAyLCBjb252ZXJ0KQ0KdGVzdCA8LSBhcHBseShmcmVxX3dvcmRzX3Rlc3QsIE1BUkdJTiA9IDIsIGNvbnZlcnQpDQoNCiMgVmVyaWZ5aW5nIHRoZSBjb252ZXJzaW9uDQpzdHIodHJhaW4pDQoNCmBgYA0KDQpgYGB7cn0NCg0KIyBMb2FkaW5nIHRoZSBlMTA3MSBsaWJyYXJ5IGZvciBuYWl2ZUJheWVzDQpsaWJyYXJ5KGUxMDcxKQ0KDQojIENyZWF0aW5nIGEgTmFpdmUgQmF5ZXMgY2xhc3NpZmllcg0Kc21zX2NsYXNzaWZpZXIgPC0gbmFpdmVCYXllcyh0cmFpbiwgdHJhaW5fbGFiZWxzKQ0KDQojIE1ha2luZyBwcmVkaWN0aW9ucyBvbiB0aGUgdGVzdCBzZXQNCnRlc3RfcHJlZGljdGlvbnMgPC0gcHJlZGljdChzbXNfY2xhc3NpZmllciwgdGVzdCkNCg0KIyBFdmFsdWF0aW5nIHRoZSBtb2RlbA0KbGlicmFyeShnbW9kZWxzKSAgIyBMb2FkIHRoZSBnbW9kZWxzIGxpYnJhcnkgZm9yIENyb3NzVGFibGUNCg0KIyBDcmVhdGluZyBhIGNvbmZ1c2lvbiB0YWJsZQ0KY29uZnVzaW9uX3RhYmxlIDwtIENyb3NzVGFibGUodGVzdF9wcmVkaWN0aW9ucywgdGVzdF9sYWJlbHMsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwcm9wLmNoaXNxID0gRkFMU0UsIHByb3AudCA9IEZBTFNFLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZG5uID0gYygnUHJlZGljdGVkJywgJ0FjdHVhbCcpKQ0KDQojIFByaW50aW5nIHRoZSBjb25mdXNpb24gdGFibGUNCnByaW50KGNvbmZ1c2lvbl90YWJsZSkNCg0KYGBgDQoNCg0KDQoNCg0KDQo=