Loading Library

Get file names

ham_dir <- "./easy_ham/"
spam_dir <- "./spam_2/"

ham_file_ls= list.files(ham_dir)
spam_file_ls= list.files(spam_dir)

## Remove cmds files
ham_file_ls = ham_file_ls[which(ham_file_ls!='cmds')]
spam_file_ls = spam_file_ls[which(spam_file_ls!='cmds')]

Sample Ham file names

head(ham_file_ls)
## [1] "00001.7c53336b37003a9286aba55d2945844c"
## [2] "00002.9c4069e25e1ef370c078db7ee85ff9ac"
## [3] "00003.860e3c3cee1b42ead714c5c874fe25f7"
## [4] "00004.864220c5b6930b209cc287c361c99af1"
## [5] "00005.bf27cdeaf0b8c4647ecd61b1d09da613"
## [6] "00006.253ea2f9a9cc36fa0b1129b04b806608"

Sample spam file names

head(spam_file_ls)
## [1] "00001.317e78fa8ee2f54cd4890fdc09ba8176"
## [2] "00002.9438920e9a55591b18e60d1ed37d992b"
## [3] "00003.590eff932f8704d8b0fcbe69d023b54d"
## [4] "00004.bdcc075fa4beb5157b5dd6cd41d8887b"
## [5] "00005.ed0aba4d386c5e62bc737cf3f0ed9589"
## [6] "00006.3ca1f399ccda5d897fecb8c57669a283"

Total 2500 ham files.

Total 1396 spam files.

Read File content

Define Text extraction function

extractText=function(path){
### Set connection
con = file(path, open="rt", encoding="latin1")
### Readlines
text = readLines(con,, encoding = "UTF-8")
### Concatenate strings
msg = text[seq(which(text=="")[1]+1,length(text),1)]
### Close connection
close(con)
return(paste(msg, collapse="\n"))
}

Extract content

ham_all = sapply(ham_file_ls, function(p) extractText(paste(ham_dir,p,sep="")))
spam_all = sapply(spam_file_ls, function(p) extractText(paste(spam_dir,p,sep="")))

Create Dataframe

### Create Spam Label
spam_label=append(rep(0,length(ham_file_ls)),
      rep(1,length(spam_file_ls)))

### Merge Ham and Spam content as one list
content = append(ham_all,spam_all)

### Combine Spam Lable list with content list 
all_df =data.frame(spam_label,content)

### Reset index 
rownames(all_df)= NULL

head(all_df)
##   spam_label
## 1          0
## 2          0
## 3          0
## 4          0
## 5          0
## 6          0
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       content
## 1                                                                                                                                            Date:        Wed, 21 Aug 2002 10:54:46 -0500\n    From:        Chris Garrigues <cwg-dated-1030377287.06fa6d@DeepEddy.Com>\n    Message-ID:  <1029945287.4797.TMDA@deepeddy.vircio.com>\n\n\n  | I can't reproduce this error.\n\nFor me it is very repeatable... (like every time, without fail).\n\nThis is the debug log of the pick happening ...\n\n18:19:03 Pick_It {exec pick +inbox -list -lbrace -lbrace -subject ftp -rbrace -rbrace} {4852-4852 -sequence mercury}\n18:19:03 exec pick +inbox -list -lbrace -lbrace -subject ftp -rbrace -rbrace 4852-4852 -sequence mercury\n18:19:04 Ftoc_PickMsgs {{1 hit}}\n18:19:04 Marking 1 hits\n18:19:04 tkerror: syntax error in expression "int ...\n\nNote, if I run the pick command by hand ...\n\ndelta$ pick +inbox -list -lbrace -lbrace -subject ftp -rbrace -rbrace  4852-4852 -sequence mercury\n1 hit\n\nThat's where the "1 hit" comes from (obviously).  The version of nmh I'm\nusing is ...\n\ndelta$ pick -version\npick -- nmh-1.0.4 [compiled on fuchsia.cs.mu.OZ.AU at Sun Mar 17 14:55:56 ICT 2002]\n\nAnd the relevant part of my .mh_profile ...\n\ndelta$ mhparam pick\n-seq sel -list\n\n\nSince the pick command works, the sequence (actually, both of them, the\none that's explicit on the command line, from the search popup, and the\none that comes from .mh_profile) do get created.\n\nkre\n\nps: this is still using the version of the code form a day ago, I haven't\nbeen able to reach the cvs repository today (local routing issue I think).\n\n\n\n_______________________________________________\nExmh-workers mailing list\nExmh-workers@redhat.com\nhttps://listman.redhat.com/mailman/listinfo/exmh-workers\n
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Martin A posted:\nTassos Papadopoulos, the Greek sculptor behind the plan, judged that the\n limestone of Mount Kerdylio, 70 miles east of Salonika and not far from the\n Mount Athos monastic community, was ideal for the patriotic sculpture. \n \n As well as Alexander's granite features, 240 ft high and 170 ft wide, a\n museum, a restored amphitheatre and car park for admiring crowds are\nplanned\n---------------------\nSo is this mountain limestone or granite?\nIf it's limestone, it'll weather pretty fast.\n\n------------------------ Yahoo! Groups Sponsor ---------------------~-->\n4 DVDs Free +s&p Join Now\nhttp://us.click.yahoo.com/pt6YBB/NXiEAA/mG3HAA/7gSolB/TM\n---------------------------------------------------------------------~->\n\nTo unsubscribe from this group, send an email to:\nforteana-unsubscribe@egroups.com\n\n \n\nYour use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ \n\n\n
## 3 Man Threatens Explosion In Moscow \n\nThursday August 22, 2002 1:40 PM\nMOSCOW (AP) - Security officers on Thursday seized an unidentified man who\nsaid he was armed with explosives and threatened to blow up his truck in\nfront of Russia's Federal Security Services headquarters in Moscow, NTV\ntelevision reported.\nThe officers seized an automatic rifle the man was carrying, then the man\ngot out of the truck and was taken into custody, NTV said. No other details\nwere immediately available.\nThe man had demanded talks with high government officials, the Interfax and\nITAR-Tass news agencies said. Ekho Moskvy radio reported that he wanted to\ntalk with Russian President Vladimir Putin.\nPolice and security forces rushed to the Security Service building, within\nblocks of the Kremlin, Red Square and the Bolshoi Ballet, and surrounded the\nman, who claimed to have one and a half tons of explosives, the news\nagencies said. Negotiations continued for about one and a half hours outside\nthe building, ITAR-Tass and Interfax reported, citing witnesses.\nThe man later drove away from the building, under police escort, and drove\nto a street near Moscow's Olympic Penta Hotel, where authorities held\nfurther negotiations with him, the Moscow police press service said. The\nmove appeared to be an attempt by security services to get him to a more\nsecure location. \n\n------------------------ Yahoo! Groups Sponsor ---------------------~-->\n4 DVDs Free +s&p Join Now\nhttp://us.click.yahoo.com/pt6YBB/NXiEAA/mG3HAA/7gSolB/TM\n---------------------------------------------------------------------~->\n\nTo unsubscribe from this group, send an email to:\nforteana-unsubscribe@egroups.com\n\n \n\nYour use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ \n\n\n
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Klez: The Virus That Won't Die\n \nAlready the most prolific virus ever, Klez continues to wreak havoc.\n\nAndrew Brandt\n>>From the September 2002 issue of PC World magazine\nPosted Thursday, August 01, 2002\n\n\nThe Klez worm is approaching its seventh month of wriggling across \nthe Web, making it one of the most persistent viruses ever. And \nexperts warn that it may be a harbinger of new viruses that use a \ncombination of pernicious approaches to go from PC to PC.\n\nAntivirus software makers Symantec and McAfee both report more than \n2000 new infections daily, with no sign of letup at press time. The \nBritish security firm MessageLabs estimates that 1 in every 300 \ne-mail messages holds a variation of the Klez virus, and says that \nKlez has already surpassed last summer's SirCam as the most prolific \nvirus ever.\n\nAnd some newer Klez variants aren't merely nuisances--they can carry \nother viruses in them that corrupt your data.\n\n...\n\nhttp://www.pcworld.com/news/article/0,aid,103259,00.asp\n_______________________________________________\nIrregulars mailing list\nIrregulars@tb.tf\nhttp://tb.tf/mailman/listinfo/irregulars\n
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   >  in adding cream to spaghetti carbonara, which has the same effect on pasta as\n>  making a pizza a deep-pie; \n\nI just had to jump in here as Carbonara is one of my favourites to make and ask \nwhat the hell are you supposed to use instead of cream?  I've never seen a \nrecipe that hasn't used this.  Personally I use low fat creme fraiche because it \nworks quite nicely but the only time I've seen an supposedly authentic recipe \nfor carbonara  it was identical to mine (cream, eggs and lots of fresh parmesan) \nexcept for the creme fraiche.\n\nStew\n-- \nStewart Smith\nScottish Microelectronics Centre, University of Edinburgh.\nhttp://www.ee.ed.ac.uk/~sxs/\n\n\n------------------------ Yahoo! Groups Sponsor ---------------------~-->\n4 DVDs Free +s&p Join Now\nhttp://us.click.yahoo.com/pt6YBB/NXiEAA/mG3HAA/7gSolB/TM\n---------------------------------------------------------------------~->\n\nTo unsubscribe from this group, send an email to:\nforteana-unsubscribe@egroups.com\n\n \n\nYour use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ \n\n\n
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              \n> I just had to jump in here as Carbonara is one of my favourites to make and \n> ask \n> what the hell are you supposed to use instead of cream? \n\nIsn't it just basically a mixture of beaten egg and bacon (or pancetta, \nreally)? You mix in the raw egg to the cooked pasta and the heat of the pasta \ncooks the egg. That's my understanding.\n\nMartin\n\n------------------------ Yahoo! Groups Sponsor ---------------------~-->\n4 DVDs Free +s&p Join Now\nhttp://us.click.yahoo.com/pt6YBB/NXiEAA/mG3HAA/7gSolB/TM\n---------------------------------------------------------------------~->\n\nTo unsubscribe from this group, send an email to:\nforteana-unsubscribe@egroups.com\n\n \n\nYour use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ \n\n\n

Clean Content

content_corp = VCorpus(VectorSource(all_df$content))
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))

content_corp = content_corp %>%
                tm_map(removePunctuation)%>%
                  tm_map(stripWhitespace)%>%
                  tm_map(content_transformer(tolower))%>%
                  tm_map(removeWords, stopwords("english"))

Create Term-Document Matrics

dtm<-DocumentTermMatrix(content_corp)
dtm
## <<DocumentTermMatrix (documents: 3896, terms: 87511)>>
## Non-/sparse entries: 452779/340490077
## Sparsity           : 100%
## Maximal term length: 880
## Weighting          : term frequency (tf)

Remove sparse terms

To obtain a more reasonable number of terms, we need to limit dtm to contain terms present in let’s ay 5% of the documents.
sparse = 0.05
freq_dtm = removeSparseTerms(dtm, 1-sparse)
freq_dtm
## <<DocumentTermMatrix (documents: 3896, terms: 278)>>
## Non-/sparse entries: 101920/981168
## Sparsity           : 91%
## Maximal term length: 43
## Weighting          : term frequency (tf)

Merge frequently presented corps dataframe to all_df

freq_dtm_df = as.data.frame(as.matrix(freq_dtm))

Assign

freq_dtm_df$spam_label = as.factor(all_df$spam_label)
head(freq_dtm_df)
##   100 2002 able access actually add address already also always another
## 1   0    2    1      0        1   0       0       0    0      0       0
## 2   0    0    0      0        0   0       0       0    0      0       0
## 3   0    1    0      0        0   0       0       0    0      0       0
## 4   0    2    0      0        0   0       0       2    0      0       0
## 5   0    0    0      0        0   0       0       0    0      0       0
## 6   0    0    0      0        0   0       0       0    0      0       0
##   anyone anything around available back based best better body border0
## 1      0        0      0         0    0     0    0      0    0       0
## 2      0        0      0         0    0     0    0      0    0       0
## 3      0        0      0         1    0     0    0      0    0       0
## 4      0        0      0         0    0     0    0      0    0       0
## 5      0        0      0         0    0     0    0      0    0       0
## 6      0        0      0         0    0     0    0      0    0       0
##   border3d0 build business bythinkgeek call can cant case cellspacing3d0
## 1         0     0        0           0    0   0    1    0              0
## 2         0     0        0           0    0   0    0    0              0
## 3         0     0        0           0    0   0    0    0              0
## 4         0     0        0           0    0   1    0    0              0
## 5         0     0        0           0    0   0    0    0              0
## 6         0     0        0           0    0   0    0    0              0
##   center change charsetiso88591 check click code come communications
## 1      0      0               0     0     0    1    0              0
## 2      0      0               0     0     0    0    0              0
## 3      0      0               0     0     0    0    0              0
## 4      0      0               0     0     0    0    0              0
## 5      0      0               0     0     0    0    0              0
## 6      0      0               0     0     0    0    0              0
##   companies company computer contact contenttransferencoding contenttype
## 1         0       0        0       0                       0           0
## 2         0       0        0       0                       0           0
## 3         0       0        0       0                       0           0
## 4         0       0        0       0                       0           0
## 5         0       0        0       0                       0           0
## 6         0       0        0       0                       0           0
##   cost course credit current data date day days didnt different div doesnt
## 1    0      0      0       0    0    1   1    0     0         0   0      0
## 2    0      0      0       0    0    0   0    0     0         0   0      0
## 3    0      0      0       0    0    0   0    0     0         0   0      0
## 4    0      0      0       0    1    0   0    0     0         0   0      0
## 5    0      0      0       0    0    0   0    0     0         0   0      0
## 6    0      0      0       0    0    0   0    0     0         0   0      0
##   done dont easy else email end enough error even ever every everything
## 1    0    0    0    0     0   0      0     2    0    0     1          0
## 2    0    0    0    0     1   0      0     0    0    0     0          0
## 3    0    0    0    0     1   0      0     0    0    0     0          0
## 4    0    0    0    0     1   0      0     0    0    3     1          0
## 5    0    0    0    0     1   0      0     0    0    0     0          0
## 6    0    0    0    0     1   0      0     0    0    0     0          0
##   experience face3darial file files find first following font fontfont
## 1          0           0    0     0    0     0         0    0        0
## 2          0           0    0     0    0     0         0    0        0
## 3          0           0    0     0    0     0         0    0        0
## 4          0           0    0     0    0     0         0    0        0
## 5          0           0    0     0    0     0         0    0        0
## 6          0           0    0     0    0     0         0    0        0
##   form format found free full future geek get getting give going good got
## 1    1      0     0    0    0      0    0   1       0    0     0    0   0
## 2    0      0     0    1    0      0    0   0       0    0     0    0   0
## 3    0      0     0    1    0      0    0   1       0    0     0    0   1
## 4    0      0     0    0    0      0    0   0       0    0     0    0   0
## 5    0      0     0    1    0      0    0   0       0    0     0    0   0
## 6    0      0     0    1    0      0    0   0       0    0     0    0   0
##   great group head heaven help helvetica high home hours however html
## 1     0     0    0      0    0         0    0    0     0       0    0
## 2     0     1    0      0    0         0    1    0     0       0    0
## 3     0     1    0      0    0         0    1    0     1       0    0
## 4     0     0    0      0    0         0    0    0     0       0    0
## 5     0     1    0      0    0         0    0    0     0       0    0
## 6     0     1    0      0    0         0    0    0     0       0    0
##   httplistsfreshrpmsnetmailmanlistinforpmlist httpthinkgeekcomsf ill img
## 1                                           0                  0   0   0
## 2                                           0                  0   0   0
## 3                                           0                  0   0   0
## 4                                           0                  0   0   0
## 5                                           0                  0   0   0
## 6                                           0                  0   0   0
##   include information instead internet isnt ive just keep know last least
## 1       0           0       0        0    0   0    0    0    0    0     0
## 2       0           0       0        0    0   0    0    0    0    0     0
## 3       0           0       0        0    0   0    0    0    0    0     0
## 4       0           0       0        0    0   0    0    0    0    1     0
## 5       0           0       1        0    0   2    1    0    0    0     0
## 6       0           0       1        0    1   0    2    0    0    0     0
##   less let life like line link linux list long look looking lot low made
## 1    0   0    0    1    1    0     0    5    0    0       0   0   0    0
## 2    0   0    0    0    0    0     0    0    0    0       0   0   0    0
## 3    0   0    0    0    0    0     0    0    0    0       0   0   0    0
## 4    0   0    0    0    0    0     0    1    0    0       0   0   0    0
## 5    0   0    0    0    0    0     0    0    0    0       0   0   1    0
## 6    0   0    0    0    0    0     0    0    0    0       0   0   0    0
##   mail mailing make making many marketing may maybe message messages meta
## 1    0       1    0      0    0         0   0     0       0        0    0
## 2    0       0    0      0    0         0   0     0       0        0    0
## 3    0       0    0      0    0         0   0     0       0        0    0
## 4    0       1    0      1    0         0   1     0       0        1    0
## 5    0       0    1      1    0         0   0     0       0        0    0
## 6    0       0    1      0    0         0   0     0       0        0    0
##   might million money month much must name need network never new news
## 1     0       0     0     0    0    0    0    0       0     0   0    0
## 2     0       0     0     0    0    0    0    0       0     0   0    0
## 3     0       0     0     0    0    0    0    0       0     0   0    2
## 4     0       0     0     1    0    0    0    0       0     0   2    0
## 5     0       0     0     0    0    0    0    0       0     1   0    0
## 6     0       0     0     0    0    0    0    0       0     0   0    0
##   next nothing now number offer offers old one online order original
## 1    0       0   0      0     0      0   0   2      0     0        0
## 2    0       0   1      0     0      0   0   0      0     0        0
## 3    0       0   1      0     0      0   0   2      0     0        0
## 4    0       0   0      0     0      0   0   1      0     0        0
## 5    0       0   1      0     0      0   0   1      0     0        0
## 6    0       0   1      0     0      0   0   1      0     0        0
##   others page part people per pfont phone place please point possible
## 1      0    0    1      0   0     0     0     0      0     0        0
## 2      0    0    0      0   0     0     0     0      0     0        0
## 3      0    0    0      0   0     0     0     0      0     0        0
## 4      0    0    0      0   0     0     0     0      0     0        0
## 5      0    0    0      0   0     0     0     0      0     0        0
## 6      0    0    0      0   0     0     0     0      0     0        0
##   probably problem problems product program provide public put rates read
## 1        0       0        0       0       0       0      0   0     0    0
## 2        0       0        0       0       0       0      0   0     0    0
## 3        0       0        0       0       0       0      0   0     0    0
## 4        0       0        0       0       0       0      0   0     0    0
## 5        0       0        0       0       0       0      0   0     0    0
## 6        0       0        0       0       0       0      0   0     0    0
##   real really receive received receiving remove removed reply right rights
## 1    0      0       0        0         0      0       0     0     0      0
## 2    0      0       0        0         0      0       0     0     0      0
## 3    0      0       0        0         0      0       0     0     0      0
## 4    0      0       0        0         0      0       0     0     0      0
## 5    0      0       0        0         0      0       0     0     0      0
## 6    0      1       0        0         0      0       0     0     0      0
##   rpmlist rpmlistfreshrpmsnet run running said sansserif say see seems
## 1       0                   0   1       0    0         0   0   0     0
## 2       0                   0   0       0    0         0   0   0     0
## 3       0                   0   0       0    5         0   0   0     0
## 4       0                   0   0       0    0         0   0   0     0
## 5       0                   0   0       0    0         0   0   0     0
## 6       0                   0   0       0    0         0   0   0     0
##   seen send sent sep service set sfnet simple simply since site software
## 1    0    0    0   0       0   0     0      0      0     1    0        0
## 2    0    1    0   0       0   0     0      0      0     0    0        0
## 3    0    1    0   0       2   0     0      0      0     0    0        0
## 4    0    0    0   0       0   0     0      0      0     0    0        1
## 5    2    1    0   0       0   0     0      0      0     0    0        0
## 6    0    1    0   0       0   0     0      0      0     0    0        0
##   someone something spam special sponsored start state still subject
## 1       0         0    0       0         0     0     0     1       3
## 2       0         0    0       0         0     0     0     0       1
## 3       0         0    0       0         0     0     0     0       1
## 4       0         0    0       0         0     0     0     0       0
## 5       0         0    0       0         0     0     0     0       1
## 6       0         0    0       0         0     0     0     0       1
##   supplied support sure system table take tell texthtml textplain thanks
## 1        0       0    0      0     0    0    0        0         0      0
## 2        0       0    0      0     0    0    0        0         0      0
## 3        0       0    0      0     0    0    0        0         0      0
## 4        0       0    0      0     0    0    0        0         0      0
## 5        0       0    0      0     0    0    0        0         0      0
## 6        0       0    0      0     0    0    0        0         0      0
##   thats theres thing things think though three time times today top true
## 1     2      0     0      0     1      0     0    1     0     1   0    0
## 2     0      0     0      0     0      0     0    0     0     0   0    0
## 3     0      0     0      0     0      0     0    0     0     0   0    0
## 4     0      0     0      0     0      0     0    1     0     0   0    0
## 5     0      0     0      0     0      0     0    1     0     0   0    0
## 6     1      0     0      0     0      0     0    0     0     0   0    0
##   try trying two type unsubscribe url use used users using version visit
## 1   0      0   0    0           0   0   0    0     0     2       3     0
## 2   0      0   0    0           1   0   1    0     0     0       0     0
## 3   0      0   0    0           1   0   1    0     0     0       0     0
## 4   0      0   0    0           0   0   1    0     0     0       0     0
## 5   0      0   0    0           1   0   3    1     0     0       0     0
## 6   0      0   0    0           1   0   2    0     0     0       0     0
##   want way web week welcome well will wish within without work working
## 1    0   0   0    0       0    0    0    0      0       1    0       0
## 2    0   0   0    0       0    1    0    0      0       0    0       0
## 3    0   0   0    0       0    0    0    0      1       0    0       0
## 4    0   0   1    0       0    0    0    0      0       0    0       0
## 5    0   0   0    0       0    0    0    0      0       0    0       0
## 6    0   0   0    0       0    0    0    0      0       0    0       0
##   works world wrote year years yes youre spam_label
## 1     1     0     0    0     0   0     0          0
## 2     0     0     0    0     0   0     0          0
## 3     0     0     0    0     0   0     0          0
## 4     0     1     0    0     0   0     0          0
## 5     1     0     0    0     0   0     0          0
## 6     0     0     0    0     0   0     0          0

Prepare Training and Testing dataset

set.seed(123)

df = freq_dtm_df%>%
      select(-c("100","2002","else","next"))
spl = sample.split(df$spam, 0.7)
train = subset(df, spl == TRUE)
test = subset(df, spl == FALSE)

Check Split proportion

prop.table(table(train$spam_label))
## 
##         0         1 
## 0.6423598 0.3576402
prop.table(table(test$spam_label))
## 
##         0         1 
## 0.6401028 0.3598972

Training the random forest model

spamRF = randomForest(spam_label~., data=train)
summary(spamRF)
##                 Length Class  Mode     
## call               3   -none- call     
## type               1   -none- character
## predicted       2729   factor numeric  
## err.rate        1500   -none- numeric  
## confusion          6   -none- numeric  
## votes           5458   matrix numeric  
## oob.times       2729   -none- numeric  
## classes            2   -none- character
## importance       274   -none- numeric  
## importanceSD       0   -none- NULL     
## localImportance    0   -none- NULL     
## proximity          0   -none- NULL     
## ntree              1   -none- numeric  
## mtry               1   -none- numeric  
## forest            14   -none- list     
## y               2729   factor numeric  
## test               0   -none- NULL     
## inbag              0   -none- NULL     
## terms              3   terms  call

Make the prediction using Random Forest Model.

target = test$spam_label

result.predicted.prob <- predict(spamRF, test, type="prob") # Prediction

result.roc <- roc(test$spam_label, result.predicted.prob[,2]) # Draw ROC curve.
plot(result.roc, print.thres="best", print.thres.best.method="closest.topleft")

result.coords <- coords(result.roc, "best", best.method="closest.topleft", ret=c("threshold", "accuracy"))
print(result.coords)
## threshold  accuracy 
## 0.5020000 0.9811482