Reading first set of spam files
library(stringr)
spamFiles <- list.files(path="C:/Data/spam", pattern="*.*", full.names=TRUE, recursive=FALSE)
spamFiles<-spamFiles[-1]
spamList<-lapply(spamFiles, function(x) {
fileName <- x
readChar(fileName, file.info(fileName)$size)
})
Pulling email addresses
spamListAddress<-lapply(spamList,function(x) str_extract(x, pattern="From:.*@.*\\..*\\b"))
spamListAddress1<-lapply(spamListAddress,function(x) str_replace_all(str_replace_all(tolower(str_extract(str_sub(x,start=7),pattern="\\b\\w*@.*\\..*\\b")),"[^a-zA-Z\\s]", " "),"\\s{2,}"," "))
spamListAddress1<-lapply(spamListAddress1,function(x) gsub("\\s*\\w*$", "", x))
hamFiles <- list.files(path="C:/Data/easy_ham", pattern="*.*", full.names=TRUE, recursive=FALSE)
head(hamFiles)
## [1] "C:/Data/easy_ham/0001.ea7e79d3153e7469e7a9c3e0af6a357e"
## [2] "C:/Data/easy_ham/0002.b3120c4bcbf3101e661161ee7efcb8bf"
## [3] "C:/Data/easy_ham/0003.acfc5ad94bbd27118a0d8685d18c89dd"
## [4] "C:/Data/easy_ham/0004.e8d5727378ddde5c3be181df593f1712"
## [5] "C:/Data/easy_ham/0005.8c3b9e9c0f3f183ddaf7592a11b99957"
## [6] "C:/Data/easy_ham/0006.ee8b0dba12856155222be180ba122058"
#hamFiles<-hamFiles[-1]
hamList<-lapply(hamFiles, function(x) {
fileName <- x
readChar(fileName, file.info(fileName)$size)
})
hamListAddress<-lapply(hamList,function(x) str_extract(x, pattern="From:.*@.*\\..*\\b"))
head(hamListAddress)
## [[1]]
## [1] "From: Robert Elz <kre@munnari.OZ.AU"
##
## [[2]]
## [1] "From: Steve Burt <steve.burt@cursor-system.com"
##
## [[3]]
## [1] "From: \"Tim Chapman\" <timc@2ubh.com"
##
## [[4]]
## [1] "From: Monty Solomon <monty@roscom.com"
##
## [[5]]
## [1] "From: Tony Nugent <tony@linuxworks.com.au"
##
## [[6]]
## [1] "From: Stewart Smith <Stewart.Smith@ee.ed.ac.uk"
hamListAddress1<-lapply(hamListAddress,function(x) str_replace_all(str_replace_all(tolower(str_extract(str_sub(x,start=7),pattern="\\b\\w*@.*\\..*\\b")),"[^a-zA-Z\\s]", " "),"\\s{2,}"," "))
hamListAddress1<-lapply(hamListAddress1,function(x) gsub("\\s*\\w*$", "", x))
head(hamListAddress1)
## [[1]]
## [1] "kre munnari oz"
##
## [[2]]
## [1] "burt cursor system"
##
## [[3]]
## [1] "timc ubh"
##
## [[4]]
## [1] "monty roscom"
##
## [[5]]
## [1] "tony linuxworks com"
##
## [[6]]
## [1] "smith ee ed ac"
doing statistical analyses
library(tm)
## Loading required package: NLP
spamListV<-unlist(spamListAddress1)
email_corpus<-c()
tmp<-VCorpus(VectorSource(spamListV[1]))
email_corpus<-tmp
flag<-c("Yes")
meta(email_corpus[[1]])
## author : character(0)
## datetimestamp: 2018-11-07 04:57:08
## description : character(0)
## heading : character(0)
## id : 1
## language : en
## origin : character(0)
inspect(email_corpus[1])
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 1
##
## [[1]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 14
meta(email_corpus[[1]],tag="spamFlag")<-flag
for (i in 2:length(spamList)){
tmp<-VCorpus(VectorSource(spamListAddress1[i]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"Yes"
}
n<-0
for (i in (length(spamList)+1):(length(spamListAddress1)+length(hamListAddress1))){
n<-n+1
tmp<-VCorpus(VectorSource(hamListAddress1[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"No"
}
email_corpus
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 3051
email_corpus1<-sample(email_corpus)
dtm<-DocumentTermMatrix(email_corpus1)
dtm
## <<DocumentTermMatrix (documents: 3051, terms: 1354)>>
## Non-/sparse entries: 6747/4124307
## Sparsity : 100%
## Maximal term length: 35
## Weighting : term frequency (tf)
library(RTextTools)
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
flag<-unlist(meta(email_corpus1,"spamFlag"))
flag[1:100]
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "Yes" "No" "No" "Yes" "No" "No" "No" "Yes" "No" "No" "No"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "Yes" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "No" "No" "No" "No" "No" "Yes" "Yes" "No" "No" "No" "No"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No"
## 1 1 1 1 1 1 1 1 1 1 1 1
## "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No"
## 1 1 1 1
## "No" "No" "No" "No"
N<-length(flag)
N
## [1] 3051
container<-create_container(dtm,labels=flag, trainSize=1:2000,testSize=2001:N,virgin=FALSE)
email_corpus[[2016]]
## <<PlainTextDocument>>
## Metadata: 8
## Content: chars: 32
svm_model<-train_model(container,"SVM")
tree_model<-train_model(container,"TREE")
maxent_model<-train_model(container,"MAXENT")
svm_out<-classify_model(container,svm_model)
tree_out<-classify_model(container,tree_model)
maxent_out<-classify_model(container,maxent_model)
head(svm_out)
## SVM_LABEL SVM_PROB
## 1 No 0.9571617
## 2 No 0.9571177
## 3 No 0.6980404
## 4 No 0.9571064
## 5 Yes 0.8443809
## 6 No 0.9571570
labels_out<-data.frame(correct_label=flag[2001:N],svm=as.character(svm_out[,1]),tree=as.character(tree_out[,1]),maxent=as.character(maxent_out[,1]),stringsAsFactors=F)
head(labels_out,100)
## correct_label svm tree maxent
## 1 No No No No
## 2 No No No No
## 3 No No No Yes
## 4 No No No No
## 5 Yes Yes No Yes
## 6 No No No No
## 7 Yes Yes Yes Yes
## 8 No No No No
## 9 No No No No
## 10 No No No No
## 11 No No No No
## 12 No No No No
## 13 No No No No
## 14 No No No No
## 15 No No No No
## 16 No No No No
## 17 No No No No
## 18 No No No No
## 19 No No No No
## 20 No No No No
## 21 No No No No
## 22 No No No No
## 23 No No No No
## 24 No No No No
## 25 No No No No
## 26 No No No No
## 27 No No No No
## 28 No No No No
## 29 No No No No
## 30 No No No No
## 31 No No No No
## 32 No No No No
## 33 No No No No
## 34 No No No No
## 35 No No No No
## 36 Yes Yes No Yes
## 37 No No No No
## 38 No No No No
## 39 No No No No
## 40 No No No No
## 41 Yes Yes Yes Yes
## 42 No No No No
## 43 Yes Yes No Yes
## 44 No No No No
## 45 Yes Yes No Yes
## 46 No No No No
## 47 No No No No
## 48 No No No No
## 49 No No No No
## 50 No No No No
## 51 No No No No
## 52 No No No No
## 53 No No No No
## 54 No No No No
## 55 Yes Yes Yes Yes
## 56 No No No No
## 57 No No No No
## 58 No No No No
## 59 No No No No
## 60 No No No No
## 61 No No No No
## 62 No No No No
## 63 No No No No
## 64 Yes No No Yes
## 65 No No No No
## 66 No No No No
## 67 No No No No
## 68 No No No No
## 69 No No No No
## 70 No No No No
## 71 No No No No
## 72 No No No No
## 73 No No No No
## 74 Yes No No No
## 75 No No No No
## 76 No No No No
## 77 No No No No
## 78 No No No No
## 79 No No No No
## 80 No No No No
## 81 No No No No
## 82 No No No No
## 83 No No No No
## 84 No No No No
## 85 No No No No
## 86 No No No No
## 87 No No No No
## 88 Yes No No No
## 89 No No No No
## 90 No No No No
## 91 No No No No
## 92 No No No No
## 93 No No No No
## 94 Yes Yes No Yes
## 95 No No No No
## 96 No No No No
## 97 No No No No
## 98 No No No No
## 99 No No No No
## 100 Yes No No No
#svm
table(labels_out[,1]==labels_out[,2])
##
## FALSE TRUE
## 73 978
prop.table(table(labels_out[,1]==labels_out[,2]))
##
## FALSE TRUE
## 0.06945766 0.93054234
# SVM model based on not cleansed email addresses produced a model with 92% correct rate
# tree
table(labels_out[,1]==labels_out[,3])
##
## FALSE TRUE
## 113 938
prop.table(table(labels_out[,1]==labels_out[,3]))
##
## FALSE TRUE
## 0.1075167 0.8924833
# maximum entropy ~ 94%
table(labels_out[,1]==labels_out[,4])
##
## FALSE TRUE
## 52 999
prop.table(table(labels_out[,1]==labels_out[,4]))
##
## FALSE TRUE
## 0.04947669 0.95052331
Loading additional spam/ham files to see if our model works
#step 2 additional testing
spamFiles1 <- list.files(path="C:/Data/spam2/spam_2", pattern="*.*", full.names=TRUE, recursive=FALSE)
head(spamFiles1)
## [1] "C:/Data/spam2/spam_2/00001.317e78fa8ee2f54cd4890fdc09ba8176"
## [2] "C:/Data/spam2/spam_2/00002.9438920e9a55591b18e60d1ed37d992b"
## [3] "C:/Data/spam2/spam_2/00003.590eff932f8704d8b0fcbe69d023b54d"
## [4] "C:/Data/spam2/spam_2/00004.bdcc075fa4beb5157b5dd6cd41d8887b"
## [5] "C:/Data/spam2/spam_2/00005.ed0aba4d386c5e62bc737cf3f0ed9589"
## [6] "C:/Data/spam2/spam_2/00006.3ca1f399ccda5d897fecb8c57669a283"
#spamFiles1<-spamFiles1[-1]
spamList1<-lapply(spamFiles1, function(x) {
fileName <- x
readChar(fileName, file.info(fileName)$size)
})
spamListAddressA<-lapply(spamList1,function(x) str_extract(x, pattern="From:.*@.*\\..*\\b"))
head(spamListAddressA)
## [[1]]
## [1] "From: \"Start Now\" <startnow2002@hotmail.com"
##
## [[2]]
## [1] "From: lmrn@mailexcite.com"
##
## [[3]]
## [1] "From: amknight@mailexcite.com"
##
## [[4]]
## [1] "From: jordan23@mailexcite.com"
##
## [[5]]
## [1] "From: yyyy@pluriproj.pt"
##
## [[6]]
## [1] "From: 3b3fke@ms10.hinet.net"
spamListAddressA1<-lapply(spamListAddressA,function(x) str_replace_all(str_replace_all(tolower(str_extract(str_sub(x,start=7),pattern="\\b\\w*@.*\\..*\\b")),"[^a-zA-Z\\s]", " "),"\\s{2,}"," "))
spamListAddressA1<-lapply(spamListAddressA1,function(x) gsub("\\s*\\w*$", "", x))
head(spamListAddressA1)
## [[1]]
## [1] "startnow hotmail"
##
## [[2]]
## [1] "lmrn mailexcite"
##
## [[3]]
## [1] "amknight mailexcite"
##
## [[4]]
## [1] "jordan mailexcite"
##
## [[5]]
## [1] "yyyy pluriproj"
##
## [[6]]
## [1] " b fke ms hinet"
hamFiles1 <- list.files(path="C:/Data/ham1/hard_ham", pattern="*.*", full.names=TRUE, recursive=FALSE)
head(hamFiles1)
## [1] "C:/Data/ham1/hard_ham/00001.7c7d6921e671bbe18ebb5f893cd9bb35"
## [2] "C:/Data/ham1/hard_ham/00002.ca96f74042d05c1a1d29ca30467cfcd5"
## [3] "C:/Data/ham1/hard_ham/00003.268fd170a3fc73bee2739d8204856a53"
## [4] "C:/Data/ham1/hard_ham/00004.68819fc91d34c82433074d7bd3127dcc"
## [5] "C:/Data/ham1/hard_ham/00005.34bcaad58ad5f598f5d6af8cfa0c0465"
## [6] "C:/Data/ham1/hard_ham/00006.3409dec8ca4fcf2d6e0582554473b5c9"
hamList1<-lapply(hamFiles1, function(x) {
fileName <- x
readChar(fileName, file.info(fileName)$size)
})
hamListAddressA<-lapply(hamList1,function(x) str_extract(x, pattern="From:.*@.*\\..*\\b"))
head(hamListAddressA)
## [[1]]
## [1] "From: The Motley Fool <Fool@motleyfool.com"
##
## [[2]]
## [1] "From: malcolm-sweeps@mrichi.com"
##
## [[3]]
## [1] "From: \"Starflung NIC\" <nic@starflung.com"
##
## [[4]]
## [1] "From: \"John Levine\" <johnl@cauce.org"
##
## [[5]]
## [1] "From: \"The ISO17799 Newsletter\" <iso17799@securityrisk.co.uk"
##
## [[6]]
## [1] "From: \"jobfair24\" <newsletter@jobfair24.de"
hamListAddressA1<-lapply(hamListAddressA,function(x) str_replace_all(str_replace_all(tolower(str_extract(str_sub(x,start=7),pattern="\\b\\w*@.*\\..*\\b")),"[^a-zA-Z\\s]", " "),"\\s{2,}"," "))
hamListAddressA1<-lapply(hamListAddressA1,function(x) gsub("\\s*\\w*$", "", x))
head(hamListAddressA1,20)
## [[1]]
## [1] "fool motleyfool"
##
## [[2]]
## [1] "sweeps mrichi"
##
## [[3]]
## [1] "nic starflung"
##
## [[4]]
## [1] "johnl cauce"
##
## [[5]]
## [1] "iso securityrisk co"
##
## [[6]]
## [1] "newsletter jobfair"
##
## [[7]]
## [1] " a f c xmr"
##
## [[8]]
## [1] "michaelr lindows"
##
## [[9]]
## [1] "newsletter jobfair"
##
## [[10]]
## [1] "michaelr lindows"
##
## [[11]]
## [1] " newsletter online"
##
## [[12]]
## [1] " newsletter online"
##
## [[13]]
## [1] " newsletter online"
##
## [[14]]
## [1] "update list theregister co"
##
## [[15]]
## [1] "subscriptions lockergnome"
##
## [[16]]
## [1] "subscriptions lockergnome"
##
## [[17]]
## [1] " ummail unitedmedia"
##
## [[18]]
## [1] " newsletter online"
##
## [[19]]
## [1] "subscriptions lockergnome"
##
## [[20]]
## [1] " newsletter online"
start<-length(spamListAddress1)+length(hamListAddress1)+1
start
## [1] 3052
end<-start+length(spamListAddressA1)
end
## [1] 4449
n<-0
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(spamListAddressA1[n]))
email_corpus1<-c(email_corpus1,tmp)
meta(email_corpus1[[i]],"spamFlag")<-"Yes"
}
start<-end+1
end<-start+length(hamListAddressA1)
n<-0
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(hamListAddressA1[n]))
email_corpus1<-c(email_corpus1,tmp)
meta(email_corpus1[[i]],"spamFlag")<-"No"
}
email_corpus1
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 4701
email_corpus2<-sample(email_corpus1)
dtm<-DocumentTermMatrix(email_corpus2)
dtm
## <<DocumentTermMatrix (documents: 4701, terms: 2974)>>
## Non-/sparse entries: 10292/13970482
## Sparsity : 100%
## Maximal term length: 38
## Weighting : term frequency (tf)
flag<-unlist(meta(email_corpus2,"spamFlag"))
N<-length(flag)
N
## [1] 4701
container<-create_container(dtm,labels=flag, trainSize=1:3052,testSize=3053:N,virgin=FALSE)
svm_model<-train_model(container,"SVM")
tree_model<-train_model(container,"TREE")
maxent_model<-train_model(container,"MAXENT")
svm_out<-classify_model(container,svm_model)
tree_out<-classify_model(container,tree_model)
maxent_out<-classify_model(container,maxent_model)
head(svm_out)
## SVM_LABEL SVM_PROB
## 1 Yes 0.7534378
## 2 No 0.9892547
## 3 Yes 0.6189336
## 4 No 0.9892605
## 5 Yes 0.9912976
## 6 No 0.9892725
labels_out<-data.frame(correct_label=flag[3053:N],svm=as.character(svm_out[,1]),tree=as.character(tree_out[,1]),maxent=as.character(maxent_out[,1]),stringsAsFactors=F)
head(labels_out,100)
## correct_label svm tree maxent
## 1 Yes Yes No Yes
## 2 No No No No
## 3 No Yes No Yes
## 4 No No No No
## 5 Yes Yes Yes Yes
## 6 No No No No
## 7 Yes Yes No No
## 8 No No No No
## 9 No No No No
## 10 Yes Yes Yes Yes
## 11 No No No No
## 12 Yes Yes No Yes
## 13 No No No No
## 14 Yes Yes Yes Yes
## 15 No No No No
## 16 No No No No
## 17 No No No No
## 18 No No No No
## 19 No No No No
## 20 No No No No
## 21 No Yes No Yes
## 22 No Yes No No
## 23 No No No No
## 24 No No No No
## 25 No No No No
## 26 Yes Yes No Yes
## 27 No No No No
## 28 No No No No
## 29 No No No No
## 30 Yes Yes No Yes
## 31 No Yes No Yes
## 32 No No No No
## 33 No Yes No Yes
## 34 Yes Yes No Yes
## 35 Yes Yes No Yes
## 36 Yes Yes Yes Yes
## 37 No No No No
## 38 No No No No
## 39 No No No No
## 40 No No No No
## 41 No No No No
## 42 No No No No
## 43 Yes Yes No Yes
## 44 Yes Yes Yes Yes
## 45 No No No No
## 46 No Yes No Yes
## 47 No No No No
## 48 Yes Yes No Yes
## 49 No Yes No Yes
## 50 No No No No
## 51 No No No No
## 52 Yes Yes Yes Yes
## 53 No No No No
## 54 Yes Yes No Yes
## 55 No No No No
## 56 Yes Yes No Yes
## 57 Yes Yes No Yes
## 58 No No No No
## 59 No No No No
## 60 No No No No
## 61 No No No No
## 62 Yes Yes No Yes
## 63 No No No No
## 64 No No No No
## 65 Yes Yes No Yes
## 66 No No No No
## 67 Yes Yes No Yes
## 68 Yes Yes No Yes
## 69 Yes Yes No Yes
## 70 Yes Yes No Yes
## 71 Yes Yes Yes Yes
## 72 No No No No
## 73 No No No No
## 74 No Yes No Yes
## 75 Yes Yes Yes Yes
## 76 Yes Yes No Yes
## 77 Yes Yes No Yes
## 78 Yes Yes Yes Yes
## 79 No No No No
## 80 Yes Yes No Yes
## 81 No No No No
## 82 No No No No
## 83 Yes Yes No Yes
## 84 No No No No
## 85 Yes Yes No Yes
## 86 Yes Yes No Yes
## 87 Yes Yes No Yes
## 88 Yes Yes Yes Yes
## 89 No No No No
## 90 No No No No
## 91 No No No No
## 92 Yes Yes Yes Yes
## 93 Yes Yes No Yes
## 94 Yes Yes No Yes
## 95 No No No No
## 96 No Yes No Yes
## 97 Yes Yes Yes Yes
## 98 No No No No
## 99 No No No No
## 100 No No No No
#svm
table(labels_out[,1]==labels_out[,2])
##
## FALSE TRUE
## 142 1507
prop.table(table(labels_out[,1]==labels_out[,2]))
##
## FALSE TRUE
## 0.0861128 0.9138872
# SVM model based on not cleansed email addresses produced a model with 92% correct rate
# tree
table(labels_out[,1]==labels_out[,3])
##
## FALSE TRUE
## 464 1185
prop.table(table(labels_out[,1]==labels_out[,3]))
##
## FALSE TRUE
## 0.2813827 0.7186173
# maximum entropy 93% correct rate
table(labels_out[,1]==labels_out[,4])
##
## FALSE TRUE
## 108 1541
prop.table(table(labels_out[,1]==labels_out[,4]))
##
## FALSE TRUE
## 0.06549424 0.93450576
head(spamListAddress1)
## [[1]]
## [1] " a mailbot web"
##
## [[2]]
## [1] "taylor s serveimage"
##
## [[3]]
## [1] "sabrina mx premio"
##
## [[4]]
## [1] "wsup playful"
##
## [[5]]
## [1] "yenene mx premio"
##
## [[6]]
## [1] "thecashsystem firemail"
Testing subject
spamListSubject<-lapply(spamList,function(x) str_extract(x, pattern="Subject:.*"))
head(spamListSubject)
## [[1]]
## [1] "Subject: Life Insurance - Why Pay More?"
##
## [[2]]
## [1] "Subject: [ILUG] Guaranteed to lose 10-12 lbs in 30 days 10.206"
##
## [[3]]
## [1] "Subject: Guaranteed to lose 10-12 lbs in 30 days 11.150"
##
## [[4]]
## [1] "Subject: Re: Fw: User Name & Password to Membership To 5 Sites zzzz@example.com pviqg"
##
## [[5]]
## [1] "Subject: [ILUG-Social] re: Guaranteed to lose 10-12 lbs in 30 days 10.148"
##
## [[6]]
## [1] "Subject: RE: Your Bank Account Information "
spamListSubject1<-lapply(spamListSubject,function(x) str_replace_all(tolower(str_replace_all(str_sub(x,start=10),"[^a-zA-Z\\s]", " ")),"\\s{2,}"," "))
spamListSubject1<-lapply(spamListSubject1,function(x) str_replace(str_replace_all(x,"^\\s",""),"\\s$",""))
head(spamListSubject1)
## [[1]]
## [1] "life insurance why pay more"
##
## [[2]]
## [1] "ilug guaranteed to lose lbs in days"
##
## [[3]]
## [1] "guaranteed to lose lbs in days"
##
## [[4]]
## [1] "re fw user name password to membership to sites zzzz example com pviqg"
##
## [[5]]
## [1] "ilug social re guaranteed to lose lbs in days"
##
## [[6]]
## [1] "re your bank account information"
hamListSubject<-lapply(hamList,function(x) str_extract(x, pattern="Subject:.*"))
head(hamListSubject)
## [[1]]
## [1] "Subject: Re: New Sequences Window"
##
## [[2]]
## [1] "Subject: [zzzzteana] RE: Alexander"
##
## [[3]]
## [1] "Subject: [zzzzteana] Moscow bomber"
##
## [[4]]
## [1] "Subject: [IRR] Klez: The Virus That Won't Die"
##
## [[5]]
## [1] "Subject: Re: Insert signature"
##
## [[6]]
## [1] "Subject: Re: [zzzzteana] Nothing like mama used to make"
hamListSubject1<-lapply(hamListSubject,function(x) str_replace_all(tolower(str_replace_all(str_sub(x,start=10),"[^a-zA-Z\\s]", " ")),"\\s{2,}"," "))
hamListSubject1<-lapply(hamListSubject1,function(x) str_replace(str_replace_all(x,"^\\s",""),"\\s$",""))
head(hamListSubject1)
## [[1]]
## [1] "re new sequences window"
##
## [[2]]
## [1] "zzzzteana re alexander"
##
## [[3]]
## [1] "zzzzteana moscow bomber"
##
## [[4]]
## [1] "irr klez the virus that won t die"
##
## [[5]]
## [1] "re insert signature"
##
## [[6]]
## [1] "re zzzzteana nothing like mama used to make"
spamListSubjectA<-lapply(spamList1,function(x) str_extract(x, pattern="Subject:.*"))
head(spamListSubjectA)
## [[1]]
## [1] "Subject: [ILUG] STOP THE MLM INSANITY"
##
## [[2]]
## [1] "Subject: Real Protection, Stun Guns! Free Shipping! Time:2:01:35 PM"
##
## [[3]]
## [1] "Subject: New Improved Fat Burners, Now With TV Fat Absorbers! Time:6:25:49 PM"
##
## [[4]]
## [1] "Subject: New Improved Fat Burners, Now With TV Fat Absorbers! Time:7:20:54 AM"
##
## [[5]]
## [1] "Subject: Never Repay Cash Grants, $500 - $50,000, Secret Revealed!"
##
## [[6]]
## [1] "Subject: ÁÙ¦b¥Î20%ªº«H¥Î¥d´`Àô¶Ü??? Time:PM 05:36:34"
spamListSubjectA1<-lapply(spamListSubjectA,function(x) str_replace_all(tolower(str_replace_all(str_sub(x,start=10),"[^a-zA-Z\\s]", " ")),"\\s{2,}"," "))
spamListSubjectA1<-lapply(spamListSubjectA1,function(x) str_replace(str_replace_all(x,"^\\s",""),"\\s$",""))
head(spamListSubjectA1)
## [[1]]
## [1] "ilug stop the mlm insanity"
##
## [[2]]
## [1] "real protection stun guns free shipping time pm"
##
## [[3]]
## [1] "new improved fat burners now with tv fat absorbers time pm"
##
## [[4]]
## [1] "new improved fat burners now with tv fat absorbers time am"
##
## [[5]]
## [1] "never repay cash grants secret revealed"
##
## [[6]]
## [1] "b h d time pm"
hamListSubjectA<-lapply(hamList1,function(x) str_extract(x, pattern="Subject:.*"))
head(hamListSubjectA)
## [[1]]
## [1] "Subject: Personal Finance: Resolutions You Can Keep"
##
## [[2]]
## [1] "Subject: Malcolm in the Middle Sweepstakes Prize Notification"
##
## [[3]]
## [1] "Subject: Automated 30 day renewal reminder 2002-05-27"
##
## [[4]]
## [1] "Subject: CAUCE NEWS, Vol 6, No 2, June 2002"
##
## [[5]]
## [1] "Subject: The ISO17799 Newsletter - Issue 4"
##
## [[6]]
## [1] "Subject: Virtueller Messetag der jobfair24 am Mittwoch, 03. Juli 2002"
hamListSubjectA1<-lapply(hamListSubjectA,function(x) str_replace_all(tolower(str_replace_all(str_sub(x,start=10),"[^a-zA-Z\\s]", " ")),"\\s{2,}"," "))
hamListSubjectA1<-lapply(hamListSubjectA1,function(x) str_replace(str_replace_all(x,"^\\s",""),"\\s$",""))
head(hamListSubjectA1)
## [[1]]
## [1] "personal finance resolutions you can keep"
##
## [[2]]
## [1] "malcolm in the middle sweepstakes prize notification"
##
## [[3]]
## [1] "automated day renewal reminder"
##
## [[4]]
## [1] "cauce news vol no june"
##
## [[5]]
## [1] "the iso newsletter issue"
##
## [[6]]
## [1] "virtueller messetag der jobfair am mittwoch juli"
spamListV<-unlist(spamListSubject1)
email_corpus<-c()
tmp<-VCorpus(VectorSource(spamListV[1]))
email_corpus<-tmp
flag<-c("Yes")
meta(email_corpus[[1]])
## author : character(0)
## datetimestamp: 2018-11-07 04:57:32
## description : character(0)
## heading : character(0)
## id : 1
## language : en
## origin : character(0)
inspect(email_corpus[1])
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 1
##
## [[1]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 27
meta(email_corpus[[1]],tag="spamFlag")<-flag
start<-2
start
## [1] 2
end<-length(spamListSubject1)
end
## [1] 500
n<-1
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(spamListSubject1[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"Yes"
}
start<-end+1
end<-start+length(hamListSubject1)
n<-0
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(hamListSubject1[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"No"
}
start<-end+1
start
## [1] 3053
end<-start+length(spamListSubjectA1)
end
## [1] 4450
n<-0
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(spamListSubjectA1[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"Yes"
}
start<-end+1
end<-start+length(hamListSubjectA1)
n<-0
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(hamListSubjectA1[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"No"
}
email_corpus
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 4702
email_corpus1<-sample(email_corpus)
dtm<-DocumentTermMatrix(email_corpus1)
dtm
## <<DocumentTermMatrix (documents: 4702, terms: 5190)>>
## Non-/sparse entries: 21744/24381636
## Sparsity : 100%
## Maximal term length: 31
## Weighting : term frequency (tf)
flag<-unlist(meta(email_corpus1,"spamFlag"))
N<-length(flag)
N
## [1] 4702
container<-create_container(dtm,labels=flag, trainSize=1:3052,testSize=3053:N,virgin=FALSE)
svm_model<-train_model(container,"SVM")
tree_model<-train_model(container,"TREE")
maxent_model<-train_model(container,"MAXENT")
svm_out<-classify_model(container,svm_model)
tree_out<-classify_model(container,tree_model)
maxent_out<-classify_model(container,maxent_model)
head(svm_out)
## SVM_LABEL SVM_PROB
## 1 No 0.9694348
## 2 No 0.6200844
## 3 No 0.7448862
## 4 No 0.9024603
## 5 No 0.7249917
## 6 Yes 0.9722772
labels_out<-data.frame(correct_label=flag[3053:N],svm=as.character(svm_out[,1]),tree=as.character(tree_out[,1]),maxent=as.character(maxent_out[,1]),stringsAsFactors=F)
head(labels_out)
## correct_label svm tree maxent
## 1 No No No No
## 2 No No No Yes
## 3 No No No No
## 4 No No No No
## 5 No No No No
## 6 Yes Yes Yes Yes
#svm
table(labels_out[,1]==labels_out[,2])
##
## FALSE TRUE
## 242 1408
prop.table(table(labels_out[,1]==labels_out[,2]))
##
## FALSE TRUE
## 0.1466667 0.8533333
# SVM model 86% correct rate
# tree
table(labels_out[,1]==labels_out[,3])
##
## FALSE TRUE
## 453 1197
prop.table(table(labels_out[,1]==labels_out[,3]))
##
## FALSE TRUE
## 0.2745455 0.7254545
# maximum entropy 88% correct rate
table(labels_out[,1]==labels_out[,4])
##
## FALSE TRUE
## 196 1454
prop.table(table(labels_out[,1]==labels_out[,4]))
##
## FALSE TRUE
## 0.1187879 0.8812121
Testing Address + Subject
# email address + subject
spamListAddressC<-c(spamListAddress1,spamListAddressA1)
hamListAddressC<-c(hamListAddress1,hamListAddressA1)
spamListSubjectC<-c(spamListSubject1,spamListSubjectA1)
hamListSubjectC<-c(hamListSubject1,hamListSubjectA1)
spamListAS<-str_c(spamListAddressC," ",spamListSubjectC)
head(spamListAS)
## [1] " a mailbot web life insurance why pay more"
## [2] "taylor s serveimage ilug guaranteed to lose lbs in days"
## [3] "sabrina mx premio guaranteed to lose lbs in days"
## [4] "wsup playful re fw user name password to membership to sites zzzz example com pviqg"
## [5] "yenene mx premio ilug social re guaranteed to lose lbs in days"
## [6] "thecashsystem firemail re your bank account information"
hamListAS<-str_c(hamListAddressC," ",hamListSubjectC)
head(hamListAS)
## [1] "kre munnari oz re new sequences window"
## [2] "burt cursor system zzzzteana re alexander"
## [3] "timc ubh zzzzteana moscow bomber"
## [4] "monty roscom irr klez the virus that won t die"
## [5] "tony linuxworks com re insert signature"
## [6] "smith ee ed ac re zzzzteana nothing like mama used to make"
spamListV<-unlist(spamListAS)
email_corpus<-c()
tmp<-VCorpus(VectorSource(spamListV[1]))
email_corpus<-tmp
flag<-c("Yes")
meta(email_corpus[[1]])
## author : character(0)
## datetimestamp: 2018-11-07 04:57:53
## description : character(0)
## heading : character(0)
## id : 1
## language : en
## origin : character(0)
inspect(email_corpus[1])
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 1
##
## [[1]]
## <<PlainTextDocument>>
## Metadata: 7
## Content: chars: 42
meta(email_corpus[[1]],tag="spamFlag")<-flag
start<-2
start
## [1] 2
end<-length(spamListAS)
end
## [1] 1897
n<-1
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(spamListAS[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"Yes"
}
start<-end+1
end<-start+length(hamListAS)
n<-0
for (i in start:end){
n<-n+1
tmp<-VCorpus(VectorSource(hamListAS[n]))
email_corpus<-c(email_corpus,tmp)
meta(email_corpus[[i]],"spamFlag")<-"No"
}
email_corpus
## <<VCorpus>>
## Metadata: corpus specific: 0, document level (indexed): 0
## Content: documents: 4700
email_corpus1<-sample(email_corpus)
dtm<-DocumentTermMatrix(email_corpus1)
dtm
## <<DocumentTermMatrix (documents: 4700, terms: 7769)>>
## Non-/sparse entries: 31759/36482541
## Sparsity : 100%
## Maximal term length: 38
## Weighting : term frequency (tf)
flag<-unlist(meta(email_corpus1,"spamFlag"))
N<-length(flag)
N
## [1] 4700
container<-create_container(dtm,labels=flag, trainSize=1:3052,testSize=3053:N,virgin=FALSE)
svm_model<-train_model(container,"SVM")
tree_model<-train_model(container,"TREE")
maxent_model<-train_model(container,"MAXENT")
svm_out<-classify_model(container,svm_model)
tree_out<-classify_model(container,tree_model)
maxent_out<-classify_model(container,maxent_model)
head(svm_out)
## SVM_LABEL SVM_PROB
## 1 No 0.7371761
## 2 Yes 0.6101042
## 3 No 0.9993536
## 4 Yes 0.9998919
## 5 No 0.5100162
## 6 No 0.9949306
labels_out<-data.frame(correct_label=flag[3053:N],svm=as.character(svm_out[,1]),tree=as.character(tree_out[,1]),maxent=as.character(maxent_out[,1]),stringsAsFactors=F)
head(labels_out)
## correct_label svm tree maxent
## 1 Yes No No No
## 2 Yes Yes No Yes
## 3 No No No No
## 4 Yes Yes Yes Yes
## 5 Yes No No Yes
## 6 No No No No
#svm
table(labels_out[,1]==labels_out[,2])
##
## FALSE TRUE
## 86 1562
prop.table(table(labels_out[,1]==labels_out[,2]))
##
## FALSE TRUE
## 0.05218447 0.94781553
# SVM model 94% correct rate - not bad
# tree - 72% correct rate - bad
table(labels_out[,1]==labels_out[,3])
##
## FALSE TRUE
## 429 1219
prop.table(table(labels_out[,1]==labels_out[,3]))
##
## FALSE TRUE
## 0.2603155 0.7396845
# maximum entropy 95% correct rate - not bad
table(labels_out[,1]==labels_out[,4])
##
## FALSE TRUE
## 72 1576
prop.table(table(labels_out[,1]==labels_out[,4]))
##
## FALSE TRUE
## 0.04368932 0.95631068
Next step would be to extract body text
spamListC<-c(spamList,spamList1)
hamListC<-c(hamList,hamList1)
head(spamListC)
## [[1]]
## [1] "From 12a1mailbot1@web.de Thu Aug 22 13:17:22 2002\r\nReturn-Path: <12a1mailbot1@web.de>\r\nDelivered-To: zzzz@localhost.example.com\r\nReceived: from localhost (localhost [127.0.0.1])\r\n\tby phobos.labs.example.com (Postfix) with ESMTP id 136B943C32\r\n\tfor <zzzz@localhost>; Thu, 22 Aug 2002 08:17:21 -0400 (EDT)\r\nReceived: from mail.webnote.net [193.120.211.219]\r\n\tby localhost with POP3 (fetchmail-5.9.0)\r\n\tfor zzzz@localhost (single-drop); Thu, 22 Aug 2002 13:17:21 +0100 (IST)\r\nReceived: from dd_it7 ([210.97.77.167])\r\n\tby webnote.net (8.9.3/8.9.3) with ESMTP id NAA04623\r\n\tfor <zzzz@example.com>; Thu, 22 Aug 2002 13:09:41 +0100\r\nFrom: 12a1mailbot1@web.de\r\nReceived: from r-smtp.korea.com - 203.122.2.197 by dd_it7 with Microsoft SMTPSVC(5.5.1775.675.6);\r\n\t Sat, 24 Aug 2002 09:42:10 +0900\r\nTo: <dcek1a1@netsgo.com>\r\nSubject: Life Insurance - Why Pay More?\r\nDate: Wed, 21 Aug 2002 20:31:57 -1600\r\nMIME-Version: 1.0\r\nMessage-ID: <0103c1042001882DD_IT7@dd_it7>\r\nContent-Type: text/html; charset=\"iso-8859-1\"\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n<META content=3D\"text/html; charset=3Dwindows-1252\" http-equiv=3DContent-T=\r\nype>\r\n<META content=3D\"MSHTML 5.00.2314.1000\" name=3DGENERATOR></HEAD>\r\n<BODY><!-- Inserted by Calypso -->\r\n<TABLE border=3D0 cellPadding=3D0 cellSpacing=3D2 id=3D_CalyPrintHeader_ r=\r\nules=3Dnone \r\nstyle=3D\"COLOR: black; DISPLAY: none\" width=3D\"100%\">\r\n <TBODY>\r\n <TR>\r\n <TD colSpan=3D3>\r\n <HR color=3Dblack noShade SIZE=3D1>\r\n </TD></TR></TD></TR>\r\n <TR>\r\n <TD colSpan=3D3>\r\n <HR color=3Dblack noShade SIZE=3D1>\r\n </TD></TR></TBODY></TABLE><!-- End Calypso --><!-- Inserted by Calypso=\r\n --><FONT \r\ncolor=3D#000000 face=3DVERDANA,ARIAL,HELVETICA size=3D-2><BR></FONT></TD><=\r\n/TR></TABLE><!-- End Calypso --><FONT color=3D#ff0000 \r\nface=3D\"Copperplate Gothic Bold\" size=3D5 PTSIZE=3D\"10\">\r\n<CENTER>Save up to 70% on Life Insurance.</CENTER></FONT><FONT color=3D#ff=\r\n0000 \r\nface=3D\"Copperplate Gothic Bold\" size=3D5 PTSIZE=3D\"10\">\r\n<CENTER>Why Spend More Than You Have To?\r\n<CENTER><FONT color=3D#ff0000 face=3D\"Copperplate Gothic Bold\" size=3D5 PT=\r\nSIZE=3D\"10\">\r\n<CENTER>Life Quote Savings\r\n<CENTER>\r\n<P align=3Dleft></P>\r\n<P align=3Dleft></P></FONT></U></I></B><BR></FONT></U></B></U></I>\r\n<P></P>\r\n<CENTER>\r\n<TABLE border=3D0 borderColor=3D#111111 cellPadding=3D0 cellSpacing=3D0 wi=\r\ndth=3D650>\r\n <TBODY></TBODY></TABLE>\r\n<TABLE border=3D0 borderColor=3D#111111 cellPadding=3D5 cellSpacing=3D0 wi=\r\ndth=3D650>\r\n <TBODY>\r\n <TR>\r\n <TD colSpan=3D2 width=3D\"35%\"><B><FONT face=3DVerdana size=3D4>Ensurin=\r\ng your \r\n family's financial security is very important. Life Quote Savings ma=\r\nkes \r\n buying life insurance simple and affordable. We Provide FREE Access =\r\nto The \r\n Very Best Companies and The Lowest Rates.</FONT></B></TD></TR>\r\n <TR>\r\n <TD align=3Dmiddle vAlign=3Dtop width=3D\"18%\">\r\n <TABLE borderColor=3D#111111 width=3D\"100%\">\r\n <TBODY>\r\n <TR>\r\n <TD style=3D\"PADDING-LEFT: 5px; PADDING-RIGHT: 5px\" width=3D\"100=\r\n%\"><FONT \r\n face=3DVerdana size=3D4><B>Life Quote Savings</B> is FAST, EAS=\r\nY and \r\n SAVES you money! Let us help you get started with the best val=\r\nues in \r\n the country on new coverage. You can SAVE hundreds or even tho=\r\nusands \r\n of dollars by requesting a FREE quote from Lifequote Savings. =\r\nOur \r\n service will take you less than 5 minutes to complete. Shop an=\r\nd \r\n compare. SAVE up to 70% on all types of Life insurance! \r\n</FONT></TD></TR>\r\n <TR><BR><BR>\r\n <TD height=3D50 style=3D\"PADDING-LEFT: 5px; PADDING-RIGHT: 5px\" \r\n width=3D\"100%\">\r\n <P align=3Dcenter><B><FONT face=3DVerdana size=3D5><A \r\n href=3D\"http://website.e365.cc/savequote/\">Click Here For Your=\r\n \r\n Free Quote!</A></FONT></B></P></TD>\r\n <P><FONT face=3DVerdana size=3D4><STRONG>\r\n <CENTER>Protecting your family is the best investment you'll eve=\r\nr \r\n make!<BR></B></TD></TR>\r\n <TR><BR><BR></STRONG></FONT></TD></TR></TD></TR>\r\n <TR></TR></TBODY></TABLE>\r\n <P align=3Dleft><FONT face=3D\"Arial, Helvetica, sans-serif\" size=3D2=\r\n></FONT></P>\r\n <P></P>\r\n <CENTER><BR><BR><BR>\r\n <P></P>\r\n <P align=3Dleft><BR></B><BR><BR><BR><BR></P>\r\n <P align=3Dcenter><BR></P>\r\n <P align=3Dleft><BR></B><BR><BR></FONT>If you are in receipt of this=\r\n email \r\n in error and/or wish to be removed from our list, <A \r\n href=3D\"mailto:coins@btamail.net.cn\">PLEASE CLICK HERE</A> AND TYPE =\r\nREMOVE. If you \r\n reside in any state which prohibits e-mail solicitations for insuran=\r\nce, \r\n please disregard this \r\n email.<BR></FONT><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR=\r\n><BR><BR><BR></FONT></P></CENTER></CENTER></TR></TBODY></TABLE></CENTER></=\r\nCENTER></CENTER></CENTER></CENTER></BODY></HTML>\r\n\r\n\r\n\r\n"
##
## [[2]]
## [1] "From ilug-admin@linux.ie Thu Aug 22 13:27:39 2002\r\nReturn-Path: <ilug-admin@linux.ie>\r\nDelivered-To: zzzz@localhost.example.com\r\nReceived: from localhost (localhost [127.0.0.1])\r\n\tby phobos.labs.example.com (Postfix) with ESMTP id A7FD7454F6\r\n\tfor <zzzz@localhost>; Thu, 22 Aug 2002 08:27:38 -0400 (EDT)\r\nReceived: from phobos [127.0.0.1]\r\n\tby localhost with IMAP (fetchmail-5.9.0)\r\n\tfor zzzz@localhost (single-drop); Thu, 22 Aug 2002 13:27:38 +0100 (IST)\r\nReceived: from lugh.tuatha.org (root@lugh.tuatha.org [194.125.145.45]) by\r\n dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g7MCJiZ06043 for\r\n <zzzz-ilug@jmason.org>; Thu, 22 Aug 2002 13:19:44 +0100\r\nReceived: from lugh (root@localhost [127.0.0.1]) by lugh.tuatha.org\r\n (8.9.3/8.9.3) with ESMTP id NAA29323; Thu, 22 Aug 2002 13:18:52 +0100\r\nReceived: from email.qves.com ([67.104.83.251]) by lugh.tuatha.org\r\n (8.9.3/8.9.3) with ESMTP id NAA29282 for <ilug@linux.ie>; Thu,\r\n 22 Aug 2002 13:18:37 +0100\r\nX-Authentication-Warning: lugh.tuatha.org: Host [67.104.83.251] claimed to\r\n be email.qves.com\r\nReceived: from qvp0091 ([169.254.6.22]) by email.qves.com with Microsoft\r\n SMTPSVC(5.0.2195.2966); Thu, 22 Aug 2002 06:18:18 -0600\r\nFrom: \"Slim Down\" <taylor@s3.serveimage.com>\r\nTo: <ilug@linux.ie>\r\nDate: Thu, 22 Aug 2002 06:18:18 -0600\r\nMessage-Id: <59e6301c249d5$ffb7ea20$1606fea9@freeyankeedom.com>\r\nMIME-Version: 1.0\r\nContent-Type: text/plain; charset=\"iso-8859-1\"\r\nContent-Transfer-Encoding: 7bit\r\nX-Mailer: Microsoft CDO for Windows 2000\r\nThread-Index: AcJJ1f+3FWdz11AmR6uWbmQN5gGxxw==\r\nContent-Class: urn:content-classes:message\r\nX-Mimeole: Produced By Microsoft MimeOLE V6.00.2462.0000\r\nX-Originalarrivaltime: 22 Aug 2002 12:18:18.0699 (UTC) FILETIME=[FFB949B0:01C249D5]\r\nSubject: [ILUG] Guaranteed to lose 10-12 lbs in 30 days 10.206\r\nSender: ilug-admin@linux.ie\r\nErrors-To: ilug-admin@linux.ie\r\nX-Mailman-Version: 1.1\r\nPrecedence: bulk\r\nList-Id: Irish Linux Users' Group <ilug.linux.ie>\r\nX-Beenthere: ilug@linux.ie\r\n\r\n1) Fight The Risk of Cancer!\r\nhttp://www.adclick.ws/p.cfm?o=315&s=pk007\r\n\r\n2) Slim Down - Guaranteed to lose 10-12 lbs in 30 days\r\nhttp://www.adclick.ws/p.cfm?o=249&s=pk007\r\n\r\n3) Get the Child Support You Deserve - Free Legal Advice\r\nhttp://www.adclick.ws/p.cfm?o=245&s=pk002\r\n\r\n4) Join the Web's Fastest Growing Singles Community\r\nhttp://www.adclick.ws/p.cfm?o=259&s=pk007\r\n\r\n5) Start Your Private Photo Album Online!\r\nhttp://www.adclick.ws/p.cfm?o=283&s=pk007\r\n\r\nHave a Wonderful Day,\r\nOffer Manager\r\nPrizeMama\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nIf you wish to leave this list please use the link below.\r\nhttp://www.qves.com/trim/?ilug@linux.ie%7C17%7C114258\r\n\r\n\r\n-- \r\nIrish Linux Users' Group: ilug@linux.ie\r\nhttp://www.linux.ie/mailman/listinfo/ilug for (un)subscription information.\r\nList maintainer: listmaster@linux.ie\r\n\r\n"
##
## [[3]]
## [1] "From sabrina@mx3.1premio.com Thu Aug 22 14:44:07 2002\r\nReturn-Path: <sabrina@mx3.1premio.com>\r\nDelivered-To: zzzz@localhost.example.com\r\nReceived: from localhost (localhost [127.0.0.1])\r\n\tby phobos.labs.example.com (Postfix) with ESMTP id 1E90847C66\r\n\tfor <zzzz@localhost>; Thu, 22 Aug 2002 09:44:02 -0400 (EDT)\r\nReceived: from mail.webnote.net [193.120.211.219]\r\n\tby localhost with POP3 (fetchmail-5.9.0)\r\n\tfor zzzz@localhost (single-drop); Thu, 22 Aug 2002 14:44:03 +0100 (IST)\r\nReceived: from email.qves.com (email1.qves.net [209.63.151.251] (may be forged))\r\n\tby webnote.net (8.9.3/8.9.3) with ESMTP id OAA04953\r\n\tfor <zzzz@example.com>; Thu, 22 Aug 2002 14:37:23 +0100\r\nReceived: from qvp0086 ([169.254.6.17]) by email.qves.com with Microsoft SMTPSVC(5.0.2195.2966);\r\n\t Thu, 22 Aug 2002 07:36:20 -0600\r\nFrom: \"Slim Down\" <sabrina@mx3.1premio.com>\r\nTo: <zzzz@example.com>\r\nSubject: Guaranteed to lose 10-12 lbs in 30 days 11.150\r\nDate: Thu, 22 Aug 2002 07:36:19 -0600\r\nMessage-ID: <9a63c01c249e0$e5a9d610$1106fea9@freeyankeedom.com>\r\nMIME-Version: 1.0\r\nContent-Type: text/plain;\r\n\tcharset=\"iso-8859-1\"\r\nContent-Transfer-Encoding: 7bit\r\nX-Mailer: Microsoft CDO for Windows 2000\r\nThread-Index: AcJJ4OWpowGq7rdNSwCz5HE3x9ZZDQ==\r\nContent-Class: urn:content-classes:message\r\nX-MimeOLE: Produced By Microsoft MimeOLE V6.00.2462.0000\r\nX-OriginalArrivalTime: 22 Aug 2002 13:36:20.0969 (UTC) FILETIME=[E692FD90:01C249E0]\r\n\r\n1) Fight The Risk of Cancer!\r\nhttp://www.adclick.ws/p.cfm?o=315&s=pk007\r\n\r\n2) Slim Down - Guaranteed to lose 10-12 lbs in 30 days\r\nhttp://www.adclick.ws/p.cfm?o=249&s=pk007\r\n\r\n3) Get the Child Support You Deserve - Free Legal Advice\r\nhttp://www.adclick.ws/p.cfm?o=245&s=pk002\r\n\r\n4) Join the Web's Fastest Growing Singles Community\r\nhttp://www.adclick.ws/p.cfm?o=259&s=pk007\r\n\r\n5) Start Your Private Photo Album Online!\r\nhttp://www.adclick.ws/p.cfm?o=283&s=pk007\r\n\r\nHave a Wonderful Day,\r\nOffer Manager\r\nPrizeMama\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nIf you wish to leave this list please use the link below.\r\nhttp://www.qves.com/trim/?zzzz@example.com%7C17%7C308417\r\n\r\n"
##
## [[4]]
## [1] "From wsup@playful.com Thu Aug 22 16:17:00 2002\r\nReturn-Path: <wsup@playful.com>\r\nDelivered-To: zzzz@localhost.example.com\r\nReceived: from localhost (localhost [127.0.0.1])\r\n\tby phobos.labs.example.com (Postfix) with ESMTP id B8E8D43F99\r\n\tfor <zzzz@localhost>; Thu, 22 Aug 2002 11:16:59 -0400 (EDT)\r\nReceived: from mail.webnote.net [193.120.211.219]\r\n\tby localhost with POP3 (fetchmail-5.9.0)\r\n\tfor zzzz@localhost (single-drop); Thu, 22 Aug 2002 16:16:59 +0100 (IST)\r\nReceived: from smtp.easydns.com (smtp.easydns.com [205.210.42.30])\r\n\tby webnote.net (8.9.3/8.9.3) with ESMTP id QAA05397\r\n\tfor <zzzz@example.com>; Thu, 22 Aug 2002 16:13:20 +0100\r\nReceived: from 200.161.16.132 (unknown [210.19.113.130])\r\n\tby smtp.easydns.com (Postfix) with SMTP id 694632EE5A\r\n\tfor <zzzz@example.com>; Thu, 22 Aug 2002 11:12:57 -0400 (EDT)\r\nReceived: from unknown (52.127.142.42) by rly-xl04.mx.aol.com with smtp; Aug, 22 2002 8:02:13 AM +0400\r\nReceived: from [176.244.234.14] by smtp-server6.tampabay.rr.com with local; Aug, 22 2002 6:50:51 AM +1200\r\nReceived: from [106.226.127.61] by n7.groups.yahoo.com with local; Aug, 22 2002 5:50:53 AM +0300\r\nReceived: from 177.139.227.166 ([177.139.227.166]) by sparc.isl.net with QMQP; Aug, 22 2002 4:47:16 AM -0000\r\nFrom: Account Services <wsup@playful.com>\r\nTo: zzzz@example.com\r\nCc: \r\nSubject: Re: Fw: User Name & Password to Membership To 5 Sites zzzz@example.com pviqg\r\nSender: Account Services <wsup@playful.com>\r\nMime-Version: 1.0\r\nContent-Type: text/plain; charset=\"iso-8859-1\"\r\nDate: Thu, 22 Aug 2002 08:13:35 -0700\r\nX-Mailer: Microsoft Outlook Express 5.00.2615.200\r\nX-Priority: 1\r\nMessage-Id: <20020822151301.694632EE5A@smtp.easydns.com>\r\n\r\n##################################################\r\n# #\r\n# Adult Club #\r\n# Offers FREE Membership #\r\n# #\r\n##################################################\r\n\r\n>>>>> INSTANT ACCESS TO ALL SITES NOW\r\n>>>>> Your User Name And Password is.\r\n>>>>> User Name: zzzz@example.com\r\n>>>>> Password: 760382\r\n\r\n5 of the Best Adult Sites on the Internet for FREE!\r\n---------------------------------------\r\nNEWS 08/18/02\r\nWith just over 2.9 Million Members that signed up for FREE, Last month there were 721,184 New\r\nMembers. Are you one of them yet???\r\n---------------------------------------\r\nOur Membership FAQ\r\n\r\nQ. Why are you offering free access to 5 adult membership sites for free?\r\nA. I have advertisers that pay me for ad space so you don't have to pay for membership.\r\n\r\nQ. Is it true my membership is for life?\r\nA. Absolutely you'll never have to pay a cent the advertisers do.\r\n\r\nQ. Can I give my account to my friends and family?\r\nA. Yes, as long they are over the age of 18.\r\n\r\nQ. Do I have to sign up for all 5 membership sites?\r\nA. No just one to get access to all of them.\r\n\r\nQ. How do I get started?\r\nA. Click on one of the following links below to become a member.\r\n\r\n- These are multi million dollar operations with policies and rules.\r\n- Fill in the required info and they won't charge you for the Free pass!\r\n- If you don't believe us, just read their terms and conditions.\r\n\r\n---------------------------\r\n\r\n# 5. > Adults Farm\r\nhttp://80.71.66.8/farm/?aid=760382\r\nGirls and Animals Getting Freaky....FREE Lifetime Membership!!\r\n\r\n# 4. > Sexy Celebes\r\nhttp://80.71.66.8/celebst/?aid=760382\r\nThousands Of XXX Celebes doing it...FREE Lifetime Membership!!\r\n\r\n# 3. > Play House Porn\r\nhttp://80.71.66.8/mega/?aid=760382\r\nLive Feeds From 60 Sites And Web Cams...FREE Lifetime Membership!!\r\n\r\n# 2. > Asian Sex Fantasies\r\nhttp://80.71.66.8/asian/?aid=760382\r\nJapanese Schoolgirls, Live Sex Shows ...FREE Lifetime Membership!!\r\n\r\n# 1. > Lesbian Lace\r\nhttp://80.71.66.8/lesbian/?aid=760382\r\nGirls and Girls Getting Freaky! ...FREE Lifetime Membership!!\r\n\r\n--------------------------\r\n\r\nJennifer Simpson, Miami, FL\r\nYour FREE lifetime membership has entertained my boyffriend and I for\r\nthe last two years! Your Adult Sites are the best on the net!\r\n\r\nJoe Morgan Manhattan, NY\r\nYour live sex shows and live sex cams are unbelievable. The best part\r\nabout your porn sites, is that they're absolutely FREE!\r\n\r\n--------------------------\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nRemoval Instructions:\r\n\r\nYou have received this advertisement because you have opted in to receive free adult internet\r\noffers and specials through our affiliated websites. If you do not wish to receive further emails\r\nor have received the email in error you may opt-out of our database here\r\nhttp://80.71.66.8/optout/ . Please allow 24 hours for removal.\r\n\r\nvonolmosatkirekpups\r\n\r\n"
##
## [[5]]
## [1] "From social-admin@linux.ie Thu Aug 22 16:37:34 2002\r\nReturn-Path: <social-admin@linux.ie>\r\nDelivered-To: zzzz@localhost.example.com\r\nReceived: from localhost (localhost [127.0.0.1])\r\n\tby phobos.labs.example.com (Postfix) with ESMTP id 30B2143F99\r\n\tfor <zzzz@localhost>; Thu, 22 Aug 2002 11:37:34 -0400 (EDT)\r\nReceived: from phobos [127.0.0.1]\r\n\tby localhost with IMAP (fetchmail-5.9.0)\r\n\tfor zzzz@localhost (single-drop); Thu, 22 Aug 2002 16:37:34 +0100 (IST)\r\nReceived: from lugh.tuatha.org (root@lugh.tuatha.org [194.125.145.45]) by\r\n dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id g7MFYOZ12548 for\r\n <zzzz+ilug-social@jmason.org>; Thu, 22 Aug 2002 16:34:25 +0100\r\nReceived: from lugh (root@localhost [127.0.0.1]) by lugh.tuatha.org\r\n (8.9.3/8.9.3) with ESMTP id QAA07692; Thu, 22 Aug 2002 16:33:43 +0100\r\nReceived: from email.qves.com ([67.104.83.251]) by lugh.tuatha.org\r\n (8.9.3/8.9.3) with ESMTP id QAA07662 for <social@linux.ie>; Thu,\r\n 22 Aug 2002 16:33:37 +0100\r\nX-Authentication-Warning: lugh.tuatha.org: Host [67.104.83.251] claimed to\r\n be email.qves.com\r\nReceived: from qvp0080 ([169.254.6.11]) by email.qves.com with Microsoft\r\n SMTPSVC(5.0.2195.2966); Thu, 22 Aug 2002 09:33:08 -0600\r\nFrom: \"Slim n Trim\" <yenene@mx2.1premio.com>\r\nTo: <social@linux.ie>\r\nDate: Thu, 22 Aug 2002 09:33:07 -0600\r\nMessage-Id: <104c1101c249f1$36e098b0$0b06fea9@freeyankeedom.com>\r\nMIME-Version: 1.0\r\nContent-Type: text/plain; charset=\"iso-8859-1\"\r\nContent-Transfer-Encoding: 7bit\r\nX-Mailer: Microsoft CDO for Windows 2000\r\nThread-Index: AcJJ8TbZoOKEj0AtTsKxJ7ZmOA0e/w==\r\nContent-Class: urn:content-classes:message\r\nX-Mimeole: Produced By Microsoft MimeOLE V6.00.2462.0000\r\nX-Originalarrivaltime: 22 Aug 2002 15:33:08.0313 (UTC) FILETIME=[3746D490:01C249F1]\r\nSubject: [ILUG-Social] re: Guaranteed to lose 10-12 lbs in 30 days 10.148\r\nSender: social-admin@linux.ie\r\nErrors-To: social-admin@linux.ie\r\nX-Mailman-Version: 1.1\r\nPrecedence: bulk\r\nList-Id: Irish Linux Users' Group social events <social.linux.ie>\r\nX-Beenthere: social@linux.ie\r\n\r\nI thought you might like these:\r\n1) Slim Down - Guaranteed to lose 10-12 lbs in 30 days\r\nhttp://www.freeyankee.com/cgi/fy2/to.cgi?l=822slim1\r\n\r\n2) Fight The Risk of Cancer! \r\nhttp://www.freeyankee.com/cgi/fy2/to.cgi?l=822nic1 \r\n\r\n3) Get the Child Support You Deserve - Free Legal Advice \r\nhttp://www.freeyankee.com/cgi/fy2/to.cgi?l=822ppl1\r\n\r\nOffer Manager\r\nDaily-Deals\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nIf you wish to leave this list please use the link below.\r\nhttp://www.qves.com/trim/?social@linux.ie%7C29%7C134077\r\n\r\n\r\n-- \r\nIrish Linux Users' Group Social Events: social@linux.ie\r\nhttp://www.linux.ie/mailman/listinfo/social for (un)subscription information.\r\nList maintainer: listmaster@linux.ie\r\n\r\n"
##
## [[6]]
## [1] "From Thecashsystem@firemail.de Thu Aug 22 16:58:24 2002\r\nReturn-Path: <Thecashsystem@firemail.de>\r\nDelivered-To: zzzz@localhost.example.com\r\nReceived: from localhost (localhost [127.0.0.1])\r\n\tby phobos.labs.example.com (Postfix) with ESMTP id 3453043F99\r\n\tfor <zzzz@localhost>; Thu, 22 Aug 2002 11:58:24 -0400 (EDT)\r\nReceived: from mail.webnote.net [193.120.211.219]\r\n\tby localhost with POP3 (fetchmail-5.9.0)\r\n\tfor zzzz@localhost (single-drop); Thu, 22 Aug 2002 16:58:24 +0100 (IST)\r\nReceived: from mailbox-13.st1.spray.net (mailbox-13.st1.spray.net [212.78.202.113])\r\n\tby webnote.net (8.9.3/8.9.3) with ESMTP id QAA05573\r\n\tfor <zzzz@example.com>; Thu, 22 Aug 2002 16:55:29 +0100\r\nReceived: from freesource (user-24-214-168-210.knology.net [24.214.168.210])\r\n\tby mailbox-13.st1.spray.net (Postfix) with ESMTP\r\n\tid ADDD03E25C; Thu, 22 Aug 2002 17:50:55 +0200 (DST)\r\nMessage-ID: <413-220028422154219900@freesource>\r\nX-Priority: 1\r\nTo: \"1\" <thecashsystem@firemail.de>\r\nFrom: \"TheCashSystem\" <Thecashsystem@firemail.de>\r\nSubject: RE: Your Bank Account Information \r\nDate: Thu, 22 Aug 2002 10:42:19 -0500\r\nMIME-Version: 1.0\r\nContent-type: text/plain; charset=US-ASCII\r\nX-MIME-Autoconverted: from quoted-printable to 8bit by webnote.net id QAA05573\r\nContent-Transfer-Encoding: 8bit\r\n\r\nA POWERHOUSE GIFTING PROGRAM You Don't Want To Miss! \r\n \r\n GET IN WITH THE FOUNDERS! \r\nThe MAJOR PLAYERS are on This ONE\r\nFor ONCE be where the PlayerS are\r\nThis is YOUR Private Invitation\r\n\r\nEXPERTS ARE CALLING THIS THE FASTEST WAY \r\nTO HUGE CASH FLOW EVER CONCEIVED\r\nLeverage $1,000 into $50,000 Over and Over Again\r\n\r\nTHE QUESTION HERE IS:\r\nYOU EITHER WANT TO BE WEALTHY \r\nOR YOU DON'T!!!\r\nWHICH ONE ARE YOU?\r\nI am tossing you a financial lifeline and for your sake I \r\nHope you GRAB onto it and hold on tight For the Ride of youR life!\r\n\r\nTestimonials\r\n\r\nHear what average people are doing their first few days:\r\nWe've received 8,000 in 1 day and we are doing that over and over again!' Q.S. in AL\r\n I'm a single mother in FL and I've received 12,000 in the last 4 days. D. S. in FL\r\nI was not sure about this when I sent off my $1,000 pledge, but I got back $2,000 the very next day! L.L. in KY\r\nI didn't have the money, so I found myself a partner to work this with. We have received $4,000 over the last 2 days. \r\nI think I made the right decision; don't you? K. C. in FL\r\nI pick up $3,000 my first day and I they gave me free leads and all the training, you can too! J.W. in CA\r\n\r\nANNOUNCING: We will CLOSE your sales for YOU! And Help you get a Fax Blast IMMEDIATELY Upon Your Entry!!! YOU Make the MONEY!!!\r\nFREE LEADS!!! TRAINING!!!\r\n\r\n$$DON'T WAIT!!! CALL NOW $$\r\nFAX BACK TO: 1-800-421-6318 OR Call 1-800-896-6568 \r\n\r\nName__________________________________Phone___________________________________________\r\n\r\nFax_____________________________________Email____________________________________________\r\n\r\nBest Time To Call_________________________Time Zone________________________________________\r\n\r\nThis message is sent in compliance of the new e-mail bill. \"Per Section 301, Paragraph (a)(2)(C) of S. 1618, further transmissions by the sender of this email may be stopped, at no cost to you, by sending a reply to this email address with the word \"REMOVE\" in the subject line. Errors, omissions, and exceptions excluded.\r\n \r\nThis is NOT spam! I have compiled this list from our Replicate Database, relative to Seattle Marketing Group, The Gigt, or Turbo Team for the sole purpose of these communications. Your continued inclusion is ONLY by your gracious permission. If you wish to not receive this mail from me, please send an email to tesrewinter@yahoo.com with \"Remove\" in the subject and you will be deleted immediately.\r\n\r\n\r\n\r\n"
spamBody<-lapply(spamListC, function(x) str_sub(str_extract(str_replace_all(str_replace(x,"\r\n\r\n"," zzzMGfirstblank "),"\r\n"," ")," zzzMGfirstblank.*"),start=18))
head(spamBody)
## [[1]]
## [1] "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"> <HTML><HEAD> <META content=3D\"text/html; charset=3Dwindows-1252\" http-equiv=3DContent-T= ype> <META content=3D\"MSHTML 5.00.2314.1000\" name=3DGENERATOR></HEAD> <BODY><!-- Inserted by Calypso --> <TABLE border=3D0 cellPadding=3D0 cellSpacing=3D2 id=3D_CalyPrintHeader_ r= ules=3Dnone style=3D\"COLOR: black; DISPLAY: none\" width=3D\"100%\"> <TBODY> <TR> <TD colSpan=3D3> <HR color=3Dblack noShade SIZE=3D1> </TD></TR></TD></TR> <TR> <TD colSpan=3D3> <HR color=3Dblack noShade SIZE=3D1> </TD></TR></TBODY></TABLE><!-- End Calypso --><!-- Inserted by Calypso= --><FONT color=3D#000000 face=3DVERDANA,ARIAL,HELVETICA size=3D-2><BR></FONT></TD><= /TR></TABLE><!-- End Calypso --><FONT color=3D#ff0000 face=3D\"Copperplate Gothic Bold\" size=3D5 PTSIZE=3D\"10\"> <CENTER>Save up to 70% on Life Insurance.</CENTER></FONT><FONT color=3D#ff= 0000 face=3D\"Copperplate Gothic Bold\" size=3D5 PTSIZE=3D\"10\"> <CENTER>Why Spend More Than You Have To? <CENTER><FONT color=3D#ff0000 face=3D\"Copperplate Gothic Bold\" size=3D5 PT= SIZE=3D\"10\"> <CENTER>Life Quote Savings <CENTER> <P align=3Dleft></P> <P align=3Dleft></P></FONT></U></I></B><BR></FONT></U></B></U></I> <P></P> <CENTER> <TABLE border=3D0 borderColor=3D#111111 cellPadding=3D0 cellSpacing=3D0 wi= dth=3D650> <TBODY></TBODY></TABLE> <TABLE border=3D0 borderColor=3D#111111 cellPadding=3D5 cellSpacing=3D0 wi= dth=3D650> <TBODY> <TR> <TD colSpan=3D2 width=3D\"35%\"><B><FONT face=3DVerdana size=3D4>Ensurin= g your family's financial security is very important. Life Quote Savings ma= kes buying life insurance simple and affordable. We Provide FREE Access = to The Very Best Companies and The Lowest Rates.</FONT></B></TD></TR> <TR> <TD align=3Dmiddle vAlign=3Dtop width=3D\"18%\"> <TABLE borderColor=3D#111111 width=3D\"100%\"> <TBODY> <TR> <TD style=3D\"PADDING-LEFT: 5px; PADDING-RIGHT: 5px\" width=3D\"100= %\"><FONT face=3DVerdana size=3D4><B>Life Quote Savings</B> is FAST, EAS= Y and SAVES you money! Let us help you get started with the best val= ues in the country on new coverage. You can SAVE hundreds or even tho= usands of dollars by requesting a FREE quote from Lifequote Savings. = Our service will take you less than 5 minutes to complete. Shop an= d compare. SAVE up to 70% on all types of Life insurance! </FONT></TD></TR> <TR><BR><BR> <TD height=3D50 style=3D\"PADDING-LEFT: 5px; PADDING-RIGHT: 5px\" width=3D\"100%\"> <P align=3Dcenter><B><FONT face=3DVerdana size=3D5><A href=3D\"http://website.e365.cc/savequote/\">Click Here For Your= Free Quote!</A></FONT></B></P></TD> <P><FONT face=3DVerdana size=3D4><STRONG> <CENTER>Protecting your family is the best investment you'll eve= r make!<BR></B></TD></TR> <TR><BR><BR></STRONG></FONT></TD></TR></TD></TR> <TR></TR></TBODY></TABLE> <P align=3Dleft><FONT face=3D\"Arial, Helvetica, sans-serif\" size=3D2= ></FONT></P> <P></P> <CENTER><BR><BR><BR> <P></P> <P align=3Dleft><BR></B><BR><BR><BR><BR></P> <P align=3Dcenter><BR></P> <P align=3Dleft><BR></B><BR><BR></FONT>If you are in receipt of this= email in error and/or wish to be removed from our list, <A href=3D\"mailto:coins@btamail.net.cn\">PLEASE CLICK HERE</A> AND TYPE = REMOVE. If you reside in any state which prohibits e-mail solicitations for insuran= ce, please disregard this email.<BR></FONT><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR><BR= ><BR><BR><BR></FONT></P></CENTER></CENTER></TR></TBODY></TABLE></CENTER></= CENTER></CENTER></CENTER></CENTER></BODY></HTML> "
##
## [[2]]
## [1] "1) Fight The Risk of Cancer! http://www.adclick.ws/p.cfm?o=315&s=pk007 2) Slim Down - Guaranteed to lose 10-12 lbs in 30 days http://www.adclick.ws/p.cfm?o=249&s=pk007 3) Get the Child Support You Deserve - Free Legal Advice http://www.adclick.ws/p.cfm?o=245&s=pk002 4) Join the Web's Fastest Growing Singles Community http://www.adclick.ws/p.cfm?o=259&s=pk007 5) Start Your Private Photo Album Online! http://www.adclick.ws/p.cfm?o=283&s=pk007 Have a Wonderful Day, Offer Manager PrizeMama If you wish to leave this list please use the link below. http://www.qves.com/trim/?ilug@linux.ie%7C17%7C114258 -- Irish Linux Users' Group: ilug@linux.ie http://www.linux.ie/mailman/listinfo/ilug for (un)subscription information. List maintainer: listmaster@linux.ie "
##
## [[3]]
## [1] "1) Fight The Risk of Cancer! http://www.adclick.ws/p.cfm?o=315&s=pk007 2) Slim Down - Guaranteed to lose 10-12 lbs in 30 days http://www.adclick.ws/p.cfm?o=249&s=pk007 3) Get the Child Support You Deserve - Free Legal Advice http://www.adclick.ws/p.cfm?o=245&s=pk002 4) Join the Web's Fastest Growing Singles Community http://www.adclick.ws/p.cfm?o=259&s=pk007 5) Start Your Private Photo Album Online! http://www.adclick.ws/p.cfm?o=283&s=pk007 Have a Wonderful Day, Offer Manager PrizeMama If you wish to leave this list please use the link below. http://www.qves.com/trim/?zzzz@example.com%7C17%7C308417 "
##
## [[4]]
## [1] "################################################## # # # Adult Club # # Offers FREE Membership # # # ################################################## >>>>> INSTANT ACCESS TO ALL SITES NOW >>>>> Your User Name And Password is. >>>>> User Name: zzzz@example.com >>>>> Password: 760382 5 of the Best Adult Sites on the Internet for FREE! --------------------------------------- NEWS 08/18/02 With just over 2.9 Million Members that signed up for FREE, Last month there were 721,184 New Members. Are you one of them yet??? --------------------------------------- Our Membership FAQ Q. Why are you offering free access to 5 adult membership sites for free? A. I have advertisers that pay me for ad space so you don't have to pay for membership. Q. Is it true my membership is for life? A. Absolutely you'll never have to pay a cent the advertisers do. Q. Can I give my account to my friends and family? A. Yes, as long they are over the age of 18. Q. Do I have to sign up for all 5 membership sites? A. No just one to get access to all of them. Q. How do I get started? A. Click on one of the following links below to become a member. - These are multi million dollar operations with policies and rules. - Fill in the required info and they won't charge you for the Free pass! - If you don't believe us, just read their terms and conditions. --------------------------- # 5. > Adults Farm http://80.71.66.8/farm/?aid=760382 Girls and Animals Getting Freaky....FREE Lifetime Membership!! # 4. > Sexy Celebes http://80.71.66.8/celebst/?aid=760382 Thousands Of XXX Celebes doing it...FREE Lifetime Membership!! # 3. > Play House Porn http://80.71.66.8/mega/?aid=760382 Live Feeds From 60 Sites And Web Cams...FREE Lifetime Membership!! # 2. > Asian Sex Fantasies http://80.71.66.8/asian/?aid=760382 Japanese Schoolgirls, Live Sex Shows ...FREE Lifetime Membership!! # 1. > Lesbian Lace http://80.71.66.8/lesbian/?aid=760382 Girls and Girls Getting Freaky! ...FREE Lifetime Membership!! -------------------------- Jennifer Simpson, Miami, FL Your FREE lifetime membership has entertained my boyffriend and I for the last two years! Your Adult Sites are the best on the net! Joe Morgan Manhattan, NY Your live sex shows and live sex cams are unbelievable. The best part about your porn sites, is that they're absolutely FREE! -------------------------- Removal Instructions: You have received this advertisement because you have opted in to receive free adult internet offers and specials through our affiliated websites. If you do not wish to receive further emails or have received the email in error you may opt-out of our database here http://80.71.66.8/optout/ . Please allow 24 hours for removal. vonolmosatkirekpups "
##
## [[5]]
## [1] "I thought you might like these: 1) Slim Down - Guaranteed to lose 10-12 lbs in 30 days http://www.freeyankee.com/cgi/fy2/to.cgi?l=822slim1 2) Fight The Risk of Cancer! http://www.freeyankee.com/cgi/fy2/to.cgi?l=822nic1 3) Get the Child Support You Deserve - Free Legal Advice http://www.freeyankee.com/cgi/fy2/to.cgi?l=822ppl1 Offer Manager Daily-Deals If you wish to leave this list please use the link below. http://www.qves.com/trim/?social@linux.ie%7C29%7C134077 -- Irish Linux Users' Group Social Events: social@linux.ie http://www.linux.ie/mailman/listinfo/social for (un)subscription information. List maintainer: listmaster@linux.ie "
##
## [[6]]
## [1] "A POWERHOUSE GIFTING PROGRAM You Don't Want To Miss! GET IN WITH THE FOUNDERS! The MAJOR PLAYERS are on This ONE For ONCE be where the PlayerS are This is YOUR Private Invitation EXPERTS ARE CALLING THIS THE FASTEST WAY TO HUGE CASH FLOW EVER CONCEIVED Leverage $1,000 into $50,000 Over and Over Again THE QUESTION HERE IS: YOU EITHER WANT TO BE WEALTHY OR YOU DON'T!!! WHICH ONE ARE YOU? I am tossing you a financial lifeline and for your sake I Hope you GRAB onto it and hold on tight For the Ride of youR life! Testimonials Hear what average people are doing their first few days: We've received 8,000 in 1 day and we are doing that over and over again!' Q.S. in AL I'm a single mother in FL and I've received 12,000 in the last 4 days. D. S. in FL I was not sure about this when I sent off my $1,000 pledge, but I got back $2,000 the very next day! L.L. in KY I didn't have the money, so I found myself a partner to work this with. We have received $4,000 over the last 2 days. I think I made the right decision; don't you? K. C. in FL I pick up $3,000 my first day and I they gave me free leads and all the training, you can too! J.W. in CA ANNOUNCING: We will CLOSE your sales for YOU! And Help you get a Fax Blast IMMEDIATELY Upon Your Entry!!! YOU Make the MONEY!!! FREE LEADS!!! TRAINING!!! $$DON'T WAIT!!! CALL NOW $$ FAX BACK TO: 1-800-421-6318 OR Call 1-800-896-6568 Name__________________________________Phone___________________________________________ Fax_____________________________________Email____________________________________________ Best Time To Call_________________________Time Zone________________________________________ This message is sent in compliance of the new e-mail bill. \"Per Section 301, Paragraph (a)(2)(C) of S. 1618, further transmissions by the sender of this email may be stopped, at no cost to you, by sending a reply to this email address with the word \"REMOVE\" in the subject line. Errors, omissions, and exceptions excluded. This is NOT spam! I have compiled this list from our Replicate Database, relative to Seattle Marketing Group, The Gigt, or Turbo Team for the sole purpose of these communications. Your continued inclusion is ONLY by your gracious permission. If you wish to not receive this mail from me, please send an email to tesrewinter@yahoo.com with \"Remove\" in the subject and you will be deleted immediately. "
cond<-sapply(spamBody,function(x) !str_detect(tolower(x),"<html>"))
spamTextList<-spamBody[cond]
spamTextList<-typeof(spamTextList)
length(spamTextList)
## [1] 1
head(spamTextList,1)
## [1] "list"
hamListAS<-str_c(hamListAddressC," ",hamListSubjectC)
head(hamListAS)
## [1] "kre munnari oz re new sequences window"
## [2] "burt cursor system zzzzteana re alexander"
## [3] "timc ubh zzzzteana moscow bomber"
## [4] "monty roscom irr klez the virus that won t die"
## [5] "tony linuxworks com re insert signature"
## [6] "smith ee ed ac re zzzzteana nothing like mama used to make"