Our Project Team 4 above (Banu Boopalan, Samuel Kigamba, James Mundy, Alain T Kuiete), we will submit 2 separate RPUB documents. The 2nd document link to RPUBS, we have performed data transformations, exploratory data analysis, visualizations using wordclouds, frequency plots on words, and performed SVM model and reported the Confusion Matrix results for the SVM model. We tried to plot the model using plot but we were not successful in representing a way to plot the model, The support vector #’s are high range so we have to dive deeper into how to represent and plot the model through plot or Kernlab pacakge or Kernfit. Within the model we are able to create document term matrix and term document matrix, segment the train and test data and then run the model to report summary model. The SVM reported an accuracy for each of our teammates will be different as we are reading in our own files from the directory. The SVM reported higher accuracy than the Naive Bayes upon first review.
Collaboration via POWERPOINT, GITHUB, GOTO MEETING along with weekly meetings on Tuesday, Friday.
We have utilized SVM model in this project4 code (Our first code that produced uses . Our approach for this project follows:
#loading required Libraries
library(caret)
library(tidyverse)
library(tidyr)
library(dplyr)
library(stringr)
library(tidytext)
library(wordcloud)
library(broom)
library(tm)
library(e1071)
library(quanteda)
library(ggplot2)create_corpus <- function(dir, label){
corpus <- VCorpus(DirSource(dir)) %>%
tm_map(PlainTextDocument) %>%
tm_map(content_transformer(tolower)) %>% #
tm_map(removeWords, stopwords("SMART")) %>%
tm_map(removePunctuation) %>% #
tm_map(removeNumbers) %>% #
tm_map(stripWhitespace) %>% #
tm_map(stemDocument) #
meta(corpus, "LABEL") <- label
return(corpus)
}
corpus<- c(create_corpus("C:/Users/Banu/Documents/RScriptfiles/Project4/SpamHam/20050311_spam_2.tar/spam_2", "Spam"), create_corpus("C:/Users/Banu/Documents/RScriptfiles/Project4/SpamHam/easyham/20030228_easy_ham/easy_ham", "Ham"))## <<DocumentTermMatrix (documents: 3898, terms: 84242)>>
## Non-/sparse entries: 557629/327817687
## Sparsity : 100%
## Maximal term length: 855
## Weighting : term frequency (tf)
## # A tibble: 557,629 x 3
## document term count
## <chr> <chr> <dbl>
## 1 character(0) aafcf 1
## 2 character(0) abandon 1
## 3 character(0) accept 1
## 4 character(0) address 1
## 5 character(0) agre 2
## 6 character(0) altern 1
## 7 character(0) altra 1
## 8 character(0) apolog 1
## 9 character(0) aug 8
## 10 character(0) authenticationwarn 1
## # ... with 557,619 more rows
#slice sentiments of 1000 rows
dtm_sentiments <- slice(dtm_td , 1:5000) %>% inner_join(get_sentiments("bing"), by = c(term = "word"))
dtm_sentiments## # A tibble: 267 x 4
## document term count sentiment
## <chr> <chr> <dbl> <chr>
## 1 character(0) betray 1 negative
## 2 character(0) burn 1 negative
## 3 character(0) easier 1 positive
## 4 character(0) error 1 negative
## 5 character(0) fail 1 negative
## 6 character(0) fatal 1 negative
## 7 character(0) flaw 1 negative
## 8 character(0) free 3 positive
## 9 character(0) good 1 positive
## 10 character(0) honest 1 positive
## # ... with 257 more rows
#unnext tokens to look at words
slice_words <- tidy(corpus) %>%
unnest_tokens(word, text)
slice_words <- slice(slice_words, 1:9000)
library(broom)
models <- count(slice_words, word) %>% inner_join(get_sentiments("bing"), by = c(word = "word"))
str(models)## Classes 'tbl_df', 'tbl' and 'data.frame': 120 obs. of 3 variables:
## $ word : chr "bad" "bankrupt" "benefit" "betray" ...
## $ n : int 4 1 1 1 8 4 1 1 2 2 ...
## $ sentiment: chr "negative" "negative" "positive" "negative" ...
dtm_sentiments %>%
count(document, sentiment, wt = count) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative) %>%
arrange(sentiment)## # A tibble: 1 x 4
## document negative positive sentiment
## <chr> <dbl> <dbl> <dbl>
## 1 character(0) 169 258 89
dtm_sentiments %>%
count(sentiment, term, wt = count) %>%
filter(n <= 10) %>%
mutate(n = ifelse(sentiment == "negative", -n, n)) %>%
mutate(term = reorder(term, n)) %>%
ggplot(aes(term, n, fill = sentiment)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ylab("Contribution to sentiment") + ggtitle("Bing Lexicon Sentiment Analysis for corpus")dtm_sentiments %>%
count(sentiment, term, wt = count) %>%
top_n(50) %>%
ungroup() %>%
mutate(term = reorder(term, n)) %>%
ggplot(aes(term, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip() #layout(matrix(c(1, 2), nrow=2), heights=c(1, 4))
#par(mar=rep(0, 4))
plot.new()
text(x=0.5, y=0.5, "Wordcloud using Bing Lexicon for corpus")#Only Keep Words found in at least 15 documents
min_docs <- 15
dtm <- removeSparseTerms(dtm, 1 - (min_docs / length(corpus)))
model_data <- as.matrix(dtm)
str(model_data)## num [1:3898, 1:3710] 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ Docs : chr [1:3898] "character(0)" "character(0)" "character(0)" "character(0)" ...
## ..$ Terms: chr [1:3710] "aaa" "aaf" "aaronsw" "abandon" ...
set.seed(12345)
in_training_set <- createDataPartition(model_data$LABEL, p = 0.75, list = FALSE)
training_data <- model_data[in_training_set, ]
testing_data <- model_data[-in_training_set, ]
head(training_data,n=1)## LABEL aaa aaf aaronsw abandon abf abil absolut absurd abus academ
## 1 Spam 0 0 0 0.003787879 0 0 0 0 0 0
## acc acceler accept acceptablelanguag acceptlanguag access accid
## 1 0 0 0.003787879 0 0 0 0
## accomplish accord account accur accuraci accus acf achiev acknowledg
## 1 0 0 0 0 0 0 0 0 0
## acpi acquir acquisit act action actiondhttpresponseresponseasp activ
## 1 0 0 0 0 0 0 0
## actual adam adamson adapt add addit address addressbr addressfonttd
## 1 0 0 0 0 0 0 0.003787879 0 0
## adf adjust administr admit adopt adsldslhstntxswbellnet
## 1 0 0 0 0 0 0
## adsldslsnfcpacbellnet adsljaxbellsouthnet adult adv advanc advantag
## 1 0 0 0 0 0 0
## advertis advic advis advisor aef aerikssonfastmailfm aff affair affect
## 1 0 0 0 0 0 0 0 0 0
## affili afford afraid africa afternoon age agenc agent aggreg aggress
## 1 0 0 0 0 0 0 0 0 0 0
## agil ago agre agreeabl agreement ahead aid aim air airlin alabama
## 1 0 0 0.007575758 0 0 0 0 0 0 0 0
## alan alaska alcohol alert algorithm align aligncent aligncenterfont
## 1 0 0 0 0 0 0 0 0
## aligncenternbsp alignd aligndcent aligndcenterfont aligndcenterimg
## 1 0 0 0 0 0
## aligndcenternbsp aligndfont aligndleft aligndleftfont aligndmiddl
## 1 0 0 0 0 0
## aligndmiddlefont aligndnbsptd aligndright alignfont alignleft
## 1 0 0 0 0 0
## alignleftfont alignmiddl alli allianc allow alsa alt altd altdbr
## 1 0 0 0 0 0 0 0 0 0
## altdclick alter altern amatthiasrpmforgenet amaz amend america
## 1 0 0 0.003787879 0 0 0 0
## american amount amp analog analysi analyst ancient ander andrew anecdot
## 1 0 0 0 0 0 0 0 0 0 0
## angel angl anglesaminvest angri anim announc annoy annual annuiti anonym
## 1 0 0 0 0 0 0 0 0 0 0
## answer answerspablo anthoni antiabus anticip antiqu anymor anytim aol
## 1 0 0 0 0 0 0 0 0 0
## apach api apolog app appar appeal appear appetit appl appli applic
## 1 0 0 0.003787879 0 0 0 0 0 0 0 0
## applicationpgpsignatur appoint appreci approach approv approxim apr apt
## 1 0 0 0 0 0 0 0 0
## architect architectur archiv area argotech argu argument arial arizona
## 1 0 0 0 0 0 0 0 0 0
## arm armi arrang arrest arriv art articl artist ase asia asian ask asmtp
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## aspect ass assembl assert asset assign assigne assignedto assist associ
## 1 0 0 0 0 0 0 0 0 0 0
## assum assumpt assur attach attack attain attempt attend attent attorney
## 1 0 0 0 0 0 0 0 0 0 0
## attract auction audienc audio aug august austin austinjumpvircio
## 1 0 0 0 0 0.03030303 0 0 0
## australia authent authenticationwarn authnlegwnnet author auto autom
## 1 0 0 0.003787879 0 0 0 0
## automat avail averag avoid await awar award baa babi back background
## 1 0 0 0 0 0 0 0 0 0 0 0
## backgroundcolor backup bad balanc ball ban bandwidth bank bankruptci
## 1 0 0 0 0 0 0 0 0 0
## banner bar barcelona bargain barrera barreraorg barri base basi basic
## 1 0 0 0 0 0 0 0 0 0 0
## bat batch batteri battl baxter bay bbc bcamatthiasrpmforgenet bcf bdf
## 1 0 0 0 0 0 0 0 0 0 0
## beach bear bearer beat beauti beberg bebergmithr bed bee beenther bef
## 1 0 0 0 0 0 0 0 0 0 0.003787879 0
## began begin behalf behavior behaviour belief believ bell belong
## 1 0 0 0 0 0 0 0 0 0
## belphegorehughesfamilyorg ben beneficiari benefit berkeley bet beta
## 1 0 0 0 0 0 0 0
## bgcolor bgcolorcc bgcolorcccccc bgcolord bgcolordcc bgcolordcccccc
## 1 0 0 0 0 0 0
## bgcolordff bgcolordffff bgcolordffffcc bgcolordffffff bgcolordfont
## 1 0 0 0 0 0
## bgcolorff bgcolorffff bgcolorffffff bgcolorwhit bias big bigger
## 1 0 0 0 0 0 0.007575758 0
## biggest bill billion billwstoddard binari bind biolog birth bit bitbitch
## 1 0 0 0 0 0 0 0 0 0 0
## bitbitchmagnesiumnet biz black blackcombpanasa blair blame blank blast
## 1 0 0 0 0 0 0 0 0
## bless blind bliss block blockquot blockquotefont blog blood blow blue
## 1 0 0 0 0 0 0 0 0 0 0
## board bob bodi bodyhtml boingbo bolcer bold bomb bond bone bonus book
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## boom boost boot border borderbr bordercollaps bordercolor bordercolord
## 1 0 0 0 0 0 0 0 0
## bordercolordarkd bordercolordarkffffff bordercolordff
## 1 0 0 0
## bordercolorlightdffffcc bordercolorlightffffff borderd borderdtd
## 1 0 0 0 0
## borderleft bordertd borderwidth bore born borrow boss boston bother
## 1 0 0 0 0 0 0 0 0 0
## bottl bottom bought bounc bound boundari boundaryexmhp boundarynextpart
## 1 0 0 0 0 0 0 0 0
## box boy brain brainsnetnotifi branch brand brbr break.
## 1 0.003787879 0 0 0 0 0 0 0
## breakthrough breast brent brfont brian bridg bright bring britain
## 1 0 0 0 0 0 0 0 0 0
## british brnbsp broad broadcast broke broken broker brother brought brown
## 1 0 0 0 0 0 0 0 0 0 0
## brows browser bst bti btw buck budget buffer bug
## 1 0 0 0 0 0 0 0 0 0
## bugzilladaemonhughesfamilyorg bugzillareason build built bulk
## 1 0 0 0 0 0.003787879
## bulkmail bunch bundl buri burn burner bus bush busi button
## 1 0 0 0 0 0.003787879 0 0 0 0.003787879 0
## buy buyer byrn byte caa cabl cach caf calcul california call
## 1 0 0 0 0 0 0 0 0 0 0 0.003787879
## camelavalon camelbobcatodsorg camelheralddragonsdawnnet
## 1 0 0 0
## camellocalhostlocaldomain camelwanderlustprognet camera camp campaign
## 1 0 0 0 0 0
## canada canadian cancel cancer candid canon capabl capac capit captur car
## 1 0 0 0 0 0 0 0 0 0 0 0
## carbon card care career carri carrier cartridg case cash cat catalog
## 1 0 0 0 0 0 0 0 0 0 0 0
## catch categori catsmxucsc catsucsc caught caus cbf cbs cdale
## 1 0 0 0 0 0 0 0 0 0
## cdaletechmonkeysnet cdf cdo cdr cdrom cds cdt cef celebr cell cellpad
## 1 0 0 0 0 0 0 0 0 0 0 0
## cellpaddingd cellspac cellspacingd cent center centerbr centerfont
## 1 0 0 0 0 0 0 0
## centert centr central centuri certifi cest cff cfont chain challeng
## 1 0 0 0 0 0 0 0 0 0 0
## chanc chang channel chapman char charact characterist charg charsetascii
## 1 0 0 0 0 0 0 0 0 0.003787879
## charsetasciiformatflow charsetbig charsetdiso charsetdwindow charsetgb
## 1 0 0 0 0 0
## charsetiso charsetwindow chart chat cheap cheaper check cheer chemic
## 1 0 0 0 0 0 0 0 0 0
## chicago chick chief child children china chines chip choic choos chosen
## 1 0 0 0 0 0 0 0 0 0 0 0
## chris christian chuck church cindi cipher cipheredhdssdescbcsha circl
## 1 0 0 0 0 0 0 0 0
## circumst cite citi citizen civil civilian claim class classic
## 1 0 0 0 0 0 0 0.003787879 0 0
## classifi claw clean clear clearanc clever click client clientattbi
## 1 0 0 0 0 0 0 0.003787879 0 0
## climat clock close closer cloth club clue cmatthiasrpmforgenet cnn coast
## 1 0 0 0 0 0 0 0 0 0 0
## code coffe col cold collabor collaps colleagu collect collector colleg
## 1 0 0 0 0 0 0 0 0 0 0
## colo color colorblack colorcc colord colorda colordcc colordff
## 1 0 0 0 0 0 0 0 0
## colordffff colordffffff colordfffont colordfffontfont colordfont colordr
## 1 0 0 0 0 0 0
## colorff colorffffff colorffffffclick colorfffont colorfont colspan
## 1 0 0 0 0 0 0
## colspand colspandimg column combin come comfort command comment commerc
## 1 0 0 0 0 0 0 0 0 0
## commerci commiss commit committe common communic communiti compani
## 1 0 0 0 0 0 0 0 0.003787879
## compar comparison compat compel compens compet competit competitor
## 1 0 0 0 0 0 0 0 0
## compil complain complaint complet complex complianc compliant complic
## 1 0 0 0 0 0 0 0 0
## compon composit comprehens compress compromis comput con conceal
## 1 0 0 0 0 0 0.003787879 0 0
## concentr concept concern conclud conclus condit conduct confer confid
## 1 0 0 0 0 0 0 0 0 0
## confidenti config configur confirm conflict confus congratul congress
## 1 0 0 0 0 0 0 0 0
## congression connect consequ conserv consid consider consist
## 1 0 0 0 0 0.003787879 0 0
## consolid constant constitut construct consult consultationfont consum
## 1 0 0 0 0 0 0 0
## contact contain content contentclass contentden contentdescript
## 1 0 0 0 0 0 0
## contentdfrontpageeditordocu contentdisposit contentdmicrosoft
## 1 0 0 0
## contentdmshtml contentdtexthtml contentfrontpageeditordocu
## 1 0 0 0
## contentmicrosoft contentmshtml contenttexthtml contenttransferencod
## 1 0 0 0 0
## contenttyp contest context continu contract contractor contribut
## 1 0.003787879 0 0 0 0 0 0
## control conveni convent convers convert convinc cook cool cooper copi
## 1 0 0 0 0 0 0 0 0 0 0
## copyright coral cordial core corner corpor corpus correct correspond
## 1 0 0 0.003787879 0 0 0 0 0 0
## corrupt cost council count countri coupl court cover coverag
## 1 0 0.003787879 0 0 0 0 0 0 0
## cpunk cpunkseinsteinssz cpunkshqpronsnet cpunkslocalhost cpunksmindernet
## 1 0 0 0 0 0
## cpunkswastemindernet cpuosdn crack craig craigdeersoft crankslacknet
## 1 0 0 0 0 0 0
## crap crash crave crazi creat creation creativ creator credit creditor
## 1 0 0 0 0 0 0 0 0 0 0
## cri crime crimin crisi criteria critic cross crucial cspphtvfi cultur
## 1 0 0 0 0 0 0 0 0 0 0
## cup curious currenc current custom cut cvs cwgexmhdeepeddi cycl
## 1 0 0 0 0 0 0.003787879 0 0 0
## cypherpunksdspronsnet cypherpunkseinsteinssz
## 1 0 0
## cypherpunksforwarddspronsnet cypherpunksoutgo cypherpunksssz daa
## 1 0 0 0 0
## daemonwast daf daili damag damn dan danger daniel dare dark data databas
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## date daughter dave david davlawhotmail day dbf dbsphilodox
## 1 0.003787879 0 0 0 0 0 0 0
## dbtnsubmit dbusphonetdtr dcc dcf dciti dcontactnam ddf dead
## 1 0 0 0 0 0 0 0 0
## deafboxhotmail deal dealer dear death deathtospamdeathtospamdeathtospam
## 1 0 0 0 0 0 0
## debat debian debt debug dec decad decemb decent decid decis declar
## 1 0 0 0 0 0 0 0 0 0 0 0
## declin decod decreas dedic deduct deep deepeddyvircio def default defend
## 1 0 0 0 0 0 0 0 0 0 0
## defens defin definit degre delay delet deliv deliveri deliveryag
## 1 0 0 0 0 0 0 0.007575758 0 0
## deliveryd dell deltacsmuozau demail demand demo democraci democrat
## 1 0 0 0 0 0 0 0 0
## demonstr deni depart depend deploy deposit depress deriv describ
## 1 0 0 0 0 0 0 0 0 0
## descript deserv design desir desk desktop destin destroy destruct detail
## 1 0 0 0 0 0 0 0 0 0 0
## detect determin develop devic devor devot dfb dff dgener dgeneratorhead
## 1 0 0 0 0 0 0 0 0 0 0
## dhcpcseucsc dhdnrecipienttxt dhdnsubjecttxt dhomephonetdtr dial die diet
## 1 0 0 0 0 0 0 0
## diff differ difficult difficulti dig digit dimeboxbmc dinter diploma dir
## 1 0 0 0 0 0 0 0 0 0 0
## direct director directori disabl disagre disappear disappoint disc
## 1 0 0 0 0 0 0 0 0
## disclaim disconnect discount discov discoveri discuss diseas disk
## 1 0 0 0 0 0 0 0 0
## display disrupt distanc distinct distribut distributor div diveintomark
## 1 0 0 0 0 0 0 0 0
## divers divfont divid dlsilcom dma dnametdtr dns doc doctor doctyp
## 1 0 0 0 0 0 0 0 0 0 0
## document doesnt dog dogmaslashnullorg dollar domain domest domin dont
## 1 0 0 0 0.003787879 0 0 0 0 0
## door dot doubl doubt download dozen dphone dprogid draft drag dramat
## 1 0 0 0 0 0 0 0 0 0 0 0
## draw drawn dream drink drive driver drop drug dsentto dslevergonet
## 1 0 0 0.01136364 0 0 0 0 0 0 0
## dsltelespnetbr dslwdcdslspeakeasynet dstate dstatetdtr dsubmit
## 1 0 0 0 0 0
## due dumb dump duncan duplic duti dvd dvds dynam dysfunct eaa eaf
## 1 0.003787879 0 0 0 0 0 0 0 0 0 0 0
## eager ear earli earlier earn earth eas easi easier easiest
## 1 0 0 0 0 0.007575758 0 0 0 0.003787879 0
## easili east eastern eat ebay ebf ecf echo eclecticklugenet econom
## 1 0 0 0 0 0 0 0 0 0 0
## economi edf edg edificio edit editor edt educ edward eef efa efc
## 1 0 0 0 0 0 0 0.003787879 0 0 0 0 0
## eff effect effici effort egp egroupsreturn egwn egwnnet ehmadscientist
## 1 0 0 0 0 0 0 0 0 0
## einsteinssz eir eirikur ejwcseucsc elect electr electron element elia
## 1 0 0 0 0 0 0 0 0 0
## elig elimin elz emac email emailbr embed emerg emot empir employ
## 1 0 0 0 0 0.02272727 0 0 0 0 0 0
## employe empti emwac enabl enclos encod encodingutf encount encourag
## 1 0 0 0 0 0 0 0 0 0
## encrypt enctypedtextplain end endeavor endors enemi energi enforc engag
## 1 0 0 0 0 0 0 0 0 0
## engin england english enhanc enjoy enorm ensur enter enterpris entertain
## 1 0 0 0 0 0 0 0 0 0 0
## entir entiti entitl entrepreneur entri envelop envelopesend environ
## 1 0 0 0 0 0 0 0 0
## environment epson equal equip equiti equival erect eriksson error
## 1 0 0 0 0 0 0 0 0 0.003787879
## escap esmtp ess essenc essenti est establish estat estim ethic
## 1 0 0.01515152 0 0 0 0 0 0 0 0
## eudora eugen eugenleitlorg eugenlocalhost euro europ european
## 1 0 0 0 0 0 0 0
## eval evalu even event eventu everyday evid evil evildo evolut
## 1 0.003787879 0 0 0 0 0 0 0 0 0
## evolutionmdk evolv exact examin exampl exceed excel except excess
## 1 0 0 0 0 0 0 0 0 0
## exchang excit exclud exclus excus execut exercis exhaust exhibit exim
## 1 0 0 0 0 0 0 0 0 0 0
## exist exit exmh exmhp exmhus exmhusersadminredhat
## 1 0 0 0 0 0 0
## exmhusersadminspamassassintaintorg exmhuserslistmanredhat
## 1 0 0
## exmhuserslistmanspamassassintaintorg exmhusersredhat
## 1 0 0
## exmhusersspamassassintaintorg exmhwork exmhworkersadminredhat
## 1 0 0 0
## exmhworkersadminspamassassintaintorg exmhworkerslistmanredhat
## 1 0 0
## exmhworkerslistmanspamassassintaintorg exmhworkersredhat
## 1 0 0
## exmhworkersspamassassintaintorg expand expect expens experi experienc
## 1 0 0 0 0 0 0
## expert expertis expir explain explan explicit explod exploit explor
## 1 0 0 0 0 0 0.007575758 0 0 0
## explos export expos exposur express ext extend extens extent extern
## 1 0 0 0 0 0.003787879 0 0 0 0 0
## extra extract extraordinari extrem eye ezmlm faa fac face faceari
## 1 0 0 0 0 0 0 0 0 0 0
## facearialfont facedari facedarialfont facedtahoma facedtim facedverdana
## 1 0 0 0 0 0 0
## facetim faceverdana faceverdanaarialhelveticasansserif fact
## 1 0 0 0 0.003787879
## factor factual fail failur fair faith fake fall fals famili
## 1 0 0 0.003787879 0 0 0 0 0 0 0
## familiar familydsansserif famous fantasi fark farm farquhar fashion fast
## 1 0 0 0 0 0 0 0 0 0
## faster fastest fat father fault favor favorit fax fbf fbi fcf fear
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## featur feb fed feder fedex fee feed feedback feel feet fef
## 1 0 0 0 0 0 0 0 0 0.003787879 0 0
## felicityklugenet felicitylocalhost fellow felt femal fetch fetchmail
## 1 0 0 0 0 0 0 0.003787879
## fff ffff ffffff fiction field fieldfont fight figur file fill film
## 1 0 0 0 0 0 0 0 0 0 0 0
## filter final financ financi find fine finger finish finn fire
## 1 0 0.003787879 0 0.003787879 0 0 0 0 0 0
## firewal firm fit fix flag flame flat flawhotmail flexibl fli flight flip
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## flood floor floppi florida flow focus focusclick folder folk follow
## 1 0 0 0 0 0 0 0 0 0 0.003787879
## font fontbr fontdiv fontfamili fontfont fontfontdiv fontfontfont
## 1 0 0 0 0 0 0 0
## fontfonttd fonthtml fontsiz fonttd fonttdtr fontweight food fool foot
## 1 0 0 0 0 0 0 0 0 0
## forc foreign forev forg forget forgot forgotten fork forkadminx
## 1 0 0 0 0 0 0 0 0 0
## forklisthotmail forkspamassassintaintorg forkxent form format
## 1 0 0 0 0.003787879 0
## formatflow formul formula forteana forteanaowneryahoogroup
## 1 0.003787879 0 0 0 0
## forteanaunsubscribeegroup forteanayahoogroup fortun forum forward
## 1 0 0 0.003787879 0 0
## found foundat fourth fox fraction frame framework franc francisco frank
## 1 0 0 0 0 0 0 0 0 0 0
## freak free freebr freebsd freedom freefont freez french frequent
## 1 0 0.01136364 0 0 0 0 0 0 0
## fresh freshrpm freshrpmsnet fri friday friend front frontpag
## 1 0 0 0 0.01893939 0 0.003787879 0 0
## fruit frustrat ftp fuck fuel full fulli fun function. fund fundament
## 1 0 0 0 0 0 0 0 0 0 0 0
## funni futur fwd gaa gain gak gakdogmaslashnullorg gamasutra gambl game
## 1 0 0 0 0 0 0 0 0 0 0
## gap gari garrigu garymcanada garymteledyn gas gate gather gave gay
## 1 0 0 0 0 0 0 0 0 0 0
## gbolcerendeavor gecko geeg geegebarreraorg geek gek gekdogmaslashnullorg
## 1 0 0 0 0 0 0 0
## general generat generic geometri georg german germani giant gibbon
## 1 0 0 0 0 0 0 0 0 0
## gibbsmidrang gif gift girl give gkdogmaslashnullorg glad glass glen
## 1 0 0 0 0 0 0 0 0 0
## global globe gmt gnk gnome gnulinux gnupg gnus goal god gojomousanet gok
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## gold golden gonz good goodby googl gordon gov govern gpg
## 1 0 0 0 0.003787879 0 0 0 0 0 0
## gpgfingerprint gpgkeyfingerprint gpgkeynumb gpgkeyserv gpk grab grade
## 1 0 0 0 0 0 0 0
## graduat gram grand grant graphic great greater greatest green greg
## 1 0 0 0 0 0 0 0 0 0 0
## gregori grep ground group grow grown growth gtk guarante guardian
## 1 0 0 0 0.007575758 0 0 0 0 0 0
## guess guid guidelin guido guidopythonorg gun guy gwesegwnnet gwpanasa
## 1 0 0 0 0 0 0 0 0 0
## haa haataja habea habeassw habeus hack hair hal haldevoreacmorg half
## 1 0 0 0 0 0 0 0 0 0 0
## hall hallgrimsson hallmailmindspringnet ham hammer hand handl hang
## 1 0 0 0 0 0 0 0 0
## hanson happen happi hard hardcor harder hardwar harley harleyargotech
## 1 0 0 0 0 0 0 0 0 0
## harm harri harvest hash hassl hat hate hawaii hcm head header headlin
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## headquart health healthi hear heard heart heat heaven heavi heck
## 1 0 0 0 0 0.003787879 0 0 0 0 0
## height heightd heightdbr heightdfont heightdtd heightfont heightnbsptd
## 1 0 0 0 0 0 0 0
## heighttd held hell helo helobelphegorehughesfamilyorg heloperlorg
## 1 0 0 0 0 0 0
## helopermafrostnet heloregina helouswprcvssourceforgenet
## 1 0 0 0
## helouswsflistsourceforgenet helouswsfnetmiscsourceforgenet help
## 1 0 0 0
## helvetica herbal hesit hettinga hewlett hey hgh hidden hide high higher
## 1 0 0 0 0 0 0 0 0 0 0 0
## highest highlight hill hint hire histor histori hit hmm hmmm
## 1 0 0 0 0 0 0 0 0.003787879 0 0
## hold holder hole holiday home homeown homepag homerperfectpres
## 1 0 0 0 0 0 0 0 0
## honest honor hook hope hormon horni hors hospit host
## 1 0.003787879 0.003787879 0 0 0 0 0 0 0.003787879
## hostaddrbtopenworld hostinsuranceiq hostnam hot hotel hotmail hottest
## 1 0 0 0 0 0 0 0
## hour hous hover hqpronsnet href hrefd hrefdhttp
## 1 0.003787879 0 0 0 0 0 0
## hrefdhttpwwwinsuranceiqlegalhtmleg hrefdhttpwwwinsurancemailnet hspace
## 1 0 0 0
## hspan html htmlbodi htmlfont htmlhead htmlheadtitl http
## 1 0 0 0 0 0 0 0
## httpboingboingnet httpclickyahooptybbnxieaamvfiaagsolbtm
## 1 0 0
## httpdocsyahooinfoterm httpeinsteinsszcdr httpequivcontenttyp
## 1 0 0 0
## httpequivdcontentlanguag httpequivdcontenttyp httpfreshrpmsnet
## 1 0 0 0
## httpjeremyzawodnyblogarchiveshtml
## 1 0
## httplistsfreshrpmsnetmailmanlistinforpmlist
## 1 0
## httplistsfreshrpmsnetmailmanlistinforpmzzzlist
## 1 0
## httplistsfreshrpmsnetpipermailrpmzzzlist httpperlorgarticleplsid
## 1 0 0
## httpperlorgcommentsplsid httpperlorgmessag
## 1 0 0
## httpscriptingnewsuserlandbackissuesam
## 1 0
## httpslistmanredhatmailmanlistinfoexmhus
## 1 0
## httpslistmanredhatmailmanlistinfoexmhwork
## 1 0
## httpslistmanspamassassintaintorgmailmanlistinfoexmhus
## 1 0
## httpslistmanspamassassintaintorgmailmanlistinfoexmhwork
## 1 0
## httpslistmanspamassassintaintorgmailmanprivateexmhus
## 1 0
## httpslistmanspamassassintaintorgmailmanprivateexmhwork
## 1 0
## httpslistssourceforgenetlistslistinforazorus
## 1 0
## httpslistssourceforgenetlistslistinfospamassassincommit
## 1 0
## httpslistssourceforgenetlistslistinfospamassassindevel
## 1 0
## httpslistssourceforgenetlistslistinfospamassassinsight
## 1 0
## httpslistssourceforgenetlistslistinfospamassassintalk
## 1 0
## httpsourceforgenetmailarchivesforumphpforumrazorus
## 1 0
## httpsourceforgenetmailarchivesforumphpforumspamassassindevel
## 1 0
## httpssourceforgenetlistslistinforazorus
## 1 0
## httpssourceforgenetlistslistinfospamassassincommit
## 1 0
## httpssourceforgenetlistslistinfospamassassindevel
## 1 0
## httpssourceforgenetlistslistinfospamassassintalk
## 1 0
## httpswwwinphonicaspsourceforgerefcodev httpteledyn httpthinkgeeksf
## 1 0 0 0
## httpwwwaaronswweblog httpwwwaskbjoernhansenarchiveshtml
## 1 0 0
## httpwwwauracomteledyn httpwwwdeepeddycwg httpwwwdeepeddycwgchrisgif
## 1 0 0 0
## httpwwwgeocitieshaldevoreiihaleyegif
## 1 0
## httpwwwgeocrawlerredirsfphplistrazorus
## 1 0
## httpwwwgeocrawlerredirsfphplistspamassassindevel
## 1 0
## httpwwwgeocrawlerredirsfphplistspamassassintalk httpwwwgnupgorg
## 1 0 0
## httpwwwhabeasreport httpwwwhughesfamilyorgbugzillashowbugcgiid
## 1 0 0
## httpwwwibuc httpwwwinsuranceiqlegalhtm httpwwwinsurancemailnet
## 1 0 0 0
## httpwwwinsurancemailnetfont httpwwwjabberosdnxim httpwwwkeyspgpnet
## 1 0 0 0
## httpwwwlinuxmailmanlistinfoilug httpwwwlinuxpipermaililug
## 1 0.003787879 0
## httpwwwmithralbeberg httpwwwnewsisfreeclick httpwwwpythonorgguido
## 1 0 0 0
## httpwwwteledyn httpwwwvircio httpxentmailmanlistinfofork
## 1 0 0 0
## httpxentpipermailfork huckleberri huge hugh human humbl
## 1 0 0 0.003787879 0 0 0
## hundr hunger hunt hurri hurt husband hydrogenleitlorg iaa ian ibm
## 1 0.003787879 0 0 0 0 0 0 0 0 0
## ice ick icq ict idea ideal ident identifi identwelchmedlicottpanasa
## 1 0 0 0 0 0 0 0 0 0
## idiot ignor iii ill illeg illinoi ilug ilugadminlinux iluglinux
## 1 0 0 0 0 0 0 0.003787879 0.01515152 0.01893939
## ilwsrbcgjzrrgqydhkjpdwqwiwrcv imag imageurl imagin imap img imho
## 1 0 0 0 0 0.003787879 0 0
## immedi imo impact implement impli implic import impos imposs
## 1 0 0 0 0 0 0 0.003787879 0 0
## impress improv inbound inbox inch incid includ incom
## 1 0 0 0 0.007575758 0 0 0 0.003787879
## inconveni incorpor incorrect increas incred incur independ index india
## 1 0 0 0 0 0 0 0 0 0
## indian indic individu industri inevit inexpens influenc info
## 1 0 0 0 0 0 0 0 0.003787879
## inform informationbr informationfont informationfonttd infrastructur
## 1 0.02272727 0 0 0 0
## infring ing ingredi init initi injuri inkjet inlin innov input inquiri
## 1 0 0 0 0 0 0 0 0 0 0 0
## insan insert insid insist inspir instal instanc instant institut
## 1 0.003787879 0 0 0 0 0 0 0 0
## instruct insur insurancemailinsuranceiq int integr intellectu intellig
## 1 0 0 0 0 0 0 0
## intend intens intent interact interest interfac intermail intern
## 1 0 0 0 0 0.003787879 0 0 0
## internet interpret interview intimid intmxcorpredhat
## 1 0 0 0 0 0
## intmxcorpspamassassintaintorg introduc introduct invas invent invest
## 1 0 0.003787879 0 0 0 0
## investig investor invis invit invoic invok involv ion iplanet iraq
## 1 0 0 0 0 0 0 0 0 0 0
## iredhatlinux ireland irish irix iron island iso
## 1 0 0 0.007575758 0 0 0 0
## isolnetsuxtechmonkeysnet isp issu ist item jaa jabber jack jacob
## 1 0 0 0 0.003787879 0 0 0 0 0
## jalapeno jame jamesr jan januari japan japanes jason java jennif jeremi
## 1 0 0 0 0 0 0 0 0 0 0 0
## jersey jftheriaultnetnologia jim jmasonorg jmexmhjmasonorg jmhall
## 1 0 0 0 0 0 0
## jmilugjmasonorg jmjmasonorg jmlocalhost jmnetnoteinc jmnetvig
## 1 0.003787879 0 0.007575758 0 0
## jmperljmasonorg jmrazorjmasonorg jmrpmjmasonorg jmsajmasonorg job joe
## 1 0 0 0 0 0 0
## joebarreraorg john johnhallevergonet join joint joke jone joseph journal
## 1 0 0 0 0 0 0 0 0 0
## joy judg judgment jul juli jump jun june junk justic justifi justin kaa
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## keep ken kernel kevin key keyboard keyword khare kharealumnicaltech kick
## 1 0 0 0 0 0 0 0 0 0 0
## kid kiddi kill killer kind king kmail knew knock know knowledg
## 1 0 0 0 0 0.003787879 0 0 0 0 0 0
## kre kremunnariozau laa lab label labor laboratori lack ladi laid
## 1 0 0 0 0 0 0 0 0 0 0
## lairxent land lang langd languag laptop larg larger largest larri
## 1 0 0.003787879 0 0 0 0 0 0 0 0
## las laser laserjet last late latest laugh launch law lawrenc lawsuit
## 1 0 0 0 0 0 0 0 0 0 0 0
## lawyer lay layer lazi lead leader leadership lean learn leas leav led
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## left leftmargin leftmargind leg legal legisl legitim leitl lend lender
## 1 0 0 0 0 0 0 0 0 0 0
## length lesson let letter level leverag lexmark lgonzepanix lib liber
## 1 0 0 0 0.01515152 0 0 0 0 0 0
## liberti librari licens lie life lifestyl lifetim lifont lift
## 1 0 0 0 0.003787879 0 0 0 0 0
## light like limit line link linkd linkdccc linkdff linux
## 1 0 0 0 0 0.003787879 0 0 0 0.007575758
## linuxiluglocalhost linuxmidrang liquid list listadmin listarch
## 1 0 0 0 0.01515152 0 0
## listen listid listmanredhat listmanspamassassintaintorg
## 1 0 0.003787879 0 0
## listmasterlinux listpost listssecurityfocus listssz listsubscrib
## 1 0.003787879 0 0 0 0
## listunsubscrib liter litig live llc load loan local localhost
## 1 0 0 0 0 0 0 0 0 0.01136364
## localhostlocaldomain locat lock locustmindernet log logic login logo
## 1 0 0 0 0 0 0 0 0
## london long longer longterm look loop loos lord los lose loss lost lot
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## lotus love lover low lowcost lower lowest luca lucid luck lucki
## 1 0 0 0 0 0 0 0 0 0 0 0
## lugh lughtuathaorg lwfdlawhotmailmsn maa mac machin macintosh
## 1 0.003787879 0.01515152 0 0 0 0 0
## made magazin magic magnesiumnet magnitud mail mailbox mailer
## 1 0 0 0 0 0 0.003787879 0 0
## mailevergonet mailfollowup mailinglist mailinsuranceiq maillistsbr
## 1 0 0 0 0 0
## maillocalhost mailmanvers mailnetnoteinc mailscann
## 1 0 0.003787879 0 0
## mailsvccradublineircomnet mailto mailtobitbitchmagnesiumnet
## 1 0 0 0
## mailtoexmhusersrequestredhatsubjectsubscrib
## 1 0
## mailtoexmhusersrequestredhatsubjectunsubscrib
## 1 0
## mailtoexmhusersrequestspamassassintaintorgsubject
## 1 0
## mailtoexmhusersspamassassintaintorg
## 1 0
## mailtoexmhworkersrequestredhatsubjectsubscrib
## 1 0
## mailtoexmhworkersrequestredhatsubjectunsubscrib
## 1 0
## mailtoexmhworkersrequestspamassassintaintorgsubject
## 1 0
## mailtoexmhworkersspamassassintaintorg mailtoforkadminx
## 1 0 0
## mailtoforkrequestxentsubject mailtoforkrequestxentsubjectsubscrib
## 1 0 0
## mailtoforkrequestxentsubjectunsubscrib mailtoforkspamassassintaintorg
## 1 0 0
## mailtoiluglinux mailtoilugrequestlinuxsubject
## 1 0 0
## mailtoilugrequestlinuxsubjectsubscrib
## 1 0
## mailtoilugrequestlinuxsubjectunsubscrib
## 1 0
## mailtorazorusersrequestlistssourceforgenetsubjectsubscrib
## 1 0
## mailtorazorusersrequestlistssourceforgenetsubjectunsubscrib
## 1 0
## mailtorazorusersrequestsourceforgenetsubject
## 1 0
## mailtorazoruserssourceforgenet
## 1 0
## mailtorpmlistrequestfreshrpmsnetsubjectsubscrib
## 1 0
## mailtorpmlistrequestfreshrpmsnetsubjectunsubscrib
## 1 0
## mailtorpmzzzlistfreshrpmsnet mailtorpmzzzlistrequestfreshrpmsnetsubject
## 1 0 0
## mailtosecprogsecurityfocus mailtosecprogsubscribesecurityfocus
## 1 0 0
## mailtosecprogunsubscribesecurityfocus
## 1 0
## mailtospamassassincommitsrequestlistssourceforgenetsubjectsubscrib
## 1 0
## mailtospamassassincommitsrequestlistssourceforgenetsubjectunsubscrib
## 1 0
## mailtospamassassincommitsrequestsourceforgenetsubject
## 1 0
## mailtospamassassincommitssourceforgenet
## 1 0
## mailtospamassassindevelrequestlistssourceforgenetsubjectsubscrib
## 1 0
## mailtospamassassindevelrequestlistssourceforgenetsubjectunsubscrib
## 1 0
## mailtospamassassindevelrequestsourceforgenetsubject
## 1 0
## mailtospamassassindevelsourceforgenet
## 1 0
## mailtospamassassintalkrequestlistssourceforgenetsubjectsubscrib
## 1 0
## mailtospamassassintalkrequestlistssourceforgenetsubjectunsubscrib
## 1 0
## mailtospamassassintalkrequestsourceforgenetsubject
## 1 0
## mailtospamassassintalksourceforgenet
## 1 0
## mailtozzzzteanaunsubscribeyahoogroup mailwebnotenet main maintain
## 1 0 0 0 0.003787879
## mainten major make maker male man manag mandarklabsnetnoteinc mandat
## 1 0 0 0 0 0 0 0 0 0
## manner manual manufactur map mar march margin marginbottom marginheight
## 1 0 0 0 0 0 0 0 0 0
## margintop marginwidth marginwidthd mari mark market marri marriag
## 1 0 0 0 0 0 0.01136364 0 0
## marshal martin mason mass massiv master mastercard match mate materi
## 1 0 0 0 0 0 0 0 0 0 0
## math mathemat matt matter matthia matthiasegwnnet matthiasrpmforgenet
## 1 0 0 0 0 0 0 0
## matur maxim maximum mayadyndnsorg mdomlocalhost mdt mean meant
## 1 0 0 0 0 0 0 0.003787879 0
## measur mechan media medic medicin medium medlicottpanasa meet
## 1 0 0 0 0 0 0 0 0
## member membership memori men mental mention menu merchant mercuri
## 1 0.003787879 0 0 0 0 0 0 0 0
## merg mess messag messageflag messageid messeng messnum met meta
## 1 0 0 0.003787879 0 0.003787879 0 0 0 0
## metabol method methoddpost methodpost mexico mgrpscdyahoo miami
## 1 0 0 0 0 0 0 0
## micalgpgpsha michael microsoft middl mike mile militari millennium
## 1 0 0 0 0 0 0 0 0
## million millionair mime mimeautoconvert mimeol mimetool mimevers mind
## 1 0 0 0 0 0 0 0.003787879 0
## mine minim minimum minist minor minut mirror miss mission mistak
## 1 0 0 0 0 0 0 0 0 0 0.003787879
## mithral mix mlm mobil mode model modem moder modern modifi modul
## 1 0 0 0.03787879 0 0 0 0 0 0 0 0
## mohr moment mon monday money moneyback monitor montanaro month
## 1 0 0 0 0 0.003787879 0 0 0 0
## moral morn mortgag mother motiv mount mountain mous mouth move movement
## 1 0 0 0 0 0 0 0 0 0 0 0
## movi mozilla mplayer msattach msg msgid msgs msmailprior msn
## 1 0 0 0 0 0 0 0 0 0
## mstnefcorrel mta mtagrpscdyahoo mtasnfcpbinet multilevel multipart
## 1 0 0 0 0 0.007575758 0
## multipartaltern multipartmix multipartrel multipartsign multipl
## 1 0 0 0 0 0
## multithread munnariozau murder murphi muscl music muslim mutti mutual
## 1 0 0 0 0 0 0 0 0 0
## mxhotmail mxmailyahoo mxredhat mxspamassassintaintorg mysteri naa name
## 1 0 0 0 0 0 0 0
## narrow nasti nation nationwid nativ natur navig nbc nbsp nbspbr nbspfont
## 1 0 0 0 0 0 0 0 0 0 0 0
## nbspnbsp nbspnbspnbspnbsp nbspnbspnbspnbspnbspnbspnbspnbspnbspnbspnbsp
## 1 0 0 0
## neal neat necessarili need negat negoti neighbor net netscap network
## 1 0 0 0 0 0 0 0 0 0 0
## newer newest newli news newscientist newsgroup newslett newspap
## 1 0 0 0 0.003787879 0 0 0 0
## nextpart ngrpscdyahoo nice nigeria nigerian night nmh nnfmp nobel normal
## 1 0 0 0 0 0 0 0 0 0 0
## nort north northern noshad nospam note notic noticefont notif nov novemb
## 1 0 0 0 0 0 0 0 0 0 0 0
## nowaday nsegwnnet nter null number numer oaa object oblig
## 1 0 0 0 0 0 0.003787879 0 0 0
## obligationfont obscur observ obtain obvious occasion occup occur oct
## 1 0 0 0 0 0 0 0 0 0
## octob odd offens offer offic offici offlin oil older oncontextmenureturn
## 1 0 0 0 0 0 0 0 0 0 0
## ondragstartreturn onlin onmouseoverwindowstatus
## 1 0 0.007575758 0
## onselectstartreturn ont oop open oper operatingsystem opinion
## 1 0 0 0 0 0 0 0
## opportun oppos opposit oprah opt optic optim option optout order
## 1 0.003787879 0 0 0 0 0 0 0 0 0
## organ origin originalarrivaltim originald originatingip originatorcal
## 1 0 0 0 0 0 0
## osdn oss ouch outgoingsecurityfocus outlook output outstand overlook
## 1 0 0 0 0 0 0 0 0
## overnight overwhelm owe owen owenpermafrostnet own owner
## 1 0 0 0 0 0 0 0
## ownercypherpunkseinsteinssz paa pacif pack packag packard pad
## 1 0 0 0 0 0 0 0
## paddingbottom paddingleft paddingtop page paid pain pair
## 1 0 0 0 0.003787879 0 0 0
## palestinian palm panasa panel paper par paragraph parent park pars part
## 1 0 0 0 0 0 0 0 0 0 0 0
## parti particip partit partner partnership pass password past
## 1 0 0 0 0 0 0 0 0.003787879
## patch patent path patient pattern paul pay payabl payment pcs pdf pdt
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## peac peak pegasus peltonen pend peni penil penni peopl percent
## 1 0 0 0 0 0 0 0 0 0.01515152 0
## percentag percept perfect perform period perl perman permiss
## 1 0 0 0 0 0.003787879 0 0 0.003787879
## permit persist person perspect pet peter pgp phd phil phobo
## 1 0 0 0 0 0 0 0 0 0 0.003787879
## phoboslabsnetnoteinc phone phonefontfont phonefonttd photo photograph
## 1 0.003787879 0 0 0 0 0
## phrase physic pic picasso pick pickl pickup pictur piec pill
## 1 0 0 0 0 0 0 0 0 0 0
## pinebsocrankslacknet pinelnxhydrogenleitlorg
## 1 0 0
## pinelnxisolnetsuxtechmonkeysnet pinelnxlocalhostlocaldomain
## 1 0 0
## pinelnxurgentrugac pinelnxwatchermithr pioneer pipe pirat pitch
## 1 0 0 0 0 0 0.003787879
## place plain plan plane planet plant planta plate platform play player
## 1 0 0 0 0 0 0 0 0 0 0 0
## pleas pleasur plenti plug plugin pocket point polic polici polit
## 1 0 0 0 0 0 0 0 0 0 0
## politician poll pool poor pop popul popular porn port portabl portion
## 1 0 0 0 0 0 0 0 0 0 0 0
## posit possess possibl post postal poster postfix
## 1 0 0 0 0 0 0 0.003787879
## postfixlughtuathaorg potenc potent potenti pound poverti power practic
## 1 0 0 0 0 0 0 0 0
## preced precedenceref precis predict prefer premier premium prepar
## 1 0.003787879 0 0 0 0 0 0 0
## prescript presenc present preserv presid press pressur prestigi presum
## 1 0 0 0 0 0 0 0 0 0
## pretti prevent previous price pricesfont pride primari primarili
## 1 0 0 0 0 0 0 0 0
## principl print printer prior prioriti privaci privat privileg prize pro
## 1 0 0 0 0 0 0 0 0 0 0
## prob problem procedur proceed process processor procmail procmailrc
## 1 0 0 0 0 0 0 0 0
## produc product profess profession profil profit progid program programm
## 1 0 0 0 0 0 0 0 0 0
## progress prohibit project promis promiscu promot prompt proof
## 1 0 0 0 0.007575758 0 0 0 0
## proper properti propos proprietari prospect prosper protect protest
## 1 0 0 0 0 0 0 0 0
## protocol protocolapplicationpgpsignatur prove proven provid provinc
## 1 0 0 0 0 0 0
## proxi ptsize public publish pudg pudgeperlorg puglisi pull pump purch
## 1 0 0 0 0 0 0 0 0 0 0
## purchas pure purpos pursu push put python qaa qmail qmailldap qmailscann
## 1 0 0 0 0 0 0 0 0 0 0 0
## qmailwebmailyahoo qmqp qualcomm qualif qualifi qualiti quarter queri
## 1 0 0 0 0 0 0 0 0
## question quick quicker quiet quinlan quinlanpathnam quit quot
## 1 0 0 0 0 0 0 0 0
## quotedprint quotefont raa race radio rafael rah
## 1 0 0 0 0 0 0 0
## rahettingapopearthlinknet rahibuc rahshipwright rais ram ran random rang
## 1 0 0 0 0 0 0 0 0
## rank rapid rare rat rate ratio ratreepsuac raw ray razor razorus
## 1 0 0 0 0 0 0 0 0 0 0 0
## razorusersadminlistssourceforgenet razorusersadminsourceforgenet
## 1 0 0
## razoruserslistssourceforgenet razoruserssourceforgenet rcs reach
## 1 0 0 0 0
## read reader readi real realist realiti realiz realtim
## 1 0.007575758 0 0 0.003787879 0 0 0 0
## reason reboot rebuild recal receipt receiv recent reciev recipi
## 1 0 0 0 0 0 0.04924242 0.003787879 0 0
## recipientsnetnoteinc recogn recommend recompil reconnect record recov
## 1 0 0 0 0 0 0 0
## recoveri recruit red redhat reduc reduct refer referr refin reflect
## 1 0 0 0 0 0 0 0 0 0 0
## reform refund refus reg regard regim region regist registr regul regular
## 1 0 0 0 0 0 0 0 0 0 0 0
## regulatori reject relat relationship relax relay relayindigo releas
## 1 0 0 0 0 0 0 0 0
## relev reli reliabl relief religi religion remain remark remedi rememb
## 1 0 0 0 0 0 0 0 0 0 0
## remind remot remov render renew rent rental repair repeat. replac
## 1 0 0 0.003787879 0 0 0 0 0 0 0
## replenish repli report reportabus repositori repres republ
## 1 0 0.003787879 0 0 0 0 0
## republican reput request requir research resel resent resentd
## 1 0 0 0 0 0 0 0 0
## resentmessageid reserv reset resid resist resolut resolv resort resourc
## 1 0 0 0 0 0 0 0 0 0
## respect respond respons rest restaur restor restrict result resum retail
## 1 0 0 0 0 0 0 0 0 0 0
## retain retir retriev return returnpath reuter reveal revenu revers
## 1 0 0 0 0 0.003787879 0 0 0 0
## review revis revolutionari reward rewrit rfc rich richard rick rid ride
## 1 0 0 0 0 0 0 0 0 0 0 0
## right ring rip rise risk rival rlyxlmxaol rlyxwmxaol rmrnetnook road rob
## 1 0 0 0 0 0 0 0 0 0 0 0
## robert robertsnetnom robin robot rock roger rohit roi role roll rom
## 1 0 0 0 0 0 0 0 0 0 0 0
## roman room root rootlocalhost rootlughtuathaorg rose rossum rough round
## 1 0 0 0 0.003787879 0.003787879 0 0 0 0
## rout router routin row rowspan rowspand rpm rpmbuild rpmforgenet rpmlist
## 1 0 0 0 0 0 0 0 0 0 0
## rpmlistadminfreshrpmsnet rpmlistfreshrpmsnet rpms
## 1 0 0 0
## rpmzzzlistadminfreshrpmsnet rpmzzzlistfreshrpmsnet rss rssfeedsjmasonorg
## 1 0 0 0 0
## rssfeedsspamassassintaintorg ruin rule run rush russel russian
## 1 0 0 0 0 0 0 0
## rwcrmhcattbi saa sacv sadev safe safeti sake sale sampl san
## 1 0 0 0 0 0 0 0 0.003787879 0 0
## sansserif sansseriffont saou sat satalk satellit satisfact satisfi
## 1 0 0 0 0 0 0 0 0
## saturday save scalabl scale scam scan sccrmhcattbi scene schedul scheme
## 1 0 0 0 0 0 0 0 0 0 0
## school schuman scienc scientif scientist scoop scope score scott scratch
## 1 0 0 0 0 0 0 0 0 0 0
## scream screen screw script sdwlignet sea seal search season seat seattl
## 1 0 0 0 0 0 0 0 0 0 0 0
## sec second secondari secproglistidsecurityfocus secprogsecurityfocus
## 1 0 0 0 0 0
## secreci secret section sector secur sed see seed seek seiz select sell
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## seminar senat send sender sendmail sendmailb senior sens
## 1 0 0 0.01515152 0.003787879 0 0 0 0
## sensit sentenc senttojmjmasonorgreturnsgroupsyahoo
## 1 0 0 0
## senttoyyyyspamassassintaintorgreturnsgroupsyahoo
## 1 0
## senttozzzzspamassassintaintorgreturnsgroupsyahoo sep separ sept septemb
## 1 0 0 0 0 0
## sequenc seri serial serif serv servant server servic servicenbsp
## 1 0 0 0 0 0 0 0 0.003787879 0
## session set settl settlement setup seventh sever sex sexi sexual sfnet
## 1 0 0 0 0 0 0 0 0 0 0 0
## sha shame shape share sheet shell shift ship shit shock shoe shoot
## 1 0 0.003787879 0 0 0 0 0 0 0 0 0 0
## shop shore short shot shout show shown shut sick side sigh sign
## 1 0 0 0 0 0 0 0 0 0 0 0 0.007575758
## signal signatur signific signup silenc silli similar simpl
## 1 0 0 0 0.003787879 0 0 0 0
## simpli sincer singl singledrop sir sister sit site situat siz
## 1 0.003787879 0.003787879 0 0.003787879 0 0 0 0 0 0
## size sizeappli sizebr sizeclick sizedbr sizedcal sizedcityfonttd
## 1 0 0 0 0 0 0 0
## sizedclick sizedcopi sizedear sizedfont sizedfontfont sizedfonttd
## 1 0 0 0 0 0 0
## sizedmailfonttd sizedphonefonttd sizedrequir sizedstatefonttd
## 1 0 0 0 0
## sizedstrong sizefont sizefontfont sizefre sizeinterest sizenbsp skeptic
## 1 0 0 0 0 0 0 0
## skill skin skip skippobox sky slacknet slashdot slashdotorg sleep slight
## 1 0 0 0 0 0 0 0 0 0 0
## slip slow small smaller smart smell smile smith smoke smoker smooth smtp
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## smtpd smtpeasydn smtpgwmsn smtpmailyahoo smtprs smtpsuperbnet
## 1 0.003787879 0 0 0 0 0
## smtpsvc snip social societi softwar solari sold sole solicit solid solut
## 1 0 0 0 0 0 0 0 0 0 0 0
## solv son song sophist sort soul sound sourc south southern space spain
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## spam spamassassin spamassassincommit
## 1 0.007575758 0 0
## spamassassincommitsadminlistssourceforgenet
## 1 0
## spamassassincommitsadminsourceforgenet
## 1 0
## spamassassincommitslistssourceforgenet spamassassincommitssourceforgenet
## 1 0 0
## spamassassindevel spamassassindeveladminlistssourceforgenet
## 1 0 0
## spamassassindeveladminsourceforgenet
## 1 0
## spamassassindevellistssourceforgenet spamassassindevelsourceforgenet
## 1 0 0
## spamassassinsight spamassassinsightingslistssourceforgenet
## 1 0 0
## spamassassintaintorg spamassassintalk
## 1 0 0
## spamassassintalkadminlistssourceforgenet
## 1 0
## spamassassintalkadminsourceforgenet spamassassintalklistssourceforgenet
## 1 0 0
## spamassassintalksourceforgenet spambay spamc spamd spammer span spanfont
## 1 0 0 0 0 0 0 0
## spanish spanspan spare spawnseenorg speak speaker spec speci special
## 1 0 0 0 0 0 0 0 0 0
## specialist specif speech speed spend spent spi spin spirit split
## 1 0 0 0 0 0 0 0 0 0 0
## spokesman sponsor sport spot spous spray spread spring squar src srcd
## 1 0 0 0 0 0 0 0 0 0 0 0
## srcdhttpiiqimagesgif ssh ssymailssykr ssz stabil stabl staff stage stamp
## 1 0 0 0 0 0 0 0 0 0
## stand standard star start startup stat state statement
## 1 0 0 0 0.003787879 0 0 0.003787879 0
## statesroman static station statist status stay steadi steal step stephen
## 1 0 0 0 0 0 0 0 0 0 0
## stepstep steve steven stick stimul stir stock stoddard stolen
## 1 0 0 0 0 0 0 0 0 0
## stop storag store stori straight strang strategi stream
## 1 0.003787879 0 0 0.003787879 0 0 0 0
## street strength strenuous stress strict strike string strip strong
## 1 0 0 0 0 0 0 0 0 0
## stronger strongfont structur struggl stuck student studi stuff stupid
## 1 0 0 0 0 0 0 0 0 0
## style stylebackgroundcolor stylebordercollaps styleborderleftstyl
## 1 0 0 0 0
## styleborderstyl styledbackgroundcolor styledcolor styledfonts stylefonts
## 1 0 0 0 0 0
## styleposit stylestyl styletextdecor stylus subject subliminalmessag
## 1 0 0 0 0 0.007575758 0
## submiss submit subscrib subscript subsequ substanc substanti succeed
## 1 0 0 0 0.003787879 0 0 0 0
## success suck sudden sue suffer suffici suggest suit sum summari summer
## 1 0 0 0 0 0 0 0 0 0 0 0
## sun sunday sunserverpermafrostnet super superior supplement suppli
## 1 0 0 0 0 0 0 0
## support suppos sure surfac surpris surround survey surviv suse suspect
## 1 0 0 0 0 0 0 0 0 0 0
## sustain swap swe sweet switch sylphe symbol system taa tab tabl
## 1 0 0 0 0 0 0 0 0.007575758 0 0 0
## tablecent tactic tag taglin take talk tall tape target targetnewwin task
## 1 0 0 0 0 0 0 0 0 0 0 0
## tast taught tax tbodi tci tcl tdfont tdimg tdinput tdtd tdtr tdtrtabl
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## tea teach team tech technic techniqu technolog teen teenag tel telecom
## 1 0 0 0 0 0 0 0 0 0 0 0
## teledynam telephon televis tell temperatur templ templat temporari ten
## 1 0 0 0 0 0 0 0 0 0
## tend term termin terribl terror terrorist test testimoni texa
## 1 0 0.003787879 0 0 0 0 0 0 0
## text textd textdecor texthtml textplain thank theft theo theori
## 1 0 0 0 0 0.003787879 0 0 0 0
## thermal thing think thinkgeek thirti thoma thought thousand thread
## 1 0 0 0 0 0 0 0 0.003787879 0
## threadindex threadtop threat threaten throw thu thursday ticket tie till
## 1 0 0 0 0 0 0 0 0 0 0
## tim timcomcastnet timcubh time timubh tini tip tire titl titlefre
## 1 0 0 0 0.003787879 0 0 0 0 0 0
## titletitl tlsvdescbcsha tmda tmdadeepeddyvircio tobacco today todaybr
## 1 0 0 0 0 0 0 0
## todayfont token told toler toll tollfre tom tomorrow tomwhoreslacknet
## 1 0 0 0 0 0 0 0 0 0
## ton toner toni tonight tool top topic topmargin topmargind total touch
## 1 0 0 0 0 0 0 0 0 0 0 0
## tough town toy trace traceback track trade trademark tradit traffic
## 1 0 0 0 0 0 0 0 0 0 0
## trail train transact transfer transform transit transitionalen translat
## 1 0 0 0 0 0 0 0 0
## transmiss transmit trap travel treat treatment tree tremend trend trial
## 1 0 0 0 0 0 0 0 0 0 0
## trick trigger trillion trip trivial troubl trtd truck true trust
## 1 0 0 0 0 0 0 0 0 0 0
## truth tue tuesday tune turn turpin tweak twenti tycho
## 1 0.003787879 0.01136364 0 0 0 0 0 0 0
## type typedhidden typedsubmit typedtext typehidden
## 1 0.003787879 0 0 0 0
## typemultipartaltern typesubmit typetext typetextcss typic uaa
## 1 0 0 0 0 0 0
## ucnombresttd ucsccatsmailscann ugli uid uidgid ulfont ultim unabl uncl
## 1 0 0 0 0 0 0 0 0 0
## undeliver underlin understand understood underwrit undisclos
## 1 0 0 0 0 0 0
## undisclosedrecipi undisclosedrecipientsmandarklabsnetnoteinc unemploy
## 1 0 0 0
## unhappi unhfxeastlinkca union uniqu unit univers unix unknown unlik
## 1 0 0 0 0 0 0 0 0 0
## unlimit unseen unsolicit unsubscrib unsubscriptioninfo unusu unverifi
## 1 0 0 0.007575758 0 0 0 0
## unwant updat upgrad urg urgent url urldhttpinternetmail
## 1 0.003787879 0 0 0 0 0 0
## urlhttpinternetmail urncontentclassesmessag usa usabl usag usb usd use
## 1 0 0 0 0 0 0 0 0
## useless user useracbizmindspr userag userid usual
## 1 0 0.007575758 0 0 0 0
## uswprcvssourceforgenet uswsffwsourceforgenet uswsflistsourceforgenet
## 1 0 0 0
## uswsfnetmiscsourceforgenet uswsfsshgatesourceforgenet utc util utter
## 1 0 0 0 0 0
## uvscan vaa vacat valhalla valid valignbottom valigndcent valigndmiddl
## 1 0 0 0 0 0 0 0 0
## valigndmiddleimg valigndtop valignmiddl valigntop valu valuabl vamm van
## 1 0 0 0 0 0 0 0 0
## vari variabl varieti vast vehicl vendor ventur venusphpwebhost ver
## 1 0 0 0 0 0 0 0 0 0
## verdana verif verifi version versiontlsvsslv versus veteran viagra vice
## 1 0 0 0 0 0 0 0 0 0
## victim video view vill violat violenc violent vircio virginia virtu
## 1 0 0 0 0 0 0 0 0 0 0
## virtual virus virusscan visa visibl vision visit visitor visual vlinkd
## 1 0 0 0 0 0 0 0 0 0 0
## vlinkdccc vlinkdff voic void volum vote vspace waa wage wait
## 1 0 0 0 0 0 0 0 0.007575758 0 0
## wake walk wall want war ward warm warn warrant warranti
## 1 0 0 0.003787879 0 0 0 0 0 0 0
## washington wast wastemindernet watch watchermithr water wave way wcdtd
## 1 0 0 0 0 0 0 0 0 0
## weak wealth wealthi weapon wear weather web weblog webmailyahoo webmast
## 1 0 0 0 0 0 0 0 0 0 0
## webmasterefi webnotenet websit webtvnetnook wed wednesday week weekend
## 1 0 0 0 0 0 0 0 0
## weigh weight weird welch welchpanasa west western wet whatsoev wheel
## 1 0 0 0 0 0 0 0 0 0 0
## white whitehead whitelist wholesal wid wide width widthd widthdfont
## 1 0 0 0 0 0 0 0 0 0
## widthdimg widthdnbsptd widthdtd widthfont widthtd wife wild will william
## 1 0 0 0 0 0 0 0 0 0
## win wind window wing wink winner winter wire wireless wisdom wise wish
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## wit wjqcclyurkqjghsvsegzskxhakcoeiul woman women won wonder wood
## 1 0 0 0 0 0 0 0
## word work worker world worldwid worri wors worst worth
## 1 0.003787879 0.01515152 0 0 0 0 0 0 0
## worthwhil wow wrap wrinkl write writer written wrong wrongdoer wrote www
## 1 0 0 0 0 0 0 0 0 0 0 0
## wwwpanasa xaa xeb xent xerox ximian xine xml xxx yahoo yahooprofil yea
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## yeah year yellow yep yesterday yield york young younger youth
## 1 0 0.003787879 0 0 0 0 0 0 0 0
## yyyylocalhostlabsnetnoteinc yyyylocalhostnetnoteinc
## 1 0 0.003787879
## yyyylocalhostspamassassintaintorg yyyynetez yyyynetmagicnet
## 1 0 0 0
## yyyynetmorenet yyyynetnoteinc yyyyperlspamassassintaintorg
## 1 0 0 0
## yyyyspamassassintaintorg zawodni zip zone zzzzilugspamassassintaintorg
## 1 0 0 0 0 0
## zzzzlocalhost zzzzlocalhostnetnoteinc zzzzlocalhostspamassassintaintorg
## 1 0 0 0
## zzzzrpmspamassassintaintorg zzzzspamassassintaintorg zzzzteana
## 1 0 0 0
## zzzzteanayahoogroup
## 1 0
## [1] 974
##
## Call:
## svm(formula = LABEL ~ ., data = training_data)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 1628
#This outputs linear kernal type
model1 <- svm(LABEL ~ ., data = training_data, kernel = "linear", scale = FALSE)
model1##
## Call:
## svm(formula = LABEL ~ ., data = training_data, kernel = "linear",
## scale = FALSE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 1493
predictions <- testing_data %>%
select(-LABEL) %>%
predict(model, .)
predictions1 <- testing_data %>%
select(-LABEL) %>%
predict(model1, .)
#radial
table(Prediction = predictions ,Truth = testing_data$LABEL)## Truth
## Prediction Ham Spam
## Ham 625 31
## Spam 0 318
## Truth
## Prediction Ham Spam
## Ham 625 32
## Spam 0 317
library(kableExtra)
table(predictions, testing_data$LABEL) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "responsive"))| Ham | Spam | |
|---|---|---|
| Ham | 625 | 31 |
| Spam | 0 | 318 |
## Confusion Matrix and Statistics
##
## Reference
## Prediction Ham Spam
## Ham 625 31
## Spam 0 318
##
## Accuracy : 0.9682
## 95% CI : (0.9551, 0.9783)
## No Information Rate : 0.6417
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9294
##
## Mcnemar's Test P-Value : 7.118e-08
##
## Sensitivity : 1.0000
## Specificity : 0.9112
## Pos Pred Value : 0.9527
## Neg Pred Value : 1.0000
## Prevalence : 0.6417
## Detection Rate : 0.6417
## Detection Prevalence : 0.6735
## Balanced Accuracy : 0.9556
##
## 'Positive' Class : Ham
##
## Confusion Matrix and Statistics
##
## Reference
## Prediction Ham Spam
## Ham 625 32
## Spam 0 317
##
## Accuracy : 0.9671
## 95% CI : (0.9539, 0.9774)
## No Information Rate : 0.6417
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9271
##
## Mcnemar's Test P-Value : 4.251e-08
##
## Sensitivity : 1.0000
## Specificity : 0.9083
## Pos Pred Value : 0.9513
## Neg Pred Value : 1.0000
## Prevalence : 0.6417
## Detection Rate : 0.6417
## Detection Prevalence : 0.6745
## Balanced Accuracy : 0.9542
##
## 'Positive' Class : Ham
##