set working directory
setwd("/Users/ntlrsmllghn/Dropbox/Data/Data 607/spam:ham/")
dir()
## [1] "20021010_easy_ham.tar copy.bz2" "20021010_easy_ham.tar.bz2"
## [3] "20030228_spam.tar.bz2" "Data 607 Project 4.Rmd"
## [5] "easy_ham" "spam"
read files
ham_file <- "/Users/ntlrsmllghn/Dropbox/Data/Data 607/spam:ham/easy_ham/"
ham_names <- list.files(ham_file)
spam_file <- "/Users/ntlrsmllghn/Dropbox/Data/Data 607/spam:ham/spam/"
spam_names <- list.files(spam_file)
create corpus
ham_corpus <- Corpus(DirSource("/Users/ntlrsmllghn/Dropbox/Data/Data 607/spam:ham/easy_ham/"))
ham_corpus
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 2551
spam_corpus <- Corpus(DirSource("/Users/ntlrsmllghn/Dropbox/Data/Data 607/spam:ham/spam/"))
spam_corpus
## <<SimpleCorpus>>
## Metadata: corpus specific: 1, document level (indexed): 0
## Content: documents: 501