This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
romney <- readtext("romney/*.txt")
head(romney)
## readtext object consisting of 6 documents and 0 docvars.
## # Description: df[,2] [6 x 2]
## doc_id text
## <chr> <chr>
## 1 011508-romney-wins-the-michigan-primary.txt "\"Tonight ma\".~
## 2 052312-remarks-on-education-a-chance-for-every-child-2.txt "\"Thanks to \".~
## 3 060712-a-champion-for-free-enterprise-2.txt "\"Thank you.\".~
## 4 062112-remarks-to-naleo-growing-opportunity-for-all-america~ "\"Thank you \".~
## 5 071112-speech-to-the-naacp-convention.txt "\"Thank you,\".~
## 6 072412-remarks-at-the-vfw-national-convention.txt "\"Thank you.\".~
names(romney)
## [1] "doc_id" "text"
#Create the corpus
my.corpus.romney = corpus(romney)
docvars(my.corpus.romney, "Speechno") <- sprintf("%02d", 1:ndoc(my.corpus.romney))
my.corpus.romney
## Corpus consisting of 22 documents and 1 docvar.
## 011508-romney-wins-the-michigan-primary.txt :
## "Tonight marks the beginning of a comeback, a comeback for Am..."
##
## 052312-remarks-on-education-a-chance-for-every-child-2.txt :
## "Thanks to the U.S. Chamber of Commerce for hosting us. This ..."
##
## 060712-a-champion-for-free-enterprise-2.txt :
## "Thank you. It is great to be here. We launched this campaign..."
##
## 062112-remarks-to-naleo-growing-opportunity-for-all-americans-2.txt :
## "Thank you for inviting me to your annual conference. Itβs an..."
##
## 071112-speech-to-the-naacp-convention.txt :
## "Thank you, Bishop Graves, for your generous introduction. Th..."
##
## 072412-remarks-at-the-vfw-national-convention.txt :
## "Thank you. Commander Richard DeNoyer, I appreciate the intro..."
##
## [ reached max_ndoc ... 16 more documents ]
#Generate Corpus Statistics
( my.corpus.stats.romney <- summary(my.corpus.romney) )
## Corpus consisting of 22 documents, showing 22 documents:
##
## Text Types
## 011508-romney-wins-the-michigan-primary.txt 292
## 052312-remarks-on-education-a-chance-for-every-child-2.txt 1026
## 060712-a-champion-for-free-enterprise-2.txt 695
## 062112-remarks-to-naleo-growing-opportunity-for-all-americans-2.txt 802
## 071112-speech-to-the-naacp-convention.txt 1008
## 072412-remarks-at-the-vfw-national-convention.txt 1141
## 072912-mitt-romney-policy-speech-in-jerusalem.txt 831
## 081112-mitt-romney-makes-his-vp-announcement.txt 773
## 081412-mitt-romneys-speech-in-chillicothe-ohio.txt 709
## 083012-romney-accepts-the-gop-nomination-in-tampa.txt 1192
## 091112-the-national-guard-association-conference.txt 815
## 091712-the-clinton-global-initiative.txt 911
## 091712-u-s-hispanic-chamber-of-commerce.txt 964
## 100812-foreign-policy-speech-the-mantel-of-leadership.txt 1069
## 101907-values-voters-summit-2007.txt 896
## 102612-remarks-on-the-american-economy.txt 896
## 11212-real-change-from-day-one.txt 800
## 11712-mitt-romneys-concession-speech.txt 280
## cpac-2007.txt 857
## cpac-2008-romney-suspends-campaign.txt 869
## faith-in-america.txt 890
## rnc-2008-932008.txt 452
## Tokens Sentences Speechno
## 902 56 01
## 3224 185 02
## 1988 97 03
## 2428 141 04
## 3276 162 05
## 3673 196 06
## 2434 122 07
## 2429 133 08
## 2107 141 09
## 4568 269 10
## 2550 125 11
## 2451 136 12
## 3051 166 13
## 3858 168 14
## 2845 154 15
## 3019 139 16
## 2786 167 17
## 787 53 18
## 2480 148 19
## 2749 171 20
## 2901 142 21
## 1073 59 22
my.corpus.stats.romney %>% arrange(desc(Types))
## Corpus consisting of 22 documents, showing 22 documents:
##
## Text Types
## 083012-romney-accepts-the-gop-nomination-in-tampa.txt 1192
## 072412-remarks-at-the-vfw-national-convention.txt 1141
## 100812-foreign-policy-speech-the-mantel-of-leadership.txt 1069
## 052312-remarks-on-education-a-chance-for-every-child-2.txt 1026
## 071112-speech-to-the-naacp-convention.txt 1008
## 091712-u-s-hispanic-chamber-of-commerce.txt 964
## 091712-the-clinton-global-initiative.txt 911
## 101907-values-voters-summit-2007.txt 896
## 102612-remarks-on-the-american-economy.txt 896
## faith-in-america.txt 890
## cpac-2008-romney-suspends-campaign.txt 869
## cpac-2007.txt 857
## 072912-mitt-romney-policy-speech-in-jerusalem.txt 831
## 091112-the-national-guard-association-conference.txt 815
## 062112-remarks-to-naleo-growing-opportunity-for-all-americans-2.txt 802
## 11212-real-change-from-day-one.txt 800
## 081112-mitt-romney-makes-his-vp-announcement.txt 773
## 081412-mitt-romneys-speech-in-chillicothe-ohio.txt 709
## 060712-a-champion-for-free-enterprise-2.txt 695
## rnc-2008-932008.txt 452
## 011508-romney-wins-the-michigan-primary.txt 292
## 11712-mitt-romneys-concession-speech.txt 280
## Tokens Sentences Speechno
## 4568 269 10
## 3673 196 06
## 3858 168 14
## 3224 185 02
## 3276 162 05
## 3051 166 13
## 2451 136 12
## 2845 154 15
## 3019 139 16
## 2901 142 21
## 2749 171 20
## 2480 148 19
## 2434 122 07
## 2550 125 11
## 2428 141 04
## 2786 167 17
## 2429 133 08
## 2107 141 09
## 1988 97 03
## 1073 59 22
## 902 56 01
## 787 53 18
sprintf("text%02dtest.csv", 1:10)
## [1] "text01test.csv" "text02test.csv" "text03test.csv" "text04test.csv"
## [5] "text05test.csv" "text06test.csv" "text07test.csv" "text08test.csv"
## [9] "text09test.csv" "text10test.csv"
paste("test",1:10, "phooey", collapse = " ")
## [1] "test 1 phooey test 2 phooey test 3 phooey test 4 phooey test 5 phooey test 6 phooey test 7 phooey test 8 phooey test 9 phooey test 10 phooey"