## Package version: 2.1.2
## Parallel computing: 2 of 2 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
## Loading required package: usethis
##
## Attaching package: 'quanteda.textmodels'
## The following object is masked from 'package:quanteda':
##
## data_dfm_lbgexample
##
## Attaching package: 'seededlda'
## The following object is masked from 'package:stats':
##
## terms
##
## Attaching package: 'rsconnect'
## The following object is masked from 'package:devtools':
##
## lint
##
## Attaching package: 'packrat'
## The following objects are masked from 'package:devtools':
##
## install, install_local
R Markdown
#create corpus
## readtext object consisting of 28 documents and 1 docvar.
## # Description: df[,3] [28 × 3]
## doc_id text docvar1
## <chr> <chr> <chr>
## 1 Bodo1.pdf "\" \"..." Bodo1
## 2 Bodo10.pdf "\" A\"..." Bodo10
## 3 Bodo11.pdf "\" \"..." Bodo11
## 4 Bodo12.pdf "\" \"..." Bodo12
## 5 Bodo13.pdf "\" Bodo agre\"..." Bodo13
## 6 Bodo14.pdf "\" Fourteen\"..." Bodo14
## # … with 22 more rows
#create corpus
## [1] 28
## docvar1
## 1 Bodo1
## 2 Bodo10
## 3 Bodo11
## 4 Bodo12
## 5 Bodo13
## 6 Bodo14
## Corpus consisting of 28 documents, showing 28 documents:
##
## Text Types Tokens Sentences docvar1
## Bodo1.pdf 230 758 31 Bodo1
## Bodo10.pdf 372 775 36 Bodo10
## Bodo11.pdf 251 462 14 Bodo11
## Bodo12.pdf 349 806 27 Bodo12
## Bodo13.pdf 186 351 10 Bodo13
## Bodo14.pdf 306 601 30 Bodo14
## Bodo15.pdf 230 417 12 Bodo15
## Bodo16.pdf 368 848 28 Bodo16
## Bodo17.pdf 538 1208 50 Bodo17
## Bodo18.pdf 290 719 17 Bodo18
## Bodo19.pdf 210 718 19 Bodo19
## Bodo2.pdf 245 446 21 Bodo2
## Bodo20.pdf 343 860 32 Bodo20
## Bodo21.pdf 335 598 11 Bodo21
## Bodo22.pdf 239 732 31 Bodo22
## Bodo23.pdf 286 648 21 Bodo23
## Bodo24.pdf 651 1567 58 Bodo24
## Bodo25.pdf 304 678 21 Bodo25
## Bodo26.pdf 1259 4556 164 Bodo26
## Bodo27.pdf 355 718 29 Bodo27
## Bodo28.pdf 394 909 27 Bodo28
## Bodo3.pdf 248 456 15 Bodo3
## Bodo4.pdf 535 1287 42 Bodo4
## Bodo5.pdf 330 710 23 Bodo5
## Bodo6.pdf 276 505 18 Bodo6
## Bodo7.pdf 216 342 14 Bodo7
## Bodo8.pdf 187 312 11 Bodo8
## Bodo9.pdf 273 533 22 Bodo9
#create dfm
## Length Class Mode
## 67648 dfm S4
#Cleaning up using tokens
## Length Class Mode
## Bodo1.pdf 390 -none- character
## Bodo10.pdf 412 -none- character
## Bodo11.pdf 246 -none- character
## Bodo12.pdf 407 -none- character
## Bodo13.pdf 203 -none- character
## Bodo14.pdf 315 -none- character
## Bodo15.pdf 214 -none- character
## Bodo16.pdf 474 -none- character
## Bodo17.pdf 616 -none- character
## Bodo18.pdf 408 -none- character
## Bodo19.pdf 348 -none- character
## Bodo2.pdf 240 -none- character
## Bodo20.pdf 441 -none- character
## Bodo21.pdf 337 -none- character
## Bodo22.pdf 407 -none- character
## Bodo23.pdf 355 -none- character
## Bodo24.pdf 814 -none- character
## Bodo25.pdf 352 -none- character
## Bodo26.pdf 2196 -none- character
## Bodo27.pdf 367 -none- character
## Bodo28.pdf 463 -none- character
## Bodo3.pdf 247 -none- character
## Bodo4.pdf 617 -none- character
## Bodo5.pdf 368 -none- character
## Bodo6.pdf 271 -none- character
## Bodo7.pdf 172 -none- character
## Bodo8.pdf 169 -none- character
## Bodo9.pdf 291 -none- character
#kwic doesn’t work with dfm, so you have to use tokens
Bodo1.pdf |
13 |
13 |
juli monday uni |
right |
reserv length word |
right |
Bodo10.pdf |
16 |
16 |
ht media ltd |
right |
reserv length word |
right |
Bodo10.pdf |
392 |
392 |
due disput land |
right |
publish ht syndic |
right |
Bodo11.pdf |
14 |
14 |
sunday kashmir monitor |
right |
reserv length word |
right |
Bodo12.pdf |
19 |
19 |
express distribut contify.com |
right |
reserv length word |
right |
Bodo13.pdf |
20 |
20 |
solut privat limit |
right |
reserv length word |
right |
Bodo14.pdf |
19 |
19 |
associat newspap ltd |
right |
reserv section indianews |
right |
Bodo15.pdf |
18 |
18 |
septemb saturday uni |
right |
reserv length word |
right |
Bodo15.pdf |
128 |
128 |
migrant assam champion |
right |
nativ local economi |
right |
Bodo16.pdf |
18 |
18 |
vision distribut contify.com |
right |
reserv length word |
right |
Bodo17.pdf |
21 |
21 |
august wednesday menafn.com |
right |
reserv length word |
right |
Bodo17.pdf |
604 |
604 |
hong kong compani |
right |
reserv pleas contact |
right |
Bodo18.pdf |
23 |
23 |
gmt india today |
right |
reserv length word |
right |
Bodo19.pdf |
13 |
13 |
june tuesday uni |
right |
reserv length word |
right |
Bodo2.pdf |
17 |
17 |
ltd distribut contify.com |
right |
reserv length word |
right |
Bodo20.pdf |
15 |
15 |
januari monday newswing |
right |
reserv length word |
right |
Bodo21.pdf |
19 |
19 |
coleman co ltd |
right |
reserv section times |
right |
Bodo22.pdf |
12 |
12 |
october monday uni |
right |
reserv length word |
right |
Bodo23.pdf |
17 |
17 |
ht media ltd |
right |
reserv length word |
right |
Bodo23.pdf |
201 |
201 |
fund centr land |
right |
bodo btad area |
right |
Bodo24.pdf |
20 |
20 |
media inc syndigate.info |
right |
reserv length word |
right |
Bodo24.pdf |
116 |
116 |
greater recognit bodo |
right |
ident claim sever |
right |
Bodo24.pdf |
166 |
166 |
polit autonomi econom |
right |
date back post-coloni |
right |
Bodo24.pdf |
584 |
584 |
safeguard non-bodo properti |
right |
expens indigen communiti |
right |
Bodo24.pdf |
628 |
628 |
group beyond defend |
right |
properti bodo attack |
right |
Bodo24.pdf |
750 |
750 |
potenti impact might |
right |
push independ bodo |
right |
Bodo25.pdf |
19 |
19 |
solut privat limit |
right |
reserv length word |
right |
Bodo26.pdf |
13 |
13 |
week distribut contify.com |
right |
reserv length word |
right |
Bodo26.pdf |
756 |
756 |
imagin attempt prove |
right |
autochthon land becam |
right |
Bodo26.pdf |
1229 |
1229 |
establish ident polit |
right |
carri histor retribut |
right |
Bodo26.pdf |
1690 |
1690 |
issu exclus land |
right |
control land issu |
right |
Bodo26.pdf |
1877 |
1877 |
customari law communiti |
right |
taken cognis roy |
right |
Bodo26.pdf |
1884 |
1884 |
40-44 format btc |
right |
recognis btc unlik |
right |
Bodo26.pdf |
1929 |
1929 |
schedul preserv land |
right |
section also safeguard |
right |
Bodo26.pdf |
1934 |
1934 |
also safeguard settlement |
right |
transfer inherit properti |
right |
Bodo26.pdf |
1955 |
1955 |
issu especi land |
right |
defin right son |
right |
Bodo26.pdf |
1957 |
1957 |
land right defin |
right |
son soil state |
right |
Bodo26.pdf |
1975 |
1975 |
though provis ensur |
right |
non-bodo bodo govern |
right |
Bodo26.pdf |
1987 |
1987 |
ethnic violenc bodoland |
right |
dichotomi bodo non-bodo |
right |
Bodo27.pdf |
16 |
16 |
ht media ltd |
right |
reserv length word |
right |
Bodo28.pdf |
17 |
17 |
express distribut contify.com |
right |
reserv length word |
right |
Bodo3.pdf |
17 |
17 |
associat newspap ltd |
right |
reserv section indianews |
right |
Bodo4.pdf |
15 |
15 |
tribun p ltd |
right |
reserv length word |
right |
Bodo5.pdf |
18 |
18 |
coleman co ltd |
right |
reserv section india |
right |
Bodo5.pdf |
362 |
362 |
input agenc reprint |
right |
timescontent.com load-dat decemb |
right |
Bodo6.pdf |
14 |
14 |
cmyk printech ltd |
right |
reserv length word |
right |
Bodo7.pdf |
17 |
17 |
coleman co ltd |
right |
reserv section times |
right |
Bodo8.pdf |
28 |
28 |
ltd distribut contify.com |
right |
reserv length word |
right |
Bodo9.pdf |
14 |
14 |
thursday india bloom |
right |
reserv length word |
right |
Bodo10.pdf |
183 |
183 |
carri arm rebellion |
independ |
bodoland ndfb origin |
independ |
Bodo14.pdf |
224 |
224 |
attack group fight |
independ |
homeland region bodo |
independ |
Bodo14.pdf |
296 |
296 |
greater region autonomi |
independ |
homeland indigen group |
independ |
Bodo17.pdf |
57 |
57 |
oppos talk goal |
independ |
bodoland indigen bodo |
independ |
Bodo17.pdf |
563 |
563 |
futil sudha ramachandran |
independ |
journalist research base |
independ |
Bodo24.pdf |
308 |
308 |
expansion occup creation |
independ |
socialist state group |
independ |
Bodo24.pdf |
752 |
752 |
might right push |
independ |
bodo territori might |
independ |
Bodo4.pdf |
232 |
232 |
faction last elect |
independ |
poll vote share |
independ |
Bodo4.pdf |
476 |
476 |
former ulfa leader |
independ |
mp naba sarania-l |
independ |
Bodo2.pdf |
5 |
5 |
assam bodo milit |
free |
press journal india |
free |
Bodo11.pdf |
77 |
77 |
crpc civil author |
decis |
initi fresh action |
decis |
Bodo19.pdf |
45 |
45 |
ministri taken polici |
decis |
elimin milit region |
decis |
Bodo19.pdf |
66 |
66 |
radic record admit |
decis |
strateg made low |
decis |
Bodo19.pdf |
195 |
195 |
ministri taken polici |
decis |
elimin milit region |
decis |
Bodo19.pdf |
216 |
216 |
radic record admit |
decis |
strateg made low |
decis |
Bodo24.pdf |
40 |
40 |
point new delhi |
decis |
privileg militari forc |
decis |
Bodo26.pdf |
1465 |
1465 |
singl largest parti |
decis |
govern form post-elector |
decis |
Bodo4.pdf |
281 |
281 |
apathi two controversi |
decis |
pertain januari third |
decis |
Bodo4.pdf |
300 |
300 |
indigen communiti one |
decis |
sign pact three |
decis |
Bodo17.pdf |
207 |
207 |
tribal brought assam |
coloni |
time work tea |
coloni |
Bodo24.pdf |
510 |
510 |
assam new delhi |
coloni |
era british brought |
coloni |
Bodo26.pdf |
439 |
439 |
histor marginalis communiti |
coloni |
post-independ period attempt |
coloni |
Bodo26.pdf |
447 |
447 |
umbrella assames nation |
coloni |
period bodo defin |
coloni |
Bodo26.pdf |
501 |
501 |
prior absu movement |
coloni |
period bodo particip |
coloni |
Bodo26.pdf |
870 |
870 |
british settl area |
coloni |
period sever dimens |
coloni |
Bodo26.pdf |
936 |
936 |
resist non-bodo sinc |
coloni |
period assames nation |
coloni |
Bodo26.pdf |
1708 |
1708 |
decad 20th centuri |
coloni |
intervent assam north- |
coloni |
Bodo26.pdf |
1721 |
1721 |
pattern demograph structur |
coloni |
saw systemat process |
coloni |
Bodo26.pdf |
1754 |
1754 |
land practis even |
coloni |
period attempt keep |
coloni |
Bodo26.pdf |
1797 |
1797 |
migrat systemat polici |
coloni |
state encourag migrat |
coloni |
Bodo26.pdf |
1818 |
1818 |
assembl land issu |
coloni |
govern introduc line |
coloni |
Bodo26.pdf |
2146 |
2146 |
tribal identiti polit |
coloni |
assam plain tribe |
coloni |
Bodo26.pdf |
153 |
153 |
mosqu claim area |
occupi |
construct mosqu part |
occupi |
Bodo10.pdf |
157 |
157 |
centr ndfb-s unite |
liber |
front asom-independ led |
liber |
Bodo12.pdf |
233 |
233 |
platform unite nation |
liber |
front western south |
liber |
Bodo15.pdf |
116 |
116 |
outlaw ulfa unite |
liber |
front assam essenti |
liber |
Bodo18.pdf |
303 |
303 |
milit group bodo |
liber |
tiger blt lead |
liber |
Bodo23.pdf |
317 |
317 |
deal sign bodo |
liber |
tiger led format |
liber |
Bodo24.pdf |
260 |
260 |
like ndfb bodo |
liber |
tiger blt form |
liber |
Bodo24.pdf |
302 |
302 |
ndfb origin focus |
liber |
bodoland indian expansion |
liber |
Bodo24.pdf |
617 |
617 |
includ adivasi nation |
liber |
armi aanla birsa |
liber |
Bodo25.pdf |
280 |
280 |
milit group bodo |
liber |
tiger lead format |
liber |
Bodo26.pdf |
142 |
142 |
signboard cadr ex-bodo |
liber |
tiger blt piec |
liber |
Bodo26.pdf |
1092 |
1092 |
cobra milit bengal |
liber |
tiger began retali |
liber |
Bodo4.pdf |
124 |
124 |
unite peopl parti |
liber |
uppl aiudf gana |
liber |
Bodo4.pdf |
181 |
181 |
milit outfit bodo |
liber |
tiger blt concern |
liber |
Bodo6.pdf |
142 |
142 |
ulfa ndfb kamtapur |
liber |
organis rebel group |
liber |
Bodo9.pdf |
235 |
235 |
i.k songbijit vow |
liber |
bodoland bodo tribesmen |
liber |
Bodo20.pdf |
311 |
311 |
central state govern |
equal |
particip crore said |
equal |
Bodo25.pdf |
201 |
201 |
next three year |
equal |
contribut rs crore |
equal |
Bodo11.pdf |
106 |
106 |
bow arrow follow |
massacr |
communiti member bodo |
massacr |
Bodo11.pdf |
176 |
176 |
offic assam follow |
massacr |
adivasi bodo milit |
massacr |
Bodo12.pdf |
376 |
376 |
said behind decemb |
massacr |
adivasi assam songbijit |
massacr |
Bodo15.pdf |
151 |
151 |
christian decemb involv |
massacr |
tribal past kill |
massacr |
Bodo17.pdf |
263 |
263 |
kill howev unlik |
massacr |
past target outsid |
massacr |
Bodo21.pdf |
48 |
48 |
friday bodo milit |
massacr |
bengali-speak muslim belief |
massacr |
Bodo26.pdf |
100 |
100 |
memori imag nelli |
massacr |
thousand alleg illeg |
massacr |
Bodo26.pdf |
776 |
776 |
muslim peasant brutal |
massacr |
barpeta lower assam |
massacr |
Bodo26.pdf |
1173 |
1173 |
talk cadr also |
massacr |
koch rajbongshi ndfb-p |
massacr |
Bodo5.pdf |
80 |
80 |
adivasi backlash tuesday |
massacr |
continu second day |
massacr |
Bodo22.pdf |
151 |
151 |
daimari warn bigger |
war |
achiev faction goal |
war |
Bodo22.pdf |
333 |
333 |
daimari warn bigger |
war |
achiev faction goal |
war |
Bodo24.pdf |
700 |
700 |
talk negoti mean |
war |
parti continu lack |
war |
Bodo26.pdf |
844 |
844 |
tacit support ethnic |
cleans |
sever clash soon |
cleans |
Bodo26.pdf |
1043 |
1043 |
bodo endeavour ethnic |
cleans |
non-bodo panic-stricken non-bodo |
cleans |
Bodo27.pdf |
133 |
133 |
muslim target ethnic |
cleans |
bodo-inhabit area along |
cleans |
## Tokens consisting of 28 documents and 1 docvar.
## Bodo1.pdf :
## [1] "bodo" "milit" "kill" "armi" "personnel" "uni"
## [7] "unite" "new" "india" "juli" "monday" "uni"
## [ ... and 378 more ]
##
## Bodo10.pdf :
## [1] "assam" "suspect" "bodo" "milit" "fire" "kokrajhar"
## [7] "market" "kill" "hindustan" "time" "august" "saturday"
## [ ... and 400 more ]
##
## Bodo11.pdf :
## [1] "armi" "begin" "all-out" "offens" "bodo" "milit" "kashmir"
## [8] "monitor" "india" "januari" "sunday" "kashmir"
## [ ... and 234 more ]
##
## Bodo12.pdf :
## [1] "bodo" "milit" "outfit" "ndfb" "s" "shun" "violenc"
## [8] "join" "peac" "talk" "indian" "express"
## [ ... and 395 more ]
##
## Bodo13.pdf :
## [1] "bodo" "agreement" "amit" "shah" "sign" "accord"
## [7] "dread" "bodo" "milit" "group" "ndfb" "publish"
## [ ... and 191 more ]
##
## Bodo14.pdf :
## [1] "fourteen" "kill" "suspect" "bodo" "milit"
## [6] "open" "fire" "crowd" "assam" "marketplac"
## [11] "mailonlin" "august"
## [ ... and 303 more ]
##
## [ reached max_ndoc ... 22 more documents ]
Bodo.tokens2
#create dfm from first tokenized steps
#word cloud 
#co-occurance
## btc non-bodo accord land territori tribal muslim demand
## 44225 32524 26122 25595 23229 21638 19127 18602
## communiti conflict
## 17974 17712
## [1] "btc" "non-bodo" "accord" "land" "territori" "tribal"
## [7] "muslim" "demand" "communiti" "conflict" "movement" "immigr"
## [13] "period" "govern" "schedul" "council" "absu" "state"
## [19] "clash" "attack" "coloni" "polit" "said" "bodoland"
## [25] "issu"
