library(tm)
## Warning: package 'tm' was built under R version 4.3.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 4.3.3
docs <- Corpus(DirSource("Alice in Wonderland"))
docs
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 12
#docs[[2]]
content(docs[[2]])[1:3]
## [1] "The   Mock   Turtle   sighed   deeply   and   drew   the   back   of   one   flapper   across   his   eyes.   He   looked   at   Alice   and   tried   to   speak   but   for   a   minute   or   two   sobs   choked   his   voice.   <93>Same   as   if   he   had   a   bone   in   his   throat   <94>   said   the   Gryphon:   and   it   set   to   work   shaking   him   and   punching   him   in   the   back.   At   last   the   Mock   Turtle   recovered   his   voice   and   with   tears   running   down   his   cheeks   he   went   on   again:<97>                                                      \n<93>You   may   not   have   lived   much   under   the   sea<97><94>   (<93>I   haven<92>t   <94>   said   Alice)<97><93>and   perhaps   you   were   never   even   introduced   to   a   lobster<97><94>   (Alice   began   to   say   <93>I   once   tasted<97><94>   but   checked   herself   hastily   and   said   <93>No   never<94>)   <93><97>so   you   can   have   no   idea   what   a   delightful   thing   a   Lobster   Quadrille   is!<94>                                                                                                                                       \n<93>No   indeed   <94>   said   Alice.   <93>What   sort   of   a   dance   is   it?<94>                                                                                                                                                                                                                                                               \n<93>Why   <94>   said   the   Gryphon   <93>you   first   form   into   a   line   along   the   sea-shore<97><94>                                                                                                                                                                                                                                                         \n<93>Two   lines!<94>   cried   the   Mock   Turtle.   <93>Seals   turtles   salmon   and   so   on;   then   when   you<92>ve   cleared   all   the   jelly-fish   out   of   the   way<97><94>                                                                                                                                                                                                                              \n<93>That<a0>generally   takes   some   time   <94>   interrupted   the   Gryphon.                                                                                                                                                                                                                                                                           \n<93><97>you   advance   twice<97><94>                                                                                                                                                                                                                                                                                          \n<93>Each   with   a   lobster   as   a   partner!<94>   cried   the   Gryphon.                                                                                                                                                                                                                                                                     \n<93>Of   course   <94>   the   Mock   Turtle   said:   <93>advance   twice   set   to   partners<97><94>                                                                                                                                                                                                                                                               \n<93><97>change   lobsters   and   retire   in   same   order   <94>   continued   the   Gryphon.                                                                                                                                                                                                                                                                  \n<93>Then   you   know   <94>   the   Mock   Turtle   went   on   <93>you   throw   the<97><94>                                                                                                                                                                                                                                                               \n<93>The   lobsters!<94>   shouted   the   Gryphon   with   a   bound   into   the   air.                                                                                                                                                                                                                                                                  \n<93><97>as   far   out   to   sea   as   you   can<97><94>                                                                                                                                                                                                                                                                           \n<93>Swim   after   them!<94>   screamed   the   Gryphon.                                                                                                                                                                                                                                                                                 \n<93>Turn   a   somersault   in   the   sea!<94>   cried   the   Mock   Turtle   capering   wildly   about.                                                                                                                                                                                                                                                            \n<93>Change   lobsters   again!<94>   yelled   the   Gryphon   at   the   top   of   its   voice.                                                                                                                                                                                                                                                               \n<93>Back   to   land   again   and   that<92>s   all   the   first   figure   <94>   said   the   Mock   Turtle   suddenly   dropping   his   voice;   and   the   two   creatures   who   had   been   jumping   about   like   mad   things   all   this   time   sat   down   again   very   sadly   and   quietly   and   looked   at   Alice.                                                                                                                                                            \n<93>It   must   be   a   very   pretty   dance   <94>   said   Alice   timidly.                                                                                                                                                                                                                                                                  \n<93>Would   you   like   to   see   a   little   of   it?<94>   said   the   Mock   Turtle.                                                                                                                                                                                                                                                            \n<93>Very   much   indeed   <94>   said   Alice.                                                                                                                                                                                                                                                                                 \n<93>Come   let<92>s   try   the   first   figure!<94>   said   the   Mock   Turtle   to   the   Gryphon.   <93>We   can   do   without   lobsters   you   know.   Which   shall   sing?<94>                                                                                                                                                                                                                              \n<93>Oh   <a0>you<a0>sing   <94>   said   the   Gryphon.   <93>I<92>ve   forgotten   the   words.<94>                                                                                                                                                                                                                                                                     \nSo   they   began   solemnly   dancing   round   and   round   Alice   every   now   and   then   treading   on   her   toes   when   they   passed   too   close   and   waving   their   forepaws   to   mark   the   time   while   the   Mock   Turtle   sang   this   very   slowly   and   sadly:<97>                                                                                                                                                                           \n                                                                                                                                                                                                                                                                                                \n<93>Will   you   walk   a   little   faster?<94>   said   a   whiting   to   a   snail.                                                                                                                                                                                                                                                               \n<93>There<92>s   a   porpoise   close   behind   us   and   he<92>s   treading   on   my   tail.                                                                                                                                                                                                                                                               \nSee   how   eagerly   the   lobsters   and   the   turtles   all   advance!                                                                                                                                                                                                                                                                     \nThey   are   waiting   on   the   shingle<97>will   you   come   and   join   the   dance?                                                                                                                                                                                                                                                               \nWill   you   won<92>t   you   will   you   won<92>t   you   will   you   join   the   dance?                                                                                                                                                                                                                                                            \nWill   you   won<92>t   you   will   you   won<92>t   you   won<92>t   you   join   the   dance?                                                                                                                                                                                                                                                            \n                                                                                                                                                                                                                                                                                                \n<93>You   can   really   have   no   notion   how   delightful   it   will   be                                                                                                                                                                                                                                                                  \nWhen   they   take   us   up   and   throw   us   with   the   lobsters   out   to   sea!<94>                                                                                                                                                                                                                                                         \nBut   the   snail   replied   <93>Too   far   too   far!<94>   and   gave   a   look   askance<97>                                                                                                                                                                                                                                                            \nSaid   he   thanked   the   whiting   kindly   but   he   would   not   join   the   dance.                                                                                                                                                                                                                                                            \nWould   not   could   not   would   not   could   not   would   not   join   the   dance.                                                                                                                                                                                                                                                            \nWould   not   could   not   would   not   could   not   could   not   join   the   dance.                                                                                                                                                                                                                                                            \n                                                                                                                                                                                                                                                                                                \n<93>What   matters   it   how   far   we   go?<94>   his   scaly   friend   replied.                                                                                                                                                                                                                                                                  \n<93>There   is   another   shore   you   know   upon   the   other   side.                                                                                                                                                                                                                                                                     \nThe   further   off   from   England   the   nearer   is   to   France<97>                                                                                                                                                                                                                                                                     \nThen   turn   not   pale   beloved   snail   but   come   and   join   the   dance.                                                                                                                                                                                                                                                               \nWill   you   won<92>t   you   will   you   won<92>t   you   will   you   join   the   dance?                                                                                                                                                                                                                                                            \nWill   you   won<92>t   you   will   you   won<92>t   you   won<92>t   you   join   the   dance?<94>                                                                                                                                                                                                                                                            \n                                                                                                                                                                                                                                                                                                \n<93>Thank   you   it<92>s   a   very   interesting   dance   to   watch   <94>   said   Alice   feeling   very   glad   that   it   was   over   at   last:   <93>and   I   do   so   like   that   curious   song   about   the   whiting!<94>                                                                                                                                                                                                   \n<93>Oh   as   to   the   whiting   <94>   said   the   Mock   Turtle   <93>they<97>you<92>ve   seen   them   of   course?<94>                                                                                                                                                                                                                                                      \n<93>Yes   <94>   said   Alice   <93>I<92>ve   often   seen   them   at   dinn<97><94>   she   checked   herself   hastily.                                                                                                                                                                                                                                                         \n<93>I   don<92>t   know   where   Dinn   may   be   <94>   said   the   Mock   Turtle   <93>but   if   you<92>ve   seen   them   so   often   of   course   you   know   what   they<92>re   like.<94>                                                                                                                                                                                                                     \n<93>I   believe   so   <94>   Alice   replied   thoughtfully.   <93>They   have   their   tails   in   their   mouths<97>and   they<92>re   all   over   crumbs.<94>                                                                                                                                                                                                                                             \n<93>You<92>re   wrong   about   the   crumbs   <94>   said   the   Mock   Turtle:   <93>crumbs   would   all   wash   off   in   the   sea.   But   they<a0>have<a0>their   tails   in   their   mouths;   and   the   reason   is<97><94>   here   the   Mock   Turtle   yawned   and   shut   his   eyes.<97><93>Tell   her   about   the   reason   and   all   that   <94>   he   said   to   the   Gryphon.                                                                                                                                             \n<93>The   reason   is   <94>   said   the   Gryphon   <93>that   they<a0>would<a0>go   with   the   lobsters   to   the   dance.   So   they   got   thrown   out   to   sea.   So   they   had   to   fall   a   long   way.   So   they   got   their   tails   fast   in   their   mouths.   So   they   couldn<92>t   get   them   out   again.   That<92>s   all.<94>                                                                                                                                                   \n<93>Thank   you   <94>   said   Alice   <93>it<92>s   very   interesting.   I   never   knew   so   much   about   a   whiting   before.<94>                                                                                                                                                                                                                                                \n<93>I   can   tell   you   more   than   that   if   you   like   <94>   said   the   Gryphon.   <93>Do   you   know   why   it<92>s   called   a   whiting?<94>                                                                                                                                                                                                                                 \n<93>I   never   thought   about   it   <94>   said   Alice.   <93>Why?<94>                                                                                                                                                                                                                                                                        \n<93>It   does   the   boots   and   shoes   <94>   the   Gryphon   replied   very   solemnly.                                                                                                                                                                                                                                                               \nAlice   was   thoroughly   puzzled.   <93>Does   the   boots   and   shoes!<94>   she   repeated   in   a   wondering   tone.                                                                                                                                                                                                                                                      \n<93>Why   what   are<a0>your<a0>shoes   done   with?<94>   said   the   Gryphon.   <93>I   mean   what   makes   them   so   shiny?<94>                                                                                                                                                                                                                                                      \nAlice   looked   down   at   them   and   considered   a   little   before   she   gave   her   answer.   <93>They<92>re   done   with   blacking   I   believe.<94>                                                                                                                                                                                                                                       \n<93>Boots   and   shoes   under   the   sea   <94>   the   Gryphon   went   on   in   a   deep   voice   <93>are   done   with   a   whiting.   Now   you   know.<94>                                                                                                                                                                                                                              \n<93>And   what   are   they   made   of?<94>   Alice   asked   in   a   tone   of   great   curiosity.                                                                                                                                                                                                                                                         \n<93>Soles   and   eels   of   course   <94>   the   Gryphon   replied   rather   impatiently:   <93>any   shrimp   could   have   told   you   that.<94>                                                                                                                                                                                                                                             \n<93>If   I<92>d   been   the   whiting   <94>   said   Alice   whose   thoughts   were   still   running   on   the   song   <93>I<92>d   have   said   to   the   porpoise   <91>Keep   back   please:   we   don<92>t   want<a0>you<a0>with   us!<92><94>                                                                                                                                                                                                            \n<93>They   were   obliged   to   have   him   with   them   <94>   the   Mock   Turtle   said:   <93>no   wise   fish   would   go   anywhere   without   a   porpoise.<94>                                                                                                                                                                                                                                 \n<93>Wouldn<92>t   it   really?<94>   said   Alice   in   a   tone   of   great   surprise.                                                                                                                                                                                                                                                                  \n<93>Of   course   not   <94>   said   the   Mock   Turtle:   <93>why   if   a   fish   came   to<a0>me   and   told   me   he   was   going   a   journey   I   should   say   <91>With   what   porpoise?<92><94>                                                                                                                                                                                                               \n<93>Don<92>t   you   mean   <91>purpose<92>?<94>   said   Alice.                                                                                                                                                                                                                                                                                 \n<93>I   mean   what   I   say   <94>   the   Mock   Turtle   replied   in   an   offended   tone.   And   the   Gryphon   added   <93>Come   let<92>s   hear   some   of<a0>your<a0>adventures.<94>                                                                                                                                                                                                                              \n<93>I   could   tell   you   my   adventures<97>beginning   from   this   morning   <94>   said   Alice   a   little   timidly:   <93>but   it<92>s   no   use   going   back   to   yesterday   because   I   was   a   different   person   then.<94>                                                                                                                                                                                                         \n<93>Explain   all   that   <94>   said   the   Mock   Turtle.                                                                                                                                                                                                                                                                           \n<93>No   no!   The   adventures   first   <94>   said   the   Gryphon   in   an   impatient   tone:   <93>explanations   take   such   a   dreadful   time.<94>                                                                                                                                                                                                                                          \nSo   Alice   began   telling   them   her   adventures   from   the   time   when   she   first   saw   the   White   Rabbit.   She   was   a   little   nervous   about   it   just   at   first   the   two   creatures   got   so   close   to   her   one   on   each   side   and   opened   their   eyes   and   mouths   so<a0>very<a0>wide   but   she   gained   courage   as   she   went   on.   Her   listeners   were   perfectly   quiet   till   she   got   to   the   part   about   her   repeating   <93>You   are   old   Father   William   <94>   to   the   Caterpillar   and   the   words   all   coming   different   and   then   the   Mock   Turtle   drew   a   long   breath   and   said   <93>That<92>s   very   curious.<94>\n<93>It<92>s   all   about   as   curious   as   it   can   be   <94>   said   the   Gryphon.                                                                                                                                                                                                                                                            \n<93>It   all   came   different!<94>   the   Mock   Turtle   repeated   thoughtfully.   <93>I   should   like   to   hear   her   try   and   repeat   something   now.   Tell   her   to   begin.<94>   He   looked   at   the   Gryphon   as   if   he   thought   it   had   some   kind   of   authority   over   Alice.                                                                                                                                                                        \n<93>Stand   up   and   repeat   <91><92>Tis   the   voice   of   the   sluggard   <92><94>   said   the   Gryphon.                                                                                                                                                                                                                                                         \n<93>How   the   creatures   order   one   about   and   make   one   repeat   lessons!<94>   thought   Alice;   <93>I   might   as   well   be   at   school   at   once.<94>   However   she   got   up   and   began   to   repeat   it   but   her   head   was   so   full   of   the   Lobster   Quadrille   that   she   hardly   knew   what   she   was   saying   and   the   words   came   very   queer   indeed:<97>                                                                                                                           \n                                                                                                                                                                                                                                                                                                \n<93><92>Tis   the   voice   of   the   Lobster;   I   heard   him   declare                                                                                                                                                                                                                                                                     \n<93>You   have   baked   me   too   brown   I   must   sugar   my   hair.<94>                                                                                                                                                                                                                                                                  \nAs   a   duck   with   its   eyelids   so   he   with   his   nose                                                                                                                                                                                                                                                                  \nTrims   his   belt   and   his   buttons   and   turns   out   his   toes.<94>                                                                                                                                                                                                                                                                  \n                                                                                                                                                                                                                                                                                                \n[later   editions   continued   as   follows                                                                                                                                                                                                                                                                                    \nWhen   the   sands   are   all   dry   he   is   gay   as   a   lark                                                                                                                                                                                                                                                               \nAnd   will   talk   in   contemptuous   tones   of   the   Shark                                                                                                                                                                                                                                                                        \nBut   when   the   tide   rises   and   sharks   are   around                                                                                                                                                                                                                                                                        \nHis   voice   has   a   timid   and   tremulous   sound.]                                                                                                                                                                                                                                                                           \n                                                                                                                                                                                                                                                                                                \n<93>That<92>s   different   from   what<a0>I<a0>used   to   say   when   I   was   a   child   <94>   said   the   Gryphon.                                                                                                                                                                                                                                                      \n<93>Well   I   never   heard   it   before   <94>   said   the   Mock   Turtle;   <93>but   it   sounds   uncommon   nonsense.<94>                                                                                                                                                                                                                                                   \nAlice   said   nothing;   she   had   sat   down   with   her   face   in   her   hands   wondering   if   anything   would<a0>ever<a0>happen   in   a   natural   way   again.                                                                                                                                                                                                                                 \n<93>I   should   like   to   have   it   explained   <94>   said   the   Mock   Turtle.                                                                                                                                                                                                                                                               \n<93>She   can<92>t   explain   it   <94>   said   the   Gryphon   hastily.   <93>Go   on   with   the   next   verse.<94>                                                                                                                                                                                                                                                      \n<93>But   about   his   toes?<94>   the   Mock   Turtle   persisted.   <93>How<a0>could<a0>he   turn   them   out   with   his   nose   you   know?<94>                                                                                                                                                                                                                                                \n<93>It<92>s   the   first   position   in   dancing.<94>   Alice   said;   but   was   dreadfully   puzzled   by   the   whole   thing   and   longed   to   change   the   subject.                                                                                                                                                                                                                                 \n<93>Go   on   with   the   next   verse   <94>   the   Gryphon   repeated   impatiently:   <93>it   begins   <91>I   passed   by   his   garden.<92><94>                                                                                                                                                                                                                                             \nAlice   did   not   dare   to   disobey   though   she   felt   sure   it   would   all   come   wrong   and   she   went   on   in   a   trembling   voice:<97>                                                                                                                                                                                                                              \n                                                                                                                                                                                                                                                                                                \n<93>I   passed   by   his   garden   and   marked   with   one   eye                                                                                                                                                                                                                                                                     \nHow   the   Owl   and   the   Panther   were   sharing   a   pie<97><94>                                                                                                                                                                                                                                                                     \n                                                                                                                                                                                                                                                                                                \n[later   editions   continued   as   follows                                                                                                                                                                                                                                                                                    \nThe   Panther   took   pie-crust   and   gravy   and   meat                                                                                                                                                                                                                                                                           \nWhile   the   Owl   had   the   dish   as   its   share   of   the   treat.                                                                                                                                                                                                                                                               \nWhen   the   pie   was   all   finished   the   Owl   as   a   boon                                                                                                                                                                                                                                                                  \nWas   kindly   permitted   to   pocket   the   spoon:                                                                                                                                                                                                                                                                              \nWhile   the   Panther   received   knife   and   fork   with   a   growl                                                                                                                                                                                                                                                                     \nAnd   concluded   the   banquet<97>]                                                                                                                                                                                                                                                                                       \n                                                                                                                                                                                                                                                                                                \n<93>What<a0>is<a0>the   use   of   repeating   all   that   stuff   <94>   the   Mock   Turtle   interrupted   <93>if   you   don<92>t   explain   it   as   you   go   on?   It<92>s   by   far   the   most   confusing   thing<a0>I<a0>ever   heard!<94>                                                                                                                                                                                                            \n<93>Yes   I   think   you<92>d   better   leave   off   <94>   said   the   Gryphon:   and   Alice   was   only   too   glad   to   do   so.                                                                                                                                                                                                                                       \n<93>Shall   we   try   another   figure   of   the   Lobster   Quadrille?<94>   the   Gryphon   went   on.   <93>Or   would   you   like   the   Mock   Turtle   to   sing   you   a   song?<94>                                                                                                                                                                                                                        \n<93>Oh   a   song   please   if   the   Mock   Turtle   would   be   so   kind   <94>   Alice   replied   so   eagerly   that   the   Gryphon   said   in   a   rather   offended   tone   <93>Hm!   No   accounting   for   tastes!   Sing   her   <91>Turtle   Soup   <92>   will   you   old   fellow?<94>                                                                                                                                                                           \nThe   Mock   Turtle   sighed   deeply   and   began   in   a   voice   sometimes   choked   with   sobs   to   sing   this:<97>                                                                                                                                                                                                                                                \n                                                                                                                                                                                                                                                                                                \n<93>Beautiful   Soup   so   rich   and   green                                                                                                                                                                                                                                                                                 \nWaiting   in   a   hot   tureen!                                                                                                                                                                                                                                                                                    \nWho   for   such   dainties   would   not   stoop?                                                                                                                                                                                                                                                                              \nSoup   of   the   evening   beautiful   Soup!                                                                                                                                                                                                                                                                                 \nSoup   of   the   evening   beautiful   Soup!                                                                                                                                                                                                                                                                                 \n<a0><a0><a0><a0>Beau<97>ootiful   Soo<97>oop!                                                                                                                                                                                                                                                                                             \n<a0><a0><a0><a0>Beau<97>ootiful   Soo<97>oop!                                                                                                                                                                                                                                                                                             \nSoo<97>oop   of   the   e<97>e<97>evening                                                                                                                                                                                                                                                                                       \n<a0><a0><a0><a0>Beautiful   beautiful   Soup!                                                                                                                                                                                                                                                                                          \n                                                                                                                                                                                                                                                                                                \n<93>Beautiful   Soup!   Who   cares   for   fish                                                                                                                                                                                                                                                                                 \nGame   or   any   other   dish?                                                                                                                                                                                                                                                                                    \nWho   would   not   give   all   else   for   two   p                                                                                                                                                                                                                                                                        \nennyworth   only   of   beautiful   Soup?                                                                                                                                                                                                                                                                                    \nPennyworth   only   of   beautiful   Soup?                                                                                                                                                                                                                                                                                    \n<a0><a0><a0><a0>Beau<97>ootiful   Soo<97>oop!                                                                                                                                                                                                                                                                                             \n<a0><a0><a0><a0>Beau<97>ootiful   Soo<97>oop!                                                                                                                                                                                                                                                                                             \nSoo<97>oop   of   the   e<97>e<97>evening                                                                                                                                                                                                                                                                                       \n<a0><a0><a0><a0>Beautiful   beauti<97>FUL   SOUP!<94>                                                                                                                                                                                                                                                                                          \n                                                                                                                                                                                                                                                                                                \n<93>Chorus   again!<94>   cried   the   Gryphon   and   the   Mock   Turtle   had   just   begun   to   repeat   it   when   a   cry   of   <93>The   trial<92>s   beginning!<94>   was   heard   in   the   distance.                                                                                                                                                                                                                  \n<93>Come   on!<94>   cried   the   Gryphon   and   taking   Alice   by   the   hand   it   hurried   off   without   waiting   for   the   end   of   the   song.                                                                                                                                                                                                                                 \n<93>What   trial   is   it?<94>   Alice   panted   as   she   ran;   but   the   Gryphon   only   answered   <93>Come   on!<94>   and   ran   the   faster   while   more   and   more   faintly   came   carried   on   the   breeze   that   followed   them   the   melancholy   words:<97>                                                                                                                                                                                       \n                                                                                                                                                                                                                                                                                                \n<93>Soo<97>oop   of   the   e<97>e<97>evening                                                                                                                                                                                                                                                                                       \n<a0><a0><a0><a0>Beautiful   beautiful   Soup!<94>                                                                                                                                                                                                                                                                                          "
## [2] NA                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
## [3] NA
#install.packages("magrittr")
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.3.3
library(tm)
docs <- docs %>%
        tm_map(removePunctuation) %>%
        tm_map(removeNumbers) %>%
        tm_map(removeWords, stopwords("en")) %>%
        tm_map(stripWhitespace) %>%
        tm_map(stemDocument)
content(docs[[2]])[1:3]
## [1] "The Mock Turtl sigh deepli drew back one flapper across eye He look Alice tri speak minut two sob choke voic Same bone throat said Gryphon set work shake punch back At last Mock Turtl recov voic tear run cheek went You may live much sea I havent said Aliceand perhap never even introduc lobster Alice began say I tast check hastili said No never can idea delight thing Lobster Quadrill No inde said Alice What sort danc Whi said Gryphon first form line along seashor Two line cri Mock Turtl Seal turtl salmon youv clear jellyfish way Thatagener take time interrupt Gryphon advanc twice Each lobster partner cri Gryphon Of cours Mock Turtl said advanc twice set partner chang lobster retir order continu Gryphon Then know Mock Turtl went throw The lobster shout Gryphon bound air far sea can Swim scream Gryphon Turn somersault sea cri Mock Turtl caper wild Chang lobster yell Gryphon top voic Back land that first figur said Mock Turtl sudden drop voic two creatur jump like mad thing time sat sad quiet look Alice It must pretti danc said Alice timid Would like see littl said Mock Turtl Veri much inde said Alice Come let tri first figur said Mock Turtl Gryphon We can without lobster know Which shall sing Oh ayouas said Gryphon Ive forgotten word So began solemn danc round round Alice everi now tread toe pass close wave forepaw mark time Mock Turtl sang slowli sad Will walk littl faster said white snail There porpois close behind us hes tread tail See eager lobster turtl advanc They wait shinglewil come join danc Will wont will wont will join danc Will wont will wont wont join danc You can realli notion delight will When take us throw us lobster sea But snail repli Too far far gave look askanc Said thank white kind join danc Would join danc Would join danc What matter far go scali friend repli There anoth shore know upon side The England nearer Franc Then turn pale belov snail come join danc Will wont will wont will join danc Will wont will wont wont join danc Thank interest danc watch said Alice feel glad last I like curious song white Oh white said Mock Turtl theyyouv seen cours Yes said Alice Ive often seen dinn check hastili I dont know Dinn may said Mock Turtl youv seen often cours know theyr like I believ Alice repli thought They tail mouthsand theyr crumb Your wrong crumb said Mock Turtl crumb wash sea But theyahaveatheir tail mouth reason Mock Turtl yawn shut eyesTel reason said Gryphon The reason said Gryphon theyawouldago lobster danc So got thrown sea So fall long way So got tail fast mouth So couldnt get That Thank said Alice interest I never knew much white I can tell like said Gryphon Do know call white I never thought said Alice Whi It boot shoe Gryphon repli solemn Alice thorough puzzl Doe boot shoe repeat wonder tone Whi areayourasho done said Gryphon I mean make shini Alice look consid littl gave answer Theyr done black I believ Boot shoe sea Gryphon went deep voic done white Now know And made Alice ask tone great curios Sole eel cours Gryphon repli rather impati shrimp told If Id white said Alice whose thought still run song Id said porpois Keep back pleas dont wantayouawith us They oblig Mock Turtl said wise fish go anywher without porpois Wouldnt realli said Alice tone great surpris Of cours said Mock Turtl fish came toam told go journey I say With porpois Dont mean purpos said Alice I mean I say Mock Turtl repli offend tone And Gryphon ad Come let hear ofayouraadventur I tell adventuresbegin morn said Alice littl timid use go back yesterday I differ person Explain said Mock Turtl No The adventur first said Gryphon impati tone explan take dread time So Alice began tell adventur time first saw White Rabbit She littl nervous just first two creatur got close one side open eye mouth soaveryawid gain courag went Her listen perfect quiet till got part repeat You old Father William Caterpillar word come differ Mock Turtl drew long breath said That curious Its curious can said Gryphon It came differ Mock Turtl repeat thought I like hear tri repeat someth now Tell begin He look Gryphon thought kind author Alice Stand repeat Tis voic sluggard said Gryphon How creatur order one make one repeat lesson thought Alice I might well school Howev got began repeat head full Lobster Quadrill hard knew say word came queer inde Tis voic Lobster I heard declar You bake brown I must sugar hair As duck eyelid nose Trim belt button turn toe later edit continu follow When sand dri gay lark And will talk contemptu tone Shark But tide rise shark around His voic timid tremul sound That differ whataIaus say I child said Gryphon Well I never heard said Mock Turtl sound uncommon nonsens Alice said noth sat face hand wonder anyth wouldaeverahappen natur way I like explain said Mock Turtl She cant explain said Gryphon hastili Go next vers But toe Mock Turtl persist Howacouldah turn nose know Its first posit danc Alice said dread puzzl whole thing long chang subject Go next vers Gryphon repeat impati begin I pass garden Alice dare disobey though felt sure come wrong went trembl voic I pass garden mark one eye How Owl Panther share pie later edit continu follow The Panther took piecrust gravi meat While Owl dish share treat When pie finish Owl boon Was kind permit pocket spoon While Panther receiv knife fork growl And conclud banquet Whataisath use repeat stuff Mock Turtl interrupt dont explain go Its far confus thingaIaev heard Yes I think youd better leav said Gryphon Alice glad Shall tri anoth figur Lobster Quadrill Gryphon went Or like Mock Turtl sing song Oh song pleas Mock Turtl kind Alice repli eager Gryphon said rather offend tone Hm No account tast Sing Turtl Soup will old fellow The Mock Turtl sigh deepli began voic sometim choke sob sing Beauti Soup rich green Wait hot tureen Who dainti stoop Soup even beauti Soup Soup even beauti Soup aaaaBeauooti Soooop aaaaBeauooti Soooop Soooop eeeven aaaaBeauti beauti Soup Beauti Soup Who care fish Game dish Who give els two p ennyworth beauti Soup Pennyworth beauti Soup aaaaBeauooti Soooop aaaaBeauooti Soooop Soooop eeeven aaaaBeauti beautiFUL SOUP Chorus cri Gryphon Mock Turtl just begun repeat cri The trial begin heard distanc Come cri Gryphon take Alice hand hurri without wait end song What trial Alice pant ran Gryphon answer Come ran faster faint came carri breez follow melancholi word Soooop eeeven aaaaBeauti beauti Soup"
## [2] NA                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [3] NA
data <- DocumentTermMatrix(docs, control=list(weighting=weightTfIdf))
## Warning in TermDocumentMatrix.SimpleCorpus(x, control): custom functions are
## ignored
data
## <<DocumentTermMatrix (documents: 12, terms: 2074)>>
## Non-/sparse entries: 5058/19830
## Sparsity           : 80%
## Maximal term length: 702
## Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
inspect(data[1:2,1:5])
## <<DocumentTermMatrix (documents: 2, terms: 5)>>
## Non-/sparse entries: 6/4
## Sparsity           : 40%
## Maximal term length: 13
## Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
## Sample             :
##                 Terms
## Docs             aaveryagood      across actuallyatook      afraid   afterward
##   Chapter 1.csv  0.004818498 0.005376344   0.004818498 0.001045171 0.003474412
##   Chapter 10.csv 0.000000000 0.001897533   0.000000000 0.000000000 0.000000000
findFreqTerms(data,0.1)
##  [1] "mock"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
##  [2] "turtl"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
##  [3] "hatter"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
##  [4] "aaaahowneatlyspreadhisclaw"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
##  [5] "aaaaimprovehisshiningtail"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
##  [6] "aaaaoneverygoldenscal"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
##  [7] "aaaawithgentlysmilingjaw"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
##  [8] "afteratimesheheardalittlepatteringoffeetinthedistanceandshehastilydriedhereyestoseewhatwascomingitwasthewhiterabbitreturningsplendidlydressedwithapairofwhitekidglovesinonehandandalargefanintheotherhecametrottingalonginagreathurrymutteringtohimselfashecameohtheduchesstheduchessohwontshebesavageifivekeptherwaitingalicefeltsodesperatethatshewasreadytoaskhelpofanyonesowhentherabbitcamenearhershebeganinalowtimidvoiceifyoupleasesirtherabbitstartedviolentlydroppedthewhitekidglovesandthefanandskurriedawayintothedarknessashardashecouldgo"                                                                                                                                                                        
##  [9] "alicesrightfootesq"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [10] "alicetookupthefanandglovesandasthehallwasveryhotshekeptfanningherselfallthetimeshewentontalkingdeardearhowqueereverythingistodayandyesterdaythingswentonjustasusualiwonderifivebeenchangedinthenightletmethinkwasithesamewhenigotupthismorningialmostthinkicanrememberfeelingalittledifferentbutifimnotthesamethenextquestioniswhointheworldamiahathatsathegreatpuzzleandshebeganthinkingoverallthechildrensheknewthatwereofthesameageasherselftoseeifshecouldhavebeenchangedforanyofthem"                                                                                                                                                                                                                                     
## [11] "andpourthewatersofthenil"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [12] "andshewentonplanningtoherselfhowshewouldmanageittheymustgobythecarriershethoughtandhowfunnyitllseemsendingpresentstoonesownfeetandhowoddthedirectionswilllook"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
## [13] "andwelcomelittlefishesin"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [14] "asshesaidthesewordsherfootslippedandinanothermomentsplashshewasuptoherchininsaltwaterherfirstideawasthatshehadsomehowfallenintotheseaandinthatcaseicangobackbyrailwayshesaidtoherselfalicehadbeentotheseasideonceinherlifeandhadcometothegeneralconclusionthatwhereveryougotoontheenglishcoastyoufindanumberofbathingmachinesintheseasomechildrendigginginthesandwithwoodenspadesthenarowoflodginghousesandbehindthemarailwaystationhowevershesoonmadeoutthatshewasinthepooloftearswhichshehadweptwhenshewasninefeethigh"                                                                                                                                                                                                      
## [15] "asshesaidthisshelookeddownatherhandsandwassurprisedtoseethatshehadputononeoftherabbitslittlewhitekidgloveswhileshewastalkinghowacanaihavedonethatshethoughtimustbegrowingsmallagainshegotupandwenttothetabletomeasureherselfbyitandfoundthatasnearlyasshecouldguessshewasnowabouttwofeethighandwasgoingonshrinkingrapidlyshesoonfoundoutthatthecauseofthiswasthefanshewasholdingandshedroppedithastilyjustintimetoavoidshrinkingawayaltogeth"                                                                                                                                                                                                                                                                                  
## [16] "curiouserandcuriousercriedaliceshewassomuchsurprisedthatforthemomentshequiteforgothowtospeakgoodenglishnowimopeningoutlikethelargesttelescopethateverwasgoodbyefeetforwhenshelookeddownatherfeettheyseemedtobealmostoutofsighttheyweregettingsofaroffohmypoorlittlefeetiwonderwhowillputonyourshoesandstockingsforyounowdearsimsureaiashantbeableishallbeagreatdealtoofarofftotroublemyselfaboutyouyoumustmanagethebestwayyoucanbutimustbekindtothemthoughtaliceorperhapstheywontwalkthewayiwanttogoletmeseeillgivethemanewpairofbootseverychristma"                                                                                                                                                                           
## [17] "hearthrug"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
## [18] "howcheerfullyheseemstogrin"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
## [19] "howdoththelittlecrocodil"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [20] "imsureimnotadashesaidforherhairgoesinsuchlongringletsandminedoesntgoinringletsatallandimsureicantbemabelforiknowallsortsofthingsandsheohsheknowssuchaverylittlebesidesashesasheandaimaiandohdearhowpuzzlingitallisilltryifiknowallthethingsiusedtoknowletmeseefourtimesfiveistwelveandfourtimessixisthirteenandfourtimessevenisohdearishallnevergettotwentyatthatratehoweverthemultiplicationtabledoesntsignifyletstrygeographylondonisthecapitalofparisandparisisthecapitalofromeandromenoathatsaallwrongimcertainimusthavebeenchangedformabelilltryandsayhowdoththelittleandshecrossedherhandsonherlapasifsheweresayinglessonsandbegantorepeatitbuthervoicesoundedhoarseandstrangeandthewordsdidnotcomethesameastheyusedtodo"
## [21] "imsurethosearenottherightwordssaidpooraliceandhereyesfilledwithtearsagainasshewentonimustbemabelafterallandishallhavetogoandliveinthatpokylittlehouseandhavenexttonotoystoplaywithandoheversomanylessonstolearnnoivemadeupmymindaboutitifimmabelillstaydownhereitllbenousetheirputtingtheirheadsdownandsayingcomeupagaindearishallonlylookupandsaywhoamithentellmethatfirstandthenifilikebeingthatpersonillcomeupifnotillstaydownheretillimsomebodyelsebutohdearcriedalicewithasuddenburstoftearsidowishtheyawouldaputtheirheadsdowniamsoaveryatiredofbeingallaloneher"                                                                                                                                                        
## [22] "itwashightimetogoforthepoolwasgettingquitecrowdedwiththebirdsandanimalsthathadfallenintoittherewereaduckandadodoaloryandaneagletandseveralothercuriouscreaturesaliceledthewayandthewholepartyswamtotheshor"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
## [23] "iwishihadntcriedsomuchsaidaliceassheswamabouttryingtofindherwayoutishallbepunishedforitnowisupposebybeingdrownedinmyowntearsthatawillabeaqueerthingtobesurehowevereverythingisqueertoday"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [24] "iwontindeedsaidaliceinagreathurrytochangethesubjectofconversationareyouareyoufondofofdogsthemousedidnotanswersoalicewentoneagerlythereissuchanicelittledognearourhouseishouldliketoshowyoualittlebrighteyedterrieryouknowwithohsuchlongcurlybrownhairanditllfetchthingswhenyouthrowthemanditllsitupandbegforitsdinnerandallsortsofthingsicantrememberhalfofthemanditbelongstoafarmeryouknowandhesaysitssousefulitsworthahundredpoundshesaysitkillsalltheratsandohdearcriedaliceinasorrowfultoneimafraidiveoffendeditagainforthemousewasswimmingawayfromherashardasitcouldgoandmakingquiteacommotioninthepoolasitw"                                                                                                             
## [25] "justthenherheadstruckagainsttheroofofthehallinfactshewasnowmorethanninefeethighandsheatoncetookupthelittlegoldenkeyandhurriedofftothegardendoor"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
## [26] "justthensheheardsomethingsplashingaboutinthepoolalittlewayoffandsheswamnearertomakeoutwhatitwasatfirstshethoughtitmustbeawalrusorhippopotamusbutthensherememberedhowsmallshewasnowandshesoonmadeoutthatitwasonlyamousethathadslippedinlikeherself"                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
## [27] "nearthefend"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
## [28] "notlikecatscriedthemouseinashrillpassionatevoicewouldayoualikecatsifyouwerem"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
## [29] "ohdearwhatnonsenseimtalk"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [30] "perhapsitdoesntunderstandenglishthoughtaliceidaresayitsafrenchmousecomeoverwithwilliamtheconquerorforwithallherknowledgeofhistoryalicehadnoveryclearnotionhowlongagoanythinghadhappenedsoshebeganagainofestmachattewhichwasthefirstsentenceinherfrenchlessonbookthemousegaveasuddenleapoutofthewaterandseemedtoquiveralloverwithfrightohibegyourpardoncriedalicehastilyafraidthatshehadhurtthepooranimalsfeelingsiquiteforgotyoudidntlikecat"                                                                                                                                                                                                                                                                                  
## [31] "pooraliceitwasasmuchasshecoulddolyingdownononesidetolookthroughintothegardenwithoneeyebuttogetthroughwasmorehopelessthanevershesatdownandbegantocryagain"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [32] "soshecalledsoftlyafteritmousedeardocomebackagainandwewonttalkaboutcatsordogseitherifyoudontlikethemwhenthemouseheardthisitturnedroundandswamslowlybacktoheritsfacewasquitepalewithpassionalicethoughtanditsaidinalowtremblingvoiceletusgettotheshoreandthenilltellyoumyhistoryandyoullunderstandwhyitisihatecatsanddog"                                                                                                                                                                                                                                                                                                                                                                                                        
## [33] "thatawasaanarrowescapesaidaliceagooddealfrightenedatthesuddenchangebutverygladtofindherselfstillinexistenceandnowforthegardenandsheranwithallspeedbacktothelittledoorbutalasthelittledoorwasshutagainandthelittlegoldenkeywaslyingontheglasstableasbeforeandthingsareworsethaneverthoughtthepoorchildforineverwassosmallasthisbeforeneverandideclareitstoobadthat"                                                                                                                                                                                                                                                                                                                                                             
## [34] "weindeedcriedthemousewhowastremblingdowntotheendofhistailasifaiawouldtalkonsuchasubjectourfamilyalwaysahatedacatsnastylowvulgarthingsdontletmehearthenameagain"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
## [35] "wellperhapsnotsaidaliceinasoothingtonedontbeangryaboutitandyetiwishicouldshowyouourcatdinahithinkyoudtakeafancytocatsifyoucouldonlyseehersheissuchadearquietthingalicewentonhalftoherselfassheswamlazilyaboutinthepoolandshesitspurringsonicelybythefirelickingherpawsandwashingherfaceandsheissuchanicesoftthingtonurseandshessuchacapitaloneforcatchingmiceohibegyourpardoncriedaliceagainforthistimethemousewasbristlingalloverandshefeltcertainitmustbereallyoffendedwewonttalkaboutheranymoreifyoudrathernot"                                                                                                                                                                                                             
## [36] "withaliceslov"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
## [37] "woulditbeofanyusenowthoughtalicetospeaktothismouseeverythingissooutofthewaydownherethatishouldthinkverylikelyitcantalkatanyratetheresnoharmintryingsoshebeganomousedoyouknowthewayoutofthispooliamverytiredofswimmingabouthereomousealicethoughtthismustbetherightwayofspeakingtoamouseshehadneverdonesuchathingbeforebutsherememberedhavingseeninherbrotherslatingrammaramouseofamousetoamouseamouseomousethemouselookedatherratherinquisitivelyandseemedtohertowinkwithoneofitslittleeyesbutitsaidnoth"                                                                                                                                                                                                                      
## [38] "yououghttobeashamedofyourselfsaidaliceagreatgirllikeyoushemightwellsaythistogooncryinginthiswaystopthismomentitellyoubutshewentonallthesamesheddinggallonsoftearsuntiltherewasalargepoolallroundheraboutfourinchesdeepandreachinghalfdownthehal"
findAssocs(data,"alice",0.3)
## $alice
##                                 dont                                might 
##                                 0.83                                 0.77 
##                               remark                                 took 
##                                 0.71                                 0.67 
##                                wasnt                                 what 
##                                 0.67                                 0.66 
##                                  ask                                  you 
##                                 0.65                                 0.64 
##                                 want                                taken 
##                                 0.63                                 0.63 
##                                  eye                                  now 
##                                 0.61                                 0.61 
##                                  say                                 well 
##                                 0.61                                 0.61 
##                             advantag                                  day 
##                                 0.61                                 0.60 
##                                 time                               answer 
##                                 0.59                                 0.58 
##                                break                                front 
##                                 0.58                                 0.58 
##                                 nine                                 said 
##                                 0.58                                 0.57 
##                                 live                                 make 
##                                 0.57                                 0.56 
##                                  use                                 went 
##                                 0.56                                 0.56 
##                              subject                                 tone 
##                                 0.56                                 0.56 
##                              angrili                                  fun 
##                                 0.56                                 0.56 
##                                 ladi                                music 
##                                 0.56                                 0.56 
##                                savag                                  and 
##                                 0.56                                 0.55 
##                                begin                                 come 
##                                 0.55                                 0.55 
##                                 much                             shoulder 
##                                 0.55                                 0.55 
##                                  but                                 keep 
##                                 0.54                                 0.54 
##                                 rest                                stori 
##                                 0.54                                 0.54 
##                                least                             cautious 
##                                 0.54                                 0.54 
##                                 have                                 once 
##                                 0.54                                 0.54 
##                                 rude                           understand 
##                                 0.54                                 0.54 
##                                 mani                               perhap 
##                                 0.53                                 0.53 
##                                thing                               togeth 
##                                 0.53                                 0.53 
##                               temper                                 like 
##                                 0.53                                 0.52 
##                              thought                                learn 
##                                 0.52                                 0.52 
##                                right                                  saw 
##                                 0.51                                 0.51 
##                                 talk                                 sigh 
##                                 0.51                                 0.51 
##                               indign                                  new 
##                                 0.51                                 0.50 
##                                think                                  yes 
##                                 0.50                                 0.50 
##                                 life                                polit 
##                                 0.50                                 0.50 
##                               bottom                              convers 
##                                 0.50                                 0.49 
##                               slowli                                 yawn 
##                                 0.49                                 0.49 
##                                  ive                                 last 
##                                 0.48                                 0.48 
##                                young                                youll 
##                                 0.48                                 0.48 
##                                everi                                  whi 
##                                 0.47                                 0.47 
##                                  its                                 wash 
##                                 0.47                                 0.47 
##                                 tree                               though 
##                                 0.47                                 0.46 
##                               ventur                                 cant 
##                                 0.46                                 0.46 
##                                minut                                chose 
##                                 0.46                                 0.46 
##                              patient                             question 
##                                 0.46                                 0.45 
##                               troubl                              duchess 
##                                 0.45                                 0.45 
##                             otherwis                                  end 
##                                 0.45                                 0.44 
##                                 felt                                clear 
##                                 0.44                                 0.44 
##                                 dare                               reason 
##                                 0.44                                 0.44 
##                                repli                               someth 
##                                 0.44                                 0.44 
##                             encourag                                  not 
##                                 0.44                                 0.44 
##                                 idea                                 tell 
##                                 0.43                                 0.43 
##                                 walk                                  old 
##                                 0.43                                 0.43 
##                                speak                                alway 
##                                 0.43                                 0.43 
##                               branch                             constant 
##                                 0.43                                 0.43 
##                                drive                              iathink 
##                                 0.43                                 0.43 
##                             occasion                               startl 
##                                 0.43                                 0.43 
##                              quarrel                            contemptu 
##                                 0.43                                 0.42 
##                                 veri                                 alon 
##                                 0.42                                 0.42 
##                                 hour                               height 
##                                 0.42                                 0.42 
##                                 less                               silent 
##                                 0.42                                 0.42 
##                                 glad                                never 
##                                 0.41                                 0.41 
##                                 nose                                plead 
##                                 0.41                                 0.41 
##                               promis                                 sinc 
##                                 0.41                                 0.41 
##                                 aand                               absenc 
##                                 0.41                                 0.41 
##                            affection                               afford 
##                                 0.41                                 0.41 
##                                 agre                               aheawa 
##                                 0.41                                 0.41 
##                              ancient                      arithmeticambit 
##                                 0.41                                 0.41 
##                                asham                         ataoursathey 
##                                 0.41                                 0.41 
##                             athatsaa                          barleysugar 
##                                 0.41                                 0.41 
##                             beautifi                                 beor 
##                                 0.41                                 0.41 
##                             birthday                                 bite 
##                                 0.41                                 0.41 
##                         bitterandand                                 bore 
##                                 0.41                                 0.41 
##                                 calm                              camomil 
##                                 0.41                                 0.41 
##                      canaeverafinish                                cheap 
##                                 0.41                                 0.41 
##                                 chin                                choic 
##                                 0.41                                 0.41 
##                               chuckl                              classic 
##                                 0.41                                 0.41 
##                               closer                                 coil 
##                                 0.41                                 0.41 
##                              compani                            congereel 
##                                 0.41                                 0.41 
##                                 cost                                count 
##                                 0.41                                 0.41 
##                              custodi                            dayschool 
##                                 0.41                                 0.41 
##                                delay                                deris 
##                                 0.41                                 0.41 
##                             distract                                drawl 
##                                 0.41                                 0.41 
##                         drawlingmast                           drawlingth 
##                                 0.41                                 0.41 
##                         educationsin                             eleventh 
##                                 0.41                                 0.41 
##                               exclam                               experi 
##                                 0.41                                 0.41 
##                                extra                             favourit 
##                                 0.41                                 0.41 
##                                flock                               french 
##                                 0.41                                 0.41 
##                                grief                                hasnt 
##                                 0.41                                 0.41 
##                                  hid                              hjckrrh 
##                                 0.41                                 0.41 
##                              holiday                               hollow 
##                                 0.41                                 0.41 
##                              hottemp                               inquir 
##                                 0.41                                 0.41 
##                                  isb                               isbird 
##                                 0.41                                 0.41 
##                                 isoh                                 isth 
##                                 0.41                                 0.41 
##                             iveabeen                           kitchenaat 
##                                 0.41                                 0.41 
##                            knewathat                                 lazi 
##                                 0.41                                 0.41 
##                                 ledg                               lessen 
##                                 0.41                                 0.41 
##                                 lift                                 link 
##                                 0.41                                 0.41 
##                                maynt            meanstomakeanythingpretti 
##                                 0.41                                 0.41 
##                                 mere                                miner 
##                                 0.41                                 0.41 
##                               modern                                moral 
##                                 0.41                                 0.41 
##                              mustard                           mustardmin 
##                                 0.41                                 0.41 
##                              mysteri                        ofathataistak 
##                                 0.41                                 0.41 
##                                 pray                                 reel 
##                                 0.41                                 0.41 
##                                 rock                           seaographi 
##                                 0.41                                 0.41 
##                                shade                            simpleton 
##                                 0.41                                 0.41 
##                            simplynev                                 sink 
##                                 0.41                                 0.41 
##                              sourand                                stiff 
##                                 0.41                                 0.41 
##                               stingi                            sweettemp 
##                                 0.41                                 0.41 
##                         thereamustab                                those 
##                                 0.41                                 0.41 
##                         thunderstorm                              tortois 
##                                 0.41                                 0.41 
##                              turtlew                                  tut 
##                                 0.41                                 0.41 
##                              twelfth                               uglifi 
##                                 0.41                                 0.41 
##                              uglific                                veget 
##                                 0.41                                 0.41 
##                              vinegar                                waist 
##                                 0.41                                 0.41 
##                                 warn                         wasathatalik 
##                                 0.41                                 0.41 
##                          wasaveryaug                         washingextra 
##                                 0.41                                 0.41 
##                                 weak                       weekaheataught 
##                                 0.41                                 0.41 
##                            whenaimaa                          withoutmayb 
##                                 0.41                                 0.41 
##                                writh                            youaareaa 
##                                 0.41                                 0.41 
##                                 good                                 hair 
##                                 0.40                                 0.40 
##                               school                                exact 
##                                 0.40                                 0.40 
##                                 wood                              couldnt 
##                                 0.40                                 0.39 
##                               lesson                                which 
##                                 0.39                                 0.39 
##                                 youd                                 this 
##                                 0.39                                 0.39 
##                                alarm                        extraordinari 
##                                 0.39                                 0.39 
##                                 hint                              instanc 
##                                 0.39                                 0.39 
##                               manner                               murder 
##                                 0.39                                 0.39 
##                              neither                                nibbl 
##                                 0.39                                 0.39 
##                                today                               better 
##                                 0.39                                 0.38 
##                               asleep                                  fli 
##                                 0.38                                 0.38 
##                                 case                            uncomfort 
##                                 0.38                                 0.38 
##                             mushroom                        croquetground 
##                                 0.38                                 0.38 
##                                 fair                                 fine 
##                                 0.38                                 0.38 
##                                 fond                               player 
##                                 0.38                                 0.38 
##                                stamp                                 back 
##                                 0.38                                 0.37 
##                                littl                                puzzl 
##                                 0.37                                 0.37 
##                                 then                                annoy 
##                                 0.37                                 0.37 
##                                 argu           aaaaaaaaaaaaaaaaaaaatwinkl 
##                                 0.37                                 0.37 
##                           andathensh                             armchair 
##                                 0.37                                 0.37 
##                   askedayouraopinion                                 bawl 
##                                 0.37                                 0.37 
##                                beaon                                 bear 
##                                 0.37                                 0.37 
##                          beforeaheaw                            breadknif 
##                                 0.37                                 0.37 
##                                choos                                civil 
##                                 0.37                                 0.37 
##                                clean                                clock 
##                                 0.37                                 0.37 
##                              consent                                  cup 
##                                 0.37                                 0.37 
##                               dinner                              disgust 
##                                 0.37                                 0.37 
##                      doesayourawatch                                 draw 
##                                 0.37                                 0.37 
##                                elsie                              flowerb 
##                                 0.37                                 0.37 
##                               fourth                                given 
##                                 0.37                                 0.37 
##                             gloomili                       goathereaagain 
##                                 0.37                                 0.37 
##                               grumbl                             halfpast 
##                                 0.37                                 0.37 
##                           heasaidawa                                  hed 
##                                 0.37                                 0.37 
##                             itaisath                              itsahim 
##                                 0.37                                 0.37 
##                                 laci                                 lead 
##                                 0.37                                 0.37 
##                              leastat                            marchjust 
##                                 0.37                                 0.37 
##                               memori                              milkjug 
##                                 0.37                                 0.37 
##                                month                                 moon 
##                                 0.37                                 0.37 
##                                mourn                            mousetrap 
##                                 0.37                                 0.37 
##                          muchnessdid                          muchnessyou 
##                                 0.37                                 0.37 
##                                  nor                               oclock 
##                                 0.37                                 0.37 
##                                 pour                               propos 
##                                 0.37                                 0.37 
##                                raven                              readili 
##                                 0.37                                 0.37 
##                                riddl                             riddlesi 
##                                 0.37                                 0.37 
##                              saythat                          sistersthey 
##                                 0.37                                 0.37 
##                                  six                            slightest 
##                                 0.37                                 0.37 
##                            stupidest                            takealess 
##                                 0.37                                 0.37 
##                       takeamoreathan                             teaparti 
##                                 0.37                                 0.37 
##                               teapot                                teath 
##                                 0.37                                 0.37 
##                              teatray                                 term 
##                                 0.37                                 0.37 
##                        theabestabutt                                theni 
##                                 0.37                                 0.37 
##                     theresaplentyaof                                theyv 
##                                 0.37                                 0.37 
##                        thingseveryth                                tilli 
##                                 0.37                                 0.37 
##                           treaclewel                        treaclewelleh 
##                                 0.37                                 0.37 
##                               unlock                                unwil 
##                                 0.37                                 0.37 
##                              veryail                                 vote 
##                                 0.37                                 0.37 
##                           wasayourat                           wastingait 
##                                 0.37                                 0.37 
##                             waterwel                        wayayouamanag 
##                                 0.37                                 0.37 
##                              wearili                           wereainath 
##                                 0.37                                 0.37 
##                                 whos                                 wide 
##                                 0.37                                 0.37 
##                                 wine                             withamin 
##                                 0.37                                 0.37 
##                                 woke                          writingdesk 
##                                 0.37                                 0.37 
##                               afraid                                close 
##                                 0.36                                 0.36 
##                                  ear                                  tri 
##                                 0.36                                 0.36 
##                               nearer                                wrong 
##                                 0.36                                 0.36 
##                                  rub                                  bit 
##                                 0.36                                 0.35 
##                                 inde                                  see 
##                                 0.35                                 0.35 
##                                twice                                 your 
##                                 0.35                                 0.35 
##                                chang                             interest 
##                                 0.35                                 0.35 
##                                  yet                                spoke 
##                                 0.35                                 0.35 
##                                 stay                               master 
##                                 0.35                                 0.35 
##                                aaaai                              areayou 
##                                 0.35                                 0.35 
##                              brought                                coupl 
##                                 0.35                                 0.35 
##                                 ever                                 know 
##                                 0.34                                 0.34 
##                                 side                               taught 
##                                 0.34                                 0.34 
##                                 wait                              wouldnt 
##                                 0.34                                 0.34 
##                               pepper                               suppos 
##                                 0.34                                 0.34 
##                                doubt                             shouldnt 
##                                 0.34                                 0.34 
##                              kitchen                              cushion 
##                                 0.34                                 0.34 
##                                 laid                                 open 
##                                 0.34                                 0.33 
##                              present                                 that 
##                                 0.33                                 0.33 
##                          caterpillar                                dread 
##                                 0.33                                 0.33 
##                                  tis                              exclaim 
##                                 0.33                                 0.33 
##                             frighten   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 
##                                 0.33                                 0.33 
## aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa                            aaaaallow 
##                                 0.33                                 0.33 
##                                aaaab                               aaaado 
##                                 0.33                                 0.33 
##                               aaaaha                             aaaapray 
##                                 0.33                                 0.33 
##                             aaaasaid                             aaaathat 
##                                 0.33                                 0.33 
##                              aaaawhi                       backsomersault 
##                                 0.33                                 0.33 
##                               balanc                                brain 
##                                 0.33                                 0.33 
##                             contempt                              content 
##                                 0.33                                 0.33 
##                           contradict                              deepest 
##                                 0.33                                 0.33 
##                                  egg                       explainamyself 
##                                 0.33                                 0.33 
##                                  fat                                hatch 
##                                 0.33                                 0.33 
##                              highest                          iadontaknow 
##                                 0.33                                 0.33 
##                          iahaveatast                                  iim 
##                                 0.33                                 0.33 
##                             imanotaa                               incess 
##                                 0.33                                 0.33 
##                                injur                                irrit 
##                                 0.33                                 0.33 
##                                 limb                             muscular 
##                                 0.33                                 0.33 
##                      notaquitearight                           ofawhatath 
##                                 0.33                                 0.33 
##                           ointmenton                               pigeon 
##                                 0.33                                 0.33 
##                              piteous                     rememberawhatath 
##                                 0.33                                 0.33 
##                                 root                              serpent 
##                                 0.33                                 0.33 
##                                sorri                               steadi 
##                                 0.33                                 0.33 
##                                stern                             strength 
##                                 0.33                                 0.33 
##                      suchaveryashort                                 suet 
##                                 0.33                                 0.33 
##                                suppl                              tougher 
##                                 0.33                                 0.33 
##                         wellawhataar                          whatacanaal 
##                                 0.33                                 0.33 
##                        whereahaveami                                 wife 
##                                 0.33                                 0.33 
##                                youth                               ground 
##                                 0.33                                 0.33 
##                                 just                                  the 
##                                 0.32                                 0.32 
##                               doesnt                                sleep 
##                                 0.32                                 0.32 
##                               attend                                  box 
##                                 0.32                                 0.32 
##                               hookah                                  sky 
##                                 0.32                                 0.32 
##                                 arch                               curios 
##                                 0.32                                 0.31 
##                                 care                                  eel 
##                                 0.31                                 0.31 
##                               father                                green 
##                                 0.31                                 0.31 
##                                 tear                                 toam 
##                                 0.31                                 0.31 
##                             uncommon                                  paw 
##                                 0.31                                 0.31 
##                                 real                                 sens 
##                                 0.31                                 0.31 
##                                sharp                                 head 
##                                 0.31                                 0.30 
##                                world                                  can 
##                                 0.30                                 0.30 
##                                  hes                                shake 
##                                 0.30                                 0.30 
##                                 fold                                 hare 
##                                 0.30                                 0.30 
##                                 beat 
##                                 0.30
newData <- removeSparseTerms(data,0.1)
newData
## <<DocumentTermMatrix (documents: 12, terms: 26)>>
## Non-/sparse entries: 286/26
## Sparsity           : 8%
## Maximal term length: 7
## Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
library(tibble)
as_tibble(as.matrix(newData))
## # A tibble: 12 × 26
##      alice      and     come      eye    first      get      got    head     ive
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>   <dbl>   <dbl>
##  1 0.00354 0.000337 0.000337 0.000169 0.000506 0.00152  0.000337 5.06e-4 3.37e-4
##  2 0.00345 0.000476 0.000953 0.000357 0.000834 0.000119 0.000595 1.19e-4 2.38e-4
##  3 0.00219 0.000410 0.000273 0.000137 0.000683 0.000820 0.000546 2.73e-4 4.10e-4
##  4 0.00269 0.000122 0.000244 0.000367 0.000367 0.000244 0.000244 9.78e-4 1.22e-4
##  5 0       0        0        0        0        0        0        0       0      
##  6 0.00340 0.00104  0.000444 0.000148 0.000444 0.000888 0.000444 2.96e-4 1.48e-4
##  7 0.00311 0.000601 0.000701 0.000401 0.000301 0.000902 0.000601 6.01e-4 3.01e-4
##  8 0.00523 0.00229  0.000654 0.000327 0.000327 0.000654 0.000327 6.54e-4 9.81e-4
##  9 0.00434 0.000505 0.000303 0.000404 0.000505 0.00101  0.000202 4.04e-4 2.02e-4
## 10 0.00545 0.000535 0.000321 0.000535 0.000535 0.000535 0.000321 6.42e-4 2.14e-4
## 11 0.00400 0.000411 0.000513 0.000205 0.000411 0.000616 0.000616 1.64e-3 2.05e-4
## 12 0.00565 0.000761 0.000761 0.000217 0.000217 0.000109 0.000217 3.26e-4 3.26e-4
## # ℹ 17 more variables: like <dbl>, littl <dbl>, look <dbl>, make <dbl>,
## #   much <dbl>, never <dbl>, now <dbl>, one <dbl>, said <dbl>, see <dbl>,
## #   that <dbl>, the <dbl>, thought <dbl>, time <dbl>, turn <dbl>, use <dbl>,
## #   well <dbl>