Load libraries.

library(readtext)
library(tidyverse)
library(quanteda)
library(quanteda.textstats)

Build a corpus of the students’ book reports.

x <- list.files() %>% readtext %>% corpus

Check the format of the in-text citations with the students.

kwic(x, pattern = phrase("\\( . , \\d{4} \\)"),
     valuetype="regex") %>%
  summarise(intext_citation = keyword)
          intext_citation
1   ( Shakespear , 1993 )
2        ( Spyri , 2008 )
3       ( Bladon , 2011 )
4      ( Burnett , 2008 )
5        ( Milne , 2002 )
6       ( Bladon , 2011 )
7  ( Shakespeare , 2007 )
8         ( Baum , 2007 )
9         ( Baum , 2007 )
10     ( Dickens , 2009 )
11      ( Bladon , 2012 )
12      ( Leroux , 2005 )
13       ( Spyri , 2008 )
14    ( Colbourn , 2008 )
15       ( Doyle , 2002 )
16      ( Stoker , 2002 )
17        ( Baum , 2007 )
18      ( Bladon , 2011 )

In-text citation outside the sentence (incorrect).

kwic(x, pattern = phrase("\\. \\( . , \\d{4} \\)"),
     valuetype="regex") %>%
  summarise(intext_citation = keyword)
       intext_citation
1 . ( Burnett , 2008 )
2    . ( Baum , 2007 )
3  . ( Bladon , 2012 )

In-text citations within the sentence (correct).

kwic(x, pattern = phrase("\\( . , \\d{4} \\) \\."),
     valuetype="regex") %>%
  summarise("in-text citation" = keyword)
           in-text citation
1   ( Shakespear , 1993 ) .
2        ( Spyri , 2008 ) .
3       ( Bladon , 2011 ) .
4      ( Burnett , 2008 ) .
5        ( Milne , 2002 ) .
6       ( Bladon , 2011 ) .
7  ( Shakespeare , 2007 ) .
8         ( Baum , 2007 ) .
9         ( Baum , 2007 ) .
10     ( Dickens , 2009 ) .
11      ( Bladon , 2012 ) .
12      ( Leroux , 2005 ) .
13       ( Spyri , 2008 ) .
14    ( Colbourn , 2008 ) .
15       ( Doyle , 2002 ) .
16      ( Stoker , 2002 ) .
17        ( Baum , 2007 ) .
18      ( Bladon , 2011 ) .

Check the APA formatting of the references with the class.

kwic(x, pattern = phrase("in APA format"), 
     window = 40) %>%
  summarise(reference = post) %>% unlist %>% str_squish %>% as.factor
 [1] ( Shakespear , 1993 ) .                                                                                                                                                                                               
 [2] Spyri , J . ( 2008 ) . Heidi . Tokyo , Japan : Macmillan Language House .                                                                                                                                             
 [3] Bladon , R . ( 2011 ) . Gandhi . Tokyo , Japan : Macmillan Readers .                                                                                                                                                  
 [4] Burnett , F . H . ( 2008 ) . The secret garden . Tokyo , Japan : Macmillan Language House .                                                                                                                           
 [5] Milne , J . ( 2008 ) . The black cat . Tokyo , Japan : Macmillan Language House .                                                                                                                                     
 [6] Bladon , R . ( 2011 ) . Gandhi . Tokyo , Japan : Macmillan Language House .                                                                                                                                           
 [7] Shakespeare , W . ( 2007 ) . A midsummer night's dream . Tokyo , Japan : Macmillan Language House . https://elib.maruzen.co.jp/elib/html/BookDetail/Id/3000006397?6                                                   
 [8] Baum , F . ( 2007 ) . The wizard of Oz . Tokyo , Japon : Macmillan Language House .                                                                                                                                   
 [9] Baum , L . F . ( 2007 ) . The wizard of Oz . Tokyo , Japan : Macmillan Language House .                                                                                                                               
[10]                                                                                                                                                                                                                       
[11] Dickens , C . ( 2009 ) . A Christmas Carol . Oxford , England : Oxford Bookworms .                                                                                                                                    
[12] Bladon , R . ( 2012 ) . The story of the Olympics -an unofficial history- . Tokyo , Japan . Macmillan Language House . Retrieved June 15 , 2021 , from https://elib.maruzen.co.jp/elib/html/BookDetail/Id/3000006413?3
[13] Leroux , G . ( 2005 ) . The phantom of the opera . Tokyo , Japan : Macmillan Language House .                                                                                                                         
[14] Spyri , J . ( 2008 ) . Heidi . Tokyo , Japan : Macmillan Language House .                                                                                                                                             
[15] Colbourn , S . ( 2008 ) . King Arthur and the knights of the round table . Tokyo , Japan : Macmillan Language House .                                                                                                 
[16] Doyle , A . C . ( 2002 ) . The speckled band and other stories . Tokyo , Japan : Macmillan Language House .                                                                                                           
[17] Stoker , B . ( 2002 ) . Dracula . Tokyo , Japan : Macmillan Language House .                                                                                                                                          
[18] Baum , F . ( 2007 ) . The wizard of OZ . Tokyo , Japan : Macmillan Language House .                                                                                                                                   
[19] Bladon , R . ( 2011 ) . Gandhi . Japan : Macmillan Language House .                                                                                                                                                   
18 Levels:  ... Stoker , B . ( 2002 ) . Dracula . Tokyo , Japan : Macmillan Language House .

Extract summaries and opinions.

summary_start <- str_locate(x, "citation")[,2]
summary_end <- str_locate(x, "Opinion")[,1]
summary <- str_sub(x, summary_start + 1, summary_end - 1) %>% str_squish

opinion_start <- str_locate(x, "Opinion")[,2]
opinion_end <- str_locate(x, "Reference")[,1]
opinion <- str_sub(x, start = opinion_start +1, end = opinion_end -1) %>% str_squish

Common words in summaries

summary %>% dfm(remove_punct=T,
          remove_numbers=T,
          remove=stopwords("en")) %>%
  textstat_frequency() %>% 
  filter(frequency>=5) %>%
  arrange(-frequency, feature) %>%
  summarise(word = feature, frequency)
         word frequency
1     dorothy        22
2         one        21
3        went        20
4       witch        20
5      holmes        17
6        back        15
7         day        15
8      gandhi        15
9          go        15
10      house        15
11      heidi        14
12        man        13
13      story        13
14    however        12
15    started        12
16       also        11
17       died        11
18       home        11
19      india        11
20         mr        11
21         oz        11
22      romeo        11
23     wanted        11
24     london        10
25        met        10
26     wizard        10
27       city         9
28  demetrius         9
29       good         9
30     kansas         9
31       room         9
32        saw         9
33      black         8
34    british         8
35  christine         8
36   criminal         8
37        got         8
38      heard         8
39     juliet         8
40       made         8
41       said         8
42  scarecrow         8
43       came         7
44     castle         7
45      clara         7
46     cubitt         7
47    decided         7
48    emerald         7
49     father         7
50     garden         7
51      ghost         7
52     killed         7
53      lived         7
54     people         7
55      uncle         7
56       woke         7
57       born         6
58        cat         6
59  christmas         6
60       eric         6
61      going         6
62      later         6
63       love         6
64        new         6
65    scrooge         6
66      south         6
67       stop         6
68      three         6
69       time         6
70       told         6
71      tried         6
72       walk         6
73        war         6
74       work         6
75      angry         5
76       aunt         5
77       away         5
78  beautiful         5
79     became         5
80       come         5
81    country         5
82       east         5
83       felt         5
84      first         5
85     friend         5
86        get         5
87     helena         5
88     knight         5
89        law         5
90       left         5
91       life         5
92       lion         5
93     living         5
94      loved         5
95   lysander         5
96       meet         5
97   mountain         5
98       name         5
99   olympics         5
100     opera         5
101    person         5
102   phantom         5
103    secret         5
104     shoes         5
105   strange         5
106       tin         5
107       two         5
108       way         5
109     world         5
110     years         5

Common words in opinions

opinion %>% dfm(remove_punct=T,
          remove_numbers=T,
          remove=stopwords("en")) %>%
  textstat_frequency() %>% 
  filter(frequency>=5) %>%
  arrange(-frequency, feature) %>%
  summarise(word = feature, frequency)
        word frequency
1      story        30
2       like        18
3       book        16
4      think        16
5    thought        15
6       read        13
7     people        12
8    reading        10
9  important         9
10  japanese         8
11      want         8
12      also         7
13   however         7
14     japan         7
15      life         7
16       man         7
17      many         7
18      good         6
19     heidi         6
20      love         6
21      make         6
22 surprised         6
23       can         5
24   dracula         5
25      felt         5
26   history         5
27      just         5
28   learned         5
29      lost         5
30      mary         5
31       one         5
32    wanted         5
33    wizard         5

Rare words

x %>% dfm(remove_punct=T,
          remove_symbols=T,
          remove_url = T,
          remove=stopwords("en"),
          remove_numbers = T) %>%
  textstat_frequency() %>% 
  filter(frequency <=4 & frequency>=2) %>%
  filter(nchar(feature)>1) %>%
  summarise(word = feature) %>%
  arrange(word) 
             word
1            able
2   advertisement
3             age
4         allowed
5          always
6         amazing
7           anger
8         animals
9         another
10       antiques
11         anyone
12       anything
13      apartment
14    aphrodisiac
15         appear
16     appearance
17       appeared
18         around
19       arrested
20        arrived
21      assistant
22         athens
23       attacked
24            bad
25          badly
26           bank
27         banned
28         become
29            bed
30          began
31        believe
32           best
33          blown
34          brain
35          brave
36     breathless
37          broke
38          broom
39         buried
40        burnett
41           busy
42         called
43       campaign
44     campaigned
45       canceled
46        capulet
47      capulet's
48           care
49      carefully
50           case
51          caste
52         caught
53        century
54          chain
55         chance
56         change
57        changed
58       changing
59          clues
60       colbourn
61          conan
62     confidence
63      consulted
64      continued
65      countries
66         course
67      cratchits
68         craven
69            cry
70         crying
71          curse
72        cyclone
73         danger
74       daughter
75           days
76           dead
77          deeds
78      delighted
79      depiction
80         desire
81      detective
82     determined
83          detie
84        dickens
85         dickon
86     difference
87      difficult
88    disappeared
89   disobedience
90         doctor
91            dog
92           door
93      dorothy's
94          doyle
95       drawings
96          dream
97            due
98         easily
99        eastern
100          easy
101         elsie
102         enemy
103       england
104       enjoyed
105       entered
106        eric's
107        escape
108    especially
109        europe
110           eve
111          ever
112         every
113    everything
114       examine
115       example
116     excalibur
117      exciting
118    experience
119          face
120          fact
121       fairies
122        fairly
123          fall
124      families
125        family
126        famous
127           far
128          fear
129          feel
130      feelings
131          find
132      followed
133        forest
134     frankfurt
135    frightened
136         front
137       funeral
138        future
139          gave
140        gewain
141          glad
142        glinda
143          gold
144    government
145    graduating
146         grand
147   grandfather
148         great
149         greed
150         green
151          grew
152       guessed
153     happening
154         happy
155         hated
156        headed
157          held
158         helen
159        helped
160        hermia
161      hermia's
162          high
163        hilton
164         hindi
165         hindu
166        hindus
167           hit
168        hitler
169        honest
170          hope
171        horror
172      imagined
173    importance
174     impressed
175        indian
176       indians
177     inspector
178    interested
179       invited
180           ioc
181           job
182        joined
183      jonathan
184         julia
185          kill
186          knew
187       knights
188          know
189          lady
190          lake
191          land
192          late
193      lawrence
194        lawyer
195         leave
196       leaving
197        leroux
198        letter
199         liked
200          live
201         lives
202        looked
203           lot
204      managers
205        marley
206      marriage
207       married
208         marry
209        martin
210    masquerade
211      material
212           may
213       meeting
214           men
215         might
216         milne
217       mistake
218         money
219      montague
220      moreover
221       morning
222          move
223         moved
224          much
225        murder
226       musical
227        muslim
228       muslims
229          must
230    mysterious
231       nations
232        nature
233        nephew
234          next
235         night
236           non
237   nonviolence
238       norfolk
239       noticed
240           now
241        oberon
242       offered
243         often
244        opened
245         order
246       ordered
247        orders
248      overcame
249        oxford
250       painted
251      pakistan
252         paper
253       parents
254         paris
255 participating
256         party
257        passed
258          past
259      peaceful
260     pearson's
261      people's
262   performance
263         peter
264      peterson
265        places
266      planning
267       pleased
268          poor
269         power
270        prayer
271      problems
272      promised
273         proud
274     provision
275          puck
276       pursued
277           put
278       quickly
279        racial
280         raoul
281          raul
282      realized
283        really
284      received
285           red
286    red-headed
287       refused
288  relationship
289     religions
290      remember
291       request
292      resisted
293      returned
294         round
295          ruby
296          rule
297           run
298           sad
299     salahadin
300          salt
301         scary
302         scene
303        scenes
304        second
305        seeing
306       seeking
307        seemed
308          seen
309       selfish
310          sent
311        series
312       serious
313      servants
314         seven
315       several
316   shakespeare
317      sherlock
318       shocked
319          shop
320          shot
321        showed
322          sick
323       similar
324         since
325          sing
326        sister
327     situation
328         sleep
329         small
330         snake
331           son
332         sound
333     spaulding
334       spirits
335        sports
336        spring
337         spyri
338         stage
339         stand
340         state
341        states
342  stepfather's
343        stingy
344        stoker
345        stolen
346       stopped
347       stories
348        strong
349       student
350       studied
351  subordinates
352    substitute
353     succeeded
354      suddenly
355     supremacy
356          sure
357  surprisingly
358         sword
359        system
360         table
361          take
362         taken
363        talent
364          task
365        taught
366          tell
367         tells
368      terrible
369        thanks
370     therefore
371        though
372         tired
373      together
374          took
375          toto
376         train
377          true
378          ugly
379            uk
380 unfortunately
381        united
382    university
383  unreasonable
384       unusual
385           use
386       various
387        verona
388        violin
389         visit
390       visited
391         voice
392         vowed
393        walked
394         wants
395          wars
396        watson
397       wedding
398          well
399          west
400       whether
401         white
402        wicked
403          wife
404        wilson
405          wish
406       without
407         woman
408           won
409     wonderful
410          word
411         words
412       written
413           ww2
414         young

Punctuation point: You need to put a space after a period.

kwic(x, pattern = "\\.\\S", window = 10,
     valuetype="regex") %>%
  summarise(keyword)
                                                           keyword
1                                                        death.The
2                                                    other.However
3                                                  father.Moreover
4                                                        Athens.So
5                                                    them.Moreover
6                                                           too.So
7                                                        there.One
8                                                        Helena.So
9                                                   Helena.However
10                                                    Helena.After
11                                                        too.Then
12                                                    violently.To
13                                                    dangerous.He
14                                             magically.Meanwhile
15                                              Lysander.Afterward
16                                                         died.In
17                                                   brave.Finally
18                                                         happy.I
19                                                          on.Its
20                                                        story.So
21 https://elib.maruzen.co.jp/elib/html/BookDetail/Id/3000006397?6
22                                                       other.Not
23 https://elib.maruzen.co.jp/elib/html/BookDetail/Id/3000006413?3
24                                                     Mr.Sesemann
25                                                   Dracula.There

Summary word count

df <- data.frame(summary)
df$words <- ntoken(summary)
df %>% ggplot(aes(x=words)) +
  geom_density() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

Opinion word count

df <- data.frame(opinion)
df$words <- ntoken(opinion)
df %>% ggplot(aes(x=words)) +
  geom_density() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank())