Load libraries.

library(readtext)
library(tidyverse)
library(quanteda)
library(quanteda.textstats)

Create corpus of students’ essays.

x <- list.files(pattern = "docx") %>%
  readtext(ignore_missing_files = T) %>% corpus()

Common Words

x %>% dfm(remove_punct=T,
          remove_numbers=T,
          remove=stopwords("en")) %>%
  textstat_frequency() %>% 
  filter(frequency>=2) %>%
  filter(nchar(feature)>1) %>%
  arrange(-frequency, feature) %>%
  summarise(word = feature, frequency) %>%
  head(50)
         word frequency
1       works        82
2        work        37
3      famous        33
4       wrote        28
5    ishiguro        23
6      series        22
7       lewis        20
8         can        19
9         one        19
10       also        18
11      novel        18
12       made        16
13      story        16
14      books        15
15    england        15
16       many        15
17      kazuo        14
18  published        14
19      three        14
20        age        13
21      harry        13
22      later        13
23    stories        13
24    written        13
25       book        12
26    fantasy        12
27         go        12
28      japan        12
29 literature        12
30     narnia        12
31     potter        12
32 references        12
33       four        11
34 ishiguro's        11
35     novels        11
36     people        11
37      world        11
38 chronicles        10
39      first        10
40     prince        10
41  character         9
42  different         9
43      faber         9
44      group         9
45      henry         9
46       life         9
47      never         9
48 nonfiction         9
49       play         9
50      plays         9

Classification Word: Classified

kwic(x, "classified") %>% summarise(pre, keyword, post)
                              pre    keyword                         post
1               . His work can be classified      into 3 sections . First
2            genres and it can be classified       into three : fantasy ,
3            genres and it can be classified       into three : fantasy ,
4              , his works can be classified     into three groups by the
5                 " but it is not classified as a ShakeSpearean tragedy .
6 Shakespeare's works , which are classified  into comedies , tragedies ,
7       When Ishiguro's works are classified by chronological order , the

Classification Word: Divided

kwic(x, "divided") %>% summarise(pre, keyword, post)
                             pre keyword                          post
1 work A Shakespeare's works are divided into four categories : comedy
2              , his work can be divided      as before heyday works .
3  Kazuo Ishiguro's works can be divided  into three categories by age

In-text citation in APA format

kwic(x, pattern = phrase("\\( . , \\d{4} \\)"),
     valuetype="regex") %>%
  summarise(intext_citation = keyword)
          intext_citation
1        ( Lewis , 2005 )
2     ( Ishiguro , 2006 )
3     ( Ishiguro , 2010 )
4  ( Shakespeare , 1983 )
5        ( Lewis , 2001 )
6        ( Lewis , 2001 )
7        ( Lewis , 1942 )
8        ( Lewis , 1940 )
9        ( Lewis , 1960 )
10       ( Lewis , 1970 )
11       ( Lewis , 2005 )
12    ( Ishiguro , 2006 )
13    ( Ishiguro , 2010 )
14    ( Ishiguro , 1995 )
15    ( Ishiguro , 2006 )
16       ( Wilde , 1997 )
17       ( Lewis , 2001 )
18       ( Lewis , 1942 )
19       ( Lewis , 1940 )
20       ( Lewis , 1960 )
21       ( Lewis , 1970 )
22    ( Ishiguro , 1995 )

In-text citation outside the sentence (INCORRECT)

kwic(x, pattern = phrase("\\. \\( . , \\d{4} \\)"),
     valuetype="regex") %>%
  summarise(intext_citation = keyword)
           intext_citation
1       . ( Lewis , 2005 )
2 . ( Shakespeare , 1983 )
3       . ( Lewis , 2001 )
4       . ( Lewis , 2001 )
5       . ( Lewis , 2005 )
6    . ( Ishiguro , 2010 )
7    . ( Ishiguro , 1995 )
8    . ( Ishiguro , 2006 )
9       . ( Lewis , 2001 )

In-text citations within the sentence (CORRECT)

kwic(x, pattern = phrase("\\( . , \\d{4} \\) \\."),
     valuetype="regex") %>%
  summarise(intext_citation = keyword)
            intext_citation
1     ( Ishiguro , 2006 ) .
2     ( Ishiguro , 2010 ) .
3  ( Shakespeare , 1983 ) .
4        ( Lewis , 1970 ) .
5     ( Ishiguro , 2006 ) .
6     ( Ishiguro , 2010 ) .
7     ( Ishiguro , 1995 ) .
8     ( Ishiguro , 2006 ) .
9        ( Wilde , 1997 ) .
10       ( Lewis , 1970 ) .
11    ( Ishiguro , 1995 ) .

References in APA format

kwic(x, pattern = phrase("References"), 
     window = 40) %>%
  summarise(reference = post) %>% unlist %>% str_squish %>% as.factor
 [1] Ishiguro , K . ( 2010 ) . A pale view of hills . London , United Kingdom : Fabre and Faber . Ishiguro , K . ( 1995 ) . The unconsoled . London , United Kingdom : Fabre                                    
 [2] Ishiguro , K . ( 2010 ) . A pale view of hills . London , United Kingdom : Fabre and Faber .                                                                                                               
 [3] Shakespeare , W . Comedy of errors . Tokyo , Japan : Hakusuisya .                                                                                                                                          
 [4] Ishiguro , K . ( 2006 ) . Never let me go . Tokyo , Japan : Hayakawa Publishing , Inc .                                                                                                                    
 [5] Ishiguro , K . ( 1989 ) . The remains of the day . London , England : Faber and Faber .                                                                                                                    
 [6]                                                                                                                                                                                                            
 [7] Ishiguro , K . ( 2010 ) . A Pale View of Hills . London , United Kingdom : Fabre and Faber . Ishiguro , K . ( 1995 ) . The unconsoled . London , United Kingdom : Fabre                                    
 [8] Wilde , O . ( 2015 ) . The happy prince and other tales . Project Gutenberg . Retrieved from The Happy Prince , by Oscar Wilde ( gutenberg.org )                                                           
 [9] https://ja.wikipedia.org/wiki/J%E3%83%BBK%E3%83%BB%E3%83%AD%E3%83%BC%E3%83%AA%E3%83%B3%E3%82%B0                                                                                                            
[10] A review of ' the Four Great Tragedies of ShakeSpeare . ' ( 2019 , Oct 13 ) . Retrieved , from https://loohcs.jp/articles/4300 ShakeSpearean tragedy . ( n.d . ) . Retrieved , from https://ja.eferrit.com/
[11]                                                                                                                                                                                                            
[12] ( " Text of J.K . Rowling's speech " , The Harvard Gazette , Received full URL:https://news.harvard.edu/gazette/story/2008/06/text-of-j-k-rowling-speech/ )                                                
11 Levels:  ...

Rare words

x %>% tokens(remove_punct=T,
          remove_symbols=T,
          remove_number = T,
          remove_url = T) %>%
  dfm(remove=stopwords("en")) %>%
  textstat_frequency() %>% 
  filter(frequency==1) %>%
  summarise(word = feature) %>%
  filter(nchar(word)>1) %>%
  arrange(word) %>%
  slice(4:n()) %>%
  slice(-368)
               word
1          absolute
2      accidentally
3       accusations
4           achieve
5            acting
6            active
7          actually
8      additionally
9         admirable
10       admittedly
11          adopted
12     affectionate
13              aim
14           almost
15          already
16       annotators
17       appreciate
18             area
19      aristocracy
20           arrest
21         arrested
22           arrive
23           arthur
24         author's
25          authors
26  autobiographies
27          awarded
28          azkaban
29       bankruptcy
30              bbc
31           bearer
32            beast
33         beatings
34         behavior
35          besides
36            bible
37              big
38          billion
39            birth
40         bisexual
41            black
42             bold
43           booker
44       box-office
45      broadcasted
46         brothers
47     brushstrokes
48             care
49           career
50           caused
51         ceremony
52          chamber
53       characters
54           charge
55     chikumashobo
56    circumstances
57            civil
58        collected
59      collections
60             come
61       comparison
62       completely
63      complicated
64      concealment
65       conditions
66      considering
67          consist
68       consisting
69         consists
70         contains
71     contemporary
72          content
73       continuous
74         contrast
75          courage
76            crime
77           crimes
78        criticism
79            d'arc
80          dealing
81          deathly
82           decade
83          decided
84      decorations
85         defeated
86           degree
87          demonic
88       denouement
89        describes
90         destined
91        detective
92         devotion
93              die
94     disadvantage
95       distinctly
96         document
97        documents
98          dollars
99           dorian
100           drama
101          dramas
102        dramatic
103         earnest
104            easy
105         edition
106          edward
107       effective
108         emotion
109     emotionally
110        emotions
111       empathize
112       encounter
113          enough
114          entire
115       essential
116             etc
117         eternal
118      everything
119         evolved
120          except
121       exception
122      experience
123       explained
124        exposure
125       extremely
126          fables
127       fairytale
128            fall
129           false
130        familiar
131        families
132             fan
133        farcical
134         figured
135            find
136            fire
137            five
138           focus
139       following
140          french
141            full
142     full-length
143     furthermore
144          future
145            gave
146         gazette
147      generation
148     generations
149       gentlemen
150          goblet
151      graduation
152            gray
153          gray's
154     grindelwald
155           gross
156       gutenberg
157   gutenberg.org
158            hair
159      hakusuisya
160      half-blood
161         hallows
162          hamlet
163            hand
164          handed
165        happened
166            hard
167           harsh
168           heart
169          height
170            help
171          helped
172          hero's
173          heyday
174            high
175         highest
176            holy
177            home
178         hundred
179         husband
180           ideal
181       identical
182      identities
183         imagine
184       imitating
185           imply
186      imprisoned
187         include
188        increase
189       indecency
190      infidelity
191       influence
192      influenced
193             inn
194      interested
195      interviews
196      introduced
197         ireland
198           irish
199      ironically
200     j.k.rowling
201          jeanne
202          juliet
203           kafka
204           keeps
205            kind
206        labour's
207        language
208            lear
209           learn
210             led
211            left
212        lifetime
213           lines
214        literary
215          little
216            live
217            lord
218          love's
219         macbeth
220         madness
221          mainly
222        majority
223           marry
224         masters
225             may
226             men
227          method
228          middle
229       miserable
230         mishaps
231        mistaken
232        mistress
233          modern
234         morally
235            much
236           music
237         natural
238       naturally
239  near-seduction
240         neither
241            nice
242       nightfall
243           noble
244       nocturnes
245     occurrences
246             oct
247          origin
248         othello
249         overall
250           part3
251           parts
252          passed
253      perfection
254       performed
255         phoenix
256         picture
257          placed
258          places
259            plan
260        planning
261        platonic
262      playwright
263            plot
264           poems
265            poet
266          poetic
267           point
268        politics
269    pomegranates
270      possession
271       potential
272        potter's
273          praise
274         present
275        prisoner
276          prizes
277        problems
278        produced
279       profundis
280         project
281           proof
282         pursues
283         pursuit
284           quite
285         ravenna
286         reached
287            real
288          reason
289         reasons
290        received
291      recognized
292           refer
293        refernce
294       reflected
295        regarded
296          regret
297    relationship
298         release
299        released
300      represents
301       reshowing
302    respectively
303         revenue
304          review
305        rewarded
306         risking
307        romantic
308           romeo
309           roses
310        savile's
311         secrets
312        sections
313           seems
314            seen
315        selfless
316       sensation
317           sense
318     sensibility
319       sentenced
320          sequel
321           serve
322            sets
323         setting
324         several
325          sexual
326           shake
327     shortlisted
328           shows
329           shrew
330           since
331            skin
332    sonetto-syuu
333            song
334          speare
335         special
336        spectors
337           stage
338           stone
339          strike
340        strike's
341          strong
342     student.one
343      subdivided
344         succeed
345         success
346         summary
347            sung
348         surreal
349      syracusans
350           taken
351          taming
352           theft
353          themes
354           thing
355           tough
356           trial
357          trifle
358            true
359       turbulent
360         turmoil
361          turned
362           turns
363            twin
364         u.k.and
365      understand
366    unforgivable
367         unknown
368            used
369            uses
370          valued
371          verona
372         village
373         vividly
374            wars
375            ways
376         wealthy
377           whose
378         william
379       willpower
380    windermere's
381         without
382         works.a
383        wrongful
384            year

Word Count

df <- ntoken(x) %>% data.frame
df %>% ggplot(aes(x=.)) +
  geom_density() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank())