source("http://www.linguistics.ucsb.edu/faculty/stgries/exact.matches.2.r")
corpus.files <- dir(
   "files",
   pattern="sgml_",
   full.names=TRUE)[1:4]
corpus.files
## [1] "files/corp_bnc_sgml_1.txt" "files/corp_bnc_sgml_2.txt"
## [3] "files/corp_bnc_sgml_3.txt" "files/corp_bnc_sgml_4.txt"
all.matches <- character()
for (i in seq(corpus.files)) {
   current.corpus.file <- tolower(
      scan(                       
         corpus.files[i],          
         what=character(),         
         sep="\n",                 
         quote="",                 
         comment.char="",         
         quiet=TRUE)) 

  current.sentences <- grep( 
        "<s n=",                
        current.corpus.file,    
        perl=TRUE,              
        value=TRUE) 
  
  
  
  current.sentences <- gsub("<(?![wc] (...|...-...)).*?>[^<]*","",
      current.sentences,
      perl=TRUE)
  
  current.matches <- exact.matches.2(
      "(?x)          
      <w\\svv.>        
      [a-z]*          
      \\s              
      <w\\sat0>the
      \\s
      <w\\snn1>
      (morning|noon|afternoon|evening|night|day|week|month|year)
      \\s
      <w\\sav0>away",                              
      current.sentences)[[1]]   
  all.matches <- c(all.matches, current.matches)
  cat("\f", i/length(corpus.files))
}
##  0.25 0.5 0.75 1
object.size(all.matches)
## 1640 bytes

How many instances are there?

print(paste0("There are ",length(all.matches)," instances"))
## [1] "There are 21 instances"

There are 21 instances of such utterances.

What is the distribution of the time nouns?

v <- sub("<.*?>([^<]+) <.*","\\1",all.matches,perl = T)

n <- sub(".*?<w\\snn1>(.*?)\\s<.*","\\1",all.matches,perl = T)

head(data.frame(             
   MATCHES=   all.matches,
   VERBS= v,     
   NOUNS= n),  
   10
)
##                                                MATCHES    VERBS   NOUNS
## 1     <w vvb>dance <w at0>the <w nn1>night <w av0>away    dance   night
## 2   <w vvg>dancing <w at0>the <w nn1>night <w av0>away  dancing   night
## 3   <w vvg>dancing <w at0>the <w nn1>night <w av0>away  dancing   night
## 4   <w vvi>waltz <w at0>the <w nn1>evening <w av0>away    waltz evening
## 5     <w vvb>dance <w at0>the <w nn1>night <w av0>away    dance   night
## 6    <w vvd>danced <w at0>the <w nn1>night <w av0>away   danced   night
## 7    <w vvn>talked <w at0>the <w nn1>night <w av0>away   talked   night
## 8     <w vvb>dance <w at0>the <w nn1>night <w av0>away    dance   night
## 9     <w vvd>swung <w at0>the <w nn1>night <w av0>away    swung   night
## 10 <w vvg>prancing <w at0>the <w nn1>night <w av0>away prancing   night
dim(tam <- table(
   v,     
   n)) 
## [1] 12  3
head(tam)
##          n
## v         day evening night
##   dance     0       0     6
##   danced    0       0     1
##   dancing   0       0     5
##   discoed   0       0     1
##   dozing    1       0     0
##   love      0       0     1

Among the 21 instances found, 1 is ‘day,’ 1 is ‘evening,’ and 19 are ‘night.’

table(n)
## n
##     day evening   night 
##       1       1      19

What time scale do most of them refer to?

Most of the time nouns refer to the time scale of a part of a day. ‘Evening’ and ‘night’ are both parts of a day.

barplot(table(n),main="Time distribution")

What is the semantic implication of this construction?

The time-away construction is very interesting. First, if we look at all the verbs matched, for example, ‘dance,’ ‘spend,’ ‘talk,’ all these verbs denote an action/activity, not a state. The subject must actively do something to make the time away. Second, if we look at the verbs’ semantics, all these verbs have a positive meaning. The activity/action denoted is enjoyable. This possibly explains why the time just went away.