Setting up our text corpus

First you’ll need to install and load the corpus package for text analysis using the two commands below.

#install.packages('corpus')
require(corpus)
## Loading required package: corpus

We have written the following custom function for the problem set, which will download any FOMC statement URL you supply and scrape the statement text.

clean_Html <- function(htmlString) {
  return(gsub("<.*?>", "", htmlString))
}

fomc_page_to_statement <- function(url){
  raw <- suppressWarnings(readLines(url, encoding = "UTF-8"))
  raw_page_text <- raw[grep("<p>", raw)]
  raw_statement_text <- raw_page_text[raw_page_text != "    <p>"]
  statement_no_html <- paste0(clean_Html(raw_statement_text), collapse=" ")
  cleaned_statement <- trimws(statement_no_html)
  return(cleaned_statement)
}

First supply the URLs of the statements you wish to this vector. All statements from 2019 onwards are included already.

urls <- c("https://www.federalreserve.gov/newsevents/pressreleases/monetary20190130a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20190320a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20190501a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20190619a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20190731a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20190918a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20191030a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20191211a.htm",
          "https://www.federalreserve.gov/newsevents/pressreleases/monetary20200129a.htm")

The following command will call the function fomc_page_to_statement which will load them into a single object.

statements <- lapply(urls, fomc_page_to_statement)
statements_df <- data.frame(
  title=substr(urls, 65, 72),
  text=do.call(c, statements),
  stringsAsFactors=FALSE
)

Analyzing Federal Open Market Committee Communications

The command corpus_frame turns it into a corpus of text for data analysis. We will also set some options that are helpful, like ignoring cases and punctuation.

corpus_statements <- corpus_frame(statements_df)
rownames(corpus_statements) <- statements_df$title #makes it easier to reference the statement by date
text_filter(corpus_statements)$map_case <- FALSE #will ignore upper and lower case in analysis
text_filter(corpus_statements)$drop_punct <- TRUE #will ignore punctuation as words

Instead of being treated as a single string of characters, the package now recognizes the statement as a list of tokens (i.e. words) that are ready for statistical analysis. The following command shows you the statement in tokenized form.

text_tokens(corpus_statements["20200129",])
## $`20200129`
##   [1] "Information"    "received"       "since"          "the"           
##   [5] "Federal"        "Open"           "Market"         "Committee"     
##   [9] "met"            "in"             "December"       "indicates"     
##  [13] "that"           "the"            "labor"          "market"        
##  [17] "remains"        "strong"         "and"            "that"          
##  [21] "economic"       "activity"       "has"            "been"          
##  [25] "rising"         "at"             "a"              "moderate"      
##  [29] "rate"           "Job"            "gains"          "have"          
##  [33] "been"           "solid"          "on"             "average"       
##  [37] "in"             "recent"         "months"         "and"           
##  [41] "the"            "unemployment"   "rate"           "has"           
##  [45] "remained"       "low"            "Although"       "household"     
##  [49] "spending"       "has"            "been"           "rising"        
##  [53] "at"             "a"              "moderate"       "pace"          
##  [57] "business"       "fixed"          "investment"     "and"           
##  [61] "exports"        "remain"         "weak"           "On"            
##  [65] "a"              "12‑month"       "basis"          "overall"       
##  [69] "inflation"      "and"            "inflation"      "for"           
##  [73] "items"          "other"          "than"           "food"          
##  [77] "and"            "energy"         "are"            "running"       
##  [81] "below"          "2"              "percent"        "Market-based"  
##  [85] "measures"       "of"             "inflation"      "compensation"  
##  [89] "remain"         "low"            "survey-based"   "measures"      
##  [93] "of"             "longer-term"    "inflation"      "expectations"  
##  [97] "are"            "little"         "changed"        "Consistent"    
## [101] "with"           "its"            "statutory"      "mandate"       
## [105] "the"            "Committee"      "seeks"          "to"            
## [109] "foster"         "maximum"        "employment"     "and"           
## [113] "price"          "stability"      "The"            "Committee"     
## [117] "decided"        "to"             "maintain"       "the"           
## [121] "target"         "range"          "for"            "the"           
## [125] "federal"        "funds"          "rate"           "at"            
## [129] "1‑1"            "2"              "to"             "1-3"           
## [133] "4"              "percent"        "The"            "Committee"     
## [137] "judges"         "that"           "the"            "current"       
## [141] "stance"         "of"             "monetary"       "policy"        
## [145] "is"             "appropriate"    "to"             "support"       
## [149] "sustained"      "expansion"      "of"             "economic"      
## [153] "activity"       "strong"         "labor"          "market"        
## [157] "conditions"     "and"            "inflation"      "returning"     
## [161] "to"             "the"            "Committee's"    "symmetric"     
## [165] "2"              "percent"        "objective"      "The"           
## [169] "Committee"      "will"           "continue"       "to"            
## [173] "monitor"        "the"            "implications"   "of"            
## [177] "incoming"       "information"    "for"            "the"           
## [181] "economic"       "outlook"        "including"      "global"        
## [185] "developments"   "and"            "muted"          "inflation"     
## [189] "pressures"      "as"             "it"             "assesses"      
## [193] "the"            "appropriate"    "path"           "of"            
## [197] "the"            "target"         "range"          "for"           
## [201] "the"            "federal"        "funds"          "rate"          
## [205] "In"             "determining"    "the"            "timing"        
## [209] "and"            "size"           "of"             "future"        
## [213] "adjustments"    "to"             "the"            "target"        
## [217] "range"          "for"            "the"            "federal"       
## [221] "funds"          "rate"           "the"            "Committee"     
## [225] "will"           "assess"         "realized"       "and"           
## [229] "expected"       "economic"       "conditions"     "relative"      
## [233] "to"             "its"            "maximum"        "employment"    
## [237] "objective"      "and"            "its"            "symmetric"     
## [241] "2"              "percent"        "inflation"      "objective"     
## [245] "This"           "assessment"     "will"           "take"          
## [249] "into"           "account"        "a"              "wide"          
## [253] "range"          "of"             "information"    "including"     
## [257] "measures"       "of"             "labor"          "market"        
## [261] "conditions"     "indicators"     "of"             "inflation"     
## [265] "pressures"      "and"            "inflation"      "expectations"  
## [269] "and"            "readings"       "on"             "financial"     
## [273] "and"            "international"  "developments"   "Voting"        
## [277] "for"            "the"            "monetary"       "policy"        
## [281] "action"         "were"           "Jerome"         "H"             
## [285] "Powell"         "Chair"          "John"           "C"             
## [289] "Williams"       "Vice"           "Chair"          "Michelle"      
## [293] "W"              "Bowman"         "Lael"           "Brainard"      
## [297] "Richard"        "H"              "Clarida"        "Patrick"       
## [301] "Harker"         "Robert"         "S"              "Kaplan"        
## [305] "Neel"           "Kashkari"       "Loretta"        "J"             
## [309] "Mester"         "and"            "Randal"         "K"             
## [313] "Quarles"        "Implementation" "Note"           "issued"        
## [317] "January"        "29"             "2020"
# optional manual corrections, make it recognize Jerome H. Powell and Lael Braenard as one unit and not independent words
text_tokens(corpus_statements["20200129",], combine = c("Jerome H. Powell", "Lael Brainard")) 
## $`20200129`
##   [1] "Information"      "received"         "since"           
##   [4] "the"              "Federal"          "Open"            
##   [7] "Market"           "Committee"        "met"             
##  [10] "in"               "December"         "indicates"       
##  [13] "that"             "the"              "labor"           
##  [16] "market"           "remains"          "strong"          
##  [19] "and"              "that"             "economic"        
##  [22] "activity"         "has"              "been"            
##  [25] "rising"           "at"               "a"               
##  [28] "moderate"         "rate"             "Job"             
##  [31] "gains"            "have"             "been"            
##  [34] "solid"            "on"               "average"         
##  [37] "in"               "recent"           "months"          
##  [40] "and"              "the"              "unemployment"    
##  [43] "rate"             "has"              "remained"        
##  [46] "low"              "Although"         "household"       
##  [49] "spending"         "has"              "been"            
##  [52] "rising"           "at"               "a"               
##  [55] "moderate"         "pace"             "business"        
##  [58] "fixed"            "investment"       "and"             
##  [61] "exports"          "remain"           "weak"            
##  [64] "On"               "a"                "12‑month"        
##  [67] "basis"            "overall"          "inflation"       
##  [70] "and"              "inflation"        "for"             
##  [73] "items"            "other"            "than"            
##  [76] "food"             "and"              "energy"          
##  [79] "are"              "running"          "below"           
##  [82] "2"                "percent"          "Market-based"    
##  [85] "measures"         "of"               "inflation"       
##  [88] "compensation"     "remain"           "low"             
##  [91] "survey-based"     "measures"         "of"              
##  [94] "longer-term"      "inflation"        "expectations"    
##  [97] "are"              "little"           "changed"         
## [100] "Consistent"       "with"             "its"             
## [103] "statutory"        "mandate"          "the"             
## [106] "Committee"        "seeks"            "to"              
## [109] "foster"           "maximum"          "employment"      
## [112] "and"              "price"            "stability"       
## [115] "The"              "Committee"        "decided"         
## [118] "to"               "maintain"         "the"             
## [121] "target"           "range"            "for"             
## [124] "the"              "federal"          "funds"           
## [127] "rate"             "at"               "1‑1"             
## [130] "2"                "to"               "1-3"             
## [133] "4"                "percent"          "The"             
## [136] "Committee"        "judges"           "that"            
## [139] "the"              "current"          "stance"          
## [142] "of"               "monetary"         "policy"          
## [145] "is"               "appropriate"      "to"              
## [148] "support"          "sustained"        "expansion"       
## [151] "of"               "economic"         "activity"        
## [154] "strong"           "labor"            "market"          
## [157] "conditions"       "and"              "inflation"       
## [160] "returning"        "to"               "the"             
## [163] "Committee's"      "symmetric"        "2"               
## [166] "percent"          "objective"        "The"             
## [169] "Committee"        "will"             "continue"        
## [172] "to"               "monitor"          "the"             
## [175] "implications"     "of"               "incoming"        
## [178] "information"      "for"              "the"             
## [181] "economic"         "outlook"          "including"       
## [184] "global"           "developments"     "and"             
## [187] "muted"            "inflation"        "pressures"       
## [190] "as"               "it"               "assesses"        
## [193] "the"              "appropriate"      "path"            
## [196] "of"               "the"              "target"          
## [199] "range"            "for"              "the"             
## [202] "federal"          "funds"            "rate"            
## [205] "In"               "determining"      "the"             
## [208] "timing"           "and"              "size"            
## [211] "of"               "future"           "adjustments"     
## [214] "to"               "the"              "target"          
## [217] "range"            "for"              "the"             
## [220] "federal"          "funds"            "rate"            
## [223] "the"              "Committee"        "will"            
## [226] "assess"           "realized"         "and"             
## [229] "expected"         "economic"         "conditions"      
## [232] "relative"         "to"               "its"             
## [235] "maximum"          "employment"       "objective"       
## [238] "and"              "its"              "symmetric"       
## [241] "2"                "percent"          "inflation"       
## [244] "objective"        "This"             "assessment"      
## [247] "will"             "take"             "into"            
## [250] "account"          "a"                "wide"            
## [253] "range"            "of"               "information"     
## [256] "including"        "measures"         "of"              
## [259] "labor"            "market"           "conditions"      
## [262] "indicators"       "of"               "inflation"       
## [265] "pressures"        "and"              "inflation"       
## [268] "expectations"     "and"              "readings"        
## [271] "on"               "financial"        "and"             
## [274] "international"    "developments"     "Voting"          
## [277] "for"              "the"              "monetary"        
## [280] "policy"           "action"           "were"            
## [283] "Jerome_H._Powell" "Chair"            "John"            
## [286] "C"                "Williams"         "Vice"            
## [289] "Chair"            "Michelle"         "W"               
## [292] "Bowman"           "Lael_Brainard"    "Richard"         
## [295] "H"                "Clarida"          "Patrick"         
## [298] "Harker"           "Robert"           "S"               
## [301] "Kaplan"           "Neel"             "Kashkari"        
## [304] "Loretta"          "J"                "Mester"          
## [307] "and"              "Randal"           "K"               
## [310] "Quarles"          "Implementation"   "Note"            
## [313] "issued"           "January"          "29"              
## [316] "2020"

The corpus package helps us to answer questions with very simple code. I will give three examples of questions it could answer:

  1. How long and complex are the statements? Have they been getting more complex?
  2. Do the FOMC have a template for communicating? If so, what phrases appear most frequently in their communication?
  3. Have the FOMC become increasingly concerned with inflation as the labor market continued to improve below 2019? What phrases were most used alongside appearances of this term?

1. How long have the statements been? Have they been getting more complex?

text_stats(corpus_statements)
##          tokens types sentences
## 20190130    335   194        13
## 20190320    351   195        13
## 20190501    327   185        13
## 20190619    364   204        14
## 20190731    412   211        15
## 20190918    398   197        14
## 20191030    357   194        14
## 20191211    319   183        13
## 20200129    319   183        14
barplot(text_stats(corpus_statements)$tokens, 
        names.arg = as.Date(rownames(corpus_statements), '%Y%m%d'),
        las=2,
        main='Length of statement (number of words)')

2. Do the FOMC have a template for communicating? If so, what phrases appear most frequently in their communication?

term_stats(corpus_statements)
##    term      count support
## 1  the         193       9
## 2  and         143       9
## 3  of          100       9
## 4  inflation    84       9
## 5  to           83       9
## 6  for          56       9
## 7  Committee    52       9
## 8  rate         47       9
## 9  percent      45       9
## 10 2            43       9
## 11 range        40       9
## 12 economic     39       9
## 13 a            34       9
## 14 its          33       9
## 15 target       31       9
## 16 will         31       9
## 17 labor        30       9
## 18 market       30       9
## 19 objective    30       9
## 20 federal      29       9
## â‹®  (295 rows total)
#What phrases have the FOMC used most frequently (a phrase with n consecutive words is an ngram of value n)
term_stats(corpus_statements, ngrams = 3, types = TRUE)
##    term                    type1       type2       type3      count support
## 1  the target range        the         target      range         31       9
## 2  federal funds rate      federal     funds       rate          29       9
## 3  for the federal         for         the         federal       29       9
## 4  range for the           range       for         the           29       9
## 5  target range for        target      range       for           29       9
## 6  the federal funds       the         federal     funds         29       9
## 7  symmetric 2 percent     symmetric   2           percent       21       9
## 8  labor market conditions labor       market      conditions    18       9
## 9  the Committee will      the         Committee   will          13       9
## 10 2 percent objective     2           percent     objective     12       9
## 11 adjustments to the      adjustments to          the           12       9
## 12 future adjustments to   future      adjustments to            12       9
## 13 in recent months        in          recent      months        12       9
## 14 its symmetric 2         its         symmetric   2             12       9
## 15 strong labor market     strong      labor       market        12       9
## 16 to the target           to          the         target        12       9
## 17 recent months and       recent      months      and           11       9
## 18 of economic activity    of          economic    activity      10       9
## 19 2 percent inflation     2           percent     inflation      9       9
## 20 Bowman Lael Brainard    Bowman      Lael        Brainard       9       9
## â‹®  (772 rows total)

3. Where do the FOMC use the term inflation? What phrases have surrounded this word in each communication?

text_locate(corpus_statements, "inflation")
##    text                before            instance              after            
## 1  20190130 …-month basis, both overall  inflation  and inflation for items oth…
## 2  20190130 …both overall inflation and  inflation  for items other than food a…
## 3  20190130 …h market-based measures of  inflation  compensation have moved low…
## 4  20190130 …ed measures of longer-term  inflation  expectations are little cha…
## 5  20190130 …bor market conditions, and  inflation  near the Committee's symmet…
## 6  20190130 …ial developments and muted  inflation  pressures, the Committee wi…
## 7  20190130 …nd its symmetric 2 percent  inflation  objective. This assessment …
## 8  20190130 … conditions, indicators of  inflation  pressures and inflation exp…
## 9  20190130 …of inflation pressures and  inflation  expectations, and readings …
## 10 20190320 … a 12-month basis, overall  inflation  has declined, largely as a …
## 11 20190320 …lt of lower energy prices;  inflation  for items other than food a…
## 12 20190320 …, market-based measures of  inflation  compensation have remained …
## 13 20190320 …ed measures of longer-term  inflation  expectations are little cha…
## 14 20190320 …bor market conditions, and  inflation  near the Committee's symmet…
## 15 20190320 …ial developments and muted  inflation  pressures, the Committee wi…
## 16 20190320 …nd its symmetric 2 percent  inflation  objective. This assessment …
## 17 20190320 … conditions, indicators of  inflation  pressures and inflation exp…
## 18 20190320 …of inflation pressures and  inflation  expectations, and readings …
## 19 20190501 … a 12-month basis, overall  inflation  and inflation for items oth…
## 20 20190501 …sis, overall inflation and  inflation  for items other than food a…
## â‹®  (84 rows total)
#How many times do they use the word inflation in each statement?
inflation_counts <- table(text_locate(corpus_statements, "inflation")$text)
barplot(inflation_counts, las=2, main='Uses of word: "Inflation" in statement')
#What trends appear in various words in the statements
x <- term_matrix(corpus_statements, 
                 select = c("inflation", "labor", "growth"))

plot(x[,'inflation'] ~ as.Date(rownames(x), '%Y%m%d'), ylim=c(0,13), type='l', col='blue', ylab='Text frequency', xlab='Month', main='FOMC Statements')
lines(x[,'labor'] ~ as.Date(rownames(x), '%Y%m%d'), ylim=c(0,10), type='l', col='red')
lines(x[,'growth'] ~ as.Date(rownames(x), '%Y%m%d'), ylim=c(0,10), type='l', col='green')
legend("topright", c('inflation','labor','growth'), col=c('blue','green','red'), lwd=1)