# list of reports, comments indicate important events around release of report
reportLinks=c("https://www.federalreserve.gov/monetarypolicy/files/20180713_mprfullreport.pdf",
"https://www.federalreserve.gov/monetarypolicy/files/20170707_mprfullreport.pdf",
"https://www.federalreserve.gov/monetarypolicy/files/20160621_mprfullreport.pdf", # released in jun, will label it July
"https://www.federalreserve.gov/monetarypolicy/files/20150715_mprfullreport.pdf", # July 2015 ( before lift off)
"https://www.federalreserve.gov/monetarypolicy/files/20140715_mprfullreport.pdf",
"https://www.federalreserve.gov/monetarypolicy/files/20130717_mprfullreport.pdf", # July 2013 ( after Taper Tantrum)
"https://www.federalreserve.gov/monetarypolicy/files/20120717_mprfullreport.pdf",
"https://www.federalreserve.gov/monetarypolicy/files/20110713_mprfullreport.pdf", # July 2011 ( early recovery)
"https://www.federalreserve.gov/monetarypolicy/files/20100721_mprfullreport.pdf",
"https://www.federalreserve.gov/monetarypolicy/files/20090721_mprfullreport.pdf", # July 2009 ( end of Great Recession)
"https://www.federalreserve.gov/monetarypolicy/files/20080715_mprfullreport.pdf",
"https://www.federalreserve.gov/monetarypolicy/files/20070718_mprfullreport.pdf" , # July 2007 ( eve of Great Recession)
"https://www.federalreserve.gov/boarddocs/hh/2006/july/fullreport.pdf",
"https://www.federalreserve.gov/boarddocs/hh/2005/july/fullreport.pdf", # July 2005 ( housing boom)
"https://www.federalreserve.gov/boarddocs/hh/2004/july/fullreport.pdf",
"https://www.federalreserve.gov/boarddocs/hh/2003/july/FullReport.pdf" , # July 2003 ( deflation fears)
"https://www.federalreserve.gov/boarddocs/hh/2002/july/FullReport.pdf",
"https://www.federalreserve.gov/boarddocs/hh/2001/july/FullReport.pdf", # July 2001 ( dot come Recession)
"https://www.federalreserve.gov/boarddocs/hh/2000/July/FullReport.pdf",
"https://www.federalreserve.gov/boarddocs/hh/1999/July/FullReport.pdf", # July 1999 ( eve of dotcom Recession)
"https://www.federalreserve.gov/boarddocs/hh/1998/july/FullReport.pdf",
"https://www.federalreserve.gov/boarddocs/hh/1997/july/FullReport.pdf", # July 1997 ( irrational exhuberance)
"https://www.federalreserve.gov/boarddocs/hh/1996/july/FullReport.pdf"
)
fed_reports <- map(reportLinks,pdf_text)
length(fed_reports)
fed <- data.frame(report=c("Jul-2018",paste0("Jul-",seq(2017,1996,-1))),stringsAsFactors = FALSE) %>%
mutate(text= fed_reports) %>% unnest(text) %>%
group_by(report) %>% mutate(page=row_number()) %>%
ungroup() %>% mutate(text=strsplit(text,"\r")) %>% unnest(text) %>% mutate(text=gsub("\n","",text)) %>%
group_by(report) %>% mutate(line=row_number())
write.csv(fed, "fed_reports.csv")
fed_r <- read.csv("fed_reports.csv", stringsAsFactors=FALSE)
head(fed_r)
## X report page text line
## 1 1 Jul-2018 3 Letter of Transmittal 1
## 2 2 Jul-2018 3 Board of Governors of the 2
## 3 3 Jul-2018 3 Federal Reserve System 3
## 4 4 Jul-2018 3 Washington, D.C., July 13, 2018 4
## 5 5 Jul-2018 3 The President of the Senate 5
## 6 6 Jul-2018 3 The Speaker of the House of Representatives 6
fed_x <- fed_r %>% dplyr::select(report, page, text, line)
head(fed_x)
## report page text line
## 1 Jul-2018 3 Letter of Transmittal 1
## 2 Jul-2018 3 Board of Governors of the 2
## 3 Jul-2018 3 Federal Reserve System 3
## 4 Jul-2018 3 Washington, D.C., July 13, 2018 4
## 5 Jul-2018 3 The President of the Senate 5
## 6 Jul-2018 3 The Speaker of the House of Representatives 6
words <- fed_x %>%
unnest_tokens(word, text) %>%
count(report, word, sort = TRUE) %>%
ungroup()
total_words <- words %>%
group_by(report) %>%
summarize(total = sum(n))
Comparing words count in each report
ggplot(data=total_words, aes(x=seq(1996,2018),y=total))+
geom_line(color="#27408b")+
geom_point(shape=21,fill="white",color="#27408b",size=3,stroke=1.1)+
scale_y_continuous(labels=scales::comma)+
theme_ridges()+
labs(x="year",y="Words count",
title="Words count in Federal Reserve Monetary Policy Reports",
subtitle="For July of each year 1996-2018")

Making a list of the most frequently occuring words in each report, while removing stop-words, numbers, and special characters
fed_text <- fed_x %>% unnest_tokens(word,text)
head(fed_text)
## report page line word
## 1 Jul-2018 3 1 letter
## 1.1 Jul-2018 3 1 of
## 1.2 Jul-2018 3 1 transmittal
## 2 Jul-2018 3 2 board
## 2.1 Jul-2018 3 2 of
## 2.2 Jul-2018 3 2 governors
fedText <- fed_text %>%
mutate(word = gsub("[^A-Za-z ]","",word)) %>%
filter(word != "") %>%
anti_join(stop_words) %>%
group_by(report) %>%
count(word,sort=TRUE) %>%
mutate(rank=row_number()) %>%
ungroup() %>%
arrange(rank,report) %>%
filter(rank<11)
## Joining, by = "word"
head(fedText)
## # A tibble: 6 x 4
## report word n rank
## <chr> <chr> <int> <int>
## 1 Jul-1996 percent 129 1
## 2 Jul-1997 quarter 139 1
## 3 Jul-1998 percent 159 1
## 4 Jul-1999 percent 157 1
## 5 Jul-2000 percent 150 1
## 6 Jul-2001 percent 171 1
tail(fedText)
## # A tibble: 6 x 4
## report word n rank
## <chr> <chr> <int> <int>
## 1 Jul-2013 reserve 88 10
## 2 Jul-2014 monetary 96 10
## 3 Jul-2015 monetary 96 10
## 4 Jul-2016 policy 69 10
## 5 Jul-2017 participants 125 10
## 6 Jul-2018 participants 129 10
Stop-words are gone
ggplot(fedText, aes(y=n,x=fct_reorder(word,n))) +
geom_col(fill="#27408b")+
facet_wrap(~report,scales="free", ncol=5)+
coord_flip()+
theme_ridges(font_size=10)+
labs(x="",y="",
title="Most Frequent Words Federal Reserve Monetary Policy Report")
