knitr::opts_chunk$set(message = FALSE)

Loading required packages and xml file

library(xml2)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(dplyr)
x <- read_xml("https://www.govinfo.gov/bulkdata/FR/2022/08/FR-2022-08-12.xml")
x
## {xml_document}
## <FEDREG noNamespaceSchemaLocation="FRMergedXML.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
##  [1] <VOL>87</VOL>
##  [2] <NO>155</NO>
##  [3] <DATE>Friday, August 12, 2022</DATE>
##  [4] <UNITNAME>Contents</UNITNAME>
##  [5] <CNTNTS>\n  <AGCY>\n    <EAR>\n                Agency Health\n           ...
##  [6] <VOL>87</VOL>
##  [7] <NO>155</NO>
##  [8] <DATE>Friday, August 12, 2022</DATE>
##  [9] <UNITNAME>Rules and Regulations</UNITNAME>
## [10] <RULES>\n  <RULE>\n    <PREAMB>\n      <PRTPAGE P="49767"/>\n      <AGEN ...
## [11] <VOL>87</VOL>
## [12] <NO>155</NO>
## [13] <DATE>Friday, August 12, 2022</DATE>
## [14] <UNITNAME>Proposed Rules</UNITNAME>
## [15] <PRORULES>\n  <PRORULE>\n    <PREAMB>\n      <PRTPAGE P="49773"/>\n      ...
## [16] <VOL>87</VOL>
## [17] <NO>155</NO>
## [18] <DATE>Friday, August 12, 2022</DATE>
## [19] <UNITNAME>Notices</UNITNAME>
## [20] <NOTICES>\n  <NOTICE>\n    <PREAMB>\n      <PRTPAGE P="49799"/>\n      < ...
## ...

Unique agencies in CNTNTS element with number

y <- xml_find_all(x, "/FEDREG/CNTNTS/AGCY/HD")
y <- unique(y)
length(y)
## [1] 47
z <-xml_find_all(x, "/FEDREG/CNTNTS/AGCY/HD/text()")
z
## {xml_nodeset (47)}
##  [1] Agency for Healthcare Research and Quality
##  [2] Agriculture Department
##  [3] Bureau of Consumer Financial Protection
##  [4] Centers for Disease Control and Prevention
##  [5] Children and Families Administration
##  [6] Civil Rights Commission
##  [7] Coast Guard
##  [8] Commerce Department
##  [9] Comptroller of the Currency
## [10] Drug Enforcement Administration
## [11] Economic Development Administration
## [12] Education Department
## [13] Energy Department
## [14] Environmental Protection Agency
## [15] Federal Aviation Administration
## [16] Federal Communications Commission
## [17] Federal Deposit Insurance Corporation
## [18] Federal Energy Regulatory Commission
## [19] Federal Railroad Administration
## [20] Federal Reserve System
## ...

Plot for count of category

category <- xml_text(xml_find_all(x, "/FEDREG/CNTNTS/AGCY/CAT/HD"))
cat_df <- as.data.frame(category)
cat_df <- cat_df %>% 
  group_by(category) %>%
  tally()

ggplot(cat_df, aes(x=category,y=n)) + geom_bar(stat='identity')

Agencies which made a notice in the category of “PROPOSED RULES”

Proposed_rules<-xml_find_all(x, "//AGCY[CAT/HD/text() = 'PROPOSED RULES']/HD/text()")
Proposed_rules
## {xml_nodeset (6)}
## [1] Coast Guard
## [2] Environmental Protection Agency
## [3] Federal Aviation Administration
## [4] Federal Energy Regulatory Commission
## [5] National Oceanic and Atmospheric Administration
## [6] Securities and Exchange Commission

Subject of the third rule

rules <- xml_text(xml_find_all(x, "//RULES/RULE[3]/PREAMB/SUBJECT"))
rules
## [1] "Modification of Class E Airspace; Coeur D'Alene—Pappy Boyington Field, ID; Correction"

Text of SUBJECT made by Coast Guard under Notices element

SUB <-xml_find_all(x, "//NOTICES/NOTICE/PREAMB[SUBAGY/text()='Coast Guard']/SUBJECT/text()")
SUB
## {xml_nodeset (2)}
## [1] National Chemical Transportation Safety Advisory Committee; Vacancies
## [2] Notification of the Removal of Conditions of Entry on Vessels Arriving Fr ...

Plot for Top 5 agency

AGENCY <- xml_text(xml_find_all(x, "//NOTICES/NOTICE/PREAMB/AGENCY/text()"))

AGENCY_df <- as.data.frame(AGENCY)

AGENCY_df <- AGENCY_df %>% 
  group_by(AGENCY) %>%
  tally()

AGENCY <- filter(AGENCY_df, n > 5) 
ggplot(AGENCY, aes(x=AGENCY,y=n)) + geom_bar(stat='identity')+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))