knitr::opts_chunk$set(message = FALSE)
Loading required packages and xml file
library(xml2)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(dplyr)
x <- read_xml("https://www.govinfo.gov/bulkdata/FR/2022/08/FR-2022-08-12.xml")
x
## {xml_document}
## <FEDREG noNamespaceSchemaLocation="FRMergedXML.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
## [1] <VOL>87</VOL>
## [2] <NO>155</NO>
## [3] <DATE>Friday, August 12, 2022</DATE>
## [4] <UNITNAME>Contents</UNITNAME>
## [5] <CNTNTS>\n <AGCY>\n <EAR>\n Agency Health\n ...
## [6] <VOL>87</VOL>
## [7] <NO>155</NO>
## [8] <DATE>Friday, August 12, 2022</DATE>
## [9] <UNITNAME>Rules and Regulations</UNITNAME>
## [10] <RULES>\n <RULE>\n <PREAMB>\n <PRTPAGE P="49767"/>\n <AGEN ...
## [11] <VOL>87</VOL>
## [12] <NO>155</NO>
## [13] <DATE>Friday, August 12, 2022</DATE>
## [14] <UNITNAME>Proposed Rules</UNITNAME>
## [15] <PRORULES>\n <PRORULE>\n <PREAMB>\n <PRTPAGE P="49773"/>\n ...
## [16] <VOL>87</VOL>
## [17] <NO>155</NO>
## [18] <DATE>Friday, August 12, 2022</DATE>
## [19] <UNITNAME>Notices</UNITNAME>
## [20] <NOTICES>\n <NOTICE>\n <PREAMB>\n <PRTPAGE P="49799"/>\n < ...
## ...
Unique agencies in CNTNTS element with number
y <- xml_find_all(x, "/FEDREG/CNTNTS/AGCY/HD")
y <- unique(y)
length(y)
## [1] 47
z <-xml_find_all(x, "/FEDREG/CNTNTS/AGCY/HD/text()")
z
## {xml_nodeset (47)}
## [1] Agency for Healthcare Research and Quality
## [2] Agriculture Department
## [3] Bureau of Consumer Financial Protection
## [4] Centers for Disease Control and Prevention
## [5] Children and Families Administration
## [6] Civil Rights Commission
## [7] Coast Guard
## [8] Commerce Department
## [9] Comptroller of the Currency
## [10] Drug Enforcement Administration
## [11] Economic Development Administration
## [12] Education Department
## [13] Energy Department
## [14] Environmental Protection Agency
## [15] Federal Aviation Administration
## [16] Federal Communications Commission
## [17] Federal Deposit Insurance Corporation
## [18] Federal Energy Regulatory Commission
## [19] Federal Railroad Administration
## [20] Federal Reserve System
## ...
Plot for count of category
category <- xml_text(xml_find_all(x, "/FEDREG/CNTNTS/AGCY/CAT/HD"))
cat_df <- as.data.frame(category)
cat_df <- cat_df %>%
group_by(category) %>%
tally()
ggplot(cat_df, aes(x=category,y=n)) + geom_bar(stat='identity')

Agencies which made a notice in the category of “PROPOSED RULES”
Proposed_rules<-xml_find_all(x, "//AGCY[CAT/HD/text() = 'PROPOSED RULES']/HD/text()")
Proposed_rules
## {xml_nodeset (6)}
## [1] Coast Guard
## [2] Environmental Protection Agency
## [3] Federal Aviation Administration
## [4] Federal Energy Regulatory Commission
## [5] National Oceanic and Atmospheric Administration
## [6] Securities and Exchange Commission
Subject of the third rule
rules <- xml_text(xml_find_all(x, "//RULES/RULE[3]/PREAMB/SUBJECT"))
rules
## [1] "Modification of Class E Airspace; Coeur D'Alene—Pappy Boyington Field, ID; Correction"
Text of SUBJECT made by Coast Guard under Notices element
SUB <-xml_find_all(x, "//NOTICES/NOTICE/PREAMB[SUBAGY/text()='Coast Guard']/SUBJECT/text()")
SUB
## {xml_nodeset (2)}
## [1] National Chemical Transportation Safety Advisory Committee; Vacancies
## [2] Notification of the Removal of Conditions of Entry on Vessels Arriving Fr ...
Plot for Top 5 agency
AGENCY <- xml_text(xml_find_all(x, "//NOTICES/NOTICE/PREAMB/AGENCY/text()"))
AGENCY_df <- as.data.frame(AGENCY)
AGENCY_df <- AGENCY_df %>%
group_by(AGENCY) %>%
tally()
AGENCY <- filter(AGENCY_df, n > 5)
ggplot(AGENCY, aes(x=AGENCY,y=n)) + geom_bar(stat='identity')+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
