Initialize

library(xml2)
library(tidyverse)
library(ggplot2)
library(knitr)
library(kableExtra)
url = "https://www.govinfo.gov/bulkdata/FR/2022/08/FR-2022-08-12.xml"

xml_string = read_xml(url)

1. Look into the CNTNTS element and show the number of unique agencies are involved in this announcement

q1 <- xml_find_all(xml_string, "/FEDREG/CNTNTS/AGCY/HD")
length(q1)
## [1] 47

2. Look into the CNTNTS element and show a bar plot for the numbers of different categories (i.e., element CAT) of announcement (RULES, PROPOSED RULES, etc.)

q2 <- xml_find_all(xml_string,"/FEDREG/CNTNTS/AGCY/CAT/HD")
q2_lst <- unlist(as_list(q2))

frequency_cat <- sort(table(q2_lst), decreasing = TRUE)
frequency_cat <- as.data.frame(frequency_cat)

ggplot(frequency_cat, aes(x=q2_lst, y=Freq))+
  geom_bar( stat="identity", fill="dark green") + 
  geom_text(aes(label=Freq), vjust=-0.3, size=5) +
  labs(title="The numbers of different categories of announcement", x="Category", y = "Count")

3. Look into the CNTNTS element and extract/show the text of the names of the agencies to which they made a notice in the category of “PROPOSED RULES”

q3 <- xml_find_all(xml_string,"//AGCY/CAT/HD[text()='PROPOSED RULES']/../../HD/text()")
q3_df <- data.frame(matrix(unlist(as_list(q3))))
colnames(q3_df) <- "Categories"

kbl(cbind(q3_df)) %>%
  kable_paper()
Categories
Coast Guard
Environmental Protection Agency
Federal Aviation Administration
Federal Energy Regulatory Commission
National Oceanic and Atmospheric Administration
Securities and Exchange Commission

4. Look into the RULES element and extract/show the text of the SUBJECT of the third rule

q4 <- xml_find_all(xml_string,"//RULES/RULE[position()=3]//SUBJECT/text()")
q4_df <- data.frame(matrix(unlist(as_list(q4))))
colnames(q4_df) <- "Subject"

kbl(cbind(q4_df)) %>%
  kable_paper()
Subject
Modification of Class E Airspace; Coeur D’Alene—Pappy Boyington Field, ID; Correction

5. Look into the NOTICES element and show the text of SUBJECT made by Coast Guard

q5 <- xml_find_all(xml_string,"//NOTICES/NOTICE//SUBAGY[text()='Coast Guard']/../SUBJECT/text()")
q5_df <- data.frame(matrix(unlist(as_list(q5))))
colnames(q5_df) <- "Subject by Caoat Guard"

kbl(cbind(q5_df)) %>%
  kable_paper() 
Subject by Caoat Guard
National Chemical Transportation Safety Advisory Committee; Vacancies
Notification of the Removal of Conditions of Entry on Vessels Arriving From Cote d’Ivoire

6. Look into the NOTICES element and show a bar plot of top AGENCY(not SUBAGY) which made more than 5 notices

q6 <- xml_find_all(xml_string, "//NOTICES/NOTICE//AGENCY/text()")
q6_lst <- unlist(as_list(q6))

frequency_agency <- as.data.frame(sort(table(q6_lst), decreasing = TRUE))
frequency_agency <- data.table::data.table(frequency_agency)[Freq>5]
frequency_agency_df <- as.data.frame(frequency_agency)

ggplot(frequency_agency_df, aes(x=q6_lst, y=Freq))+
  geom_bar( stat="identity", fill="darkblue") + 
  geom_text(aes(label=Freq), vjust=-0.3, size=5) +
  labs(title="Top AGENCY which made more than 5 notices", x="Agency", y = "Count")