In this assignment, we will explore data from the Federal Register
Load the required libraries
library(xml2)
library(kableExtra)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Read the xml file
fr_data= read_xml('https://www.govinfo.gov/bulkdata/FR/2022/08/FR-2022-08-12.xml')
Q1. Look into the CNTNTS element and show the number of unique agencies are involved in this announcement
unique_agencies=unique(xml_find_all(fr_data, "//CNTNTS/AGCY"))
length(unique_agencies)
## [1] 47
Q2. Look into the CNTNTS element and show a bar plot for the numbers of different categories (i.e., element CAT) of announcement (RULES, PROPOSED RULES, etc.)
categories <- xml_text(xml_find_all(fr_data, "//CNTNTS/AGCY/CAT/HD"))
categories_df <- as.data.frame(categories)
categories_df <- categories_df %>% group_by(categories) %>% tally()
ggplot(categories_df, aes(x=categories,y=n,fill=categories)) + geom_bar(stat="identity")+theme_minimal() + ggtitle("Bar plot for the number of different categories of announcement") + xlab("Categories") + ylab("Count")

Q3. Look into the CNTNTS element and extract/show the text of the names of the agencies to which they made a notice in the category of “PROPOSED RULES”
agencies_text <- xml_find_all(fr_data, "//AGCY[CAT/HD/text() = 'PROPOSED RULES']/HD/text()")
Agencies_With_ProposedRules_Category <- unlist(as_list(agencies_text))
agencies_category <- as.data.frame (Agencies_With_ProposedRules_Category)
agencies_category %>% kbl() %>% kable_material_dark()
|
Agencies_With_ProposedRules_Category
|
|
Coast Guard
|
|
Environmental Protection Agency
|
|
Federal Aviation Administration
|
|
Federal Energy Regulatory Commission
|
|
National Oceanic and Atmospheric Administration
|
|
Securities and Exchange Commission
|
Q4. Look into the RULES element and extract/show the text of the SUBJECT of the third rule
rules_subject <- xml_find_all(fr_data, "//RULES/RULE[3]//SUBJECT/text()")
Subject <- unlist(as_list(rules_subject))
Subject <- as.data.frame (Subject)
Subject %>% kbl() %>% kable_material_dark()
|
Subject
|
|
Modification of Class E Airspace; Coeur D’Alene—Pappy Boyington Field, ID; Correction
|
Q5. Look into the NOTICES element and show the text of SUBJECT made by Coast Guard
notices_subject <- xml_find_all(fr_data, "//NOTICES/NOTICE/PREAMB[SUBAGY/text() = 'Coast Guard']/SUBJECT/text()")
Subject_by_CoastGuard <- unlist(as_list(notices_subject))
Subject_by_CoastGuard <- as.data.frame (Subject_by_CoastGuard)
Subject_by_CoastGuard %>% kbl() %>% kable_material_dark()
|
Subject_by_CoastGuard
|
|
National Chemical Transportation Safety Advisory Committee; Vacancies
|
|
Notification of the Removal of Conditions of Entry on Vessels Arriving From Cote d’Ivoire
|
Q6. Look into the NOTICES element and show a bar plot of top AGENCY (not SUBAGY) which made more than 5 notices
notices_agency <- xml_text(xml_find_all(fr_data, "//NOTICES/NOTICE/PREAMB/AGENCY/text()"))
notices_agency_df <- as.data.frame(notices_agency)
notices_agency_df <- notices_agency_df %>% group_by(notices_agency) %>% tally(sort = T) %>% ungroup() %>% arrange(desc(n))
notices_agency <- filter(notices_agency_df, n > 5)
ggplot(notices_agency, aes(x=notices_agency,y=n)) + geom_bar(stat = "identity", fill="blue", alpha=0.7, width=0.4) + ggtitle("Bar plot of AGENCIES with more than 5 notices") + xlab("Agency") + ylab("Count") + coord_flip() + theme_bw()
