library(xml2)
library(purrr)
library(dplyr)
##
## 次のパッケージを付け加えます: 'dplyr'
## 以下のオブジェクトは 'package:stats' からマスクされています:
##
## filter, lag
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## intersect, setdiff, setequal, union
# Modified XML data
xml_str <- "
<root>
<person>
<name>John</name>
<age>30</age>
<address_SET>
<address>
<city>New York</city>
<state>New York</state>
</address>
</address_SET>
</person>
<person>
<name>Alice</name>
<age>253</age>
<address_SET>
<address>
<city>Los Angeles</city>
<state>California</state>
</address>
<address>
<city>San Francisco</city>
<state>California</state>
</address>
<address>
<city>Osaka</city>
<state>Osaka</state>
</address>
</address_SET>
</person>
<person>
<name>Bob</name>
<age>28</age>
<address_SET>
<address>
<city>Tokyo</city>
<state>Tokyo</state>
</address>
<address>
<city>CIIII</city>
<state>Saitama</state>
</address>
<address>
<city>Chiba</city>
</address>
<address>
<city>Ibaraki</city>
<state>Mito</state>
<state2>Mito</state2>
</address>
</address_SET>
</person>
</root>"
# Parse the modified XML data
xml_data <- read_xml(xml_str)
xml_file <- xml_str
root_element <- "//person"
key_element <- ".//name"
child_element <- ".//address_SET/address"
required_elements <- c("city", "state","state2")
process_xml_data_4 <- function(xml_file, root_element, key_element, child_element, required_elements, search_method = "auto") {
# Parse the XML data
xml_data <- read_xml(xml_file)
target_data <- xml_find_all(xml_data, root_element) %>%
map(~ {
key_data <- xml_text(xml_find_first(.x, key_element))
child_data_set <- xml_find_all(.x, child_element)
child_data <- map(child_data_set, ~ {
element_data <- map(required_elements, function(y) xml_text(xml_find_first(.x, paste0(".//", y))))
as_tibble(setNames(element_data, required_elements))
})
tibble(
key_data = key_data,
child_data = child_data
)
})
target_data
result_df <- bind_rows(target_data)
result_df %>% tidyr::unnest(child_data)
}
result_data <- process_xml_data_4(xml_file, root_element, key_element, child_element, required_elements)
print(result_data)
## # A tibble: 8 × 4
## key_data city state state2
## <chr> <chr> <chr> <chr>
## 1 John New York New York <NA>
## 2 Alice Los Angeles California <NA>
## 3 Alice San Francisco California <NA>
## 4 Alice Osaka Osaka <NA>
## 5 Bob Tokyo Tokyo <NA>
## 6 Bob CIIII Saitama <NA>
## 7 Bob Chiba <NA> <NA>
## 8 Bob Ibaraki Mito Mito