library(xml2)
library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## -- Attaching packages ------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.1 √ purrr 0.3.2
## √ tibble 2.1.1 √ dplyr 0.8.0.1
## √ tidyr 0.8.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## -- Conflicts ---------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr)
set_colname_xml_tibble <- function(tblxml) {
x <- tolower(colnames((tblxml))) %>%
str_replace("[[:punct:][:space:]]+", "_") %>%
str_replace("_+", "_") %>%
str_replace("(^_|_$)", "") %>%
make.unique(sep = "_")
colnames(tblxml) <- x
return(tblxml)
}
cols(
.default = col_character(),
value = col_double()
) -> xdf_cols
api <- str_c("https://dashboard.e-stat.go.jp/api/1.0/Xml/getData?Lang=JP&IndicatorCode=0201010000000010000&RegionCode=792,710,484")
api
## [1] "https://dashboard.e-stat.go.jp/api/1.0/Xml/getData?Lang=JP&IndicatorCode=0201010000000010000&RegionCode=792,710,484"
doc <- read_xml(api)
xdf <- xml_find_all(doc, ".//VALUE") %>%
map_df(~ {
xml_attrs(.x) %>% as.list()
}) %>%
set_colname_xml_tibble() %>%
type_convert(col_types = xdf_cols)
xdf
## # A tibble: 204 x 9
## indicator unit stat regioncode time cycle regionrank isseasonal
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 02010100~ 090 1002~ 484 1950~ 3 1 1
## 2 02010100~ 090 1002~ 710 1950~ 3 1 1
## 3 02010100~ 090 1002~ 792 1950~ 3 1 1
## 4 02010100~ 090 1002~ 484 1951~ 3 1 1
## 5 02010100~ 090 1002~ 710 1951~ 3 1 1
## 6 02010100~ 090 1002~ 792 1951~ 3 1 1
## 7 02010100~ 090 1002~ 484 1952~ 3 1 1
## 8 02010100~ 090 1002~ 710 1952~ 3 1 1
## 9 02010100~ 090 1002~ 792 1952~ 3 1 1
## 10 02010100~ 090 1002~ 484 1953~ 3 1 1
## # ... with 194 more rows, and 1 more variable: isprovisional <chr>
value_xml <- xml_double(xml_find_all(doc, ".//VALUE")) %>%
data.frame() %>%
setNames("value")
tblggplot <- bind_cols(xdf, value_xml) %>%
dplyr::group_by(regioncode) %>%
mutate(year_data = as.numeric(stringr::str_sub(time, 1, 4))) %>%
as_tibble() %>%
print()
## # A tibble: 204 x 11
## indicator unit stat regioncode time cycle regionrank isseasonal
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 02010100~ 090 1002~ 484 1950~ 3 1 1
## 2 02010100~ 090 1002~ 710 1950~ 3 1 1
## 3 02010100~ 090 1002~ 792 1950~ 3 1 1
## 4 02010100~ 090 1002~ 484 1951~ 3 1 1
## 5 02010100~ 090 1002~ 710 1951~ 3 1 1
## 6 02010100~ 090 1002~ 792 1951~ 3 1 1
## 7 02010100~ 090 1002~ 484 1952~ 3 1 1
## 8 02010100~ 090 1002~ 710 1952~ 3 1 1
## 9 02010100~ 090 1002~ 792 1952~ 3 1 1
## 10 02010100~ 090 1002~ 484 1953~ 3 1 1
## # ... with 194 more rows, and 3 more variables: isprovisional <chr>,
## # value <dbl>, year_data <dbl>
ggplot(data = tblggplot, aes(x = year_data, y = value / 1e6, colour = regioncode)) +
geom_point() +
geom_line() +
theme_gray(base_size = 11)
