library(xml2)
library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2
## -- Attaching packages ------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.1       √ purrr   0.3.2  
## √ tibble  2.1.1       √ dplyr   0.8.0.1
## √ tidyr   0.8.3       √ stringr 1.4.0  
## √ readr   1.3.1       √ forcats 0.4.0
## -- Conflicts ---------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stringr)

set_colname_xml_tibble <- function(tblxml) {
  x <- tolower(colnames((tblxml))) %>%
    str_replace("[[:punct:][:space:]]+", "_") %>%
    str_replace("_+", "_") %>%
    str_replace("(^_|_$)", "") %>%
    make.unique(sep = "_")

  colnames(tblxml) <- x
  return(tblxml)
}

cols(
  .default = col_character(),
  value = col_double()
) -> xdf_cols

api <- str_c("https://dashboard.e-stat.go.jp/api/1.0/Xml/getData?Lang=JP&IndicatorCode=0201010000000010000&RegionCode=792,710,484")
api
## [1] "https://dashboard.e-stat.go.jp/api/1.0/Xml/getData?Lang=JP&IndicatorCode=0201010000000010000&RegionCode=792,710,484"
doc <- read_xml(api)
xdf <- xml_find_all(doc, ".//VALUE") %>%
  map_df(~ {
    xml_attrs(.x) %>% as.list()
  }) %>%
  set_colname_xml_tibble() %>%
  type_convert(col_types = xdf_cols)
xdf
## # A tibble: 204 x 9
##    indicator unit  stat  regioncode time  cycle regionrank isseasonal
##    <chr>     <chr> <chr> <chr>      <chr> <chr> <chr>      <chr>     
##  1 02010100~ 090   1002~ 484        1950~ 3     1          1         
##  2 02010100~ 090   1002~ 710        1950~ 3     1          1         
##  3 02010100~ 090   1002~ 792        1950~ 3     1          1         
##  4 02010100~ 090   1002~ 484        1951~ 3     1          1         
##  5 02010100~ 090   1002~ 710        1951~ 3     1          1         
##  6 02010100~ 090   1002~ 792        1951~ 3     1          1         
##  7 02010100~ 090   1002~ 484        1952~ 3     1          1         
##  8 02010100~ 090   1002~ 710        1952~ 3     1          1         
##  9 02010100~ 090   1002~ 792        1952~ 3     1          1         
## 10 02010100~ 090   1002~ 484        1953~ 3     1          1         
## # ... with 194 more rows, and 1 more variable: isprovisional <chr>
value_xml <- xml_double(xml_find_all(doc, ".//VALUE")) %>%
  data.frame() %>%
  setNames("value")


tblggplot <- bind_cols(xdf, value_xml) %>%
  dplyr::group_by(regioncode) %>%
  mutate(year_data = as.numeric(stringr::str_sub(time, 1, 4))) %>%
  as_tibble() %>%
  print()
## # A tibble: 204 x 11
##    indicator unit  stat  regioncode time  cycle regionrank isseasonal
##    <chr>     <chr> <chr> <chr>      <chr> <chr> <chr>      <chr>     
##  1 02010100~ 090   1002~ 484        1950~ 3     1          1         
##  2 02010100~ 090   1002~ 710        1950~ 3     1          1         
##  3 02010100~ 090   1002~ 792        1950~ 3     1          1         
##  4 02010100~ 090   1002~ 484        1951~ 3     1          1         
##  5 02010100~ 090   1002~ 710        1951~ 3     1          1         
##  6 02010100~ 090   1002~ 792        1951~ 3     1          1         
##  7 02010100~ 090   1002~ 484        1952~ 3     1          1         
##  8 02010100~ 090   1002~ 710        1952~ 3     1          1         
##  9 02010100~ 090   1002~ 792        1952~ 3     1          1         
## 10 02010100~ 090   1002~ 484        1953~ 3     1          1         
## # ... with 194 more rows, and 3 more variables: isprovisional <chr>,
## #   value <dbl>, year_data <dbl>
ggplot(data = tblggplot, aes(x = year_data, y = value / 1e6, colour = regioncode)) +
  geom_point() +
  geom_line() +
  theme_gray(base_size = 11)