readxlでの読み込みを試してみました。

library(readxl)
library(dplyr)
library(gistr)

元データ

http://www.city.osaka.lg.jp/toshikeikaku/page/0000015211.htmlにあるデータを使います。

url <- "http://www.city.osaka.lg.jp/toshikeikaku/cmsfiles/contents/0000015/15211/suikei-nennrei2013-2014.xls"
tmp <- tempfile(fileext = ".xls")
download.file(url, tmp)

こんな感じのデータです。

エクセル

ざっくり言って

という雰囲気です。

読み込む

書くシートのタイトルは、区の名前になっています。「大阪市」は合計値なので、今回は省いて3枚目のシート以降を使います。 なんか謎のエラーっぽいのが出ますが怯まず先に進みます。。

( districts <- excel_sheets(tmp) )
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 15 00 00 00 00 00 00 07 3b 13 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 12 00 00 00 00 00 00 07 3b 10 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 09 00 00 00 00 00 00 07 3b 06 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 06 00 00 00 00 00 00 07 3b 03 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 17 00 00 00 00 00 00 07 3b 15 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 16 00 00 00 00 00 00 07 3b 14 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 13 00 00 00 00 00 00 07 3b 11 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 11 00 00 00 00 00 00 07 3b 0f 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 08 00 00 00 00 00 00 07 3b 05 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 1a 00 00 00 00 00 00 07 3b 18 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 0d 00 00 00 00 00 00 07 3b 0b 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 02 00 00 00 00 00 00 07 3b 07 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 0a 00 00 00 00 00 00 07 3b 08 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 07 00 00 00 00 00 00 07 3b 04 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 14 00 00 00 00 00 00 07 3b 12 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 0b 00 00 00 00 00 00 07 3b 09 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 04 00 00 00 00 00 00 07 3b 01 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 18 00 00 00 00 00 00 07 3b 16 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 10 00 00 00 00 00 00 07 3b 0e 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 0f 00 00 00 00 00 00 07 3b 0d 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 05 00 00 00 00 00 00 07 3b 02 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 19 00 00 00 00 00 00 07 3b 17 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 03 00 00 00 00 00 00 07 3b 00 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 0e 00 00 00 00 00 00 07 3b 0c 00 00 00 01 00 00 00 ff 00 
## DEFINEDNAME: 20 00 00 01 0b 00 00 00 0c 00 00 00 00 00 00 07 3b 0a 00 00 00 01 00 00 00 ff 00
##  [1] "利用上の注意" "大阪市"       "北"           "都島"        
##  [5] "福島"         "此花"         "中央"         "西"          
##  [9] "港"           "大正"         "天王寺"       "浪速"        
## [13] "西淀川"       "淀川"         "東淀川"       "東成"        
## [17] "生野"         "旭"           "城東"         "鶴見"        
## [21] "阿倍野"       "住之江"       "住吉"         "東住吉"      
## [25] "平野"         "西成"
data_list <- list()

# 使うのは3枚目のシート以降
for (i in 3:length(districts)) {
  district <- districts[i]
  
  # 上3行のデータはスキップ。列名は自分で設定する。
  data_part <- read_excel(tmp, sheet = district, skip = 3, col_names = FALSE)
  names(data_part) <- c("age", "total", "male", "female")

  # 100歳以上は「100~」にまとめられている。
  # 下でas.numericできないものはfilterしてしまうので、数字にしておく
  data_part[data_part$age == "100~", "age"] <- "100"
  
  # 「X~Y」になっているものは、as.numeric(age)がNAになる。
  data_list[[district]] <- data_part %>%
    mutate(age = as.numeric(age),
           district = district) %>%
    filter(!is.na(age))
}

これをCSVとして書き出します。

data_all <- bind_rows(data_list)
write.csv(data_all, file = "osaka_age_composition.csv", row.names = FALSE)

書き出したものは、https://github.com/yutannihilation/osaka_age_composition に置きました。