やっつけ間がハンパなくて欠けてる選手もいたりで正確な値ではないですが、googleVizライブラリ使って、結果をGoogle Visualisation APIで描写する
library(hash, quietly = TRUE)
## hash-2.2.3 provided by Decision Patterns
suppressPackageStartupMessages(library(googleVis))
# チーム名から国一覧取得
get.countries.fromName <- function(fc = "FC_Barcelona") {
url <- paste("http://en.wikipedia.org/w/api.php?format=xmlfm&action=query&export&titles=",
fc, sep = "")
return(get.countries.fromURL(url))
}
# urlから国一覧取得
get.countries.fromURL <- function(url.str) {
allxml <- readLines(con = url.str, n = -1L, warn = FALSE)
if (length(grep("= *Out on loan", allxml)) > 0) {
allxml <- allxml[1:grep("= *Out on loan", allxml)]
}
players <- allxml[grep("(player.*(nat=|nb=)|nac=.*|pos=)", allxml)]
if (length(grep("( in| out| ss| disc| mfs)", players)) > 0) {
players <- players[-grep("( in| out| ss| disc| mfs)", players)]
} else {
}
countries <- unlist(sapply(strsplit(players, "\\|"), function(x) {
x[grep("(nb=|nat=|nac=)", x)]
}))
return(countries)
}
# リーグ名からurlを構築(今のところリーガしかテストしてない)
get.url.fromLeagueName <- function(league = "La_Liga") {
base.url <- "http://en.wikipedia.org/w/api.php?format=xmlfm&action=query&export&titles="
mk.url <- paste(base.url, league, sep = "")
return(URLencode(mk.url))
}
# urlから、そのurl中のチーム名を取得
get.teamNames.fromURL <- function(test.url) {
# 一行ずつ読み込んでいく
allxml <- readLines(con = test.url, n = -1L, warn = FALSE)
teams.pre <- allxml[grep("\\{\\{Location map~", allxml)]
teams.pre2 <- sapply(strsplit(teams.pre, "\\|"), function(x) {
return(strsplit(x[5], "\\[\\["))
})
teams <- sapply(teams.pre2, function(x) {
return(x[2])
})
teams <- sapply(strsplit(teams, "\\]\\]"), function(x) {
return(x[1])
})
return(teams)
}
# wikipediaから国名の対応表をgetする
get.countries.name.tbl <- function(url.str = "http://en.wikipedia.org/w/api.php?format=xmlfm&action=query&export&titles=ISO_3166-1") {
allxml <- readLines(con = url.str, n = -1L, warn = FALSE)
# フル表記を含む文字列を取得
idx <- grep("\\{\\{flag\\|.*\\}\\}", allxml)
fullname <- allxml[idx]
# 2文字、3文字表記を含む文字列を取得
twoname <- allxml[idx + 1]
# データフレーム化
countries.name <- data.frame(fullname = fullname, twoname = sapply(strsplit(twoname,
"\\|"), function(x) {
x[9]
}), threename = sapply(strsplit(twoname, "\\|"), function(x) {
x[5]
}))
# フル表記を作る
countries.name <- countries.name[-grep("hide=y", countries.name$fullname),
]
countries.name$fullname <- substr(iconv(countries.name$fullname, "UTF-8"),
as.vector(regexpr("\\{\\{flag\\|", countries.name$fullname)), 1000)
countries.name$fullname <- gsub("(\\{\\{|\\}\\})", "", countries.name$fullname)
countries.name$fullname <- sapply(strsplit(countries.name$fullname, "\\|"),
function(x) {
x[2]
})
countries.name <- countries.name[grep("^[A-Za-z]", countries.name$fullname),
]
# 2文字を作る
countries.name$twoname <- sapply(strsplit(as.character(countries.name$twoname),
":"), function(x) {
substring(x[2], 1, 2)
})
# 3文字を作る
countries.name$threename <- sapply(strsplit(as.character(countries.name$threename),
";"), function(x) {
substring(x[7], 1, 3)
})
return(countries.name)
}
get.countries.fromLeagueName <- function(league = "La_Liga") {
test.url <- get.url.fromLeagueName(league = league)
# team名のみ取得
team.names <- get.teamNames.fromURL(test.url)
this.year.list <- list()
for (i in 1:length(team.names)) {
# team情報を取得するurlを構築
team.year.url <- get.url.fromLeagueName(league = team.names[i])
# 国名を含む文字列を抽出
team.countries <- get.countries.fromURL(team.year.url)
if (!is.null(team.countries)) {
this.year.list[[length(this.year.list) + 1]] <- team.countries
names(this.year.list)[i] <- team.names[i]
}
Sys.sleep(3) #念のため間隔を空ける
}
# listをベクトル化して
pre.countries <- unlist(this.year.list)
# 国名だけを抽出(空白が含まれる)
return(gsub(" ", "", sapply(strsplit(pre.countries, "="), function(x) {
return(x[length(x)])
})))
}
############################# 国のカウント数を取得
countries <- get.countries.fromLeagueName("La_Liga")
### 国名の変換表を取得
c.name.tbl <- get.countries.name.tbl()
# hash化
h <- hash(c(c.name.tbl$fullname, c.name.tbl$threename), c(c.name.tbl$twoname,
c.name.tbl$twoname))
# 2文字表記に変える
countries2 <- countries
for (i in 1:length(countries)) {
countries2[i] <- ifelse(!is.null(h[[countries[i]]]), h[[countries[i]]],
countries[i])
}
## 表記揺れはメンドいから今回は無視 国別選手数のベクトル
c.players <- table(countries2[nchar(countries2) == 2])
# googleVisする
exp.data <- data.frame(c.players)
names(exp.data) <- c("Country", "Counts")
T <- gvisTable(exp.data, options = list(width = 200, height = 280))
G <- gvisGeoChart(exp.data, locationvar = "Country", colorvar = "Counts",
options = list(width = 720, height = 560, dataMode = "regions"))
TG <- gvisMerge(T, G, horizontal = TRUE, tableOptions = "bgcolor=\"#CCCCCC\" cellspacing=10")
print(TG, "chart")
|
|
|