メジャーリーガーの月別打率の推移をrChartします.
retrosheet(“http://www.retrosheet.org/game.htm”)から打席結果データを取得して, パースして整形したものを利用します.
データは, githubにcsvであります(https://github.com/gghatano/baseball_data).
コードの中では, PostgreSQLから読んでいます.
library(data.table)
library(dplyr)
library(RPostgreSQL)
library(magrittr)
library(rCharts)
# read data from PostgreSQL
conn = dbConnect(PostgreSQL(), dbname = "baseball_data")
dat = dbReadTable(conn, "all2013")
dat = dat %>% as.data.table()
# calculate the monthly AVG
dat_m = dat %>% select(GAME_ID, BAT_ID, AB_FL, H_FL) %>% filter(AB_FL == "T") %>%
mutate(month = substr(GAME_ID, 9, 9)) %>% mutate(H_FL = (H_FL > 0)) %>%
filter(month >= 4) %>% group_by(BAT_ID, month) %>% dplyr::summarise(ATBAT = n(),
HITS = sum(H_FL)) %>% mutate(AVG = round(HITS/ATBAT, 3))
# Over 500 ATBAT player
dat_over_500 = dat_m %>% group_by(BAT_ID) %>% dplyr::summarise(ATBATS = sum(ATBAT)) %>%
filter(ATBATS > 500) %>% select(BAT_ID)
# join the ID data to FULLNAME data
fullname = dbReadTable(conn, "fullname_id")
dat_m = dat_over_500 %>% inner_join(dat_m, by = "BAT_ID")
dat_m = fullname %>% as.data.table() %>% inner_join(dat_m, by = "BAT_ID")
# Asian players
asians = c("Norichika Aoki", "Ichiro Suzuki", "Shin-Soo Choo")
dat_m = dat_m %>% filter(FULLNAME %in% asians)
# as.Date data
dat_m = dat_m %>% mutate(date = paste("2013-0", month, "-01", sep = "")) %>%
mutate(date = as.Date(date))
# make lineChart with rCharts
n1 = nPlot(AVG ~ date, group = "FULLNAME", data = dat_m, type = "lineChart")
n1$xAxis(tickValues = "#![15796, 15826, 15857, 15887, 15918,15949]!#", tickFormat = "#!function(d) {return d3.time.format('%m')(new Date(d * 86400000));}!#")
n1$chart(forceY = "#![0]!#")
n1$xAxis(axisLabel = "MONTH")
n1$yAxis(axisLabel = "AVG")
n1$print("mychart1", include_assets = TRUE, cdn = TRUE)
この後, 所属チームでグループ化して, インタラクティブになんやかんやできるようにします.