library(plyr)
# setwd("csvファイルがある場所")
# 2013年の整形済みデータを利用
data2013 <- read.csv("all2013.csv", header= TRUE)
# playerIDをつけます
dar.id <- "darvy001"
iwakuma.id <- "iwakh001"
verland.id <- "verlj001"
# 選手ごとにデータを抽出
dar.data <- subset(data2013, PIT_ID ==dar.id)
iwakuma.data <- subset(data2013, PIT_ID ==iwakuma.id)
verland.data <- subset(data2013, PIT_ID ==verland.id)
# データをまとめる関数
createdata <- function(d){
# 試合の日にちを抽出
d$Date <- as.Date(substr(d$GAME_ID, 4, 11), format = "%Y%m%d")
d <- arrange(d, Date)
# 三振のデータが欲しいなら EVENT_CD は3です
d$SO <- ifelse(d$EVENT_CD == 3, 1, 0)
d$cumSO <- cumsum(d$SO)
d[, c("Date", "cumSO")]
}
dar.SOdata <- createdata(dar.data)
iwakuma.SOdata <- createdata(iwakuma.data)
verland.SOdata <- createdata(verland.data)
# 適当にプロット
plot(dar.SOdata, type ="l", lwd =2,xlab = "Date(Month)", ylab = "SO")
lines(iwakuma.SOdata, lwd = 2, col = "grey")
lines(verland.SOdata, lwd = 2, col = "red")
legend("topleft",
legend = c(paste("Darvish (", max(dar.SOdata$cumSO), ")", sep=""),
paste("Iwakuma (", max(iwakuma.SOdata$cumSO), ")", sep=""),
paste("Verlander (", max(verland.SOdata$cumSO), ")", sep="")),
lwd = 2, col = c("black", "grey", "red"))
文字列の切り出し: midw(文字, はじめ, 文字数)
累積和: maccum
#!/bin/bash
cat ./all2013.csv |
mcut f=PIT_ID,GAME_ID,EVENT_CD |
mselstr f=PIT_ID -sub v=darvy001,iwakh001,verlj001 |
mcal c='if(${EVENT_CD}==3, 1, 0)' a="KO_FLG" |
mcal c='midw($s{GAME_ID}, 3,8)' a="DATE" |
msum k=DATE f=KO_FLG:KO |
maccum k=PIT_ID s=PIT_ID,DATE f=KO:KO_SEASON |
mcut f=PIT_ID,DATE,KO_SEASON > KO_2013.csv
library(ggplot2)
library(data.table)
library(dplyr)
dat = fread("KO_2013.csv")
dat %>%
mutate(DATE = as.Date(as.character(DATE),format = "%Y%m%d")) %>%
ggplot(aes(x=DATE, y=KO_SEASON, color=PIT_ID)) +
geom_line(size=2)