library(magrittr)
p.combination = character(1000)
for(i in 1:length(p.combination)){
p.combination[i] = LETTERS[sample(1:26,5)] %>% paste0(collapse = ", ")
}
p.com.allowed = character(1000)
for(i in 1:length(p.com.allowed)){
p.com.allowed[i] = LETTERS[sample(1:26,5)] %>% paste0(collapse = ", ")
}
data = data.frame(
stringsAsFactors = FALSE,
p.combination = p.combination,
p.com.allowed = p.com.allowed)
player = LETTERS[1:26]
input.matrix0 = function(data, player, off){
X = matrix(ncol = length(player), nrow = dim(data)[1])
for(i in 1:dim(data)[1]){
if(off) {
colnames(X) = paste0("O_",player)
coding = 1
pp = data$p.combination
} else {
colnames(X) = paste0("D_",player)
coding = -1
pp = data$p.com.allowed
}
player.temp = pp[i] %>% gsub(", ", "|",.)
index = grep(player.temp, player)
X[i,index] = coding
X[i,-index] = 0
}
return(X)
}
mowgur = function(data, player){
X.off = input.matrix0(data, player, T)
X.def = input.matrix0(data, player, F)
return(cbind(X.off, X.def))
}
w2long=function(array){
lapply(c(1:length(array)),function(i) {
data.table("id"=i,"player"=as.character(array[i]) %>%
strsplit(", ") %>% .[[1]])
}) %>% rbindlist()
}
tan800630 <- function(data, player) {
out=merge(
w2long(data$p.combination) %>% mutate(show=1) %>%
dcast(id~player,value.var="show"),
w2long(data$p.com.allowed) %>% mutate(show=-1) %>%
dcast(id~player,value.var="show"),by="id",suffix=c("_O","_D")
)
out[is.na(out)]=0
out
}
library(magrittr)
library(plyr)
library(data.table)
andrew <- function(data, name.player) {
colnames(data) <- c("p.attack", "p.defence")
dt <- data.table(data)
out.attack <-
strsplit(dt$p.attack, ", ") %>%
sapply(., function(x) {
name.player %in% x
}) %>%
t %>%
set_colnames(paste0("att_", name.player)) %>%
mapvalues(., c(T, F), c(1L, 0L))
out.defence <-
strsplit(dt$p.attack, ", ") %>%
sapply(., function(x) {
name.player %in% x
}) %>%
t %>%
set_colnames(paste0("def_", name.player)) %>%
mapvalues(., c(T, F), c(-1L, 0L))
cbind(out.attack, out.defence)
}
library(Matrix)
library(text2vec)
wush <- function(data, player) {
it <- itoken(data[[1]], tokenizer = word_tokenizer, progressbar = FALSE, n_chunks = 10)
it2 <- itoken(data[[2]], tokenizer = word_tokenizer, progressbar = FALSE, n_chunks = 10)
vocab <- create_vocabulary(player)
vectorizer <- vocab_vectorizer(vocab)
m1 <- create_dtm(it, vectorizer)
m2 <- create_dtm(it2, vectorizer)
m2@x[] <- -1
cbind(m1, m2)
}
library(rbenchmark)
result <- benchmark(
replications = 10,
out1 <- mowgur(data, player),
out2 <- tan800630(data, player),
out3 <- andrew(data, player),
out4 <- wush(data, player)
)
knitr::kable(result)
| test | replications | elapsed | relative | user.self | sys.self | user.child | sys.child |
|---|---|---|---|---|---|---|---|
| out1 <- mowgur(data, player) | 10 | 4.050 | 30.916 | 4.032 | 0.008 | 0 | 0 |
| out2 <- tan800630(data, player) | 10 | 8.272 | 63.145 | 8.204 | 0.024 | 0 | 0 |
| out3 <- andrew(data, player) | 10 | 0.131 | 1.000 | 0.128 | 0.000 | 0 | 0 |
| out4 <- wush(data, player) | 10 | 0.490 | 3.740 | 0.456 | 0.028 | 0 | 0 |
out2 <- out2[,-1]
colnames(out1) <- colnames(out2) <- colnames(out3) <- colnames(out4) <- rep(player, 2)
which(out1[1,] == 1)
D P Q S T 4 16 17 19 20
which(out2[1,] == 1)
[1] 4 16 17 19 20
which(out3[1,] == 1)
D P Q S T 4 16 17 19 20
which(out4[1,] == 1)
D P Q S T 4 16 17 19 20
which(out1[1,] == -1)
D I N X Z 30 35 40 50 52
which(out2[1,] == -1)
[1] 30 35 40 50 52
which(out3[1,] == -1)
D P Q S T 30 42 43 45 46
which(out4[1,] == -1)
D I N X Z 30 35 40 50 52
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
registerDoParallel(8)
wush <- function(data, player) {
it <- itoken_parallel(data[[1]], tokenizer = word_tokenizer, progressbar = FALSE, n_chunks = 10)
it2 <- itoken_parallel(data[[2]], tokenizer = word_tokenizer, progressbar = FALSE, n_chunks = 10)
vocab <- create_vocabulary(player)
vectorizer <- vocab_vectorizer(vocab)
m1 <- create_dtm(it, vectorizer)
m2 <- create_dtm(it2, vectorizer)
m2@x[] <- -1
cbind(m1, m2)
}
n <- 1e+5
p.combination = character(n)
for(i in 1:length(p.combination)){
p.combination[i] = LETTERS[sample(1:26,5)] %>% paste0(collapse = ", ")
}
p.com.allowed = character(n)
for(i in 1:length(p.com.allowed)){
p.com.allowed[i] = LETTERS[sample(1:26,5)] %>% paste0(collapse = ", ")
}
data = data.frame(
stringsAsFactors = FALSE,
p.combination = p.combination,
p.com.allowed = p.com.allowed)
player = LETTERS[1:26]
result2 <- benchmark(
replications = 10,
# out1 <- mowgur(data, player),
# out2 <- tan800630(data, player),
out3 <- andrew(data, player),
out4 <- wush(data, player)
)
knitr::kable(result2)
| test | replications | elapsed | relative | user.self | sys.self | user.child | sys.child |
|---|---|---|---|---|---|---|---|
| out3 <- andrew(data, player) | 10 | 16.916 | 1.528 | 16.78 | 0.128 | 0.000 | 0.000 |
| out4 <- wush(data, player) | 10 | 11.071 | 1.000 | 2.04 | 1.556 | 39.584 | 7.348 |
| sparse 比較省記憶體 |
object.size(out3)
## 20803320 bytes
object.size(out4)
## 13283376 bytes