【 選用資料集介紹 】



【 套件載入 】

# 載入所需的套件(Loading package)
library(dplyr)
library(plyr)
library(ggplot2)
library(readr)
library(reshape)
library(dplyr)
library(plyr)
library(ggplot2)
library(readr)
library(reshape)
library(ROCR)
library(caret)
library(randomForest)
library(pROC)
# 更改R的預設語系
Sys.setlocale("LC_ALL",'C')      
[1] "C/C/C/C/C/en_US.UTF-8"



【 敘述統計 】

# 讀取檔案
LOL <- read_csv("LOLgamedata.csv")
Parsed with column specification:
cols(
  .default = col_character(),
  gameId = col_double(),
  creationTime = col_double(),
  gameDuration = col_integer(),
  seasonId = col_integer(),
  winner = col_integer(),
  firstBlood = col_integer(),
  firstTower = col_integer(),
  firstInhibitor = col_integer(),
  firstBaron = col_integer(),
  firstDragon = col_integer(),
  firstRiftHerald = col_integer(),
  t1_towerKills = col_integer(),
  t1_inhibitorKills = col_integer(),
  t1_baronKills = col_integer(),
  t1_dragonKills = col_integer(),
  t1_riftHeraldKills = col_integer(),
  t2_towerKills = col_integer(),
  t2_inhibitorKills = col_integer(),
  t2_baronKills = col_integer(),
  t2_dragonKills = col_integer()
  # ... with 1 more columns
)
See spec(...) for full column specifications.
# 把英雄的出場次數做統計並且算出勝率
LOL$seasonId = NULL
hero = rbind(table(LOL$t1_champ1id), table(LOL$t1_champ2id), table(LOL$t1_champ3id), table(LOL$t1_champ4id),
table(LOL$t1_champ5id), table(LOL$t2_champ1id), table(LOL$t2_champ2id), table(LOL$t2_champ3id), table(LOL$t2_champ4id), table(LOL$t2_champ5id))
hero <- hero %>% t() %>% as.data.frame()
hero$red1 <- rowSums(subset(hero, ,c("V1","V2","V3","V4","V5")))
hero$blue1 <- rowSums(subset(hero, ,c("V6","V7","V8","V9","V10")))
hero$heroname <- rownames(hero)
# mdata <- melt(select(hero,c(1:10,13)),id = "heroname")
# rowSums(hero) 
# hero <- rbind(hero, Totals = colSums(hero))
barplot(hero[,11], names.arg = rownames(hero), horiz = TRUE, las = 1)

winlo = select(LOL,c(4, 11, 14, 17, 20, 23, 36, 39, 42, 45, 48))
winlo$rowname <- rownames(winlo)
m <- winlo %>% select(2 : 6)
colnames(m) <- c("v", "v", "v", "v", "v")
m <- cbind(m,winlo[,12])
mdata <- melt(m, id = "rowname")
ggplot(data = mdata, aes(x = value, colour = 'red')) + geom_histogram(stat = "count")
Ignoring unknown parameters: binwidth, bins, pad

m2 <- winlo %>% select(7 : 11)
colnames(m2) = c("v", "v", "v", "v", "v")
m2 <- cbind(m2, winlo[,12])
mdata2 <- melt(m2, id = "rowname")
ggplot(data = mdata2, aes(x = value, colour='red')) + geom_histogram(stat = "count")
Ignoring unknown parameters: binwidth, bins, pad

m3 <- winlo %>% select(2 : 11)
colnames(m3) = c("v", "v", "v", "v", "v", "v", "v", "v", "v", "v")
m3 <- cbind(m3,winlo[,12])
mdata3 <- melt(m3, id = "rowname")
ggplot(data = mdata3, aes(x = value, colour = 'red')) + geom_histogram(stat = "count")
Ignoring unknown parameters: binwidth, bins, pad



# 把英雄的出場次數做統計並且算出勝率
hero = as.data.frame(hero)
wint1 <- subset(winlo, winner == 1)
wint2 <- subset(winlo, winner == 2)
wincount1 <- Reduce(rbind, Map(function(count)table(wint1[,count]), c(2:6)))
losecount2 <- Reduce(rbind, Map(function(count)table(wint1[,count]), c(7:11)))
wincount2 <- Reduce(rbind, Map(function(count)table(wint2[,count]), c(7:11)))
losecount1 <- Reduce(rbind, Map(function(count)table(wint2[,count]), c(2:6)))
wincount1 <- wincount1 %>% as.data.frame() %>% t()
wincount2 <- wincount2 %>% as.data.frame() %>% t()
losecount1 <- losecount1 %>% as.data.frame() %>% t()
losecount2 <- losecount2 %>% as.data.frame() %>% t()
wincount <- cbind(wincount1,wincount2)
losecount <- cbind(losecount1,losecount2)
hero$win <- wincount %>% rowSums()
hero$lose <- losecount %>% rowSums()
rm(losecount1, losecount2, wincount1, wincount2, winlo, wint1, wint2)
hero$winrate <- hero$win / (hero$win + hero$lose)
max(hero$winrate)
[1] 0.5563472
hero %>% filter(heroname == "Yasuo" | heroname == "Zed" | heroname == "Janna" | heroname == "Darius" | heroname == "Cho'Gath" | heroname == "Udyr" | heroname == "Skarner" | heroname == "Ryze")
LOL %>% filter(t1_inhibitorKills >=10 | t2_inhibitorKills >= 10)



# 統計每隻角色被ban次數
bancount <- select(LOL, c(31:35,56:60))
bancount <- Reduce(rbind, Map(function(count)table(bancount[,count]) , c(1:10)))
bancount <- bancount %>% t() %>% rowSums() %>% as.data.frame()
bancount$name <- bancount %>% rownames()
drawdraw <- bancount %>% filter(.>=20000 | .<=100)
barplot(drawdraw[,1], names.arg = drawdraw$name, horiz = TRUE, las=1)



# 將每個腳色的種類和所帶的召喚師技能做統計
chtotag = select(LOL, c(61, 12, 13, 62, 15, 16, 63, 18, 19, 64, 21, 22, 65, 24, 25, 66, 37, 38, 67, 40, 41, 68, 43, 44, 69, 46, 47, 70, 49, 50))
# chtotag = rbind(table(select(chtotag,1,2)),table(select(chtotag,1,3)),table(select(chtotag,4,5)),table(select(chtotag,4,6)),table(select(chtotag,7,8)),table(select(chtotag,7,9)),table(select(chtotag,10,11)),table(select(chtotag,10,12)),table(select(chtotag,13,14)),table(select(chtotag,13,15)),table(select(chtotag,16,17)),table(select(chtotag,16,18)),table(select(chtotag,19,20)),table(select(chtotag,19,21)),table(select(chtotag,22,23)),table(select(chtotag,22,24)),table(select(chtotag,25,26)),table(select(chtotag,25,27)),table(select(chtotag,28,29)),table(select(chtotag,28,30)))
chtotag = cbind(table(chtotag[,c(1,2)]), table(chtotag[,c(1,3)]), table(chtotag[,c(4,5)]), table(chtotag[,c(4,6)]), table(chtotag[,c(7,8)]), table(chtotag[,c(7,9)]), table(chtotag[,c(10,11)]), table(chtotag[,c(10,12)]), table(chtotag[,c(13,14)]), table(chtotag[,c(13,15)]), table(chtotag[,c(16,17)]), table(chtotag[,c(16,18)]), table(chtotag[,c(19,20)]), table(chtotag[,c(19,21)]), table(chtotag[,c(22,23)]), table(chtotag[,c(22,24)]), table(chtotag[,c(25,26)]), table(chtotag[,c(25,27)]), table(chtotag[,c(28,29)]), table(chtotag[,c(28,30)]))
chtotag <- chtotag %>% as.data.frame()
# apply(chtotag, MARGIN = 2, FUN = table)
# for(i in 1:9){
#   for(j in 1:6)
#    chtotag[j,i]= sum(chtotag[j,i],chtotag[j,i+9],chtotag[j,i+18],chtotag[j,i+27],chtotag[j,i+36],chtotag[j,i+45],chtotag[j,i+54],chtotag[j,i+63],chtotag[j,i+72],chtotag[j,i+80],chtotag[j,i+82],chtotag[j,i+90],chtotag[j,i+99],chtotag[j,i+108],chtotag[j,i+117],chtotag[j,i+126],chtotag[j,i+135],chtotag[j,i+144],chtotag[j,i+153],chtotag[j,i+162],chtotag[j,i+171])
# }
# chtotag[,c(10:180)]=NULL



# 畫出腳色種類對於攜帶召喚師技能的熱圖
heatmap(as.matrix(chtotag[, c(1:9)]))

d <- density(LOL$t1_towerKills)
d2 <- density(LOL$t1_inhibitorKills)
d3 <- density(LOL$t1_baronKills)
d4 <- density(LOL$t1_dragonKills)
d5 <- density(LOL$firstRiftHerald)        # returns the density data 
plot(d)

plot(d2)

plot(d3)

plot(d4)

plot(d5)

p <- ggplot(LOL,aes(x=gameDuration,y=t1_towerKills))
p + geom_point()

p + geom_point(aes(color=factor(t1_inhibitorKills)))



# 畫出每個腳色對應殺龍次數的統計
redkilldrag <- select(LOL,c(11,14,17,20,23,29))           # %>% t() -> redkilldrag
bluekilldrag <-  select(LOL,c(36,39,42,45,48,54))         # %>% t() -> bluekilldrag
agg <- lapply(list(redkilldrag$t1_champ1id, redkilldrag$t1_champ2id, redkilldrag$t1_champ3id, redkilldrag$t1_champ4id, redkilldrag$t1_champ5id), function(num)aggregate(redkilldrag[,6],by = list(num), FUN = mean))
meandrapkill <- Reduce(cbind,Map(function(num)agg[[num]],c(1:5)))
hero$redmeandragkill <- meandrapkill[,c(2,4,6,8,10)] %>% rowSums()
agg2 <- lapply(list(bluekilldrag$t2_champ1id, bluekilldrag$t2_champ2id, bluekilldrag$t2_champ3id, bluekilldrag$t2_champ4id, bluekilldrag$t2_champ5id), function(num)aggregate(bluekilldrag[,6], by = list(num), FUN = mean))
meandrapkill2 <- Reduce(cbind, Map(function(num)agg2[[num]],c(1:5)))
hero$bluemeandragkill <- meandrapkill2[,c(2,4,6,8,10)] %>% rowSums()
# ggplot(redkilldrag,aes(t1_champ5id,fill=as.factor(t1_dragonKills)))+geom_bar(position ="stack")
# ggplot(bluekilldrag,aes(t2_champ5id,fill=as.factor(t2_dragonKills)))+geom_bar(position ="stack")
x <- ggplot(data = hero, aes(x = heroname, y = redmeandragkill)) + 
      geom_bar(stat = "identity",aes(fill = redmeandragkill))+ 
      coord_flip()
y <- ggplot(data = hero, aes(x=heroname, y = bluemeandragkill)) + 
      geom_bar(stat = "identity",aes(fill = bluemeandragkill))+ 
      coord_flip()
x

y
ggsave(plot = y, width = 10, height = 10, dpi = 300, filename = "tower.png")
ggsave(plot = x, width = 10, height = 10, dpi = 300, filename = "tower2.png")



# 畫出每個腳色對於拆塔次數的統計
y <- ggplot(data = hero, aes(x = heronames, y = redmeandragkill)) +
  geom_bar(stat = "identity",aes(colour = redmeandragkill,width = 1))+ 
  coord_flip()+
  theme(text = element_text(size = 10),
        axis.text.x = element_text(angle = 0, hjust = 2))+
  scale_fill_gradient(low = "white", high = "red")
Ignoring unknown aesthetics: width
  # scale_colour_gradient(low = "green", high = "yellow")
  
y=ggplot(data = hero, aes(x = heroname, y = redmeandragkill)) +
  geom_bar(stat = "identity",aes(fill = redmeandragkill,width = 1)) + 
  coord_flip()+
  theme(text = element_text(size = 10),
        axis.text.x = element_text(angle = 0, hjust = 2))+
  scale_fill_gradient(low = "white", high = "red")
Ignoring unknown aesthetics: width
y
ggsave(plot = y, width = 10, height = 10, dpi = 300, filename = "haha.png")

x <- ggplot(data = hero, aes(x = heroname, y = bluemeandragkill)) +
  geom_bar(stat = "identity", aes(fill = bluemeandragkill),width = 1)+ 
  coord_flip()+
  theme(text = element_text(size=10),
        axis.text.x = element_text(angle=0, hjust=2))
x
ggsave(plot = y, width = 10, height = 10, dpi = 300, filename = "haha.png")
ggsave(plot = x, width = 10, height = 10, dpi = 300, filename = "haha2.png")



# 欲瞭解當獲勝方為藍方時,閃現(flash)放在D鍵與F鍵的比例
summary(glm(winner ~ firstDragon, data= lol_train, family = "binomial"))

Call:
glm(formula = winner ~ firstDragon, family = "binomial", data = lol_train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4984  -0.8730  -0.8730   0.8873   1.5161  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -0.27006    0.08775  -3.078  0.00209 ** 
firstDragon1 -0.49825    0.08908  -5.593 2.23e-08 ***
firstDragon2  0.99897    0.08906  11.217  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 55288  on 39883  degrees of freedom
Residual deviance: 50130  on 39881  degrees of freedom
AIC: 50136

Number of Fisher Scoring iterations: 4
test1 <- subset(lol_train,lol_train$firstDragon!=0)
summary(glm(test1$winner ~ as.factor(test1$firstDragon),family = "binomial"))

Call:
glm(formula = test1$winner ~ as.factor(test1$firstDragon), family = "binomial")

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4984  -0.8730  -0.8730   0.8873   1.5161  

Coefficients:
                              Estimate Std. Error z value Pr(>|z|)    
(Intercept)                   -0.76831    0.01535  -50.05   <2e-16 ***
as.factor(test1$firstDragon)2  1.49723    0.02159   69.34   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 54555  on 39354  degrees of freedom
Residual deviance: 49406  on 39353  degrees of freedom
AIC: 49410

Number of Fisher Scoring iterations: 4
summary(glm(lol_train$winner ~ lol_train$t1_champ1_sum1,family = "binomial"))

Call:
glm(formula = lol_train$winner ~ lol_train$t1_champ1_sum1, family = "binomial")

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-1.202  -1.170  -1.150   1.185   1.247  

Coefficients:
                                 Estimate Std. Error z value Pr(>|z|)
(Intercept)                      -0.06454    0.08717  -0.740    0.459
lol_train$t1_champ1_sum1Cleanse  -0.09710    0.20017  -0.485    0.628
lol_train$t1_champ1_sum1Exhaust   0.01274    0.09496   0.134    0.893
lol_train$t1_champ1_sum1Flash     0.04615    0.08821   0.523    0.601
lol_train$t1_champ1_sum1Ghost     0.12308    0.12037   1.022    0.307
lol_train$t1_champ1_sum1Heal      0.06510    0.09337   0.697    0.486
lol_train$t1_champ1_sum1Ignite    0.04496    0.09459   0.475    0.635
lol_train$t1_champ1_sum1Smite     0.04236    0.09317   0.455    0.649
lol_train$t1_champ1_sum1Teleport  0.06080    0.09310   0.653    0.514

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 55288  on 39883  degrees of freedom
Residual deviance: 55285  on 39875  degrees of freedom
AIC: 55303

Number of Fisher Scoring iterations: 3
a <- lol[,c(5,13,16,19,22,25)]
summary(a)
 winner     t1_champ1_sum1   t1_champ2_sum1   t1_champ3_sum1   t1_champ4_sum1   t1_champ5_sum1 
 1:25211   Flash   :27295   Flash   :27144   Flash   :27117   Flash   :27104   Flash   :27085  
 2:24645   Teleport: 4784   Heal    : 4763   Heal    : 4737   Heal    : 4801   Teleport: 4899  
           Smite   : 4556   Smite   : 4626   Smite   : 4649   Smite   : 4519   Smite   : 4636  
           Heal    : 4439   Teleport: 4426   Teleport: 4529   Teleport: 4515   Heal    : 4352  
           Ignite  : 3700   Exhaust : 3770   Exhaust : 3808   Exhaust : 3863   Ignite  : 3794  
           Exhaust : 3544   Ignite  : 3697   Ignite  : 3656   Ignite  : 3668   Exhaust : 3634  
           (Other) : 1538   (Other) : 1430   (Other) : 1360   (Other) : 1386   (Other) : 1456  
b <- lol[,c(5,38,41,44,47,50)]
summary(b)
 winner     t2_champ1_sum1   t2_champ2_sum1   t2_champ3_sum1   t2_champ4_sum1   t2_champ5_sum1 
 1:25211   Flash   :27312   Flash   :27150   Flash   :27280   Flash   :27166   Flash   :26937  
 2:24645   Teleport: 4790   Heal    : 4704   Heal    : 4687   Heal    : 4757   Teleport: 4880  
           Smite   : 4451   Smite   : 4561   Teleport: 4576   Smite   : 4743   Smite   : 4624  
           Heal    : 4325   Teleport: 4533   Smite   : 4470   Teleport: 4456   Heal    : 4428  
           Ignite  : 3799   Exhaust : 3782   Exhaust : 3813   Exhaust : 3734   Ignite  : 3840  
           Exhaust : 3660   Ignite  : 3693   Ignite  : 3642   Ignite  : 3583   Exhaust : 3759  
           (Other) : 1519   (Other) : 1433   (Other) : 1388   (Other) : 1417   (Other) : 1388  
c <- a[a$winner=="1",]
d <- b[b$winner=="1",]
summary(c)
 winner     t1_champ1_sum1   t1_champ2_sum1   t1_champ3_sum1   t1_champ4_sum1   t1_champ5_sum1 
 1:25211   Flash   :13819   Flash   :13715   Flash   :13708   Flash   :13614   Flash   :13681  
 2:    0   Teleport: 2428   Heal    : 2392   Smite   : 2398   Heal    : 2482   Teleport: 2543  
           Smite   : 2296   Smite   : 2296   Heal    : 2381   Smite   : 2310   Smite   : 2334  
           Heal    : 2230   Teleport: 2215   Teleport: 2241   Teleport: 2251   Heal    : 2135  
           Ignite  : 1870   Exhaust : 1940   Exhaust : 1926   Exhaust : 1984   Ignite  : 1926  
           Exhaust : 1792   Ignite  : 1924   Ignite  : 1866   Ignite  : 1832   Exhaust : 1858  
           (Other) :  776   (Other) :  729   (Other) :  691   (Other) :  738   (Other) :  734  
summary(d)
 winner     t2_champ1_sum1   t2_champ2_sum1   t2_champ3_sum1   t2_champ4_sum1   t2_champ5_sum1 
 1:25211   Flash   :13769   Flash   :13727   Flash   :13857   Flash   :13659   Flash   :13572  
 2:    0   Teleport: 2475   Heal    : 2415   Teleport: 2361   Smite   : 2394   Teleport: 2450  
           Smite   : 2249   Smite   : 2295   Heal    : 2330   Heal    : 2363   Smite   : 2394  
           Heal    : 2241   Teleport: 2240   Smite   : 2181   Teleport: 2337   Heal    : 2269  
           Ignite  : 1855   Exhaust : 1926   Exhaust : 1938   Exhaust : 1954   Ignite  : 1939  
           Exhaust : 1841   Ignite  : 1875   Ignite  : 1856   Ignite  : 1810   Exhaust : 1882  
           (Other) :  781   (Other) :  733   (Other) :  688   (Other) :  694   (Other) :  705  
s1 <- 13819 + 13715 + 13708 + 13614 + 13681
s2 <- 13769 + 13727 + 13857 + 13659 + 13572
s1 / (25211 * 5)                                  # 獲勝方為藍方時,將閃現放在D鍵的比例          # 0.5437071
[1] 0.5437071
s2 / (25211 * 5)                                  # 獲勝方為藍方時,將閃現放在F鍵的比例          # 0.54408
[1] 0.54408



【 資料前處理 】



【 (處理後)資料集資訊 】



【 載入資料集(Loading dataset):[LOLgamedata.csv] 】

我們透過組員自身的遊戲經驗,挑出我們認為較顯著之資料欄位進行變數型態轉換

lol <- read.csv("LOLgamedata.csv",header = T)
# View(lol)
# 將欄位進行型態轉換(transfer our data as factor)
lol$winner <- as.factor(lol$winner)
lol$firstBlood <- as.factor(lol$firstBlood)
lol$firstTower <- as.factor(lol$firstTower)
lol$firstInhibitor <- as.factor(lol$firstInhibitor)
lol$firstBaron <- as.factor(lol$firstBaron)
lol$firstDragon <- as.factor(lol$firstDragon)
lol$firstRiftHerald <- as.factor(lol$firstRiftHerald)



【 創造變數 】

lol$tower_gap <- (lol$t1_towerKills - lol$t2_towerKills)                              # 勝負隊伍塔差
# lol_train$tower_gap <- (lol_train$t1_towerKills - lol_train$t2_towerKills)
# summary(glm(winner ~ lol_train$tower_gap, data= lol_train, family = "binomial"))
lol$dragon_gap <- (lol$t1_dragonKills - lol$t2_dragonKills)                           # 勝負隊伍殺小龍數量差異
lol$baron_gap <- (lol$t1_baronKills - lol$t2_baronKills)                              # 勝負隊伍殺巴隆數量差異
lol$inhibitorKills_gap <- (lol$t1_inhibitorKills - lol$t2_inhibitorKills)             # 勝負隊伍水晶兵營數量差異



【建模】 【邏輯式迴歸(glm) 】

# 切割資料:將資料切割成Training Set(lol_train), Testing Set(lol_test)
set.seed(2018)
train_idx <- sample(1:nrow(lol), size = 0.8 * nrow(lol), replace = F)
lol_train <- lol[train_idx,]                                          # 39884 obs. of 75 variables
lol_test <- lol[-train_idx,]                                          # 9972 obs. of 74 variables    
summary(glm(winner ~ firstBlood + firstTower + firstInhibitor + firstBaron + firstDragon + firstRiftHerald, data = lol_train, family = "binomial"))

Call:
glm(formula = winner ~ firstBlood + firstTower + firstInhibitor + 
    firstBaron + firstDragon + firstRiftHerald, family = "binomial", 
    data = lol_train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.9057  -0.3409  -0.1809   0.3495   2.8710  

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)      -0.72064    0.11624  -6.199 5.67e-10 ***
firstBlood2       0.28354    0.03497   8.108 5.14e-16 ***
firstTower2       0.82213    0.03631  22.644  < 2e-16 ***
firstInhibitor1  -2.02436    0.05042 -40.153  < 2e-16 ***
firstInhibitor2   1.91971    0.05078  37.805  < 2e-16 ***
firstBaron1      -0.91436    0.04685 -19.518  < 2e-16 ***
firstBaron2       1.18904    0.04549  26.141  < 2e-16 ***
firstDragon1     -0.26268    0.11836  -2.219   0.0265 *  
firstDragon2      0.54305    0.11849   4.583 4.58e-06 ***
firstRiftHerald1 -0.18285    0.04329  -4.224 2.40e-05 ***
firstRiftHerald2  0.16980    0.04365   3.890   0.0001 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 55288  on 39883  degrees of freedom
Residual deviance: 23077  on 39873  degrees of freedom
AIC: 23099

Number of Fisher Scoring iterations: 6
# 預測(glm prediction)(common sense)
model1 <- glm(winner ~ firstBlood + firstTower + firstInhibitor + firstBaron + firstDragon + firstRiftHerald, data = lol_train ,family = "binomial")
result <- predict(model1,newdata = lol_test, type = "response")
table(lol_test$winner, result>0.5) %>% {sum(diag(.))/sum(.)}         # ACC = 0.9044324
[1] 0.9044324
# 預測(glm prediction)(加入towel_gap)
model1 <- glm(winner ~ firstBlood + firstTower + firstInhibitor + firstBaron + firstDragon + firstRiftHerald + tower_gap, data = lol_train ,family = "binomial")
result <- predict(model1,newdata = lol_test, type = "response")
table(lol_test$winner, result>0.5) %>% {sum(diag(.))/sum(.)}         # ACC = 0.9736262
[1] 0.9736262
# 預測(glm prediction)(加入towel_gap,並拿掉不顯著之變數)
model1 <- glm(winner ~ firstTower + firstInhibitor + firstRiftHerald + tower_gap, data = lol_train ,family = "binomial")
result <- predict(model1,newdata = lol_test, type = "response")
table(lol_test$winner, result>0.5) %>% {sum(diag(.))/sum(.)}         # ACC = 0.9731247
[1] 0.9731247
# 預測(glm prediction)(加入towel_gap, dragon_gap, baron_gap, inhibitorKills_gap, gameDuration)
model1 <- glm(winner ~ firstBlood + firstTower + firstInhibitor + firstBaron + firstDragon + firstRiftHerald + tower_gap + dragon_gap + baron_gap + inhibitorKills_gap + gameDuration, data = lol_train ,family = "binomial")
result <- predict(model1,newdata = lol_test, type = "response")
table(lol_test$winner, result>0.5) %>% {sum(diag(.))/sum(.)}         # ACC = 0.9742278
[1] 0.9742278
summary(model1)

Call:
glm(formula = winner ~ firstBlood + firstTower + firstInhibitor + 
    firstBaron + firstDragon + firstRiftHerald + tower_gap + 
    dragon_gap + baron_gap + inhibitorKills_gap + gameDuration, 
    family = "binomial", data = lol_train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.8432  -0.0338  -0.0023   0.0325   4.7229  

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)         0.37382    0.27022   1.383 0.166540    
firstBlood2        -0.12787    0.07033  -1.818 0.069042 .  
firstTower2        -0.87222    0.07553 -11.548  < 2e-16 ***
firstInhibitor1     0.51610    0.13912   3.710 0.000207 ***
firstInhibitor2    -0.58941    0.14017  -4.205 2.61e-05 ***
firstBaron1         0.52945    0.13253   3.995 6.47e-05 ***
firstBaron2        -0.08496    0.13143  -0.646 0.518002    
firstDragon1        0.47420    0.26300   1.803 0.071388 .  
firstDragon2        0.59231    0.26366   2.246 0.024673 *  
firstRiftHerald1    0.22080    0.08751   2.523 0.011634 *  
firstRiftHerald2   -0.59453    0.08857  -6.713 1.91e-11 ***
tower_gap          -1.28958    0.02374 -54.313  < 2e-16 ***
dragon_gap          0.04511    0.02163   2.086 0.037020 *  
baron_gap          -0.62911    0.05491 -11.456  < 2e-16 ***
inhibitorKills_gap  0.10260    0.03776   2.717 0.006591 ** 
gameDuration       -0.01023    0.00544  -1.880 0.060134 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 55287.7  on 39883  degrees of freedom
Residual deviance:  5838.7  on 39868  degrees of freedom
AIC: 5870.7

Number of Fisher Scoring iterations: 9



【 邏輯式迴歸ROC(ROC curve of Logistic Regression) 】

pred <- prediction(result, lol_test$winner)
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
auc <- performance(pred, "auc")
# 繪製ROC curve之圖形,並算出AUC
plot(perf, main = "ROC curve(Logistic Regression)", xlab = "Specificity(FPR)", ylab = "Sensitivity(TPR)")
abline(0, 1)
text(0.5, 0.5, as.character(auc@y.values[[1]]))           # AUC = 0.9966



【 邏輯式迴歸交叉驗證(Cross-Validation(CV) of Logistic Regession) 】

# Get k-fold CV confusion matrix for Logistic Regression model
# f: formula, d: data, k: number of folds, cutoff: cutoff point 0-1
k_fold_CV_logit = function(f, d, k, cutoff){
  numOfRec = nrow(d) # number of observations
  reponse_var = all.vars(f)[1] # name of the response variable
  # k indices used to split data into k parts
  sample_idx_k = rep(sample(1:k),round(numOfRec / k) + 1)[1:numOfRec]
  # k models for k subsets of data
  k_fits = Map( function(x) glm(f, d[sample_idx_k != x, ],
                                family = "binomial"), 1:k)
  # Predicted & actual classes for each hold-out subset
  predActualClass = Map(function(x){
    predictedProb = predict(k_fits[[x]], d[sample_idx_k == x,],
                            type = "response")
    predictedClass = ifelse(predictedProb > cutoff, 1, 0)
    return(data.frame("predictedClass" = predictedClass,
                      "actualClass" = d[sample_idx_k == x, reponse_var] ) )
  }, 1:k)
  # A data frame with all predicted & actual classes
  output_DF = Reduce(function(x, y) rbind(x, y), predActualClass)
  output_DF$predictedClass = factor(output_DF$predictedClass,
                                    levels=c(0,1),labels = c("No", "Yes"))
  return( table(output_DF$predictedClass, output_DF$actualClass))
}
Map(function(cutoff) k_fold_CV_logit(winner ~ firstBlood+firstTower+firstInhibitor+firstBaron+firstDragon+firstRiftHerald,
                                     lol[train_idx,], 10, cutoff), list(0.9, 0.8, 0.7, 0.6, 0.5, 0.45, 0.4, 0.3, 0.2, 0.1)) # 0.5
[[1]]
     
          1     2
  No  19504  7456
  Yes   618 12306

[[2]]
     
          1     2
  No  19129  4829
  Yes   993 14933

[[3]]
     
          1     2
  No  18822  3285
  Yes  1300 16477

[[4]]
     
          1     2
  No  18674  2637
  Yes  1448 17125

[[5]]
     
          1     2
  No  18233  1978
  Yes  1889 17784

[[6]]
     
          1     2
  No  17860  1664
  Yes  2262 18098

[[7]]
     
          1     2
  No  17692  1587
  Yes  2430 18175

[[8]]
     
          1     2
  No  16964  1335
  Yes  3158 18427

[[9]]
     
          1     2
  No  15230   975
  Yes  4892 18787

[[10]]
     
          1     2
  No  12439   526
  Yes  7683 19236



【 隨機森林(Random Forest) 】

# 試藉由組員自身遊戲經驗所找出之變數跑randomForest
set.seed(2018)
lol_tree <- randomForest(winner ~ firstBlood + firstTower + firstInhibitor + firstBaron + firstDragon + firstRiftHerald, lol_train, ntree = 500)
result_tree <- predict(lol_tree,newdata = lol_test)
lol_tree

Call:
 randomForest(formula = winner ~ firstBlood + firstTower + firstInhibitor +      firstBaron + firstDragon + firstRiftHerald, data = lol_train,      ntree = 500) 
               Type of random forest: classification
                     Number of trees: 500
No. of variables tried at each split: 2

        OOB estimate of  error rate: 9.36%
Confusion matrix:
      1     2 class.error
1 18342  1780  0.08846039
2  1954 17808  0.09887663
    # Confusion matrix:
    #       1     2 class.error
    # 1 18345  1777  0.08831130
    # 2  1973 17789  0.09983807
# 透過importance()來找出較重要之變數
importance(lol_tree)
                MeanDecreaseGini
firstBlood               49.4587
firstTower             1481.5744
firstInhibitor         8485.8823
firstBaron             2454.6815
firstDragon             718.0792
firstRiftHerald         210.8131
    #                 MeanDecreaseGini
    # firstTower            1059.24050
    # firstInhibitor        9253.06155
    # firstBaron            2451.38878
    # firstDragon            745.84541
    # firstRiftHerald         91.45326 (最低)
# 移除firstRiftHerald變數,再次進行建模
lol_tree <- randomForest(winner ~ firstTower+firstInhibitor+firstBaron+firstDragon+firstRiftHerald,lol_train, ntree=500)
result_tree <- predict(lol_tree,newdata = lol_test)
lol_tree

Call:
 randomForest(formula = winner ~ firstTower + firstInhibitor +      firstBaron + firstDragon + firstRiftHerald, data = lol_train,      ntree = 500) 
               Type of random forest: classification
                     Number of trees: 500
No. of variables tried at each split: 2

        OOB estimate of  error rate: 9.42%
Confusion matrix:
      1     2 class.error
1 18316  1806  0.08975251
2  1953 17809  0.09882603
    # Confusion matrix:
    #       1     2 class.error
    # 1 18322  1800  0.08945433
    # 2  1966 17796  0.09948386
# 預測(randomForest prediction)(加入towel_gap, dragon_gap, baron_gap, inhibitorKills_gap, gameDuration)
lol_tree <- randomForest(winner ~ firstBlood + firstTower + firstInhibitor + firstBaron + firstDragon + firstRiftHerald + tower_gap + dragon_gap + baron_gap + inhibitorKills_gap + gameDuration,lol_train, ntree = 500)
result_tree <- predict(lol_tree,newdata = lol_test)
lol_tree

Call:
 randomForest(formula = winner ~ firstBlood + firstTower + firstInhibitor +      firstBaron + firstDragon + firstRiftHerald + tower_gap +      dragon_gap + baron_gap + inhibitorKills_gap + gameDuration,      data = lol_train, ntree = 500) 
               Type of random forest: classification
                     Number of trees: 500
No. of variables tried at each split: 3

        OOB estimate of  error rate: 2.4%
Confusion matrix:
      1     2 class.error
1 19649   473  0.02350661
2   484 19278  0.02449145
    # Confusion matrix:
    #       1     2 class.error
    # 1 19655   467  0.02320843
    # 2   488 19274  0.02469386
summary(lol_tree)
                Length Class  Mode     
call                4  -none- call     
type                1  -none- character
predicted       39884  factor numeric  
err.rate         1500  -none- numeric  
confusion           6  -none- numeric  
votes           79768  matrix numeric  
oob.times       39884  -none- numeric  
classes             2  -none- character
importance         11  -none- numeric  
importanceSD        0  -none- NULL     
localImportance     0  -none- NULL     
proximity           0  -none- NULL     
ntree               1  -none- numeric  
mtry                1  -none- numeric  
forest             14  -none- list     
y               39884  factor numeric  
test                0  -none- NULL     
inbag               0  -none- NULL     
terms               3  terms  call     



【 隨機森林ROC(ROC curve of Random Forest) 】

# 繪製隨機森林的ROC曲線,繪製ROC curve之圖形,並算出AUC
rf.pred <- predict(lol_tree, lol_test, type = "prob")
rf.roc <- prediction(rf.pred[,2], lol_test$winner)
rf.auc <- performance(rf.roc, 'tpr', 'fpr')
# rf.auc
plot(rf.auc)
abline(0, 1)

# text(0.5, 0.5, as.character(rf.auc@y.values[[1]]))  



【 計算平均平方誤差MSE(Mean Square Error) 】

# mean((result-lol_test$winner)^2)
dim(lol_test)       #  9972    71
[1] 9972   75
dim(lol_train)      # 39884    71
[1] 39884    75
summary(model1)

Call:
glm(formula = winner ~ firstBlood + firstTower + firstInhibitor + 
    firstBaron + firstDragon + firstRiftHerald + tower_gap + 
    dragon_gap + baron_gap + inhibitorKills_gap + gameDuration, 
    family = "binomial", data = lol_train)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-3.8432  -0.0338  -0.0023   0.0325   4.7229  

Coefficients:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)         0.37382    0.27022   1.383 0.166540    
firstBlood2        -0.12787    0.07033  -1.818 0.069042 .  
firstTower2        -0.87222    0.07553 -11.548  < 2e-16 ***
firstInhibitor1     0.51610    0.13912   3.710 0.000207 ***
firstInhibitor2    -0.58941    0.14017  -4.205 2.61e-05 ***
firstBaron1         0.52945    0.13253   3.995 6.47e-05 ***
firstBaron2        -0.08496    0.13143  -0.646 0.518002    
firstDragon1        0.47420    0.26300   1.803 0.071388 .  
firstDragon2        0.59231    0.26366   2.246 0.024673 *  
firstRiftHerald1    0.22080    0.08751   2.523 0.011634 *  
firstRiftHerald2   -0.59453    0.08857  -6.713 1.91e-11 ***
tower_gap          -1.28958    0.02374 -54.313  < 2e-16 ***
dragon_gap          0.04511    0.02163   2.086 0.037020 *  
baron_gap          -0.62911    0.05491 -11.456  < 2e-16 ***
inhibitorKills_gap  0.10260    0.03776   2.717 0.006591 ** 
gameDuration       -0.01023    0.00544  -1.880 0.060134 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 55287.7  on 39883  degrees of freedom
Residual deviance:  5838.7  on 39868  degrees of freedom
AIC: 5870.7

Number of Fisher Scoring iterations: 9
