library(devtools)
library(DBI)
library(tidyverse)
library(RSQLite)
library(DescTools)
library(DT)
library(Datatablethemes)
library(scales)
library(stringi)
library(downloadthis)
library(ggplot2)
library(lubridate)
library(hrbrthemes)
library(patchwork)
library(viridis)
library(ggridges)
library(RColorBrewer)
library(gplots)
library(neuralnet)
library(caret)
library(fastDummies)
library(e1071)
| library(devtools) |
| library(DBI) |
| library(tidyverse) |
| library(RSQLite) |
| library(DescTools) |
| library(DT) |
| library(Datatablethemes) |
| library(stringi) |
| library(downloadthis) |
Chess_project <- read_csv(file = "10480_games_with_centipawn_metrics.csv")
dim(Chess_project)
naniar::gg_miss_var(Chess_project)
Chess_project <- Chess_project %>%
select(-...1, -PGN, -`White Expected Rating by ACPL`, -`Black Expected Rating by ACPL`) %>%
drop_na(Result)
naniar::gg_miss_var(Chess_project)
glimpse(Chess_project)
## [1] 10622 20
## Rows: 10,478
## Columns: 16
## $ Date <chr> "3/28/2018", "3/29/2018", "3/29/2018", "3/30/2018…
## $ `Event Name` <chr> "Philadelphia op 12th", "Philadelphia op 12th", "…
## $ `Event Rounds` <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9…
## $ Round <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "1",…
## $ `White Name` <chr> "Niemann, Hans Moke", "Niemann, Hans Moke", "Goro…
## $ `Black Name` <chr> "Oberoi, Shelev", "Popilski, Gil", "Niemann, Hans…
## $ Result <dbl> 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 1, 1, 2, 1, 1, 0…
## $ `White ELO` <dbl> 2302, 2302, 2496, 2509, 2302, 2134, 2302, 2162, 2…
## $ `Black ELO` <dbl> 1924, 2502, 2302, 2302, 2405, 2302, 2145, 2302, 2…
## $ Moves <dbl> 39, 36, 50, 38, 39, 46, 44, 46, 56, 38, 57, 60, 6…
## $ `White Av CP Loss` <dbl> 22, 12, 46, 22, 23, 34, 24, 50, 32, 16, 37, 11, 1…
## $ `Black Av CP Loss` <dbl> 46, 13, 28, 38, 7, 34, 45, 50, 49, 12, 21, 11, 21…
## $ `Evaluations List` <chr> "[47, 43, 44, -9, 17, 10, 5, 0, 12, -30, -13, -51…
## $ `White CP Loss List` <chr> "[4, 53, 7, 5, 42, 38, 0, 13, 135, 0, 0, 5, 0, 22…
## $ `Black CP Loss List` <chr> "[1, 26, 0, 12, 17, 10, 19, 14, 35, 12, 10, 11, 1…
## $ `Analysis Depth` <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2…
summary(Chess_project)
## Date Event Name Event Rounds Round
## Length:10478 Length:10478 Min. : 1.000 Length:10478
## Class :character Class :character 1st Qu.: 9.000 Class :character
## Mode :character Mode :character Median : 9.000 Mode :character
## Mean : 9.408
## 3rd Qu.:11.000
## Max. :22.000
## NA's :12
## White Name Black Name Result White ELO
## Length:10478 Length:10478 Min. :0.0000 Min. : 893
## Class :character Class :character 1st Qu.:0.0000 1st Qu.:2439
## Mode :character Mode :character Median :1.0000 Median :2558
## Mean :0.8846 Mean :2535
## 3rd Qu.:1.0000 3rd Qu.:2688
## Max. :2.0000 Max. :2882
##
## Black ELO Moves White Av CP Loss Black Av CP Loss
## Min. : 700 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.:2430 1st Qu.: 33.00 1st Qu.: 15.00 1st Qu.: 15.0
## Median :2553 Median : 41.00 Median : 23.00 Median : 25.0
## Mean :2526 Mean : 44.28 Mean : 26.84 Mean : 28.5
## 3rd Qu.:2686 3rd Qu.: 54.00 3rd Qu.: 35.00 3rd Qu.: 39.0
## Max. :2882 Max. :182.00 Max. :458.00 Max. :476.0
##
## Evaluations List White CP Loss List Black CP Loss List Analysis Depth
## Length:10478 Length:10478 Length:10478 Min. :20
## Class :character Class :character Class :character 1st Qu.:20
## Mode :character Mode :character Mode :character Median :20
## Mean :20
## 3rd Qu.:20
## Max. :22
## NA's :1
t(t(unique(Chess_project$Round)))[72,]
## [1] "?"
Chess_project$Date <- as.Date(Chess_project$Date, "%m/%d/%y")
Chess_project$Round <- as.numeric(Chess_project$Round)
Chess_project$Result <- as.factor(Chess_project$Result)
summary(Chess_project)
## Date Event Name Event Rounds Round
## Min. :2019-01-01 Length:10478 Min. : 1.000 Min. : 1.000
## 1st Qu.:2020-03-26 Class :character 1st Qu.: 9.000 1st Qu.: 3.000
## Median :2020-06-30 Mode :character Median : 9.000 Median : 5.000
## Mean :2020-06-21 Mean : 9.408 Mean : 5.275
## 3rd Qu.:2020-09-21 3rd Qu.:11.000 3rd Qu.: 7.800
## Max. :2020-12-31 Max. :22.000 Max. :15.800
## NA's :12 NA's :29
## White Name Black Name Result White ELO Black ELO
## Length:10478 Length:10478 0:3703 Min. : 893 Min. : 700
## Class :character Class :character 1:4281 1st Qu.:2439 1st Qu.:2430
## Mode :character Mode :character 2:2494 Median :2558 Median :2553
## Mean :2535 Mean :2526
## 3rd Qu.:2688 3rd Qu.:2686
## Max. :2882 Max. :2882
##
## Moves White Av CP Loss Black Av CP Loss Evaluations List
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Length:10478
## 1st Qu.: 33.00 1st Qu.: 15.00 1st Qu.: 15.0 Class :character
## Median : 41.00 Median : 23.00 Median : 25.0 Mode :character
## Mean : 44.28 Mean : 26.84 Mean : 28.5
## 3rd Qu.: 54.00 3rd Qu.: 35.00 3rd Qu.: 39.0
## Max. :182.00 Max. :458.00 Max. :476.0
##
## White CP Loss List Black CP Loss List Analysis Depth
## Length:10478 Length:10478 Min. :20
## Class :character Class :character 1st Qu.:20
## Mode :character Mode :character Median :20
## Mean :20
## 3rd Qu.:20
## Max. :22
## NA's :1
## White ELO
bw <- Chess_project %>%
select(`White ELO`) %>%
filter(!is.na(`White ELO`))
bw <- 2 * IQR(bw$`White ELO`) / length(bw$`White ELO`)^(1/3)
g1 <- Chess_project %>%
ggplot(aes(x = `White ELO`)) +
geom_histogram(binwidth = bw,
fill = 'green',
color = 'black',
show.legend = F,
alpha = .5) +
labs(title = "(g1) Distribution of White ELO",
x = "White ELO",
y = "Frequency") +
hrbrthemes::theme_ft_rc()+
theme(axis.title = element_text(color = "white"),
axis.text = element_text(color = "white"))
## Black ELO
bw <- Chess_project %>%
select(`Black ELO`) %>%
filter(!is.na(`Black ELO`))
bw <- 2 * IQR(bw$`Black ELO`) / length(bw$`Black ELO`)^(1/3)
g2 <- Chess_project %>%
ggplot(aes(x = `Black ELO`)) +
geom_histogram(binwidth = bw,
fill = 'green',
color = 'black',
show.legend = F,
alpha = .5) +
labs(title = "(g2) Distribution of Black ELO",
x = "Black ELO",
y = "Frequency") +
hrbrthemes::theme_ft_rc()+
theme(axis.title = element_text(color = "white"),
axis.text = element_text(color = "white"))
## Combining
(g1 /g2) +
plot_annotation(theme = theme(plot.title = element_text(size = 18,
colour = "black"))) +
theme(text = element_text('mono'))
| White ELO | Black ELO | |
|---|---|---|
| Mean | 2,535 | 2,526 |
| Median | 2,558 | 2,553 |
## White AVG CP Loss
g3 <- Chess_project %>%
ggplot(aes(x = `White Av CP Loss`)) +
geom_boxplot(show.legend = F,
alpha = .5,
fill = "#FF8C00",
color = "#0DDBCC") +
labs(title = "(g3) White Av CP Loss",
x = "White Av CP Loss") +
hrbrthemes::theme_modern_rc()+
theme(axis.title = element_text(color = "#0DDBCC"),
axis.text = element_text(color = "#0DDBCC"),
plot.title = element_text(color = "#FF8C00"))
## Black AVG CP Loss
g4 <- Chess_project %>%
ggplot(aes(x = `Black Av CP Loss`)) +
geom_boxplot(show.legend = F,
alpha = .5,
fill = "#FF8C00",
color = "#0DDBCC") +
labs(title = "(g4) Black Av CP Loss",
x = "Black Av CP Loss") +
hrbrthemes::theme_modern_rc()+
theme(axis.title = element_text(color = "#0DDBCC"),
axis.text = element_text(color = "#0DDBCC"),
plot.title = element_text(color = "#FF8C00"))
## Total Moves
g5 <- Chess_project %>%
ggplot(aes(x = Moves)) +
geom_boxplot(show.legend = F,
alpha = .5,
fill = "#FF8C00",
color = "#0DDBCC") +
labs(title = "(g5) Moves",
x = "Moves") +
hrbrthemes::theme_modern_rc()+
theme(axis.title = element_text(color = "#0DDBCC"),
axis.text = element_text(color = "#0DDBCC"),
plot.title = element_text(color = "#FF8C00"))
## Combine
((g3 | g4) / g5) +
plot_annotation(theme = theme(plot.title = element_text(size = 18,
colour = "black"))) +
theme(text = element_text('mono'))
## Total Moves aggregated by Result of game
Chess_project %>%
mutate(Result = recode(Result,
"0" = "Black win",
"1" = "Draw",
"2" = "White win")) %>%
ggplot(aes(Moves, Result, fill = Result)) +
geom_boxplot(alpha = 0.5, color = "white") +
stat_summary(fun.y = mean, geom = "text", label = "X", color = "white")+
scale_fill_manual(values = c("Black win" = "#ffe042",
"Draw" = "#e71989",
"White win" = "#00e1d9")) +
labs(title = "(g6) Total moves made",
subtitle = "disagregated by result of game",
x = "Total Moves",
y = "Result of game") +
hrbrthemes::theme_modern_rc()+
theme(legend.position = "none")+
annotate("text", x=38, y=1, label= "42", color = "white")+
annotate("text", x=36, y=2, label= "40", color = "white")+
annotate("text", x=39, y=3, label= "43", color = "white")
# ggsave(file = "Total moves made aggregated by result of game.png",
# units = c("in"),
# width = 8,
# height = 5.5,
# dpi=700,
# g6)
## Line plots
## White ELO VS White AVG CP Loss
g7 <- Chess_project %>%
ggplot(aes(x = `White ELO`,
y = `White Av CP Loss`)) +
geom_point(color = "#2b6be4", size = 2,
alpha = 0.7) +
geom_smooth(color = "#00008b")+
labs(title = "(g7) White ELO vs. AVG CP Loss",
subtitle = "with smoothed linear model",
x = "White ELO",
y = "White AVG CP Loss") +
ggthemes::theme_solarized()
## Black ELO Vs Black AVG CP Loss
g8 <- Chess_project %>%
ggplot(aes(x = `Black ELO`,
y = `Black Av CP Loss`)) +
geom_point(color = "#2b6be4", size = 2,
alpha = 0.7) +
geom_smooth(color = "#00008b")+
labs(title = "(g8) Black ELO vs. AVG CP Loss",
subtitle = "with smoothed linear model",
x = "Black ELO",
y = "Black AVG CP Loss") +
ggthemes::theme_solarized()
## Combine graphs
(g7 /g8) +
plot_annotation(theme = theme(plot.title = element_text(size = 18,
colour = "black"))) +
theme(text = element_text('mono'))
## White ELO vs Black AVG CP Loss
g9 <- Chess_project %>%
ggplot(aes(x = `White ELO`,
y = `Black Av CP Loss`)) +
geom_point(color = "#2b6be4", size = 2,
alpha = 0.7) +
geom_smooth(color = "#00008b")+
labs(title = "(g7) White ELO vs. Black AVG CP Loss",
subtitle = "with smoothed linear model",
x = "White ELO",
y = "Black AVG CP Loss") +
ggthemes::theme_solarized()
## Black ELO vs White AVG CP Loss
g10 <- Chess_project %>%
ggplot(aes(x = `Black ELO`,
y = `White Av CP Loss`)) +
geom_point(color = "#2b6be4", size = 2,
alpha = 0.7) +
geom_smooth(color = "#00008b")+
labs(title = "(g7) Black ELO vs. White AVG CP Loss",
subtitle = "with smoothed linear model",
x = "Black ELO",
y = "White AVG CP Loss") +
ggthemes::theme_solarized()
## Combine Graph
(g9 /g10) +
plot_annotation(theme = theme(plot.title = element_text(size = 18,
colour = "black"))) +
theme(text = element_text('mono'))
## Combine with result of game
Chess_project %>%
mutate(Result = recode(Result,
"0" = "Black win",
"1" = "Draw",
"2" = "White win")) %>%
ggplot(aes(x = `White ELO`,
y = `White Av CP Loss`,
color = Result)) +
geom_point(size = 5,
alpha = 0.3) +
geom_line(size = 1)+
labs(title = "(g11) White ELO vs. AVG CP Loss",
subtitle = "Aggregated by Result of game",
x = "White ELO",
y = "White AVG CP Loss") +
ggthemes::theme_solarized()
## Checking to make sure the avg cp Loss matches (Need to run code below for this to work)
# Chess_project %>%
# left_join(.,
# Chess_project %>%
# group_by(`White CP Loss List`) %>%
# summarise(White_AVG = round(mean(`White CP Loss List`[[1]]), digits = 0)),
# by = "White CP Loss List") %>%
# select(`White Av CP Loss`, White_AVG) %>%
# mutate(example = case_when(
# `White Av CP Loss` == White_AVG ~ "Yes",
# TRUE ~ "No"
# )) %>%
# filter(example == "Yes")
## Could use mutate but the group by gets rid of 4 rows because there are duplicates
# Chess_project %>%
# group_by(`White CP Loss List`) %>%
# mutate(White_AVG = round(mean(`White CP Loss List`[[1]]), digits = 0))
Chess_project <- Chess_project %>%
mutate(`Evaluations List` = gsub(`Evaluations List`, pattern = "[", fixed = T, replacement = "")) %>%
mutate(`Evaluations List` = gsub(`Evaluations List`, pattern = ",", fixed = T, replacement = "")) %>%
mutate(`Evaluations List` = str_extract_all(`Evaluations List`, "\\(?[0-9,.-]+\\)?")) %>%
mutate(`White CP Loss List` = gsub(`White CP Loss List`, pattern = "[", fixed = T, replacement = "")) %>%
mutate(`White CP Loss List` = gsub(`White CP Loss List`, pattern = ",", fixed = T, replacement = "")) %>%
mutate(`White CP Loss List` = str_extract_all(`White CP Loss List`, "\\(?[0-9,.-]+\\)?")) %>%
mutate(`Black CP Loss List` = gsub(`Black CP Loss List`, pattern = "[", fixed = T, replacement = "")) %>%
mutate(`Black CP Loss List` = gsub(`Black CP Loss List`, pattern = ",", fixed = T, replacement = "")) %>%
mutate(`Black CP Loss List` = str_extract_all(`Black CP Loss List`, "\\(?[0-9,.-]+\\)?"))
## need to perform on specific columns
for (i in 1:length(Chess_project$`Evaluations List`)) {
Chess_project$`Evaluations List`[[i]] <- as.numeric(Chess_project$`Evaluations List`[[i]])
}
for (i in 1:length(Chess_project$`White CP Loss List`)) {
Chess_project$`White CP Loss List`[[i]] <- as.numeric(Chess_project$`White CP Loss List`[[i]])
}
for (i in 1:length(Chess_project$`Black CP Loss List`)) {
Chess_project$`Black CP Loss List`[[i]] <- as.numeric(Chess_project$`Black CP Loss List`[[i]])
}
glimpse(Chess_project)
## Rows: 10,478
## Columns: 16
## $ Date <date> 2020-03-28, 2020-03-29, 2020-03-29, 2020-03-30, …
## $ `Event Name` <chr> "Philadelphia op 12th", "Philadelphia op 12th", "…
## $ `Event Rounds` <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9…
## $ Round <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 1…
## $ `White Name` <chr> "Niemann, Hans Moke", "Niemann, Hans Moke", "Goro…
## $ `Black Name` <chr> "Oberoi, Shelev", "Popilski, Gil", "Niemann, Hans…
## $ Result <fct> 0, 1, 2, 0, 2, 1, 0, 1, 0, 1, 2, 1, 1, 2, 1, 1, 0…
## $ `White ELO` <dbl> 2302, 2302, 2496, 2509, 2302, 2134, 2302, 2162, 2…
## $ `Black ELO` <dbl> 1924, 2502, 2302, 2302, 2405, 2302, 2145, 2302, 2…
## $ Moves <dbl> 39, 36, 50, 38, 39, 46, 44, 46, 56, 38, 57, 60, 6…
## $ `White Av CP Loss` <dbl> 22, 12, 46, 22, 23, 34, 24, 50, 32, 16, 37, 11, 1…
## $ `Black Av CP Loss` <dbl> 46, 13, 28, 38, 7, 34, 45, 50, 49, 12, 21, 11, 21…
## $ `Evaluations List` <list> <47, 43, 44, -9, 17, 10, 5, 0, 12, -30, -13, -51…
## $ `White CP Loss List` <list> <4, 53, 7, 5, 42, 38, 0, 13, 135, 0, 0, 5, 0, 22…
## $ `Black CP Loss List` <list> <1, 26, 0, 12, 17, 10, 19, 14, 35, 12, 10, 11, 1…
## $ `Analysis Depth` <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2…
# mean(Chess_project$`White CP Loss List`[[1]])
## g1
Chess_project %>%
count(`White Name`) %>%
arrange(desc(n)) %>%
rename(Count = n) %>%
mutate(`White Name` = fct_reorder(`White Name`, Count)) %>%
head() %>%
ggplot() +
aes(x = `White Name`,
y = Count,
fill = `White Name`,
label = Count) +
geom_bar(stat='identity',
alpha = 0.5,
show.legend = F,
color = "black") +
scale_fill_brewer(palette = "YlGnBu", direction = 1) +
theme(legend.position = "right",
axis.text.x = element_text(angle = 45, hjust = 0.8)) +
labs(title = "(g1) Barplot of Amount of Games Played",
x = "Player",
y = "How many games played by player") +
ggthemes::theme_solarized() +
geom_text(nudge_y = 2,
vjust = 3,
color = "black",
size = 4.3) +
theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1))
## Need library(viridis)
corrmatrix <- as.data.frame(Chess_project)
colfunc <- colorRampPalette(brewer.pal(9,"BrBG"))
heatmap.2(cor(Filter(is.numeric, corrmatrix), use = "complete.obs"), Rowv = FALSE,
Colv = FALSE, dendrogram = "none", lwid=c(.1,2), lhei=c(.1,2),
col = colfunc(15),
cellnote = round(cor(Filter(is.numeric, corrmatrix), use = "complete.obs"),2),
notecol = "black", key = FALSE, trace = 'none', margins = c(10,10))
Chess_project <- Chess_project %>%
select(-`Evaluations List`, -`White CP Loss List`, -`Black CP Loss List`)
Chess_project %>%
mutate(Result = recode(Result,
"0" = "Black win",
"1" = "Draw",
"2" = "White win")) %>%
select(Result, Moves) %>%
group_by(Result) %>%
summarise(mean_moves = mean(Moves)) %>%
arrange(mean_moves)
## # A tibble: 3 × 2
## Result mean_moves
## <fct> <dbl>
## 1 Draw 42.8
## 2 Black win 45.0
## 3 White win 45.8
Chess_project %>%
mutate(Result = recode(Result,
"0" = "Black win",
"1" = "Draw",
"2" = "White win")) %>%
select(Result, Moves) %>%
aov(Moves ~ Result, data = .) %>%
summary() # Honestly significant difference
## Df Sum Sq Mean Sq F value Pr(>F)
## Result 2 16884 8442 26.39 3.7e-12 ***
## Residuals 10475 3351206 320
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chess_project %>%
mutate(Result = recode(Result,
"0" = "Black win",
"1" = "Draw",
"2" = "White win")) %>%
select(Result, Moves) %>%
aov(Moves ~ Result, data = .) %>%
TukeyHSD() # Honestly significant difference
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Moves ~ Result, data = .)
##
## $Result
## diff lwr upr p adj
## Draw-Black win -2.1514386 -3.0923492 -1.210528 0.0000003
## White win-Black win 0.8585284 -0.2275301 1.944587 0.1526681
## White win-Draw 3.0099670 1.9538273 4.066107 0.0000000
Chess_project <- Chess_project %>%
mutate(Result = recode(Result,
"0" = "Black_win",
"1" = "Draw",
"2" = "White_win")) %>%
rename(White_ELO = `White ELO`) %>%
rename(Black_ELO = `Black ELO`) %>%
rename(Event_Rounds = `Event Rounds`) %>%
rename(White_Av_CP_Loss = `White Av CP Loss`) %>%
rename(Black_Av_CP_Loss = `Black Av CP Loss`)
Chess_project <- dummy_cols(Chess_project,
select_columns = c("Result"),
remove_first_dummy = TRUE,
remove_selected_columns = TRUE)
num.cols <- colnames(select_if(Chess_project, is.numeric))
Chess_project <- Chess_project %>%
drop_na()
for (i in num.cols) {
print(i)
print(skewness(Chess_project[[i]]))
}
## [1] "Event_Rounds"
## [1] -0.4466047
## [1] "Round"
## [1] 0.3724137
## [1] "White_ELO"
## [1] -1.373876
## [1] "Black_ELO"
## [1] -1.77574
## [1] "Moves"
## [1] 1.048866
## [1] "White_Av_CP_Loss"
## [1] 3.558416
## [1] "Black_Av_CP_Loss"
## [1] 2.976917
## [1] "Analysis Depth"
## [1] NaN
## [1] "Result_Draw"
## [1] 0.3714695
## [1] "Result_White_win"
## [1] 1.227654
Chess_project <- Chess_project %>%
select(-`Analysis Depth`)
Chess_project$Black_ELO <- log(Chess_project$Black_ELO + 1)
Chess_project$Moves <- log(Chess_project$Moves + 1)
Chess_project$White_Av_CP_Loss <- log(Chess_project$White_Av_CP_Loss + 1)
Chess_project$Black_Av_CP_Loss <- log(Chess_project$Black_Av_CP_Loss + 1)
Chess_project <- Chess_project %>%
select_if(., is.numeric)
# Could set seed with set.seed()
train.index <- sample(rownames(Chess_project), nrow(Chess_project) * 0.7)
chess.train <- Chess_project[train.index, ]
valid.index <- setdiff(rownames(Chess_project), train.index)
chess.valid <- Chess_project[valid.index, ]
chess.train.norm <- chess.train
chess.valid.norm <- chess.valid
cols <- colnames(chess.train)
for (i in cols) {
chess.valid.norm[[i]] <-
(chess.valid.norm[[i]] - min(chess.train[[i]])) / (max(chess.train[[i]]) - min(chess.train[[i]]))
chess.train.norm[[i]] <-
(chess.train.norm[[i]] - min(chess.train[[i]])) / (max(chess.train[[i]]) - min(chess.train[[i]]))
}
summary(chess.train.norm)
## Event_Rounds Round White_ELO Black_ELO
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3810 1st Qu.:0.1351 1st Qu.:0.7778 1st Qu.:0.8803
## Median :0.3810 Median :0.2703 Median :0.8376 Median :0.9154
## Mean :0.4009 Mean :0.2877 Mean :0.8257 Mean :0.9036
## 3rd Qu.:0.4762 3rd Qu.:0.4257 3rd Qu.:0.9030 3rd Qu.:0.9502
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Moves White_Av_CP_Loss Black_Av_CP_Loss Result_Draw
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6769 1st Qu.:0.2923 1st Qu.:0.3302 1st Qu.:0.0000
## Median :0.7220 Median :0.3778 Median :0.4260 Median :0.0000
## Mean :0.7176 Mean :0.3765 Mean :0.4168 Mean :0.4127
## 3rd Qu.:0.7692 3rd Qu.:0.4633 3rd Qu.:0.5060 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Result_White_win
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2341
## 3rd Qu.:0.0000
## Max. :1.0000
summary(chess.valid.norm)
## Event_Rounds Round White_ELO Black_ELO
## Min. :0.0000 Min. :0.0000 Min. :0.1403 Min. :0.05715
## 1st Qu.:0.3810 1st Qu.:0.1351 1st Qu.:0.7755 1st Qu.:0.87882
## Median :0.3810 Median :0.2703 Median :0.8346 Median :0.91403
## Mean :0.4012 Mean :0.2925 Mean :0.8248 Mean :0.90324
## 3rd Qu.:0.4762 3rd Qu.:0.4730 3rd Qu.:0.9005 3rd Qu.:0.95021
## Max. :1.0000 Max. :0.9730 Max. :1.0000 Max. :1.00000
## Moves White_Av_CP_Loss Black_Av_CP_Loss Result_Draw
## Min. :0.0000 Min. :-0.2923 Min. :-0.2167 Min. :0.0000
## 1st Qu.:0.6712 1st Qu.: 0.2923 1st Qu.: 0.3422 1st Qu.:0.0000
## Median :0.7175 Median : 0.3778 Median : 0.4260 Median :0.0000
## Mean :0.7132 Mean : 0.3787 Mean : 0.4196 Mean :0.3992
## 3rd Qu.:0.7657 3rd Qu.: 0.4691 3rd Qu.: 0.5110 3rd Qu.:1.0000
## Max. :0.9472 Max. : 0.8311 Max. : 0.8088 Max. :1.0000
## Result_White_win
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2485
## 3rd Qu.:0.0000
## Max. :1.0000
min_White_ELO <- min(chess.train$White_ELO)
max_White_ELO <- max(chess.train$White_ELO)
actpred <- data.frame(actual = chess.valid$White_ELO,
predicted = min_White_ELO + predict.nn.3.3*(max_White_ELO - min_White_ELO))
RMSE(actpred$predicted, actpred$actual)
## [1] 136.7458
head(predict.nn.3.3)
## [,1]
## [1,] 0.6339028
## [2,] 0.7424240
## [3,] 0.7297766
## [4,] 0.6922631
## [5,] 0.6294488
## [6,] 0.7131989
head(actpred)
## actual predicted
## 1 2496 2153.833
## 2 2302 2369.681
## 3 2162 2344.526
## 4 2302 2269.911
## 5 1853 2144.974
## 6 2313 2311.553