Learning Objective
- This tutorial is to demonstrate how to build the reinforcement learning model in R
- After this tutorial, students will gain some insight of how AlphaGo was built.
- This is a low-code tutorial in R
Package Installation and Load
install.packages('ReinforcementLearning')
library('ReinforcementLearning')
Data Preparation
data("tictactoe")
print(nrow(tictactoe))
[1] 406541
head(tictactoe, 30)
the first five observations of a representative dataset containing game states of randomly sampled tic-tac-toe games. In this dataset, the first column contains a representation of the current board state in a match. The second column denotes the observed action of player X in this state, whereas the third column contains a representation of the resulting board state after performing the action. The fourth column specifies the resulting reward for player X. This dataset is thus sufficient as input for learning the agent.
Display TicTakToe Board
plot_board <- function(x,action="") {
if (action!="")
{
actionNumber=substr(action,2,2)
substr(x, actionNumber , actionNumber) <- "X"
}
string=NULL
for (i in 1:nchar(x))
{
if (substr(x,i,i)=='.') string=c(string,'0')
if (substr(x,i,i)=='X') string=c(string,'1')
if (substr(x,i,i)=='B') string=c(string,'-1')
}
pieced <- rep("", length(string))
pieced[which(string == 1)] <- "x"
pieced[which(string == -1)] <- "o"
pieced[which(string == 0)] <- "*"
board <- gsub(" \\|$", "", paste(pieced, "|", collapse = " "))
board_lines <- gsub("(. \\| . \\| . )\\|( . \\| . \\| . )\\|( . \\| . \\| .)",
"\n \\1\n-----------\n\\2\n-----------\n\\3",
board
)
return(writeLines(board_lines))
}
plot_board('......X.B')
* | * | *
-----------
* | * | *
-----------
x | * | o
plot_board('......X.B', 'c4')
* | * | *
-----------
x | * | *
-----------
x | * | o
Build the model
model <- ReinforcementLearning(data = tictactoe,
s = "State",
a = "Action",
r = "Reward",
s_new = "NextState",
iter = 1)
Calculate optimal policy
pol = computePolicy(model)
# Print policy
head(pol)
.XXBB..XB XXBB.B.X. .XBB..BXX BXX...B.. ..XB..... XBXBXB...
"c1" "c5" "c5" "c4" "c5" "c9"
Let’s play tic-tak-toe
data_unseen <- data.frame(State ='B.....X..', stringsAsFactors = FALSE)
predict(model, data_unseen$State)
[1] "c5"
LS0tCnRpdGxlOiAi5Lq65bel5pm66IO955+l6K2Y5Y+K5oeJ55So6K2J5pu477yI5YW86K6A5Yi277yJIgpzdWJ0aXRsZTogJ0FJIHRvIFBsYXkgVGljLVRhYy1Ub2UgJwpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyMgTGVhcm5pbmcgT2JqZWN0aXZlCiogVGhpcyB0dXRvcmlhbCBpcyB0byBkZW1vbnN0cmF0ZSBob3cgdG8gYnVpbGQgdGhlIHJlaW5mb3JjZW1lbnQgbGVhcm5pbmcgbW9kZWwgaW4gUjxicj4KKiBBZnRlciB0aGlzIHR1dG9yaWFsLCBzdHVkZW50cyB3aWxsIGdhaW4gc29tZSBpbnNpZ2h0IG9mIGhvdyBBbHBoYUdvIHdhcyBidWlsdC4KKiBUaGlzIGlzIGEgbG93LWNvZGUgdHV0b3JpYWwgaW4gUgoKPGJyPjxicj4KCiMjIyBQYWNrYWdlIEluc3RhbGxhdGlvbiBhbmQgTG9hZApgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygnUmVpbmZvcmNlbWVudExlYXJuaW5nJykKYGBgCgoKYGBge3J9CmxpYnJhcnkoJ1JlaW5mb3JjZW1lbnRMZWFybmluZycpCmBgYAoKPGJyPjxicj4KCiMjIyBEYXRhIFByZXBhcmF0aW9uCmBgYHtyfQpkYXRhKCJ0aWN0YWN0b2UiKQpwcmludChucm93KHRpY3RhY3RvZSkpCmhlYWQodGljdGFjdG9lLCAzMCkKYGBgCnRoZSBmaXJzdCBmaXZlIG9ic2VydmF0aW9ucyBvZiBhIHJlcHJlc2VudGF0aXZlIGRhdGFzZXQgY29udGFpbmluZyBnYW1lIHN0YXRlcyBvZiByYW5kb21seSBzYW1wbGVkIHRpYy10YWMtdG9lIGdhbWVzLiBJbiB0aGlzIGRhdGFzZXQsIHRoZSBmaXJzdCBjb2x1bW4gY29udGFpbnMgYSByZXByZXNlbnRhdGlvbiBvZiB0aGUgY3VycmVudCBib2FyZCBzdGF0ZSBpbiBhIG1hdGNoLiBUaGUgc2Vjb25kIGNvbHVtbiBkZW5vdGVzIHRoZSBvYnNlcnZlZCBhY3Rpb24gb2YgcGxheWVyIFggaW4gdGhpcyBzdGF0ZSwgd2hlcmVhcyB0aGUgdGhpcmQgY29sdW1uIGNvbnRhaW5zIGEgcmVwcmVzZW50YXRpb24gb2YgdGhlIHJlc3VsdGluZyBib2FyZCBzdGF0ZSBhZnRlciBwZXJmb3JtaW5nIHRoZSBhY3Rpb24uIFRoZSBmb3VydGggY29sdW1uIHNwZWNpZmllcyB0aGUgcmVzdWx0aW5nIHJld2FyZCBmb3IgcGxheWVyIFguIFRoaXMgZGF0YXNldCBpcyB0aHVzIHN1ZmZpY2llbnQgYXMgaW5wdXQgZm9yIGxlYXJuaW5nIHRoZSBhZ2VudC4KCjxicj48YnI+CgojIyMgRGlzcGxheSBUaWNUYWtUb2UgQm9hcmQKYGBge3J9CnBsb3RfYm9hcmQgPC0gZnVuY3Rpb24oeCxhY3Rpb249IiIpIHsKICBpZiAoYWN0aW9uIT0iIikgCiAgewogICAgYWN0aW9uTnVtYmVyPXN1YnN0cihhY3Rpb24sMiwyKQogICAgc3Vic3RyKHgsIGFjdGlvbk51bWJlciAsIGFjdGlvbk51bWJlcikgPC0gIlgiCiAgfQogIHN0cmluZz1OVUxMCiAgZm9yIChpIGluIDE6bmNoYXIoeCkpCiAgewogICAgaWYgKHN1YnN0cih4LGksaSk9PScuJykgc3RyaW5nPWMoc3RyaW5nLCcwJykKICAgIGlmIChzdWJzdHIoeCxpLGkpPT0nWCcpIHN0cmluZz1jKHN0cmluZywnMScpCiAgICBpZiAoc3Vic3RyKHgsaSxpKT09J0InKSBzdHJpbmc9YyhzdHJpbmcsJy0xJykKICB9CiAgcGllY2VkIDwtIHJlcCgiIiwgbGVuZ3RoKHN0cmluZykpCiAgcGllY2VkW3doaWNoKHN0cmluZyA9PSAxKV0gPC0gIngiCiAgcGllY2VkW3doaWNoKHN0cmluZyA9PSAtMSldIDwtICJvIgogIHBpZWNlZFt3aGljaChzdHJpbmcgPT0gMCldIDwtICIqIgogIGJvYXJkIDwtIGdzdWIoIiBcXHwkIiwgIiIsIHBhc3RlKHBpZWNlZCwgInwiLCBjb2xsYXBzZSA9ICIgIikpCiAgYm9hcmRfbGluZXMgPC0gZ3N1YigiKC4gXFx8IC4gXFx8IC4gKVxcfCggLiBcXHwgLiBcXHwgLiApXFx8KCAuIFxcfCAuIFxcfCAuKSIsIAogICAgICAgICAgICAgICAgICAgICAgIlxuIFxcMVxuLS0tLS0tLS0tLS1cblxcMlxuLS0tLS0tLS0tLS1cblxcMyIsCiAgICAgICAgICAgICAgICAgICAgICBib2FyZAogICkKICByZXR1cm4od3JpdGVMaW5lcyhib2FyZF9saW5lcykpCn0KCmBgYAoKCmBgYHtyfQpwbG90X2JvYXJkKCcuLi4uLi5YLkInKQpgYGAKYGBge3J9CnBsb3RfYm9hcmQoJy4uLi4uLlguQicsICdjNCcpCmBgYAoKCjxicj48YnI+CgojIyMgQnVpbGQgdGhlIG1vZGVsIAoKYGBge3J9Cm1vZGVsIDwtIFJlaW5mb3JjZW1lbnRMZWFybmluZyhkYXRhID0gdGljdGFjdG9lLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHMgPSAiU3RhdGUiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGEgPSAiQWN0aW9uIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByID0gIlJld2FyZCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc19uZXcgPSAiTmV4dFN0YXRlIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpdGVyID0gMSkKYGBgCgoKIyMjIENhbGN1bGF0ZSBvcHRpbWFsIHBvbGljeQpgYGB7cn0KcG9sID0gY29tcHV0ZVBvbGljeShtb2RlbCkKIyBQcmludCBwb2xpY3kKaGVhZChwb2wpCmBgYAojIyMgTGV0J3MgcGxheSB0aWMtdGFrLXRvZSAKYGBge3J9CmRhdGFJbnB1dCA8LSBkYXRhLmZyYW1lKFN0YXRlID0nQi4uLi4uWC4uJywgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpwcmVkaWN0KG1vZGVsLCBkYXRhSW5wdXQkU3RhdGUpCmBgYAoKCgojIyMgUkVGCi0gaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL1JlaW5mb3JjZW1lbnRMZWFybmluZy92aWduZXR0ZXMvUmVpbmZvcmNlbWVudExlYXJuaW5nLmh0bWwKLSBodHRwczovL3d3dy5yLWJsb2dnZXJzLmNvbS8yMDE5LzExL3ItaW5mb3JjZW1lbnQtbGVhcm5pbmctcGFydC1vbmUtdGljLXRhYy10b2Uv