Learning Objective



Package Installation and Load

install.packages('ReinforcementLearning')
library('ReinforcementLearning')



Data Preparation

data("tictactoe")
print(nrow(tictactoe))
[1] 406541
head(tictactoe, 30)

the first five observations of a representative dataset containing game states of randomly sampled tic-tac-toe games. In this dataset, the first column contains a representation of the current board state in a match. The second column denotes the observed action of player X in this state, whereas the third column contains a representation of the resulting board state after performing the action. The fourth column specifies the resulting reward for player X. This dataset is thus sufficient as input for learning the agent.



Display TicTakToe Board

plot_board <- function(x,action="") {
  if (action!="") 
  {
    actionNumber=substr(action,2,2)
    substr(x, actionNumber , actionNumber) <- "X"
  }
  string=NULL
  for (i in 1:nchar(x))
  {
    if (substr(x,i,i)=='.') string=c(string,'0')
    if (substr(x,i,i)=='X') string=c(string,'1')
    if (substr(x,i,i)=='B') string=c(string,'-1')
  }
  pieced <- rep("", length(string))
  pieced[which(string == 1)] <- "x"
  pieced[which(string == -1)] <- "o"
  pieced[which(string == 0)] <- "*"
  board <- gsub(" \\|$", "", paste(pieced, "|", collapse = " "))
  board_lines <- gsub("(. \\| . \\| . )\\|( . \\| . \\| . )\\|( . \\| . \\| .)", 
                      "\n \\1\n-----------\n\\2\n-----------\n\\3",
                      board
  )
  return(writeLines(board_lines))
}
plot_board('......X.B')

 * | * | * 
-----------
 * | * | * 
-----------
 x | * | o
plot_board('......X.B', 'c4')

 * | * | * 
-----------
 x | * | * 
-----------
 x | * | o



Build the model

model <- ReinforcementLearning(data = tictactoe, 
                               s = "State", 
                               a = "Action", 
                               r = "Reward", 
                               s_new = "NextState", 
                               iter = 1)

Calculate optimal policy

pol = computePolicy(model)
# Print policy
head(pol)
.XXBB..XB XXBB.B.X. .XBB..BXX BXX...B.. ..XB..... XBXBXB... 
     "c1"      "c5"      "c5"      "c4"      "c5"      "c9" 

Let’s play tic-tak-toe

data_unseen <- data.frame(State ='B.....X..', stringsAsFactors = FALSE)
predict(model, data_unseen$State)
[1] "c5"

REF

LS0tCnRpdGxlOiAi5Lq65bel5pm66IO955+l6K2Y5Y+K5oeJ55So6K2J5pu477yI5YW86K6A5Yi277yJIgpzdWJ0aXRsZTogJ0FJIHRvIFBsYXkgVGljLVRhYy1Ub2UgJwpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIyMgTGVhcm5pbmcgT2JqZWN0aXZlCiogVGhpcyB0dXRvcmlhbCBpcyB0byBkZW1vbnN0cmF0ZSBob3cgdG8gYnVpbGQgdGhlIHJlaW5mb3JjZW1lbnQgbGVhcm5pbmcgbW9kZWwgaW4gUjxicj4KKiBBZnRlciB0aGlzIHR1dG9yaWFsLCBzdHVkZW50cyB3aWxsIGdhaW4gc29tZSBpbnNpZ2h0IG9mIGhvdyBBbHBoYUdvIHdhcyBidWlsdC4KKiBUaGlzIGlzIGEgbG93LWNvZGUgdHV0b3JpYWwgaW4gUgoKPGJyPjxicj4KCiMjIyBQYWNrYWdlIEluc3RhbGxhdGlvbiBhbmQgTG9hZApgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygnUmVpbmZvcmNlbWVudExlYXJuaW5nJykKYGBgCgoKYGBge3J9CmxpYnJhcnkoJ1JlaW5mb3JjZW1lbnRMZWFybmluZycpCmBgYAoKPGJyPjxicj4KCiMjIyBEYXRhIFByZXBhcmF0aW9uCmBgYHtyfQpkYXRhKCJ0aWN0YWN0b2UiKQpwcmludChucm93KHRpY3RhY3RvZSkpCmhlYWQodGljdGFjdG9lLCAzMCkKYGBgCnRoZSBmaXJzdCBmaXZlIG9ic2VydmF0aW9ucyBvZiBhIHJlcHJlc2VudGF0aXZlIGRhdGFzZXQgY29udGFpbmluZyBnYW1lIHN0YXRlcyBvZiByYW5kb21seSBzYW1wbGVkIHRpYy10YWMtdG9lIGdhbWVzLiBJbiB0aGlzIGRhdGFzZXQsIHRoZSBmaXJzdCBjb2x1bW4gY29udGFpbnMgYSByZXByZXNlbnRhdGlvbiBvZiB0aGUgY3VycmVudCBib2FyZCBzdGF0ZSBpbiBhIG1hdGNoLiBUaGUgc2Vjb25kIGNvbHVtbiBkZW5vdGVzIHRoZSBvYnNlcnZlZCBhY3Rpb24gb2YgcGxheWVyIFggaW4gdGhpcyBzdGF0ZSwgd2hlcmVhcyB0aGUgdGhpcmQgY29sdW1uIGNvbnRhaW5zIGEgcmVwcmVzZW50YXRpb24gb2YgdGhlIHJlc3VsdGluZyBib2FyZCBzdGF0ZSBhZnRlciBwZXJmb3JtaW5nIHRoZSBhY3Rpb24uIFRoZSBmb3VydGggY29sdW1uIHNwZWNpZmllcyB0aGUgcmVzdWx0aW5nIHJld2FyZCBmb3IgcGxheWVyIFguIFRoaXMgZGF0YXNldCBpcyB0aHVzIHN1ZmZpY2llbnQgYXMgaW5wdXQgZm9yIGxlYXJuaW5nIHRoZSBhZ2VudC4KCjxicj48YnI+CgojIyMgRGlzcGxheSBUaWNUYWtUb2UgQm9hcmQKYGBge3J9CnBsb3RfYm9hcmQgPC0gZnVuY3Rpb24oeCxhY3Rpb249IiIpIHsKICBpZiAoYWN0aW9uIT0iIikgCiAgewogICAgYWN0aW9uTnVtYmVyPXN1YnN0cihhY3Rpb24sMiwyKQogICAgc3Vic3RyKHgsIGFjdGlvbk51bWJlciAsIGFjdGlvbk51bWJlcikgPC0gIlgiCiAgfQogIHN0cmluZz1OVUxMCiAgZm9yIChpIGluIDE6bmNoYXIoeCkpCiAgewogICAgaWYgKHN1YnN0cih4LGksaSk9PScuJykgc3RyaW5nPWMoc3RyaW5nLCcwJykKICAgIGlmIChzdWJzdHIoeCxpLGkpPT0nWCcpIHN0cmluZz1jKHN0cmluZywnMScpCiAgICBpZiAoc3Vic3RyKHgsaSxpKT09J0InKSBzdHJpbmc9YyhzdHJpbmcsJy0xJykKICB9CiAgcGllY2VkIDwtIHJlcCgiIiwgbGVuZ3RoKHN0cmluZykpCiAgcGllY2VkW3doaWNoKHN0cmluZyA9PSAxKV0gPC0gIngiCiAgcGllY2VkW3doaWNoKHN0cmluZyA9PSAtMSldIDwtICJvIgogIHBpZWNlZFt3aGljaChzdHJpbmcgPT0gMCldIDwtICIqIgogIGJvYXJkIDwtIGdzdWIoIiBcXHwkIiwgIiIsIHBhc3RlKHBpZWNlZCwgInwiLCBjb2xsYXBzZSA9ICIgIikpCiAgYm9hcmRfbGluZXMgPC0gZ3N1YigiKC4gXFx8IC4gXFx8IC4gKVxcfCggLiBcXHwgLiBcXHwgLiApXFx8KCAuIFxcfCAuIFxcfCAuKSIsIAogICAgICAgICAgICAgICAgICAgICAgIlxuIFxcMVxuLS0tLS0tLS0tLS1cblxcMlxuLS0tLS0tLS0tLS1cblxcMyIsCiAgICAgICAgICAgICAgICAgICAgICBib2FyZAogICkKICByZXR1cm4od3JpdGVMaW5lcyhib2FyZF9saW5lcykpCn0KCmBgYAoKCmBgYHtyfQpwbG90X2JvYXJkKCcuLi4uLi5YLkInKQpgYGAKYGBge3J9CnBsb3RfYm9hcmQoJy4uLi4uLlguQicsICdjNCcpCmBgYAoKCjxicj48YnI+CgojIyMgQnVpbGQgdGhlIG1vZGVsIAoKYGBge3J9Cm1vZGVsIDwtIFJlaW5mb3JjZW1lbnRMZWFybmluZyhkYXRhID0gdGljdGFjdG9lLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHMgPSAiU3RhdGUiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGEgPSAiQWN0aW9uIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICByID0gIlJld2FyZCIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc19uZXcgPSAiTmV4dFN0YXRlIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpdGVyID0gMSkKYGBgCgoKIyMjIENhbGN1bGF0ZSBvcHRpbWFsIHBvbGljeQpgYGB7cn0KcG9sID0gY29tcHV0ZVBvbGljeShtb2RlbCkKIyBQcmludCBwb2xpY3kKaGVhZChwb2wpCmBgYAojIyMgTGV0J3MgcGxheSB0aWMtdGFrLXRvZSAKYGBge3J9CmRhdGFJbnB1dCA8LSBkYXRhLmZyYW1lKFN0YXRlID0nQi4uLi4uWC4uJywgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKQpwcmVkaWN0KG1vZGVsLCBkYXRhSW5wdXQkU3RhdGUpCmBgYAoKCgojIyMgUkVGCi0gaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL1JlaW5mb3JjZW1lbnRMZWFybmluZy92aWduZXR0ZXMvUmVpbmZvcmNlbWVudExlYXJuaW5nLmh0bWwKLSBodHRwczovL3d3dy5yLWJsb2dnZXJzLmNvbS8yMDE5LzExL3ItaW5mb3JjZW1lbnQtbGVhcm5pbmctcGFydC1vbmUtdGljLXRhYy10b2Uv