Set up
#realistic transition matrix
transition1 <- array(c(.8, .3, .7, .15, .5, .2, .05, .2, .1, .1, 0, .2, .2, .5,
.3, .7, .5, .5), dim = c(3, 3, 2))
#reward both equally
reward3 <- array(rep(100, 18), dim = c(3,3,2))
Discount Factor = 0.05
#discount factor = 0.05
disc = .05
MDP_Computation <- mdp_policy_iteration(transition1, reward3, disc)
print(MDP_Computation[["policy"]])
## [1] 1 1 1
#get value function for dicking around
mdp_eval_policy_iterative(transition1, reward3, disc, c(1,1,1))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
## [,1]
## [1,] 105.2632
## [2,] 105.2632
## [3,] 105.2632
#get value function for working
mdp_eval_policy_iterative(transition1, reward3, disc, c(2,2,2))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
## [,1]
## [1,] 105.2632
## [2,] 105.2632
## [3,] 105.2632
Discount Factor = 0.65
#discount factor = 0.65
disc = .65
MDP_Computation <- mdp_policy_iteration(transition1, reward3, disc)
print(MDP_Computation[["policy"]])
## [1] 1 1 1
#get value function for dicking around
mdp_eval_policy_iterative(transition1, reward3, disc, c(1,1,1))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
## [,1]
## [1,] 285.7142
## [2,] 285.7142
## [3,] 285.7142
#get value function for working
mdp_eval_policy_iterative(transition1, reward3, disc, c(2,2,2))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
## [,1]
## [1,] 285.7142
## [2,] 285.7142
## [3,] 285.7142
Discount Factor = 0.95
#discount factor = 0.95
disc = .95
MDP_Computation <- mdp_policy_iteration(transition1, reward3, disc)
print(MDP_Computation[["policy"]])
## [1] 2 1 1
#get value function for dicking around
mdp_eval_policy_iterative(transition1, reward3, disc, c(1,1,1))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
## [,1]
## [1,] 1999.998
## [2,] 1999.998
## [3,] 1999.998
#get value function for working
mdp_eval_policy_iterative(transition1, reward3, disc, c(2,2,2))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
## [,1]
## [1,] 1999.998
## [2,] 1999.998
## [3,] 1999.998