Set up

#realistic transition matrix
transition1 <- array(c(.8, .3, .7, .15, .5, .2, .05, .2, .1, .1, 0, .2, .2, .5, 
                       .3, .7, .5, .5), dim = c(3, 3, 2))
#reward both equally
reward3 <- array(rep(100, 18), dim = c(3,3,2))

Discount Factor = 0.05

#discount factor = 0.05
disc = .05
MDP_Computation <- mdp_policy_iteration(transition1, reward3, disc)
print(MDP_Computation[["policy"]])
## [1] 1 1 1
#get value function for dicking around
mdp_eval_policy_iterative(transition1, reward3, disc, c(1,1,1))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
##          [,1]
## [1,] 105.2632
## [2,] 105.2632
## [3,] 105.2632
#get value function for working
mdp_eval_policy_iterative(transition1, reward3, disc, c(2,2,2))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
##          [,1]
## [1,] 105.2632
## [2,] 105.2632
## [3,] 105.2632

Discount Factor = 0.65

#discount factor = 0.65
disc = .65
MDP_Computation <- mdp_policy_iteration(transition1, reward3, disc)
print(MDP_Computation[["policy"]])
## [1] 1 1 1
#get value function for dicking around
mdp_eval_policy_iterative(transition1, reward3, disc, c(1,1,1))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
##          [,1]
## [1,] 285.7142
## [2,] 285.7142
## [3,] 285.7142
#get value function for working
mdp_eval_policy_iterative(transition1, reward3, disc, c(2,2,2))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
##          [,1]
## [1,] 285.7142
## [2,] 285.7142
## [3,] 285.7142

Discount Factor = 0.95

#discount factor = 0.95
disc = .95
MDP_Computation <- mdp_policy_iteration(transition1, reward3, disc)
print(MDP_Computation[["policy"]])
## [1] 2 1 1
#get value function for dicking around
mdp_eval_policy_iterative(transition1, reward3, disc, c(1,1,1))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
##          [,1]
## [1,] 1999.998
## [2,] 1999.998
## [3,] 1999.998
#get value function for working
mdp_eval_policy_iterative(transition1, reward3, disc, c(2,2,2))
## [1] "MDP Toolbox: iterations stopped, epsilon-optimal value function"
##          [,1]
## [1,] 1999.998
## [2,] 1999.998
## [3,] 1999.998