Use Bellman Equations to Solve Hungry-Full Problem¶

In [1]:
import sympy
from sympy import symbols
sympy.init_printing()

Bellman Expectation Equations¶

In [2]:
v_hungry, v_full = symbols('v_hungry v_full')
q_hungry_eat, q_hungry_none, q_full_eat, q_full_none = \
        symbols('q_hungry_eat q_hungry_none q_full_eat q_full_none')
alpha, beta, gamma = symbols('alpha beta gamma')
x, y = symbols('x y')
In [3]:
system = sympy.Matrix((
        (1, 0, x-1, -x, 0, 0, 0),
        (0, 1, 0, 0, -y, y-1, 0),
        (-gamma, 0, 1, 0, 0, 0, -2),
        ((alpha-1)*gamma, -alpha*gamma, 0, 1, 0, 0, 4*alpha-3),
        (-beta*gamma, (beta-1)*gamma, 0, 0, 1, 0, -4*beta+2),
        (0, -gamma, 0, 0, 0, 1, 1) ))
sympy.solve_linear_system(system,
        v_hungry, v_full,
        q_hungry_none, q_hungry_eat, q_full_none, q_full_eat)
Out[3]:
$\displaystyle \left\{ q_{full eat} : \frac{- \alpha \gamma^{2} x y - \alpha \gamma x + \beta \gamma^{2} x y - \beta \gamma^{2} y + 3 \beta \gamma y + \gamma^{2} y - \gamma y + \gamma - 1}{\alpha \gamma^{2} x - \alpha \gamma x + \beta \gamma^{2} y - \beta \gamma y - \gamma^{2} + 2 \gamma - 1}, \ q_{full none} : \frac{- \alpha \gamma^{2} x y + \alpha \gamma^{2} x - 2 \alpha \gamma x + \beta \gamma^{2} x y - \beta \gamma^{2} x - \beta \gamma^{2} y + \beta \gamma^{2} + \beta \gamma x + 3 \beta \gamma y - 5 \beta \gamma + 4 \beta + \gamma^{2} y - \gamma^{2} - \gamma y + 3 \gamma - 2}{\alpha \gamma^{2} x - \alpha \gamma x + \beta \gamma^{2} y - \beta \gamma y - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry eat} : \frac{- \alpha \gamma^{2} x y + \alpha \gamma^{2} x + \alpha \gamma^{2} y - \alpha \gamma^{2} - 2 \alpha \gamma x - \alpha \gamma y + 5 \alpha \gamma - 4 \alpha + \beta \gamma^{2} x y - \beta \gamma^{2} y + 3 \beta \gamma y - \gamma^{2} x + \gamma^{2} + \gamma x - 4 \gamma + 3}{\alpha \gamma^{2} x - \alpha \gamma x + \beta \gamma^{2} y - \beta \gamma y - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry none} : \frac{- \alpha \gamma^{2} x y + \alpha \gamma^{2} x - 2 \alpha \gamma x + \beta \gamma^{2} x y + 2 \beta \gamma y - \gamma^{2} x + \gamma x - 2 \gamma + 2}{\alpha \gamma^{2} x - \alpha \gamma x + \beta \gamma^{2} y - \beta \gamma y - \gamma^{2} + 2 \gamma - 1}, \ v_{full} : \frac{- \alpha \gamma x y - \alpha \gamma x + \beta \gamma x y - 2 \beta \gamma y + 4 \beta y + \gamma y + \gamma - y - 1}{\alpha \gamma^{2} x - \alpha \gamma x + \beta \gamma^{2} y - \beta \gamma y - \gamma^{2} + 2 \gamma - 1}, \ v_{hungry} : \frac{- \alpha \gamma x y + 3 \alpha \gamma x - 4 \alpha x + \beta \gamma x y + 2 \beta \gamma y - \gamma x - 2 \gamma + x + 2}{\alpha \gamma^{2} x - \alpha \gamma x + \beta \gamma^{2} y - \beta \gamma y - \gamma^{2} + 2 \gamma - 1}\right\}$

Bellman Optimal Equations¶

In [4]:
xy_tuples = ((0, 0), (1, 0), (0, 1), (1, 1))
for x, y in xy_tuples:
    system = sympy.Matrix((
            (1, 0, x-1, -x, 0, 0, 0),
            (0, 1, 0, 0, -y, y-1, 0),
            (-gamma, 0, 1, 0, 0, 0, -2),
            ((alpha-1)*gamma, -alpha*gamma, 0, 1, 0, 0, 4*alpha-3),
            (-beta*gamma, (beta-1)*gamma, 0, 0, 1, 0, -4*beta+2),
            (0, -gamma, 0, 0, 0, 1, 1) ))
    result = sympy.solve_linear_system(system,
            v_hungry, v_full,
            q_hungry_none, q_hungry_eat, q_full_none, q_full_eat,
            simplification=True)
    msgx = 'v(hungry) = q(hungry,{}eat)'.format('' if x else 'not ')
    msgy = 'v(full) = q(full,{}eat)'.format('not ' if y else '')
    print('==== {}, {} ==== x = {}, y = {} ===='.format(msgx, msgy, x, y))
    display(result)
==== v(hungry) = q(hungry,not eat), v(full) = q(full,eat) ==== x = 0, y = 0 ====
$\displaystyle \left\{ q_{full eat} : - \frac{1}{\gamma - 1}, \ q_{full none} : \frac{- \beta \gamma + 4 \beta + \gamma - 2}{\gamma - 1}, \ q_{hungry eat} : \frac{\alpha \gamma - 4 \alpha - \gamma + 3}{\gamma - 1}, \ q_{hungry none} : \frac{2}{\gamma - 1}, \ v_{full} : - \frac{1}{\gamma - 1}, \ v_{hungry} : \frac{2}{\gamma - 1}\right\}$
==== v(hungry) = q(hungry,eat), v(full) = q(full,eat) ==== x = 1, y = 0 ====
$\displaystyle \left\{ q_{full eat} : - \frac{1}{\gamma - 1}, \ q_{full none} : \frac{\alpha \gamma^{2} - 2 \alpha \gamma - 4 \beta \gamma + 4 \beta - \gamma^{2} + 3 \gamma - 2}{\alpha \gamma^{2} - \alpha \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry eat} : \frac{3 \alpha \gamma - 4 \alpha - 3 \gamma + 3}{\alpha \gamma^{2} - \alpha \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry none} : \frac{\alpha \gamma^{2} - 2 \alpha \gamma - \gamma^{2} - \gamma + 2}{\alpha \gamma^{2} - \alpha \gamma - \gamma^{2} + 2 \gamma - 1}, \ v_{full} : - \frac{1}{\gamma - 1}, \ v_{hungry} : \frac{3 \alpha \gamma - 4 \alpha - 3 \gamma + 3}{\alpha \gamma^{2} - \alpha \gamma - \gamma^{2} + 2 \gamma - 1}\right\}$
==== v(hungry) = q(hungry,not eat), v(full) = q(full,not eat) ==== x = 0, y = 1 ====
$\displaystyle \left\{ q_{full eat} : \frac{- \beta \gamma^{2} + 3 \beta \gamma + \gamma^{2} - 1}{\beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{full none} : \frac{- 2 \beta \gamma + 4 \beta + 2 \gamma - 2}{\beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry eat} : \frac{4 \alpha \gamma - 4 \alpha - \beta \gamma^{2} + 3 \beta \gamma + \gamma^{2} - 4 \gamma + 3}{\beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry none} : \frac{2}{\gamma - 1}, \ v_{full} : \frac{- 2 \beta \gamma + 4 \beta + 2 \gamma - 2}{\beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ v_{hungry} : \frac{2}{\gamma - 1}\right\}$
==== v(hungry) = q(hungry,eat), v(full) = q(full,not eat) ==== x = 1, y = 1 ====
$\displaystyle \left\{ q_{full eat} : \frac{- \alpha \gamma^{2} - \alpha \gamma + 3 \beta \gamma + \gamma^{2} - 1}{\alpha \gamma^{2} - \alpha \gamma + \beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{full none} : \frac{- 2 \alpha \gamma - \beta \gamma + 4 \beta + 2 \gamma - 2}{\alpha \gamma^{2} - \alpha \gamma + \beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry eat} : \frac{2 \alpha \gamma - 4 \alpha + 3 \beta \gamma - 3 \gamma + 3}{\alpha \gamma^{2} - \alpha \gamma + \beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ q_{hungry none} : \frac{- 2 \alpha \gamma + \beta \gamma^{2} + 2 \beta \gamma - \gamma^{2} - \gamma + 2}{\alpha \gamma^{2} - \alpha \gamma + \beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ v_{full} : \frac{- 2 \alpha \gamma - \beta \gamma + 4 \beta + 2 \gamma - 2}{\alpha \gamma^{2} - \alpha \gamma + \beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}, \ v_{hungry} : \frac{2 \alpha \gamma - 4 \alpha + 3 \beta \gamma - 3 \gamma + 3}{\alpha \gamma^{2} - \alpha \gamma + \beta \gamma^{2} - \beta \gamma - \gamma^{2} + 2 \gamma - 1}\right\}$