Unknown information.
Randomness.
die ∷ MonadSample m ⇒ m Int
die = uniformD [1..6]
dice2 ∷ MonadSample m ⇒ m Int
dice2 = do
one ← die
two ← die
three ← die
pure (one + two + three)
diceGame ∷ MonadSample m ⇒ m Int
diceGame = do
n ← die
xs ← replicateM n die
pure (sum xs)
m x
x → m y
m x → (x → m y) → m y
m x
m y
(x → y → z) → m x → m y → m z
How do we handle
uncertainty over time?
Time?
State?
History?
data MarkovProcess m s = MP {
step ∷ s → m s
}
Next depends on current.
Next only depends on current.
[s] → m s
s → m s
∘ s → m s
∘ s → m s
…
forecast ∷ MonadSample m ⇒ m Int
forecast = poisson 3
store ∷ m Int → MarkovProcess m Int
store forecast = MP {
step = \ inventory → do
let ordered = 3
demanded ← forecast
let sold = min inventory demanded
pure (inventory - sold + ordered)
}
simulate ∷ Monad m
⇒ MarkovProcess m s
→ s
→ Stream (Of s) m ()
simulate process state = do
next ← lift (step process state)
yield next
simulate process next
start, step start, step (step start), …
iterate ∷ (a → a) → a → [a]
iterate f x = x : iterate f (f x)
simulate ∷ Monad m
⇒ MarkovProcess m s
→ s
→ Stream (Of s) m ()
simulate MP { step } start =
iterateM step (pure start)
simulate ∷ Monad m
⇒ MarkovProcess m s
→ m s
→ Stream (Of s) m ()
simulate MP { step } = iterateM step
type Reward = Double
data MarkovRewardProcess m s = MRP {
step ∷ s → m (s, Reward)
}
data MarkovRewardProcess m s = MRP {
step ∷ s → m (s, Reward)
}
data WriterT m a = WriterT {
runWriterT ∷ m (s, Reward)
}
data MarkovRewardProcess m s = MRP {
step ∷ s → WriterT Reward m s
}
type MarkovRewardProcess m s =
MakrovProcess (WriterT Reward m) s
type Reward = Sum Double
reward ∷ (MonadWriter Reward m, Integral n)
⇒ n
→ m ()
reward = tell ∘ Sum
step = \ inventory → do
reward (-1 * inventory)
let ordered = 3
reward (-4 * ordered)
demanded ← lift forecast
let sold = min demanded inventory
missed = demanded - sold
reward (sold * 8 - missed * 14)
pure (inventory - sold + ordered)
data MarkovDecisionProcess m s a = MDP {
step ∷ s → a → m (s, Reward)
}
data MarkovDecisionProcess m s a = MDP {
act ∷ s → a → WriterT Reward m s
}
We know what we want: reward.
We know what we can do: actions.
Choose the action that maximizes our reward.
Choose the action that maximizes our total expected reward.
But how?
[a, a, a, a, a, a, a…]
But actions depend on state!
s → a
type Policy s a = s → a
Find the policy
that maximizes
total expected reward
apply ∷ Policy s a
→ MarkovDecisionProcess m s a
→ MarkovRewardProcess m s
apply policy MDP { act } = MP {
step = \ s → act s (policy s)
}
storeMDP forecast = MDP {
act = \ inventory ordered → do
reward (-1 * inventory)
reward (-4 * ordered)
demanded ← lift forecast
let sold = min demanded inventory
missed = demanded - sold
reward (sold * 6 - missed * 12)
pure (inventory - sold + ordered)
}
always ∷ Int → Policy Int Int
always order = \ _ → order
always 3
always 4
always 2
always
0–8
orderUpTo ∷ Int → Policy Int Int
orderUpTo level = \ inventory →
if inventory < level
then level - inventory
else 0
orderUpTo 6
orderUpTo 6
orderUpTo
4–16
orderUpTo 10
orderUpTo 10
Lots of policies
Created by Tikhon Jelvis.