import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsx mdx */

import Sketch from '../../components/sketch';
import Layout from '../../components/layout';
import DateConverter from '../../components/dateConverter';
import sketch from '../../scripts/archive/sGridworld';
import sketchGw from '../../scripts/sGw/sPancakes';
import sketchRew from '../../scripts/archive/sGridworldRew';
import sketchStates from '../../scripts/archive/sGridworldStates';
export const _frontmatter = {
  "title": "The (Pancakes) Gridworld Environment",
  "date": "2020-01-05",
  "slug": "gridworld",
  "author": "JAN MALTE LICHTENBERG"
};

const makeShortcode = name => function MDXDefaultShortcode(props) {
  console.warn("Component " + name + " was not imported, exported, or provided by MDXProvider as global scope");
  return <div {...props} />;
};

const MDXLayout = "wrapper";
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...props} components={components} mdxType="MDXLayout">
    <Layout mdxType="Layout">
      <div>
        <DateConverter frontmatter={props.pageContext.frontmatter} mdxType="DateConverter" />
        <p className="blogtitle">
          {props.pageContext.frontmatter.title}
        </p>
      </div>
      <Sketch sketch={sketchGw} mdxType="Sketch" />
      <p>{`The sketch below shows an example 5x5 gridworld. You can play it using the arrow
keys on your keyboard or, on mobile, using the four arrowheads.`}</p>
      <h4>{`Rules`}</h4>
      <p>{`The agent (red sphere) can navigate the gridworld environment using
one of four `}
        <em {...{
          "parentName": "p"
        }}>{`actions`}</em>
        {` at each time step. The four actions correspond to the
cardinal directions "north", "east", "south", and "west". The agent receives a
`}
        <em {...{
          "parentName": "p"
        }}>{`reward`}</em>
        {`, `}
        <label htmlFor="mn-demo" className="margin-toggle">{`⊕`}</label>
        {` `}
        <input type="checkbox" id="mn-demo" className="margin-toggle" />
        <span className="marginnote"><strong>{`Reward`}</strong>
          {` received when a cell is entered.`}
          <Sketch sketch={sketchRew} mdxType="Sketch" /></span>
        {` which depends on the type of cell that is entered.`}</p>
      <p>{`The reward is +10 for a gold cell, -10 for a bomb cell, and -1 for an empty cell
(which can be interpreted as the "effort" that is required for moving). When an
action would move the agent into a wall, the agent remains instead in the
current cell and nevertheless receives a reward of -1 (banging your head against
a wall requires effort, too).`}</p>
      <p>{`The gold and bomb cells are `}
        <em {...{
          "parentName": "p"
        }}>{`terminal states`}</em>
        {`. Whenever the agent enters a
terminal state, the current episode ends and the agent restarts at one of the
bottom cells, chosen uniformly at random.`}</p>
      <h4>{`Objective`}</h4>
      <p>{`In reinforcement learning, the agent's goal is usually to
maximize the cumulative reward (also called `}
        <em {...{
          "parentName": "p"
        }}>{`return`}</em>
        {`). In the gridworld
environment this objective is reached (that is, return is maximized) if the
agent finds the shortest path to the gold cell from each possible starting
position, while avoiding the bomb cell.`}</p>
      <blockquote>
        <p><p>{`I saw how RL algorithms learn to play ATARI games from pixel input.
Why are you showing me this boring stuff?`}</p>
          {` `}
          <footer>{`People`}</footer></p>
      </blockquote>
      <h4>{`MDP formulation`}</h4>
      <p>{`The gridworld environment can be formulated as an episodic
Markov Decision Process (MDP), denoted by the tuple`}</p>
      <div {...{
        "className": "math math-display"
      }}><span {...{
          "className": "katex-display",
          "parentName": "div"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "display": "block",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`⟨`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{` `}</mtext>
                    <mi {...{
                      "mathvariant": "script",
                      "parentName": "mrow"
                    }}>{`S`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "mathvariant": "script",
                      "parentName": "mrow"
                    }}>{`A`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{` `}</mtext>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`⟩`}</mo>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\langle \\ \\mathcal{S}, \\mathcal{A}, P(s' | s, a), P(r | s, a) \\ \\rangle,`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1.051892em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`⟨`}</span>
                <span {...{
                  "className": "mspace",
                  "parentName": "span"
                }}>{` `}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathcal",
                    "style": {
                      "marginRight": "0.075em"
                    },
                    "parentName": "span"
                  }}>{`S`}</span></span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathcal",
                    "parentName": "span"
                  }}>{`A`}</span></span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.13889em"
                  },
                  "parentName": "span"
                }}>{`P`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.801892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.113em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.13889em"
                  },
                  "parentName": "span"
                }}>{`P`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "parentName": "span"
                }}>{` `}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`⟩`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span></span></span></span></span></div>
      <p>{`where `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "script",
                      "parentName": "mrow"
                    }}>{`S`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathcal{S}`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.68333em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathcal",
                    "style": {
                      "marginRight": "0.075em"
                    },
                    "parentName": "span"
                  }}>{`S`}</span></span></span></span></span></span>
        {` is the set of states, `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "script",
                      "parentName": "mrow"
                    }}>{`A`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathcal{A}`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.68333em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathcal",
                    "parentName": "span"
                  }}>{`A`}</span></span></span></span></span></span>
        {` is the set of actions,
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`P(s'| s, a)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1.001892em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.13889em"
                  },
                  "parentName": "span"
                }}>{`P`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.751892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.063em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {` is the state-transition function that specifies the probability of
transitioning to state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s'`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.751892em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.751892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.063em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span></span></span></span></span>
        {` when taking action `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`a`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span></span></span></span></span>
        {` in state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span></span></span></span></span>
        {`, and `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`P(r| s,
a)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.13889em"
                  },
                  "parentName": "span"
                }}>{`P`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {` is the probability distribution of reward `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`r`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span></span></span></span></span>
        {` that the agent receives when
taking action `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`a`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span></span></span></span></span>
        {` in state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span></span></span></span></span>
        {`.`}</p>
      <p>{`For our 5x5 gridworld we can define a corresponding MDP as follows. -
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "script",
                      "parentName": "mrow"
                    }}>{`S`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathcal{S}`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.68333em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathcal",
                    "style": {
                      "marginRight": "0.075em"
                    },
                    "parentName": "span"
                  }}>{`S`}</span></span></span></span></span></span>
        {` is the set of all 25 cells `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`s`}</mi>
                      <mi {...{
                        "parentName": "msub"
                      }}>{`i`}</mi></msub></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s_i`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.58056em",
                    "verticalAlign": "-0.15em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.31166399999999994em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mathnormal mtight",
                                "parentName": "span"
                              }}>{`i`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span></span></span></span></span>
        {` for `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`i`}</mi>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`1`}</mn>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`.`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`.`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`.`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`24`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`i = 0, 1, ... , 24`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.65952em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`i`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.8388800000000001em",
                    "verticalAlign": "-0.19444em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`.`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`.`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`.`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`2`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`4`}</span></span></span></span></span>
        {`. `}
        <label htmlFor="mn-state-sketch" className="margin-toggle">{`⊕`}</label>
        {` `}
        <input type="checkbox" id="mn-state-sketch" className="margin-toggle" />
        <span className="marginnote"><strong>{`State`}</strong>
          {` respresentation used.`}
          <Sketch sketch={sketchStates} mdxType="Sketch" /></span>
        {` The figure on the right shows one arbitrary
assignment of state ids to cells of the gridworld. Under this specification,
state 18 (bomb) and 23 (gold) are terminal states. All other states are states
are non-terminal states. - The set of actions `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "script",
                      "parentName": "mrow"
                    }}>{`A`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathcal{A}`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.68333em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathcal",
                    "parentName": "span"
                  }}>{`A`}</span></span></span></span></span></span>
        {` is identical in each
state and consists of the four actions `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`a`}</mi>
                      <mn {...{
                        "parentName": "msub"
                      }}>{`0`}</mn></msub>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`a`}</mi>
                      <mn {...{
                        "parentName": "msub"
                      }}>{`1`}</mn></msub>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`a`}</mi>
                      <mn {...{
                        "parentName": "msub"
                      }}>{`2`}</mn></msub>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`a`}</mi>
                      <mn {...{
                        "parentName": "msub"
                      }}>{`3`}</mn></msub>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`(a_0, a_1, a_2, a_3)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`a`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.30110799999999993em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}>{`0`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`a`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.30110799999999993em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}>{`1`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`a`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.30110799999999993em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}>{`2`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`a`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.30110799999999993em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}>{`3`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {` = `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{`north, east, south, west`}</mtext>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`(\\text{north,
east, south, west})`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord text",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}>{`north, east, south, west`}</span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {`. - The gridworld described so far is `}
        <em {...{
          "parentName": "p"
        }}>{`deterministic`}</em>
        {` in the
sense that the agent always moves in the intended direction. For example, if the
agent is in state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`s`}</mi>
                      <mn {...{
                        "parentName": "msub"
                      }}>{`2`}</mn></msub></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s_2`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.58056em",
                    "verticalAlign": "-0.15em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.30110799999999993em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}>{`2`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span></span></span></span></span>
        {` and takes action `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mtext {...{
                      "parentName": "mrow"
                    }}>{`north`}</mtext></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\text{north}`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.69444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord text",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}>{`north`}</span></span></span></span></span></span>
        {`, the agent actually
transitions to state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><msub {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msub"
                      }}>{`s`}</mi>
                      <mn {...{
                        "parentName": "msub"
                      }}>{`7`}</mn></msub>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`.`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s_7.`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.58056em",
                    "verticalAlign": "-0.15em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t vlist-t2",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.30110799999999993em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-2.5500000000000003em",
                              "marginLeft": "0em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}>{`7`}</span></span></span></span>
                        <span {...{
                          "className": "vlist-s",
                          "parentName": "span"
                        }}>{`​`}</span></span>
                      <span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.15em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "parentName": "span"
                          }} /></span></span></span></span></span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`.`}</span></span></span></span></span>
        {` We can write `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "double-struck",
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`7`}</mn>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`2`}</mn>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{`north`}</mtext>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`1`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathbb{P}(7 | 2, \\text{north})=1`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathbb",
                    "parentName": "span"
                  }}>{`P`}</span></span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`7`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`2`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord text",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}>{`north`}</span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span></span></span></span></span>
        {` and
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "double-struck",
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`2`}</mn>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{`north`}</mtext>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathbb{P}(s' | 2, \\text{north})=0`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1.001892em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathbb",
                    "parentName": "span"
                  }}>{`P`}</span></span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.751892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.063em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`2`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord text",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}>{`north`}</span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {` for all `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup>
                    <mo {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`≠`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`7`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s' \\ne 7`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.946332em",
                    "verticalAlign": "-0.19444em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.751892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.063em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}><span {...{
                    "className": "mrel",
                    "parentName": "span"
                  }}><span {...{
                      "className": "mord vbox",
                      "parentName": "span"
                    }}><span {...{
                        "className": "thinbox",
                        "parentName": "span"
                      }}><span {...{
                          "className": "rlap",
                          "parentName": "span"
                        }}><span {...{
                            "className": "strut",
                            "style": {
                              "height": "0.8888799999999999em",
                              "verticalAlign": "-0.19444em"
                            },
                            "parentName": "span"
                          }} />
                          <span {...{
                            "className": "inner",
                            "parentName": "span"
                          }}><span {...{
                              "className": "mrel",
                              "parentName": "span"
                            }}>{``}</span></span>
                          <span {...{
                            "className": "fix",
                            "parentName": "span"
                          }} /></span></span></span></span>
                  <span {...{
                    "className": "mrel",
                    "parentName": "span"
                  }}>{`=`}</span></span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`7`}</span></span></span></span></span>
        {`. See further below for
an example of a stochastic transition probability distribution. - The reward
function also takes a particularly simple form in this small and deterministic
environment. Because it doesn't matter from which side a cell is entered,
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "double-struck",
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`1`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathbb{P}(r|s, a)=1`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathbb",
                    "parentName": "span"
                  }}>{`P`}</span></span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span></span></span></span></span>
        {` for the reward `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`r = r(s')`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1.001892em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.751892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.063em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {` that is given for entering the
state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><msup {...{
                      "parentName": "mrow"
                    }}><mi {...{
                        "parentName": "msup"
                      }}>{`s`}</mi>
                      <mo {...{
                        "mathvariant": "normal",
                        "lspace": "0em",
                        "rspace": "0em",
                        "parentName": "msup"
                      }}>{`′`}</mo></msup></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s'`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.751892em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathnormal",
                    "parentName": "span"
                  }}>{`s`}</span>
                  <span {...{
                    "className": "msupsub",
                    "parentName": "span"
                  }}><span {...{
                      "className": "vlist-t",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-r",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist",
                          "style": {
                            "height": "0.751892em"
                          },
                          "parentName": "span"
                        }}><span {...{
                            "style": {
                              "top": "-3.063em",
                              "marginRight": "0.05em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "className": "pstrut",
                              "style": {
                                "height": "2.7em"
                              },
                              "parentName": "span"
                            }} />
                            <span {...{
                              "className": "sizing reset-size6 size3 mtight",
                              "parentName": "span"
                            }}><span {...{
                                "className": "mord mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`′`}</span></span></span></span></span></span></span></span></span></span></span></span></span>
        {` when taking action `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`a`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span></span></span></span></span>
        {` in state `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`s`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span></span></span></span></span>
        {`, and `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "double-struck",
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathbb{P}(r|s, a)=0`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathbb",
                    "parentName": "span"
                  }}>{`P`}</span></span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {` for
all other `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`r`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span></span></span></span></span>
        {`. For example, `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "double-struck",
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`−`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`10`}</mn>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`17`}</mn>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{`east`}</mtext>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`1`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathbb{P}(-10|17, \\text{east})=1`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathbb",
                    "parentName": "span"
                  }}>{`P`}</span></span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`−`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`7`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord text",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}>{`east`}</span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span></span></span></span></span>
        {` and
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "mathvariant": "double-struck",
                      "parentName": "mrow"
                    }}>{`P`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mi {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`∣`}</mi>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`17`}</mn>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mtext {...{
                      "parentName": "mrow"
                    }}>{`east`}</mtext>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\mathbb{P}(r|17, \\text{east})=0`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord mathbb",
                    "parentName": "span"
                  }}>{`P`}</span></span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`∣`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`7`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord text",
                  "parentName": "span"
                }}><span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}>{`east`}</span></span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {` for all `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`r`}</mi>
                    <mo {...{
                      "mathvariant": "normal",
                      "parentName": "mrow"
                    }}>{`≠`}</mo>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`−`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`10`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`r \\ne -10`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.8888799999999999em",
                    "verticalAlign": "-0.19444em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.02778em"
                  },
                  "parentName": "span"
                }}>{`r`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}><span {...{
                    "className": "mrel",
                    "parentName": "span"
                  }}><span {...{
                      "className": "mord vbox",
                      "parentName": "span"
                    }}><span {...{
                        "className": "thinbox",
                        "parentName": "span"
                      }}><span {...{
                          "className": "rlap",
                          "parentName": "span"
                        }}><span {...{
                            "className": "strut",
                            "style": {
                              "height": "0.8888799999999999em",
                              "verticalAlign": "-0.19444em"
                            },
                            "parentName": "span"
                          }} />
                          <span {...{
                            "className": "inner",
                            "parentName": "span"
                          }}><span {...{
                              "className": "mrel",
                              "parentName": "span"
                            }}>{``}</span></span>
                          <span {...{
                            "className": "fix",
                            "parentName": "span"
                          }} /></span></span></span></span>
                  <span {...{
                    "className": "mrel",
                    "parentName": "span"
                  }}>{`=`}</span></span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.72777em",
                    "verticalAlign": "-0.08333em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`−`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {`.`}</p>
      <h4>{`Possible rule modifications`}</h4>
      <p>{`Common changes to the basic gridworld described above are often made along the
following dimensions: - `}
        <em {...{
          "parentName": "p"
        }}>{`Stochasticity.`}</em>
        {` The agent moves in the intended
direction (e.g., up for action "north") with probability `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mn {...{
                      "parentName": "mrow"
                    }}>{`1`}</mn>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`−`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`ϵ`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`1-\\epsilon`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.72777em",
                    "verticalAlign": "-0.08333em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`1`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2222222222222222em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mbin",
                  "parentName": "span"
                }}>{`−`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2222222222222222em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`ϵ`}</span></span></span></span></span>
        {` and moves
with probaility of `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`ϵ`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\epsilon`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`ϵ`}</span></span></span></span></span>
        {` in any other direction. In the context of a
gridworld like the one described here, this effect if sometimes referred to as
"wind" or "slipperiness" (of the surface). - Availability of a `}
        <em {...{
          "parentName": "p"
        }}>{`model of the
environment`}</em>
        {`. In the most basic version of the gridworld, the transition
dynamics are generally not known. That is, the agent does `}
        <em {...{
          "parentName": "p"
        }}>{`not`}</em>
        {` know beforehand
that selecting the action "north" leads to an "upward" movement. When transition
dynamics are given, the agent can use this information to plan ahead, and thus
to learn more efficiently.`}</p>
      <h4>{`This really `}
        <em {...{
          "parentName": "h4"
        }}>{`is`}</em>
        {` boring... why should I care?`}</h4>
      <p>{`ATARI games, Starcraft, and DotA are certainly much more fun to look at than the
gridworld. However, it is difficult to get an understanding of how exactly
reinforcement learning algorithms actually learn by looking at videos of
well-performing agents. The simplicity of a gridworld allows us to
`}
        <strong {...{
          "parentName": "p"
        }}>{`visualize`}</strong>
        {`, `}
        <strong {...{
          "parentName": "p"
        }}>{`understand`}</strong>
        {`, and ultimately `}
        <strong {...{
          "parentName": "p"
        }}>{`compare`}</strong>
        {` the `}
        <strong {...{
          "parentName": "p"
        }}>{`learning
behaviours`}</strong>
        {` of various algorithms.`}</p>
      <blockquote>
        <p>
          <q>
            <p>{`Is your entire PhD about Gridworlds?`}</p>
          </q>
        </p>
        <footer>
          <p>{`Ed`}</p>
        </footer>
      </blockquote>
    </Layout>

  </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      