import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsx mdx */

import Sketch from '../../components/sketch';
import sketchQ from '../../scripts/sQlearning/sQlearning';
import sketchQExp from '../../scripts/sQlearningExp/sQlearningExp';
import { Link } from 'gatsby';
import Layout from '../../components/blogLayout';
import DateConverter from '../../components/dateConverter';
import { SideNote, MarginNote } from '../../components/sideNote';
import { KatexBlock, KatexInline } from "../../components/Katex";
import Commento from "../../components/commento";
export const _frontmatter = {
  "title": "Reinforcement learning, line by line: Q-learning",
  "subtitle": "blabla",
  "date": "2021-09-13",
  "slug": "qlearning",
  "author": "JAN MALTE LICHTENBERG"
};

const makeShortcode = name => function MDXDefaultShortcode(props) {
  console.warn("Component " + name + " was not imported, exported, or provided by MDXProvider as global scope");
  return <div {...props} />;
};

const MDXLayout = "wrapper";
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...props} components={components} mdxType="MDXLayout">
    <Layout slug={props.pageContext.frontmatter.slug} mdxType="Layout">
      <div>
        <DateConverter frontmatter={props.pageContext.frontmatter} mdxType="DateConverter" />
        <p className="blogtitle">
          {props.pageContext.frontmatter.title}
        </p>
      </div>
      <Sketch sketch={sketchQ} mdxType="Sketch" />
      <p className="intro">
        <p>{`This is the third post of the blog series `}
          <Link to="/posts/rllbl" mdxType="Link">{`Reinforcement learning: line by line`}</Link>
          {`. The interactive
sketch shows an implementation of the tabular `}
          <em {...{
            "parentName": "p"
          }}>{`Q-learning`}</em>
          {` algorithm (Watkins, `}
          <a href="https://scholar.google.de/scholar?hl=en&as_sdt=0%2C5&q=watkins++Learning+from++delayed++rewards.&btnG=">{`1989`}</a>
          {`)
applied to a simple game, called the `}
          <em {...{
            "parentName": "p"
          }}>{`Pancakes Gridworld`}</em>
          {`. See `}
          <Link to="/posts/mdp" mdxType="Link">{`this post`}</Link>
          {` for more information about the Pancakes
Gridworld as well as the notation and foundational concepts required to
understand the algorithm. In case you are completely new to reinforcement
learning (RL), see `}
          <Link to="/posts/rllbl#rl" mdxType="Link">{`here`}</Link>
          {` for an informal
introduction.`}</p>
      </p>
      <h4 id="qpc">
        <p>{`The algorithm: tabular Q-learning`}</p>
      </h4>
      <p>{`The agent's goal is to learn an `}
        <Link to="/posts/mdp#optval" mdxType="Link">{`optimal action
value function`}</Link>
        {` through interaction with the environment. The Pancakes
Gridworld can be `}
        <Link to="/posts/mdp#panmdp" mdxType="Link">{`modeled as a Markov decision
process`}</Link>
        {` (MDP) with finite state and action sets and we can thus represent
the value function as a set of `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`Q`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`Q(s, a)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`Q`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {`-values, one for each state-action pair
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`(s, a)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {`. The agents starts with a random estimate of each `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`Q`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`Q`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.8777699999999999em",
                    "verticalAlign": "-0.19444em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`Q`}</span></span></span></span></span>
        {`-value (in the
example shown above, we initialize all `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`Q`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`s`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`a`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`Q(s, a)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`Q`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`s`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`a`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {`-values to `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`0`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {`).`}
        <SideNote snId="gray" mdxType="SideNote">{`In the sketch shown above, the Q-values are represented by four
arrows in each cell. In the beginning these arrows are gray but they become blue
(for negative Q-values) or red (for positive Q-values) during the learning
process.`}</SideNote>
        {` During the learning process, the agent uses the current
value estimates to make decisions and uses the reward signals provided by the
environment to continuously improve its value estimates.`}</p>
      <span className="algoLineMainText">
        <KatexInline formula={`0: \\text{Loop for
  each episode: }`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>{`This is the `}
          {`“`}
          {`outer loop`}
          {`”`}
          {`. This simply indicates that the value
function is usually learnt across different episodes.`}</li>

      </ul>
      <span className="algoLineMainText">
        <KatexInline formula={`1: S \\leftarrow
  s_0`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>{`Set the agent to a starting state (in our case, this is always the same state
`}
          <span {...{
            "className": "math math-inline",
            "parentName": "li"
          }}><span {...{
              "className": "katex",
              "parentName": "span"
            }}><span {...{
                "className": "katex-mathml",
                "parentName": "span"
              }}><math {...{
                  "xmlns": "http://www.w3.org/1998/Math/MathML",
                  "parentName": "span"
                }}><semantics {...{
                    "parentName": "math"
                  }}><mrow {...{
                      "parentName": "semantics"
                    }}><msub {...{
                        "parentName": "mrow"
                      }}><mi {...{
                          "parentName": "msub"
                        }}>{`s`}</mi>
                        <mn {...{
                          "parentName": "msub"
                        }}>{`0`}</mn></msub></mrow>
                    <annotation {...{
                      "encoding": "application/x-tex",
                      "parentName": "semantics"
                    }}>{`s_0`}</annotation></semantics></math></span>
              <span {...{
                "className": "katex-html",
                "aria-hidden": "true",
                "parentName": "span"
              }}><span {...{
                  "className": "base",
                  "parentName": "span"
                }}><span {...{
                    "className": "strut",
                    "style": {
                      "height": "0.58056em",
                      "verticalAlign": "-0.15em"
                    },
                    "parentName": "span"
                  }} />
                  <span {...{
                    "className": "mord",
                    "parentName": "span"
                  }}><span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`s`}</span>
                    <span {...{
                      "className": "msupsub",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-t vlist-t2",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist-r",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist",
                            "style": {
                              "height": "0.30110799999999993em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "style": {
                                "top": "-2.5500000000000003em",
                                "marginLeft": "0em",
                                "marginRight": "0.05em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "className": "pstrut",
                                "style": {
                                  "height": "2.7em"
                                },
                                "parentName": "span"
                              }} />
                              <span {...{
                                "className": "sizing reset-size6 size3 mtight",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mtight",
                                  "parentName": "span"
                                }}>{`0`}</span></span></span></span>
                          <span {...{
                            "className": "vlist-s",
                            "parentName": "span"
                          }}>{`​`}</span></span>
                        <span {...{
                          "className": "vlist-r",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist",
                            "style": {
                              "height": "0.15em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "parentName": "span"
                            }} /></span></span></span></span></span></span></span></span></span>
          {` in the left-bottom corner of the grid).`}</li>

      </ul>
      <span className="algoLineMainText">
        <KatexInline formula={`2: \\text{Loop until
  a terminal state is reached:}`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>{`This is the `}
          {`“`}
          {`inner loop`}
          {`”`}
          {`; it represents one episode (that is,
until one of the terminal states 🍄 or 🥞 is reached).`}</li>

      </ul>
      <span className="algoLineMainText">
        <KatexInline formula={`3: \\text{Select
  action } A \\text{ from state } S \\text{ using } \\epsilon\\text{-greedy
  policy}`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>

          <p {...{
            "parentName": "li"
          }}>{`The so-called `}
            <em {...{
              "parentName": "p"
            }}>{`greedy policy`}</em>
            {` in a state `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`s`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`s`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.43056em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`s`}</span></span></span></span></span>
            {` is to select any value-maximizing
action in that state, that is, `}
            <KatexInline formula={`\\pi(s) = \\argmax_{a \\in
  \\mathcal{A}(s)} \\ Q(s, a)`} mdxType="KatexInline" />
            {`. In other words, the agent selects an action
that is assumed to be best, according to the current beliefs (`}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.8777699999999999em",
                        "verticalAlign": "-0.19444em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span></span></span></span></span>
            {`-values).`}</p>

        </li>


        <li {...{
          "parentName": "ul"
        }}>

          <p {...{
            "parentName": "li"
          }}>{`The `}
            <em {...{
              "parentName": "p"
            }}><span {...{
                "className": "math math-inline",
                "parentName": "em"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`ϵ`}</mi></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`\\epsilon`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.43056em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`ϵ`}</span></span></span></span></span>
              {`-greedy policy`}</em>
            {` is a stochastic policy based on the greedy
policy. With probability of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mn {...{
                          "parentName": "mrow"
                        }}>{`1`}</mn>
                        <mo {...{
                          "parentName": "mrow"
                        }}>{`−`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`ϵ`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`(1-\\epsilon)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`1`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2222222222222222em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mbin",
                      "parentName": "span"
                    }}>{`−`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2222222222222222em"
                      },
                      "parentName": "span"
                    }} /></span>
                  <span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`ϵ`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {` it selects a value-maximizing, greedy
action. Yet with probability of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`ϵ`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`\\epsilon`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.43056em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`ϵ`}</span></span></span></span></span>
            {`, the `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`ϵ`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`\\epsilon`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.43056em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`ϵ`}</span></span></span></span></span>
            {`-greedy policy selects
a uniformly randomly chosen action!`}
            <SideNote snId="eps" mdxType="SideNote">{`Note that the
`}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`ϵ`}</mi></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`\\epsilon`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.43056em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`ϵ`}</span></span></span></span></span>
              {`-greedy policy contains the greedy policy (for `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`ϵ`}</mi>
                          <mo {...{
                            "parentName": "mrow"
                          }}>{`=`}</mo>
                          <mn {...{
                            "parentName": "mrow"
                          }}>{`0`}</mn></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`\\epsilon = 0`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.43056em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`ϵ`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mrel",
                        "parentName": "span"
                      }}>{`=`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} /></span>
                    <span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.64444em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord",
                        "parentName": "span"
                      }}>{`0`}</span></span></span></span></span>
              {`) and the
uniformly random policy (for `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`ϵ`}</mi>
                          <mo {...{
                            "parentName": "mrow"
                          }}>{`=`}</mo>
                          <mn {...{
                            "parentName": "mrow"
                          }}>{`1`}</mn></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`\\epsilon = 1`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.43056em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`ϵ`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mrel",
                        "parentName": "span"
                      }}>{`=`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} /></span>
                    <span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.64444em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord",
                        "parentName": "span"
                      }}>{`1`}</span></span></span></span></span>
              {`) as special cases.`}</SideNote>
            {`
Occasionally selecting non-value-maximizing actions is called `}
            <em {...{
              "parentName": "p"
            }}>{`exploration`}</em>
            {` and
is required for the algorithm to converge to an optimal policy, see also the
FAQs further below.`}</p>

        </li>

      </ul>
      <span className="algoLineMainText">
        <KatexInline formula={`4: \\text{Take
  action } A, \\text{ observe reward } R \\text{ and new state } S`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>{`The response of the environment to the action executed by the agent.`}</li>

      </ul>
      <span className="algoLineMainText">
        <KatexInline formula={`5: Q(S, A)
  \\leftarrow (1 - \\alpha) \\ Q(S, A) + \\alpha \\ [R + \\gamma \\max_{a} Q(S',
  a)]`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>

          <p {...{
            "parentName": "li"
          }}><span id="qupdate"><em>{`The learning update. This is where all the magic happens!`}</em></span>
            {` The agent
updates the value estimate for `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q(S, A)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {`, where
`}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`S`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.68333em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span></span></span></span></span>
            {` is the current state and `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`A`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.68333em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span></span></span></span></span>
            {` is the action that was selected by the `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`ϵ`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`\\epsilon`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.43056em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`ϵ`}</span></span></span></span></span>
            {`-greedy
policy. Let's start to unpack the update formula a bit:
`}
            <KatexBlock formula={` \\underbrace{Q(S, A)}_{\\text{new estimate}} \\leftarrow \\textcolor{red}{(1 -
  \\alpha)} \\ 
  \\underbrace{Q(S, A)}_{\\text{old estimate}} + \\textcolor{red}{\\alpha} \\ \\underbrace{[R  +
  \\gamma \\max_{a} Q(S', a)]}_{\\text{Bellman estimate}}. `} mdxType="KatexBlock" /></p>

        </li>


        <li {...{
          "parentName": "ul"
        }}>

          <p {...{
            "parentName": "li"
          }}>{`The new estimate of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q(S, A)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {` is a weighted average of the agent's previous
estimate of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q(S, A)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {` and the `}
            {`“`}
            {`Bellman estimate`}
            {`”`}
            {` of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q(S, A)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {` (see below). The
learning rate `}
            <KatexInline formula={`\\textcolor{red}{\\alpha}`} mdxType="KatexInline" />
            {` determines
how much weight we give to either of these estimates. For example, say our
learning rate is `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`α`}</mi>
                        <mo {...{
                          "parentName": "mrow"
                        }}>{`=`}</mo>
                        <mn {...{
                          "parentName": "mrow"
                        }}>{`0.1`}</mn></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`\\alpha = 0.1`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.43056em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.0037em"
                      },
                      "parentName": "span"
                    }}>{`α`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2777777777777778em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mrel",
                      "parentName": "span"
                    }}>{`=`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2777777777777778em"
                      },
                      "parentName": "span"
                    }} /></span>
                  <span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.64444em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`0`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`.`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`1`}</span></span></span></span></span>
            {`, then our new value estimate consists of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mn {...{
                          "parentName": "mrow"
                        }}>{`1`}</mn>
                        <mo {...{
                          "parentName": "mrow"
                        }}>{`−`}</mo>
                        <mn {...{
                          "parentName": "mrow"
                        }}>{`0.1`}</mn>
                        <mo {...{
                          "parentName": "mrow"
                        }}>{`=`}</mo>
                        <mn {...{
                          "parentName": "mrow"
                        }}>{`90`}</mn>
                        <mi {...{
                          "mathvariant": "normal",
                          "parentName": "mrow"
                        }}>{`%`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`1 -
0.1 = 90\\%`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.72777em",
                        "verticalAlign": "-0.08333em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`1`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2222222222222222em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mbin",
                      "parentName": "span"
                    }}>{`−`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2222222222222222em"
                      },
                      "parentName": "span"
                    }} /></span>
                  <span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.64444em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`0`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`.`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`1`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2777777777777778em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mrel",
                      "parentName": "span"
                    }}>{`=`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.2777777777777778em"
                      },
                      "parentName": "span"
                    }} /></span>
                  <span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.80556em",
                        "verticalAlign": "-0.05556em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`9`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`0`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`%`}</span></span></span></span></span>
            {` of the previous estimate of `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q(S, A)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {` and `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mn {...{
                          "parentName": "mrow"
                        }}>{`10`}</mn>
                        <mi {...{
                          "mathvariant": "normal",
                          "parentName": "mrow"
                        }}>{`%`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`10\\%`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.80556em",
                        "verticalAlign": "-0.05556em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`1`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`0`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}>{`%`}</span></span></span></span></span>
            {` of the Bellman
estimate.`}
            <SideNote snId="lr" mdxType="SideNote">{` If the learning rate is too low (in the extreme
case, `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`α`}</mi>
                          <mo {...{
                            "parentName": "mrow"
                          }}>{`=`}</mo>
                          <mn {...{
                            "parentName": "mrow"
                          }}>{`0`}</mn></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`\\alpha = 0`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.43056em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "style": {
                          "marginRight": "0.0037em"
                        },
                        "parentName": "span"
                      }}>{`α`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mrel",
                        "parentName": "span"
                      }}>{`=`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} /></span>
                    <span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.64444em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord",
                        "parentName": "span"
                      }}>{`0`}</span></span></span></span></span>
              {`), we never actually learn anything new because the new
estimate always equal the old estimate. If, on the other hand, the learning rate
is too high (say, `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`α`}</mi>
                          <mo {...{
                            "parentName": "mrow"
                          }}>{`=`}</mo>
                          <mn {...{
                            "parentName": "mrow"
                          }}>{`1`}</mn></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`\\alpha = 1`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.43056em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "style": {
                          "marginRight": "0.0037em"
                        },
                        "parentName": "span"
                      }}>{`α`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mrel",
                        "parentName": "span"
                      }}>{`=`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} /></span>
                    <span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.64444em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord",
                        "parentName": "span"
                      }}>{`1`}</span></span></span></span></span>
              {`), we `}
              {`“`}
              {`throw away`}
              {`”`}
              {` everything we've
learned about `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`Q`}</mi>
                          <mo {...{
                            "stretchy": "false",
                            "parentName": "mrow"
                          }}>{`(`}</mo>
                          <mi {...{
                            "parentName": "mrow"
                          }}>{`S`}</mi>
                          <mo {...{
                            "separator": "true",
                            "parentName": "mrow"
                          }}>{`,`}</mo>
                          <mi {...{
                            "parentName": "mrow"
                          }}>{`A`}</mi>
                          <mo {...{
                            "stretchy": "false",
                            "parentName": "mrow"
                          }}>{`)`}</mo></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`Q(S, A)`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "1em",
                          "verticalAlign": "-0.25em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`Q`}</span>
                      <span {...{
                        "className": "mopen",
                        "parentName": "span"
                      }}>{`(`}</span>
                      <span {...{
                        "className": "mord mathnormal",
                        "style": {
                          "marginRight": "0.05764em"
                        },
                        "parentName": "span"
                      }}>{`S`}</span>
                      <span {...{
                        "className": "mpunct",
                        "parentName": "span"
                      }}>{`,`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.16666666666666666em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`A`}</span>
                      <span {...{
                        "className": "mclose",
                        "parentName": "span"
                      }}>{`)`}</span></span></span></span></span>
              {` during the update.`}</SideNote></p>

        </li>


        <li {...{
          "parentName": "ul"
        }}>

          <p {...{
            "parentName": "li"
          }}>{`The Bellman estimate `}
            <KatexInline formula={`\\hat{Q}(S, A) = [R  + \\gamma
  \\max_{a} Q(S', a)]`} mdxType="KatexInline" />
            {` is based on the `}
            <Link to="/posts/mdp#Bellman" mdxType="Link">{`Bellman equations of the optimal
action-value function`}</Link>
            {`.`}
            <SideNote snId="bellmaneq" mdxType="SideNote">{` In fact, the `}
              <em>{`deterministic`}</em>
              {`
Bellman equations for the optimal value function, given by `}
              <KatexInline formula={`
      q_{\\ast}(s, a) = \\gamma q_{\\ast}(s', a_\\ast)
  `} mdxType="KatexInline" />
              {`, for all `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`s`}</mi>
                          <mo {...{
                            "parentName": "mrow"
                          }}>{`∈`}</mo>
                          <mi {...{
                            "mathvariant": "script",
                            "parentName": "mrow"
                          }}>{`S`}</mi></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`s \\in \\mathcal{S}`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.5782em",
                          "verticalAlign": "-0.0391em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`s`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mrel",
                        "parentName": "span"
                      }}>{`∈`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} /></span>
                    <span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.68333em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord",
                        "parentName": "span"
                      }}><span {...{
                          "className": "mord mathcal",
                          "style": {
                            "marginRight": "0.075em"
                          },
                          "parentName": "span"
                        }}>{`S`}</span></span></span></span></span></span>
              {` and `}
              <span {...{
                "className": "math math-inline"
              }}><span {...{
                  "className": "katex",
                  "parentName": "span"
                }}><span {...{
                    "className": "katex-mathml",
                    "parentName": "span"
                  }}><math {...{
                      "xmlns": "http://www.w3.org/1998/Math/MathML",
                      "parentName": "span"
                    }}><semantics {...{
                        "parentName": "math"
                      }}><mrow {...{
                          "parentName": "semantics"
                        }}><mi {...{
                            "parentName": "mrow"
                          }}>{`a`}</mi>
                          <mo {...{
                            "parentName": "mrow"
                          }}>{`∈`}</mo>
                          <mi {...{
                            "mathvariant": "script",
                            "parentName": "mrow"
                          }}>{`A`}</mi></mrow>
                        <annotation {...{
                          "encoding": "application/x-tex",
                          "parentName": "semantics"
                        }}>{`a \\in \\mathcal{A}`}</annotation></semantics></math></span>
                  <span {...{
                    "className": "katex-html",
                    "aria-hidden": "true",
                    "parentName": "span"
                  }}><span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.5782em",
                          "verticalAlign": "-0.0391em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord mathnormal",
                        "parentName": "span"
                      }}>{`a`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mrel",
                        "parentName": "span"
                      }}>{`∈`}</span>
                      <span {...{
                        "className": "mspace",
                        "style": {
                          "marginRight": "0.2777777777777778em"
                        },
                        "parentName": "span"
                      }} /></span>
                    <span {...{
                      "className": "base",
                      "parentName": "span"
                    }}><span {...{
                        "className": "strut",
                        "style": {
                          "height": "0.68333em",
                          "verticalAlign": "0em"
                        },
                        "parentName": "span"
                      }} />
                      <span {...{
                        "className": "mord",
                        "parentName": "span"
                      }}><span {...{
                          "className": "mord mathcal",
                          "parentName": "span"
                        }}>{`A`}</span></span></span></span></span></span>
              {`, look almost the same as
the Bellman estimator!`}</SideNote>
            {` Intuitively speaking, the Bellman estimator
decomposes the expected return `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mover {...{
                          "accent": "true",
                          "parentName": "mrow"
                        }}><mi {...{
                            "parentName": "mover"
                          }}>{`Q`}</mi>
                          <mo {...{
                            "parentName": "mover"
                          }}>{`^`}</mo></mover>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`A`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`\\hat{Q}(S, A)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1.19677em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord accent",
                      "parentName": "span"
                    }}><span {...{
                        "className": "vlist-t vlist-t2",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist-r",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist",
                            "style": {
                              "height": "0.9467699999999999em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "style": {
                                "top": "-3em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "className": "pstrut",
                                "style": {
                                  "height": "3em"
                                },
                                "parentName": "span"
                              }} />
                              <span {...{
                                "className": "mord",
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord mathnormal",
                                  "parentName": "span"
                                }}>{`Q`}</span></span></span>
                            <span {...{
                              "style": {
                                "top": "-3.25233em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "className": "pstrut",
                                "style": {
                                  "height": "3em"
                                },
                                "parentName": "span"
                              }} />
                              <span {...{
                                "className": "accent-body",
                                "style": {
                                  "left": "-0.16666em"
                                },
                                "parentName": "span"
                              }}><span {...{
                                  "className": "mord",
                                  "parentName": "span"
                                }}>{`^`}</span></span></span></span>
                          <span {...{
                            "className": "vlist-s",
                            "parentName": "span"
                          }}>{`​`}</span></span>
                        <span {...{
                          "className": "vlist-r",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist",
                            "style": {
                              "height": "0.19444em"
                            },
                            "parentName": "span"
                          }}><span {...{
                              "parentName": "span"
                            }} /></span></span></span></span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`A`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {` into a) the reward that is
obtained in the following time step, given by `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`R`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`R`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.68333em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.00773em"
                      },
                      "parentName": "span"
                    }}>{`R`}</span></span></span></span></span>
            {`, and b) the return that is expected
`}
            <em {...{
              "parentName": "p"
            }}>{`after`}</em>
            {` that time step (estimated by the currently best `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`Q`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.8777699999999999em",
                        "verticalAlign": "-0.19444em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span></span></span></span></span>
            {`-value in the next
state, `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><msub {...{
                          "parentName": "mrow"
                        }}><mo {...{
                            "parentName": "msub"
                          }}><mi {...{
                              "parentName": "mo"
                            }}>{`max`}</mi>
                            <mo {...{
                              "parentName": "mo"
                            }}>{`⁡`}</mo></mo>
                          <mi {...{
                            "parentName": "msub"
                          }}>{`a`}</mi></msub>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`Q`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`(`}</mo>
                        <msup {...{
                          "parentName": "mrow"
                        }}><mi {...{
                            "parentName": "msup"
                          }}>{`S`}</mi>
                          <mo {...{
                            "mathvariant": "normal",
                            "lspace": "0em",
                            "rspace": "0em",
                            "parentName": "msup"
                          }}>{`′`}</mo></msup>
                        <mo {...{
                          "separator": "true",
                          "parentName": "mrow"
                        }}>{`,`}</mo>
                        <mi {...{
                          "parentName": "mrow"
                        }}>{`a`}</mi>
                        <mo {...{
                          "stretchy": "false",
                          "parentName": "mrow"
                        }}>{`)`}</mo></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`\\max_{a} Q(S', a)`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "1.001892em",
                        "verticalAlign": "-0.25em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mop",
                      "parentName": "span"
                    }}><span {...{
                        "className": "mop",
                        "parentName": "span"
                      }}>{`max`}</span>
                      <span {...{
                        "className": "msupsub",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist-t vlist-t2",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist-r",
                            "parentName": "span"
                          }}><span {...{
                              "className": "vlist",
                              "style": {
                                "height": "0.151392em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "style": {
                                  "top": "-2.5500000000000003em",
                                  "marginRight": "0.05em"
                                },
                                "parentName": "span"
                              }}><span {...{
                                  "className": "pstrut",
                                  "style": {
                                    "height": "2.7em"
                                  },
                                  "parentName": "span"
                                }} />
                                <span {...{
                                  "className": "sizing reset-size6 size3 mtight",
                                  "parentName": "span"
                                }}><span {...{
                                    "className": "mord mtight",
                                    "parentName": "span"
                                  }}><span {...{
                                      "className": "mord mathnormal mtight",
                                      "parentName": "span"
                                    }}>{`a`}</span></span></span></span></span>
                            <span {...{
                              "className": "vlist-s",
                              "parentName": "span"
                            }}>{`​`}</span></span>
                          <span {...{
                            "className": "vlist-r",
                            "parentName": "span"
                          }}><span {...{
                              "className": "vlist",
                              "style": {
                                "height": "0.15em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "parentName": "span"
                              }} /></span></span></span></span></span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`Q`}</span>
                    <span {...{
                      "className": "mopen",
                      "parentName": "span"
                    }}>{`(`}</span>
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}><span {...{
                        "className": "mord mathnormal",
                        "style": {
                          "marginRight": "0.05764em"
                        },
                        "parentName": "span"
                      }}>{`S`}</span>
                      <span {...{
                        "className": "msupsub",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist-t",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist-r",
                            "parentName": "span"
                          }}><span {...{
                              "className": "vlist",
                              "style": {
                                "height": "0.751892em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "style": {
                                  "top": "-3.063em",
                                  "marginRight": "0.05em"
                                },
                                "parentName": "span"
                              }}><span {...{
                                  "className": "pstrut",
                                  "style": {
                                    "height": "2.7em"
                                  },
                                  "parentName": "span"
                                }} />
                                <span {...{
                                  "className": "sizing reset-size6 size3 mtight",
                                  "parentName": "span"
                                }}><span {...{
                                    "className": "mord mtight",
                                    "parentName": "span"
                                  }}><span {...{
                                      "className": "mord mtight",
                                      "parentName": "span"
                                    }}>{`′`}</span></span></span></span></span></span></span></span></span>
                    <span {...{
                      "className": "mpunct",
                      "parentName": "span"
                    }}>{`,`}</span>
                    <span {...{
                      "className": "mspace",
                      "style": {
                        "marginRight": "0.16666666666666666em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "parentName": "span"
                    }}>{`a`}</span>
                    <span {...{
                      "className": "mclose",
                      "parentName": "span"
                    }}>{`)`}</span></span></span></span></span>
            {`). One important difference between these two
components is that `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`R`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`R`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.68333em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.00773em"
                      },
                      "parentName": "span"
                    }}>{`R`}</span></span></span></span></span>
            {` is an actual reward that was just observed, whereas the
value of the next state is an estimate itself (one, that at the beginning of the
learning process is initialized randomly and thus usually not very accurate).`}</p>

        </li>


        <li {...{
          "parentName": "ul"
        }}>

          <p {...{
            "parentName": "li"
          }}>{`Especially in the beginning, most of the learning progress is thus made for
state-action pairs that lead to an important reward `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`R`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`R`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.68333em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.00773em"
                      },
                      "parentName": "span"
                    }}>{`R`}</span></span></span></span></span>
            {` (for example, an action
that leads the agent onto the 🥞-cell). Over time this knowledge is then
propagated via the `}
            {`“`}
            {`bootstrapping`}
            {`”`}
            {` mechanism that, simply put,
relates the value of a state `}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><mi {...{
                          "parentName": "mrow"
                        }}>{`S`}</mi></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`S`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.68333em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord mathnormal",
                      "style": {
                        "marginRight": "0.05764em"
                      },
                      "parentName": "span"
                    }}>{`S`}</span></span></span></span></span>
            {` to the value of the following state
`}
            <span {...{
              "className": "math math-inline",
              "parentName": "p"
            }}><span {...{
                "className": "katex",
                "parentName": "span"
              }}><span {...{
                  "className": "katex-mathml",
                  "parentName": "span"
                }}><math {...{
                    "xmlns": "http://www.w3.org/1998/Math/MathML",
                    "parentName": "span"
                  }}><semantics {...{
                      "parentName": "math"
                    }}><mrow {...{
                        "parentName": "semantics"
                      }}><msup {...{
                          "parentName": "mrow"
                        }}><mi {...{
                            "parentName": "msup"
                          }}>{`S`}</mi>
                          <mo {...{
                            "mathvariant": "normal",
                            "lspace": "0em",
                            "rspace": "0em",
                            "parentName": "msup"
                          }}>{`′`}</mo></msup></mrow>
                      <annotation {...{
                        "encoding": "application/x-tex",
                        "parentName": "semantics"
                      }}>{`S'`}</annotation></semantics></math></span>
                <span {...{
                  "className": "katex-html",
                  "aria-hidden": "true",
                  "parentName": "span"
                }}><span {...{
                    "className": "base",
                    "parentName": "span"
                  }}><span {...{
                      "className": "strut",
                      "style": {
                        "height": "0.751892em",
                        "verticalAlign": "0em"
                      },
                      "parentName": "span"
                    }} />
                    <span {...{
                      "className": "mord",
                      "parentName": "span"
                    }}><span {...{
                        "className": "mord mathnormal",
                        "style": {
                          "marginRight": "0.05764em"
                        },
                        "parentName": "span"
                      }}>{`S`}</span>
                      <span {...{
                        "className": "msupsub",
                        "parentName": "span"
                      }}><span {...{
                          "className": "vlist-t",
                          "parentName": "span"
                        }}><span {...{
                            "className": "vlist-r",
                            "parentName": "span"
                          }}><span {...{
                              "className": "vlist",
                              "style": {
                                "height": "0.751892em"
                              },
                              "parentName": "span"
                            }}><span {...{
                                "style": {
                                  "top": "-3.063em",
                                  "marginRight": "0.05em"
                                },
                                "parentName": "span"
                              }}><span {...{
                                  "className": "pstrut",
                                  "style": {
                                    "height": "2.7em"
                                  },
                                  "parentName": "span"
                                }} />
                                <span {...{
                                  "className": "sizing reset-size6 size3 mtight",
                                  "parentName": "span"
                                }}><span {...{
                                    "className": "mord mtight",
                                    "parentName": "span"
                                  }}><span {...{
                                      "className": "mord mtight",
                                      "parentName": "span"
                                    }}>{`′`}</span></span></span></span></span></span></span></span></span></span></span></span></span>
            {`.`}
            <SideNote snId="propagation" mdxType="SideNote">{` You can observe this behavior in the sketch
above. The first red arrow (positive value) occurs when the agent hits the
🥞-cell for the first time. After that, the red arrows (that is, the knowledge
that 🥞 are close) slowly but surely propagate to the starting state.`}</SideNote></p>

        </li>

      </ul>
      <span className="algoLineMainText">
        <KatexInline formula={`6: S \\leftarrow
  S'`} mdxType="KatexInline" />
      </span>
      <ul>

        <li {...{
          "parentName": "ul"
        }}>{`The `}
          {`“`}
          {`next state`}
          {`”`}
          {` becomes the `}
          {`“`}
          {`current state`}
          {`”`}
          {`. If the
current state is now a terminal state, then the episode is over and the
algorithm jumps back to line 1. If the episode is not over yet, the algorithm
jumps back to line 3 to select the next action.`}</li>

      </ul>
      <h4 id="faq">
        <p>{`FAQ`}</p>
      </h4>
      <p><strong {...{
          "parentName": "p"
        }}>{`(Why) does the agent need to explore?`}</strong>
        {` An agent that never explores might get stuck in
a sub-optimal policy. Consider the illustrative example shown in the sketch
below. You can see the agent's current Q-value estimates as shown by the red
arrows. The greedy policy with respect to this value function actually leads the
agent to the 🥞-cell, just not in an optimal way (the expected episode return of
this policy is `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`0`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {`). The exploration parameter in this sketch is set to
`}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`ϵ`}</mi>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`0`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\epsilon = 0`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`ϵ`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span></span></span></span></span>
        {` by default, so if you press the `}
        <span className="textInlineMaterial">{`play_arrow`}</span>
        {` button, the agent will always
follow the same, inefficient path.`}</p>
      <Sketch sketch={sketchQExp} mdxType="Sketch" />
      <p>{`Now increase the exploration parameter a little bit to, say, `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`ϵ`}</mi>
                    <mo {...{
                      "parentName": "mrow"
                    }}>{`=`}</mo>
                    <mn {...{
                      "parentName": "mrow"
                    }}>{`0.3`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\epsilon = 0.3`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`ϵ`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mrel",
                  "parentName": "span"
                }}>{`=`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.2777777777777778em"
                  },
                  "parentName": "span"
                }} /></span>
              <span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`0`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`.`}</span>
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`3`}</span></span></span></span></span>
        {`
(using the slider next to the bottom-right corner of the gridworld). You will
see that, after some time, the agent finds a shorter route to the pancakes and
updates its action-value estimates accordingly. Once an optimal policy is found
(you can check this using the `}
        {`“`}
        {`Greedy policy`}
        {`”`}
        {` button below the
sketch), you could dial down again the agent's exploration behavior to see that
the agent now yields the optimal episode return of `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mn {...{
                      "parentName": "mrow"
                    }}>{`4`}</mn></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`4`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.64444em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord",
                  "parentName": "span"
                }}>{`4`}</span></span></span></span></span>
        {`.`}</p>
      <p><strong {...{
          "parentName": "p"
        }}>{`Why should I care? Isn't the Pancakes Gridworld way too easy?`}</strong>
        {` Your friends
probably wouldn't be particularly impressed if you showed them that `}
        <em {...{
          "parentName": "p"
        }}>{`you`}</em>
        {` could
`}
        {`“`}
        {`solve`}
        {`”`}
        {` the Pancakes Gridworld. So why should we be impressed by the
Q-learning agent? The difference is that the agent initially knows absolutely
nothing about its environment. Yes, you've read that correctly; the agent finds
itself in the unimaginable situation of not even knowing about the existence of
pancakes, let alone their deliciousness. Almost worse, the agent initially
doesn't even know the concept of a direction, or how the different cells of the
gridworld are connected to each other. All that the agent ever gets is the info
that there are four actions in every state and a reward signal after very step.
We, on the other hand, know that pancakes are good and your eyes help you to
navigate safely to them. The RL agent learns all this from scratch, which, in my
opinion, is a quite impressive achievement.`}</p>
      <span id="bellmanError">
        <p><strong {...{
            "parentName": "p"
          }}>{`What is the `}
            {`“`}
            {`Bellman error`}
            {`”`}
            {`?`}</strong></p>
      </span>
      <p>{`You might have noticed that many papers and books write the Q-learning update
rule as `}
        <KatexBlock formula={` \\underbrace{Q(S, A)}_{\\text{new estimate}} \\leftarrow 
  \\underbrace{Q(S, A)}_{\\text{old estimate}} + \\alpha \\ 
  \\underbrace{[\\textcolor{#7FB069}{R  + \\gamma \\max_{a} Q(S', a)} - \\textcolor{#FA7921}{Q(S, A)}]}_{\\text{Bellman 
  error}}. `} mdxType="KatexBlock" />
        {` This is of course just a re-arrangement of the formula shown and discussed in
the pseudo code above, but it offers another nice interpretation. The `}
        <em {...{
          "parentName": "p"
        }}>{`Bellman
error`}</em>
        {` is simply the difference between the right-hand side (RHS) and the left-hand
side (LHS) of the optimal Bellman equation applied to the current value estimates `}
        <KatexBlock formula={` \\underbrace{\\textcolor{#FA7921}{Q(S, A)}}_{\\text{LHS}} = 
  \\underbrace{\\textcolor{#7FB069}{R + \\gamma \\max_{a} Q(S', a)}}_{\\text{RHS}}. `} mdxType="KatexBlock" />
        {` The Q-learning update is thus simply a way of reducing the Bellman error by adding a
tiny bit of it to the current estimate of `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`Q`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`(`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`S`}</mi>
                    <mo {...{
                      "separator": "true",
                      "parentName": "mrow"
                    }}>{`,`}</mo>
                    <mi {...{
                      "parentName": "mrow"
                    }}>{`A`}</mi>
                    <mo {...{
                      "stretchy": "false",
                      "parentName": "mrow"
                    }}>{`)`}</mo></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`Q(S, A)`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "1em",
                    "verticalAlign": "-0.25em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`Q`}</span>
                <span {...{
                  "className": "mopen",
                  "parentName": "span"
                }}>{`(`}</span>
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.05764em"
                  },
                  "parentName": "span"
                }}>{`S`}</span>
                <span {...{
                  "className": "mpunct",
                  "parentName": "span"
                }}>{`,`}</span>
                <span {...{
                  "className": "mspace",
                  "style": {
                    "marginRight": "0.16666666666666666em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "parentName": "span"
                }}>{`A`}</span>
                <span {...{
                  "className": "mclose",
                  "parentName": "span"
                }}>{`)`}</span></span></span></span></span>
        {` (the amount that is added is
determined by the learning rate `}
        <span {...{
          "className": "math math-inline",
          "parentName": "p"
        }}><span {...{
            "className": "katex",
            "parentName": "span"
          }}><span {...{
              "className": "katex-mathml",
              "parentName": "span"
            }}><math {...{
                "xmlns": "http://www.w3.org/1998/Math/MathML",
                "parentName": "span"
              }}><semantics {...{
                  "parentName": "math"
                }}><mrow {...{
                    "parentName": "semantics"
                  }}><mi {...{
                      "parentName": "mrow"
                    }}>{`α`}</mi></mrow>
                  <annotation {...{
                    "encoding": "application/x-tex",
                    "parentName": "semantics"
                  }}>{`\\alpha`}</annotation></semantics></math></span>
            <span {...{
              "className": "katex-html",
              "aria-hidden": "true",
              "parentName": "span"
            }}><span {...{
                "className": "base",
                "parentName": "span"
              }}><span {...{
                  "className": "strut",
                  "style": {
                    "height": "0.43056em",
                    "verticalAlign": "0em"
                  },
                  "parentName": "span"
                }} />
                <span {...{
                  "className": "mord mathnormal",
                  "style": {
                    "marginRight": "0.0037em"
                  },
                  "parentName": "span"
                }}>{`α`}</span></span></span></span></span>
        {`). The smaller the Bellman
error across all state-action pairs, the closer are our value-function estimates
to the optimal action-value function.`}</p>
      <p className="intro">
        <p>{`If you liked this post, please consider following me on `}
          <Link to="https://twitter.com/JanMalteL" mdxType="Link">{`Twitter`}</Link>
          {` for updates on new blog
posts. In the `}
          <Link to="/posts/sarsa" mdxType="Link">{`next post`}</Link>
          {` we compare Q-learning to the SARSA algorithm.`}</p>
      </p>
      <Commento id={props.pageContext.frontmatter.slug} mdxType="Commento" />
    </Layout>

  </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      