uπ(s)≜𝔼π[Gt|St=s]=𝔼π[Rt+1+γGt+1|St=s]=𝔼π[Rt+1+γuπ(St+1)|St=s] \begin{align*} u_{\pi}(s) &\triangleq \mathbb{E}_{\pi}[G_t | S_t = s] \\ &= \mathbb{E}_{\pi} [R_{t+1} + \gamma G_{t+1} | S_t = s] \\ &= \mathbb{E}_{\pi} [R_{t+1} + \gamma u_{\pi}(S_{t+1}) | S_t = s] \end{align*}
Q1(st,at)←Q1(st,at)+α[R+γQ2(st+1,argmaxaQ1(st+1,a))−Q1(st,at)]Q2(st,at)←Q2(st,at)+α[R+γQ1(st+1,argmaxaQ2(st+1,a))−Q2(st,at)]\begin{align*} Q_1(s_t,a_t) & \leftarrow Q_1(s_t,a_t) + \alpha\left[R + \gamma Q_2(s_{t+1}, \underset{a}{argmax}Q_1(s_{t+1},a)) - Q_1(s_t,a_t)\right] \\ Q_2(s_t,a_t) & \leftarrow Q_2(s_t,a_t) + \alpha\left[R + \gamma Q_1(s_{t+1}, \underset{a}{argmax}Q_2(s_{t+1},a)) - Q_2(s_t,a_t)\right] \end{align*}