/* eslint-disable class-methods-use-this */
// import Board from './board';

function maxArgMax(arr) {
    // Returns the maximum value and a list of ALL argmax indices
    let lArgmaxes = [-1];
    let maxValue = Number.NEGATIVE_INFINITY;
    for (let acIx = 0; acIx < arr.length; acIx += 1) {
        const tmpVal = arr[acIx];
        if (tmpVal > maxValue) {
            lArgmaxes = [acIx];
            maxValue = tmpVal;
        } else if (tmpVal === maxValue) {
            lArgmaxes.push(acIx);
        }
    }
    return { max: maxValue, argMax: lArgmaxes };
}

export default class Q {
    // Iterative policy evaluation.
    constructor(p, d, sharedParams, gw, lc) {
        this.p = p;
        this.d = d;
        this.sharedParams = sharedParams;
        this.xPos = this.d.xAgent;
        this.yPos = this.d.yAgent;
        this.gw = gw;
        this.lc = lc;
        this.epsilon = 0.05;
        this.alpha = 0.2;
        this.gamma = 1;
        this.stateValues = new Array(this.d.numStates).fill(0);
        this.qTable = this.initizalizeQtable();
        this.qTableViz = this.qTable.map(arr => arr.slice());
        this.qDelayedVizUpdateList = [];
        this.Delta = 1;
        this.currentQ = 0;
        this.currentStateindex = -1;
        this.currentS = undefined;
        this.nextS = undefined;
        this.currentA = undefined;
        this.currentR = undefined;
        this.currentReturn = 0;
        this.maxQvalue = undefined;
        this.currentQ = undefined;
        this.converged = false;
        this.isTerminal = false;

        this.algoDict = {
            0: {
                depth: 0,
                text: '\\text{Loop for each episode:}',
                fn: this.newEpisode.bind(this),
                comment: undefined
            },
            1: {
                depth: 1,
                text: 'S \\leftarrow s_0',
                fn: this.initState.bind(this),
                comment: undefined
            },
            2: {
                depth: 1,
                text: '\\text{Loop until a terminal state is reached:}',
                fn: this.stepLoop.bind(this),
                comment: undefined
            },
            3: {
                depth: 2,
                text:
                    '\\text{Select action } A \\text{ from state } S \\text{ using } \\epsilon\\text{-greedy policy}',
                fn: this.chooseAction.bind(this)
            },
            4: {
                depth: 2,
                text:
                    "\\text{Take action } A, \\text{ observe reward } R \\text{ and new state } S'",
                fn: this.envStep.bind(this)
            },
            5: {
                depth: 2,
                // text: "Q(S, A) \\leftarrow Q(S, A) + \\alpha [R + \\gamma \\max_{a} Q(S', a) - Q(S, A)]",
                text:
                    "Q(S, A) \\leftarrow (1 - \\alpha) \\ Q(S, A) + \\alpha \\ [R + \\gamma \\max_{a} Q(S', a)]",
                fn: this.learnUpdate.bind(this)
            },
            6: {
                depth: 2,
                text: "S \\leftarrow S'",
                fn: this.updateCurrentS.bind(this)
            }
            // 7: {depth: 0, text: "\\text{until } \\Delta < \\theta", tex: true},
        };
        this.buildAlgoDict();
        this.numLines = Object.keys(this.algoDict).length;
        this.lineHeightPx = `${this.d.algoLineHeight}px`;
        this.lineWidthPx = `${this.d.algoWidth}px`;
        this.algoDivList = new Array(this.numLines);

        this.paramDict = {
            0: {
                id: 'epsilon',
                comment: '// \\textit{ exploration rate}',
                description: 'Exploration',
                lab: '\\epsilon = ',
                minV: 0,
                maxV: 1,
                defaultV: this.epsilon,
                stepSize: 0.05
            },
            1: {
                id: 'alpha',
                comment: '// \\textit{ learning rate}',
                description: 'Learning rate',
                lab: '\\alpha = ',
                minV: 0,
                maxV: 1,
                defaultV: this.alpha,
                stepSize: 0.1
            },
            2: {
                id: 'gamma',
                comment: '// \\textit{ discount factor}',
                description: 'Discount factor',
                lab: '\\gamma = ',
                minV: 0,
                maxV: 1,
                defaultV: this.gamma,
                stepSize: 0.1
            }
        };

        this.numParams = Object.keys(this.paramDict).length;
        this.paramDivList = new Array(this.numParams);
        this.paramLabList = new Array(this.numParams);
        this.paramSliderList = new Array(this.numParams);
        // this.paramCommentList = new Array(this.numParams);
        this.paramLabelList = new Array(this.numParams);

        this.algoDiv = this.p.createDiv().position(this.xPos, this.yPos);

        //
        //  MANUAL
        //
        this.manualBox = this.p
            .createDiv()
            .parent(this.algoDiv)
            .id('manualBox')
            .style('width', this.lineWidthPx);
        this.p.createSpan('Press ').parent(this.manualBox);
        this.buttonNext = this.p
            .createButton('skip_next')
            .parent(this.manualBox)
            .class('inlineMaterial'); //.onClick(this.style("top", "2px")); // top: 2px;;
        this.p
            .createSpan(' to advance to the next line in the pseudo code. Alternatively, press ')
            .parent(this.manualBox);
        this.buttonPlay = this.p
            .createButton(sharedParams.play ? 'pause' : 'play_arrow')
            .parent(this.manualBox)
            .class('inlineMaterial');
        this.p
            .createSpan(' to let the agent play continuously. You can use ') // Control the agent's speed using
            .parent(this.manualBox);
        this.speedSlider = this.p
            .createSlider(2, d.maxFramesTillStep, 17)
            .class('mySlider')
            .style('width', '80px')
            .parent(this.manualBox);
        this.p
            .createSpan(
                " to control the agent's speed. You can reset the learning progress by pressing "
            )
            .parent(this.manualBox);
        this.buttonRestart = this.p
            .createButton('replay')
            .parent(this.manualBox)
            .class('inlineMaterial');
        this.p.createSpan('.').parent(this.manualBox);

        //
        //  PSEUDO CODE
        //
        this.pseudoCodeDiv = this.p
            .createDiv()
            .style('margin-top', '1.3em')
            .parent(this.algoDiv);

        this.p
            .createP(this.d.algoTitle)
            .parent(this.pseudoCodeDiv)
            .style('width', this.lineWidthPx)
            .class('sketchSubTitle');

        // Algorithm lines
        // TODO: add tooltips for variable values.
        for (let lineIx = 0; lineIx < this.numLines; lineIx += 1) {
            this.algoDivList[lineIx] = this.p
                .createDiv()
                .parent(this.pseudoCodeDiv)
                .class('algoLineDiv')
                .style('width', this.lineWidthPx);

            this.d.kat.render(this.algoDict[lineIx].text, this.algoDivList[lineIx].elt);
        }
        this.currentLine = 0;
        this.algoDivList[this.currentLine].class('algoLineDivActive');

        // this.p.createElement('br').parent(this.algoDiv);

        //
        // Parameter lines
        //
        this.paramDiv = this.p
            .createDiv()
            .style('width', `${this.algoParamWidth}`)
            .position(this.d.xAgentAlgo, this.d.yAgentAlgo);
        this.algorithmParamDiv = this.p
            .createDiv()
            .parent(this.paramDiv)
            .style('display', 'inline-block');

        this.algoParamTitle = this.p
            .createP('ALGORITHM PARAMETERS')
            .parent(this.algorithmParamDiv)
            .style('width', `${this.algoParamWidth}`)
            .class('sketchSubTitle');

        this.verticalAlignmentDiv = this.p
            .createDiv()
            .parent(this.algorithmParamDiv)
            .style('display', 'inline-block');

        const individualParamDivWidth =
            (this.d.algoParamWidth - (this.numParams - 1) * this.d.hMargin) / this.numParams;
        const greekSymWidth = 48;
        const greekSymWidthPx = `${greekSymWidth}px`;
        const algoSliderWidth = individualParamDivWidth - greekSymWidth - 4;
        const algoSliderWidthPx = `${algoSliderWidth}px`;
        for (let lineIx = 0; lineIx < this.numParams; lineIx += 1) {
            // Spacing
            if (lineIx > 0) {
                this.p
                    .createDiv()
                    .style('display', 'inline-block')
                    .parent(this.verticalAlignmentDiv)
                    .style('width', `${this.d.hMargin}px`);
            }
            this.paramDivList[lineIx] = this.p
                .createDiv()
                .parent(this.verticalAlignmentDiv)
                .style('display', 'inline-block')
                .class('paramLineDiv')
                .style('width', `${individualParamDivWidth}px`);

            this.paramLabelList[lineIx] = this.p
                .createP(this.paramDict[lineIx].description)
                .parent(this.paramDivList[lineIx])
                .class('indParamLabel');

            this.paramLabList[lineIx] = this.p
                .createSpan()
                .style('display', 'inline-block')
                .class('indParamGreek')
                .parent(this.paramDivList[lineIx])
                .style('width', greekSymWidthPx);

            this.d.kat.render(
                this.paramDict[lineIx].lab + this.paramDict[lineIx].defaultV,
                this.paramLabList[lineIx].elt
            );
            this.paramSliderList[lineIx] = this.p
                .createSlider(
                    this.paramDict[lineIx].minV,
                    this.paramDict[lineIx].maxV,
                    this.paramDict[lineIx].defaultV,
                    this.paramDict[lineIx].stepSize
                )
                .style('width', algoSliderWidthPx)
                .parent(this.paramDivList[lineIx])
                .class('algoSlider');
        }
    }

    // fillInCurrentValues() {
    //     this.d.kat.render(
    //         typeof this.currentR === 'undefined' ? 'R = ' : `R = ${this.currentR}`,
    //         this.currentRLabel.elt
    //     );
    //     this.d.kat.render(
    //         typeof this.currentS === 'undefined' ? 'S = ' : 'S = ' + this.currentS,
    //         this.currentSLabel.elt
    //     );
    //     this.d.kat.render(
    //         typeof this.nextS === 'undefined' ? "S' = " : "S' = " + this.nextS,
    //         this.nextSLabel.elt
    //     );
    //     this.d.kat.render(
    //         typeof this.currentA === 'undefined' ? 'A = ' : 'A = ' + this.currentA,
    //         this.currentALabel.elt
    //     );
    //     this.d.kat.render(
    //         typeof this.currentA === 'undefined' || typeof this.currentS === 'undefined'
    //             ? 'Q(S, A) = '
    //             : 'Q(S, A) = ' + Math.round(this.qTable[this.currentS][this.currentA] * 100) / 100,
    //         this.currentQLabel.elt
    //     );
    //     this.d.kat.render(
    //         typeof this.nextS === 'undefined'
    //             ? "max_{a}Q(S', a) = "
    //             : "max_{a}Q(S', a) = " +
    //                   Math.round(Math.max(...this.qTable[this.nextS]) * 100) / 100,
    //         this.nextQLabel.elt
    //     );
    // }

    initizalizeQtable() {
        // Initialize q-values to 0
        const qTable = new Array(this.d.numStates)
            .fill()
            .map(() => Array(this.d.numActions).fill(0));
        // Random initialization of q-values.
        // this.qTable = new Array(this.d.numStates)
        //     .fill()
        //     .map(() => Array.from({ length: this.d.numActions }, () => (Math.random() - 0.5) * 3));
        return qTable;
    }

    buildAlgoDict() {
        Object.entries(this.algoDict).forEach(([key, val]) => {
            val.text = `\\ ${'\\ \\ \\ \\ \\ \\ \\ '.repeat(val.depth)}${val.text}`;
            val.text = '\\ \\text{' + key + ': } ' + val.text;
        });
    }

    restart() {
        // Complete restart / set all q-values to zero.
        this.qTable = this.initizalizeQtable();
        this.qTableViz = this.qTable.map(arr => arr.slice());
        this.currentA = undefined;
        this.currentS = undefined;
        this.nextS = undefined;
        this.currentR = undefined;
        this.currentReturn = 0;
        this.maxQvalue = undefined;
        this.currentQ = undefined;
        this.changeActiveLine(0);
        this.currentLine = 0;
    }

    step() {
        const newLine = this.algoDict[this.currentLine]['fn'](this.currentLine);
        this.changeActiveLine(newLine);
        this.currentLine = newLine;
        // TODO: process this.qDelayedVizUpdateList
    }

    changeActiveLine(newLine) {
        this.algoDivList[this.currentLine].class('algoLineDiv');
        this.algoDivList[newLine].class('algoLineDivActive');
    }

    // 0
    newEpisode(currentLine) {
        const nextLine = currentLine + 1; // initState()
        this.currentA = undefined;
        this.currentS = undefined;
        this.nextS = undefined;
        this.currentR = undefined;
        this.currentReturn = 0;
        this.maxQvalue = undefined;
        this.currentQ = undefined;
        return nextLine;
    }

    // 1
    initState(currentLine) {
        this.currentS = 0;
        this.gw.reset(this.currentS);
        const nextLine = currentLine + 1; // stepLoop()
        return nextLine;
    }

    // 2
    stepLoop(currentLine) {
        let nextLine = currentLine + 1;
        if (this.gw.isTerminal) {
            // Terminal state -> start new episode
            this.currentS = undefined;
            this.gw.currentState = undefined;
            this.lc.pushReturn(this.currentReturn);
            nextLine = 0; // newEpisode()
        }
        return nextLine; // storeValue()
    }

    // 3
    chooseAction(currentLine) {
        // epsilon-greedy
        const currentQvalues = this.qTable[this.gw.currentState];
        const numActions = currentQvalues.length;
        if (Math.random() < this.epsilon) {
            // act randomly
            this.currentA = Math.floor(Math.random() * numActions);
        } else {
            // act greedily
            let argmax = [-1];
            let maxValue = Number.NEGATIVE_INFINITY;
            for (let acIx = 0; acIx < numActions; acIx += 1) {
                const tmpVal = currentQvalues[acIx];
                if (tmpVal > maxValue) {
                    argmax = [acIx];
                    maxValue = tmpVal;
                } else if (tmpVal === maxValue) {
                    argmax.push(acIx);
                }
            }
            this.currentA = argmax[Math.floor(Math.random() * argmax.length)];
        }

        // Show gridworld which action has been chosen.
        this.gw.activeAction = this.currentA;
        return currentLine + 1; // envStep()
    }

    // 4
    envStep(currentLine) {
        this.currentR = this.gw.step(this.currentA);
        this.currentReturn += this.currentR;
        this.lc.updateReturn(this.currentReturn);
        this.nextS = this.gw.currentState;

        // Gridworld doesn't have to show the active action anymore.
        this.gw.activeAction = undefined;

        return currentLine + 1; // envStep()
    }

    // 5
    learnUpdate(currentLine) {
        if (this.gw.showLearningUpdates) {
            // For visualization of learning updates we need an argmax value (randomly-chosen in
            // case of multiple argmaxes)
            let argMax = [-1];
            if (this.gw.isTerminal) {
                this.maxQvalue = 0;
            } else {
                const dMaxArgMax = maxArgMax(this.qTable[this.nextS]);
                this.maxQvalue = dMaxArgMax.max;
                argMax = dMaxArgMax.argMax;
            }

            this.gw.addLearningUpdate({
                reward: this.currentR,
                maxQ: this.maxQvalue,
                maxQaction: argMax[Math.floor(Math.random() * argMax.length)],
                action: this.currentA,
                nextS: this.nextS,
                prevS: this.currentS,
                sleepFrames: this.sharedParams.play ? this.d.framesPlay : this.d.framesStill,
                frames: this.sharedParams.play ? this.d.framesLearn : this.d.framesLearnStill,
                xPosArrow: [],
                yPosArrow: [],
                arrowAngle: [],
                xPosRew: [],
                yPosRew: [],
                rewCol: undefined,
                isTerminal: this.gw.isTerminal
            });
        } else {
            // Easy way of determining max Q-value if argmax not needed for visualization.
            this.maxQvalue = this.gw.isTerminal ? 0 : Math.max(...this.qTable[this.nextS]);
        }

        // Actual update
        const qUpdate =
            this.alpha *
            (this.currentR +
                this.gamma * this.maxQvalue -
                this.qTable[this.currentS][this.currentA]);
        this.qTable[this.currentS][this.currentA] += qUpdate;

        if (this.gw.showLearningUpdates) {
            // Update visualized q-values only after the value arrow "has arrived".
            this.qDelayedVizUpdateList.push({
                sleepFrames: this.sharedParams.play ? this.d.framesPlay : this.d.framesStill,
                frames: this.sharedParams.play ? this.d.framesLearn : this.d.framesLearnStill,
                qUpdate: qUpdate,
                action: this.currentA,
                prevS: this.currentS
            });
        } else {
            // Update visualized q-values immediately.
            this.qTableViz[this.currentS][this.currentA] += qUpdate;
            this.currentQ = this.qTableViz[this.currentS][this.currentA];
        }
        return currentLine + 1; // updateCurrentS()
    }

    processDelayedQUpdates() {
        let shiftUntil = 0;
        for (let dlux = 0; dlux < this.qDelayedVizUpdateList.length; dlux += 1) {
            const dlu = this.qDelayedVizUpdateList[dlux];
            if (dlu.sleepFrames > 0) {
                dlu.sleepFrames -= 1;
            } else {
                dlu.frames -= 1;
                if (dlu.frames <= 0) {
                    this.qTableViz[dlu.prevS][dlu.action] += dlu.qUpdate;
                    this.currentQ = this.qTableViz[dlu.prevS][dlu.action];
                    shiftUntil += 1;
                }
            }
        }
        for (let jx = 0; jx < shiftUntil; jx += 1) {
            this.qDelayedVizUpdateList.shift();
        }
    }

    // 6
    updateCurrentS(currentLine) {
        this.currentS = this.nextS;
        // this.g
        return 2; // stepLoop()
    }

    // chooseAction(currentState) {
    //     const currentQvalues = this.qTable[currentState];
    //     const numActions = currentQvalues.length;
    //     let argmax = 0;
    //     if (Math.random() < this.epsilon) {
    //         argmax = Math.floor(Math.random() * numActions);
    //     }
    //     let maxValue = Number.NEGATIVE_INFINITY;
    //     for (let acIx = 0; acIx < numActions; acIx += 1) {
    //         const tmpVal = currentQvalues[acIx];
    //         if (tmpVal > maxValue) {
    //             argmax = acIx;
    //             maxValue = tmpVal;
    //         }
    //     }
    //     return argmax;
    // }

    // learn(state, action, reward, nextState, isTerminal) {
    //     const maxQvalue = isTerminal ? 0 : Math.max(...this.qTable[nextState]);
    //     this.qTable[state][action] =
    //         (1 - this.alpha) * this.qTable[state][action] + this.alpha * (reward + maxQvalue);
    // }
}

// function dotProduct(a, b) {
//     let out = 0;
//     for (let ix = 0; ix < a.length; ix += 1) {
//         out += a[ix] * b[ix];
//     }
//     return out;
// }

// let argmax = [-1];
// if (this.gw.isTerminal) {
//     this.maxQvalue = 0;
// } else {
//     const currentQvalues = this.qTable[this.nextS];
//     const numActions = currentQvalues.length;
//     let maxValue = Number.NEGATIVE_INFINITY;
//     for (let acIx = 0; acIx < numActions; acIx += 1) {
//         const tmpVal = currentQvalues[acIx];
//         if (tmpVal > maxValue) {
//             argmax = [acIx];
//             maxValue = tmpVal;
//         } else if (tmpVal === maxValue) {
//             argmax.push(acIx);
//         }
//     }
//     this.maxQvalue = maxValue;
// }
