// Author:  Paul Oliver <contact@pauloliver.dev>
// Project: Salis

// Based on the original salis-v1 VM architecture:
// https://git.pauloliver.dev/salis-v1/about/

{% set inst_count = arch_vars.inst_set|length %}

enum {
    {% for i in arch_vars.inst_set %}
    {{ i[0]|join(' ') }},
    {% endfor %}
};

{% if args.command in ["bench", "new"] and anc_bytes is defined %}
void arch_anc_init(struct Core *core) {
    assert(core);

    {% if arch_vars.mvec_loop %}
    uint64_t addr = {{ uint64_half }};
    {% else %}
    uint64_t addr = 0;
    {% endif %}

    for (uint64_t i = 0; i < {{ args.clones }}; ++i) {
        uint64_t addr_clone = addr + (({{ mvec_size }} / {{ args.clones }})) * i;

        struct Proc *panc = proc_fetch(core, i);

        panc->mb0a = addr_clone;
        panc->mb0s = {{ anc_bytes|length }};
        panc->ip   = addr_clone;
        panc->sp   = addr_clone;
    }
}
{% endif %}

uint64_t arch_proc_mb0_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb0a;
}

uint64_t arch_proc_mb0_size(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb0s;
}

uint64_t arch_proc_mb1_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb1a;
}

uint64_t arch_proc_mb1_size(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->mb1s;
}

uint64_t arch_proc_ip_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->ip;
}

uint64_t arch_proc_sp_addr(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));
    return proc_get(core, pix)->sp;
}

uint64_t arch_proc_slice(const struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    (void)core;
    (void)pix;

    return 1;
}

void _free_memory_block(struct Core *core, uint64_t addr, uint64_t size) {
    assert(core);
    assert(size);

    for (uint64_t i = 0; i < size; ++i) {
        mvec_free(core, addr + i);
    }
}

void arch_on_proc_kill(struct Core *core) {
    assert(core);
    assert(core->pnum > 1);

    struct Proc *pfst = proc_fetch(core, core->pfst);

    _free_memory_block(core, pfst->mb0a, pfst->mb0s);

    if (pfst->mb1s) {
        _free_memory_block(core, pfst->mb1a, pfst->mb1s);
    }

    memcpy(pfst, &g_dead_proc, sizeof(struct Proc));
}

uint8_t _get_inst(const struct Core *core, uint64_t addr) {
    assert(core);

    return mvec_get_inst(core, addr) % {{ inst_count }};
}

void _increment_ip(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    proc->ip++;
    proc->sp = proc->ip;
}

bool _is_between(uint8_t inst, uint8_t lo, uint8_t hi) {
    assert(inst < {{ inst_count }});
    assert(lo < {{ inst_count }});
    assert(hi < {{ inst_count }});
    assert(lo < hi);

    return (inst >= lo) && (inst <= hi);
}

bool _is_key(uint8_t inst) {
    assert(inst < {{ inst_count }});

    return _is_between(inst, keya, keyp);
}

bool _is_lock(uint8_t inst) {
    assert(inst < {{ inst_count }});

    return _is_between(inst, loka, lokp);
}

bool _is_rmod(uint8_t inst) {
    assert(inst < {{ inst_count }});

    return _is_between(inst, nop0, nop3);
}

bool _key_lock_match(uint8_t key, uint8_t lock) {
    assert(key < {{ inst_count }});
    assert(lock < {{ inst_count }});
    assert(_is_key(key));

    return (key - keya) == (lock - loka);
}

bool _seek(struct Core *core, uint64_t pix, bool fwrd) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint8_t      next = _get_inst(core, proc->ip + 1);

    if (!_is_key(next)) {
        _increment_ip(core, pix);
        return false;
    }

    uint8_t spin = _get_inst(core, proc->sp);

    if (_key_lock_match(next, spin)) {
        return true;
    }

    if (fwrd) {
        proc->sp++;
    } else {
        proc->sp--;
    }

    return false;
}

void _jump(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    {% if not args.optimized %}
    uint8_t next = _get_inst(core, proc->ip + 1);
    uint8_t spin = _get_inst(core, proc->sp);
    assert(_is_key(next));
    assert(_is_lock(spin));
    assert(_key_lock_match(next, spin));
    {% endif %}

    proc->ip = proc->sp;
}

void _get_reg_addr_list(struct Core *core, uint64_t pix, uint64_t **rlist, int rcount, bool offset) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    assert(rlist);
    assert(rcount);
    assert(rcount < 4);

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t     madr = proc->ip + (offset ? 2 : 1);

    for (int i = 0; i < rcount; ++i) {
        rlist[i] = &proc->r0x;
    }

    for (int i = 0; i < rcount; ++i) {
        uint64_t mnxt = madr + i;
        uint8_t  mins = _get_inst(core, mnxt);

        if (!_is_rmod(mins)) {
            break;
        }

        switch (mins) {
        case nop0:
            rlist[i] = &proc->r0x;
            break;
        case nop1:
            rlist[i] = &proc->r1x;
            break;
        case nop2:
            rlist[i] = &proc->r2x;
            break;
        case nop3:
            rlist[i] = &proc->r3x;
            break;
        }
    }
}

void _addr(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *reg;

    {% if not args.optimized %}
    uint8_t next = _get_inst(core, proc->ip + 1);
    uint8_t spin = _get_inst(core, proc->sp);
    assert(_is_key(next));
    assert(_is_lock(spin));
    assert(_key_lock_match(next, spin));
    {% endif %}

    _get_reg_addr_list(core, pix, &reg, 1, true);
    *reg = proc->sp;

    _increment_ip(core, pix);
}

void _ifnz(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    uint64_t jmod = _is_rmod(_get_inst(core, proc->ip + 1)) ? 1 : 0;
    uint64_t rmod = *reg ? 1 : 2;

    proc->ip += jmod + rmod;
    proc->sp = proc->ip;
}

void _free_child_memory_of(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    assert(proc->mb1s);

    _free_memory_block(core, proc->mb1a, proc->mb1s);

    proc->mb1a = 0;
    proc->mb1s = 0;
}

// Organisms allocate new memory blocks by means of their seek pointer (sp),
// which sweeps memory 1 byte per simulation step, extending the block as it goes.
// In case allocated memory is found mid-way, current allocation is discarded.
void _alloc(struct Core *core, uint64_t pix, bool fwrd) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    uint64_t bsize = *regs[0];

    // Do nothing if block-size is zero
    if (!bsize) {
        _increment_ip(core, pix);
        return;
    }

    // Do nothing if seek pointer is not adjacent to allocated memory block
    // This is an error condition
    if (proc->mb1s) {
        uint64_t exp_addr = proc->mb1a;

        if (fwrd) {
            exp_addr += proc->mb1s;
        } else {
            exp_addr--;
        }

        if (proc->sp != exp_addr) {
            _increment_ip(core, pix);
            return;
        }
    }

    // Allocation was successful
    // Store block address on register
    if (proc->mb1s == bsize) {
        _increment_ip(core, pix);
        *regs[1] = proc->mb1a;
        return;
    }

    // Seek pointer collided with another allocated block
    // Discard and keep trying
    if (mvec_is_alloc(core, proc->sp)) {
        if (proc->mb1s) {
            _free_child_memory_of(core, pix);
        }

        if (fwrd) {
            proc->sp++;
        } else {
            proc->sp--;
        }

        return;
    }

    // Free (non-allocated) byte found
    // Enlarge child block 1 byte
    mvec_alloc(core, proc->sp);

    if (!proc->mb1s || !fwrd) {
        proc->mb1a = proc->sp;
    }

    proc->mb1s++;

    // Move seek pointer
    if (fwrd) {
        proc->sp++;
    } else {
        proc->sp--;
    }
}

void _bswap(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    if (proc->mb1s) {
        uint64_t tmpa = proc->mb0a;
        uint64_t tmps = proc->mb0s;

        proc->mb0a = proc->mb1a;
        proc->mb0s = proc->mb1s;
        proc->mb1a = tmpa;
        proc->mb1s = tmps;
    }

    _increment_ip(core, pix);
}

void _bclear(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    if (proc->mb1s) {
        _free_child_memory_of(core, pix);
    }

    _increment_ip(core, pix);
}

void _split(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);

    if (proc->mb1s) {
        struct Proc child = {0};

        child.ip   = proc->mb1a;
        child.sp   = proc->mb1a;
        child.mb0a = proc->mb1a;
        child.mb0s = proc->mb1s;

        proc->mb1a = 0;
        proc->mb1s = 0;

        // A new organism is born :)
        proc_new(core, &child);
    } else {
        assert(!proc->mb1a);
    }

    _increment_ip(core, pix);
}

void _3rop(struct Core *core, uint64_t pix, uint8_t inst) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t *regs[3];

    _get_reg_addr_list(core, pix, regs, 3, false);

    // Organisms can do arithmetic using any sequence of 3 registers
    switch (inst) {
    case addn:
        *regs[0] = *regs[1] + *regs[2];
        break;
    case subn:
        *regs[0] = *regs[1] - *regs[2];
        break;
    case muln:
        *regs[0] = *regs[1] * *regs[2];
        break;
    case divn:
        // Division by zero
        // Do nothing
        if (*regs[2]) {
            *regs[0] = *regs[1] / *regs[2];
        }

        break;
    default:
        assert(false);
    }

    _increment_ip(core, pix);
}

void _1rop(struct Core *core, uint64_t pix, uint8_t inst) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    switch (inst) {
    case incn:
        (*reg)++;
        break;
    case decn:
        (*reg)--;
        break;
    case notn:
        *reg = !(*reg);
        break;
    case shfl:
        *reg <<= 1;
        break;
    case shfr:
        *reg >>= 1;
        break;
    case zero:
        *reg = 0;
        break;
    case unit:
        *reg = 1;
        break;
    default:
        assert(false);
    }

    _increment_ip(core, pix);
}

void _push(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    proc->s7 = proc->s6;
    proc->s6 = proc->s5;
    proc->s5 = proc->s4;
    proc->s4 = proc->s3;
    proc->s3 = proc->s2;
    proc->s2 = proc->s1;
    proc->s1 = proc->s0;
    proc->s0 = *reg;

    _increment_ip(core, pix);
}

void _pop(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *reg;

    _get_reg_addr_list(core, pix, &reg, 1, false);

    *reg     = proc->s0;
    proc->s0 = proc->s1;
    proc->s1 = proc->s2;
    proc->s2 = proc->s3;
    proc->s3 = proc->s4;
    proc->s4 = proc->s5;
    proc->s5 = proc->s6;
    proc->s6 = proc->s7;
    proc->s7 = 0;

    _increment_ip(core, pix);
}

int _sp_dir(uint64_t src, uint64_t dst) {
    if (src == dst) {
        return 0;
    } else if (src - dst <= dst - src) {
        return -1;
    } else {
        return 1;
    }
}

void _load(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    int sp_dir = _sp_dir(proc->sp, *regs[0]);

    if (sp_dir == 1) {
        proc->sp++;
    } else if (sp_dir == -1) {
        proc->sp--;
    } else {
        *regs[1] = mvec_get_inst(core, *regs[0]);
        _increment_ip(core, pix);
    }
}

bool _is_writeable_by(const struct Core *core, uint64_t addr, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    return !mvec_is_alloc(core, addr) || mvec_is_proc_owner(core, addr, pix);
}

void _write(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint64_t    *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    int sp_dir = _sp_dir(proc->sp, *regs[0]);

    if (sp_dir == 1) {
        proc->sp++;
    } else if (sp_dir == -1) {
        proc->sp--;
    } else {
        if (_is_writeable_by(core, *regs[0], pix)) {
            mvec_set_inst(core, *regs[0], *regs[1] % {{ inst_cap }});
        }

        _increment_ip(core, pix);
    }
}

void _2rop(struct Core *core, uint64_t pix, uint8_t inst) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t *regs[2];

    _get_reg_addr_list(core, pix, regs, 2, false);

    switch (inst) {
    case dupl:
        *regs[1] = *regs[0];
        break;
    case swap:
        {
            uint64_t tmp = *regs[0];
            *regs[0]     = *regs[1];
            *regs[1]     = tmp;
        }

        break;
    default:
        assert(false);
    }

    _increment_ip(core, pix);
}

void arch_proc_step(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    struct Proc *proc = proc_fetch(core, pix);
    uint8_t      inst = _get_inst(core, proc->ip);

    switch (inst) {
    case jmpb:
        if (_seek(core, pix, false)) {
            _jump(core, pix);
        }

        break;
    case jmpf:
        if (_seek(core, pix, true)) {
            _jump(core, pix);
        }

        break;
    case adrb:
        if (_seek(core, pix, false)) {
            _addr(core, pix);
        }

        break;
    case adrf:
        if (_seek(core, pix, true)) {
            _addr(core, pix);
        }

        break;
    case ifnz:
        _ifnz(core, pix);
        break;
    case allb:
        _alloc(core, pix, false);
        break;
    case allf:
        _alloc(core, pix, true);
        break;
    case bswp:
        _bswap(core, pix);
        break;
    case bclr:
        _bclear(core, pix);
        break;
    case splt:
        _split(core, pix);
        break;
    case addn:
    case subn:
    case muln:
    case divn:
        _3rop(core, pix, inst);
        break;
    case incn:
    case decn:
    case notn:
    case shfl:
    case shfr:
    case zero:
    case unit:
        _1rop(core, pix, inst);
        break;
    case pshn:
        _push(core, pix);
        break;
    case popn:
        _pop(core, pix);
        break;
    case load:
        _load(core, pix);
        break;
    case wrte:
        _write(core, pix);
        break;
    case dupl:
    case swap:
        _2rop(core, pix, inst);
        break;
    default:
        _increment_ip(core, pix);
        break;
    }

    return;
}

{% if not args.optimized %}
void arch_validate_proc(const struct Core *core, uint64_t pix) {
    assert(core);

    const struct Proc *proc = proc_get(core, pix);

    assert(proc->mb0s);

    if (proc->mb1a) {
        assert(proc->mb1s);
    }

    for (uint64_t i = 0; i < proc->mb0s; ++i) {
        uint64_t addr = proc->mb0a + i;
        assert(mvec_is_alloc(core, addr));
        assert(mvec_is_proc_owner(core, addr, pix));
    }

    for (uint64_t i = 0; i < proc->mb1s; ++i) {
        uint64_t addr = proc->mb1a + i;
        assert(mvec_is_alloc(core, addr));
        assert(mvec_is_proc_owner(core, addr, pix));
    }
}
{% endif %}

wchar_t arch_symbol(uint8_t inst) {
    switch (inst % {{ inst_count }}) {
    {% for i in arch_vars.inst_set %}
    case {{ i[0]|join(' ') }}: return L'{{ i[1] }}';
    {% endfor %}
    }

    assert(false);
    return L'\0';
}

const char *arch_mnemonic(uint8_t inst) {
    switch (inst % {{ inst_count }}) {
    {% for i in arch_vars.inst_set %}
    case {{ i[0]|join(' ') }}: return "{{ i[0]|join(' ') }}";
    {% endfor %}
    }

    assert(false);
    return NULL;
}

{% if data_push_path is defined %}
void arch_push_data_header() {
    assert(g_sim_data);

    const char *sql = (
        "create table trend("
        "step int not null, "
        {% for i in range(args.cores) %}
        "cycl_{{ i }}      int  not null, "
        "mall_{{ i }}      int  not null, "
        "pnum_{{ i }}      int  not null, "
        "pfst_{{ i }}      int  not null, "
        "plst_{{ i }}      int  not null, "
        "avrg_mb0s_{{ i }} real not null, "
        "avrg_mb1s_{{ i }} real not null, "
        {% set outer_loop = loop %}
        {% for j in arch_vars.inst_set %}
        "inst_{{ j[0]|join(' ') }}_{{ i }} int not null{% if not outer_loop.last or not loop.last %},{% endif %} "
        {% endfor %}
        {% endfor %}
        ");"
    );

    g_info("Generating 'trend' table in SQLite database");

    int   sql_res;
    char *sql_err;

    // Only handle busy database errors
    // Application should fail on all other error conditions
    while ((sql_res = sqlite3_exec(g_sim_data, sql, NULL, NULL, &sql_err)) == SQLITE_BUSY) {
        g_warn("Busy SQLite database returned error '%d' with message:", sql_res);
        g_warn(sql_err);

        sqlite3_free(sql_err);

        g_info("Will retry query...");
    }

    assert(sql_res == 0);
}

void arch_push_data_line() {
    assert(g_sim_data);

    // Gather data on all cores
    uint64_t inst_total[{{ args.cores }}][{{ inst_count }}] = { 0 };

    double avrg_mb0s[{{ args.cores }}] = { 0 };
    double avrg_mb1s[{{ args.cores }}] = { 0 };

    for (int i = 0; i < {{ args.cores }}; ++i) {
        const struct Core *core = &g_cores[i];

        // Count number of instructions
        for (uint64_t j = 0; j < {{ mvec_size }}; ++j) {
            uint8_t inst = mvec_get_inst(core, j) % {{ inst_count }};
            ++inst_total[i][inst];
        }

        // Avregare memory block sizes
        for (uint64_t j = core->pfst; j <= core->plst; ++j) {
            const struct Proc *proc = proc_get(core, j);

            avrg_mb0s[i] += (double)proc->mb0s;
            avrg_mb1s[i] += (double)proc->mb1s;
        }

        avrg_mb0s[i] /= core->pnum;
        avrg_mb1s[i] /= core->pnum;
    }

    // Insert new row
    char *sql = NULL;

    asprintf(
        &sql,
        "insert into trend ("
        "step, "
        {% for i in range(args.cores) %}
        "cycl_{{ i }}, "
        "mall_{{ i }}, "
        "pnum_{{ i }}, "
        "pfst_{{ i }}, "
        "plst_{{ i }}, "
        "avrg_mb0s_{{ i }}, "
        "avrg_mb1s_{{ i }}, "
        {% set outer_loop = loop %}
        {% for j in arch_vars.inst_set %}
        "inst_{{ j[0]|join(' ') }}_{{ i }}{% if not outer_loop.last or not loop.last %},{% endif %} "
        {% endfor %}
        {% endfor %}
        ") values ("
        "%ld, "
        {% for i in range(args.cores) %}
        "%ld, %ld, %ld, %ld, %ld, %f, %f, "
        {% set outer_loop = loop %}
        {% for _ in arch_vars.inst_set %}
        "%ld{% if not outer_loop.last or not loop.last %},{% endif %} "
        {% endfor %}
        {% endfor %}
        ");",
        g_steps,
        {% for i in range(args.cores) %}
        g_cores[{{ i }}].cycl,
        g_cores[{{ i }}].mall,
        g_cores[{{ i }}].pnum,
        g_cores[{{ i }}].pfst,
        g_cores[{{ i }}].plst,
        avrg_mb0s[{{ i }}],
        avrg_mb1s[{{ i }}],
        {% set outer_loop = loop %}
        {% for j in arch_vars.inst_set %}
        inst_total[{{ i }}][{{ j[0]|join(' ') }}]{% if not outer_loop.last or not loop.last %},{% endif %} // inst
        {% endfor %}
        {% endfor %}
    );

    g_info("Pushing row to 'trend' table in SQLite database");

    int   sql_res;
    char *sql_err;

    // Only handle busy database errors
    // Application should fail on all other error conditions
    while ((sql_res = sqlite3_exec(g_sim_data, sql, NULL, NULL, &sql_err)) == SQLITE_BUSY) {
        g_warn("Busy SQLite database returned error '%d' with message:", sql_res);
        g_warn(sql_err);

        sqlite3_free(sql_err);

        g_info("Will retry query...");
    }

    assert(sql_res == 0);

    // Free query string returned by 'asprintf()'
    free(sql);
}
{% endif %}