// Author:  Paul Oliver <contact@pauloliver.dev>
// Project: salis-v3

// Core template of the salis simulator.
// Different architectures and UIs can be attached in order to
// create a streamlined source file.

{% for include in includes|sort %}
#include <{{ include }}>
{% endfor %}

// Each architecture defines its own process type
struct Proc {
    {% for type, val in arch_vars.proc_fields %}
    {{ type }} {{ val }};
    {% endfor %}
};

// Simulation core
// Each core runs on a separate thread
// Core synchronization and IPC occurs at set intervals
struct Core {
    uint64_t     cycl;
    uint64_t     mall;
    uint64_t     muta[4];

    uint64_t     pnum;
    uint64_t     pcap;
    uint64_t     pfst;
    uint64_t     plst;
    uint64_t     pcur;
    uint64_t     psli;

    thrd_t       thrd;
    uint64_t     thrd_idx;

    uint64_t     ivpt;
    uint64_t    *ivav;
    uint8_t     *iviv;

    // Architectures may provide custom fields
    {% for type, val in arch_vars.core_fields %}
    {{ type }} {{ val }};
    {% endfor %}

    struct Proc *pvec;
    uint8_t      mvec[{{ mvec_size }}];
    uint8_t      tgap[{{ thread_gap }}];
};

// Globals
struct Core       g_cores[{{ args.cores }}];
uint64_t          g_steps;
uint64_t          g_syncs;
const struct Proc g_dead_proc;

{% if args.command in ["load", "new"] %}
char              g_asav_pbuf[{{ auto_save_name_len }}];
{% endif %}

{% if data_push_path is defined %}
sqlite3          *g_sim_data;
{% endif %}

// Forward declarations
// Each architecture must define these
{% if args.command in ["bench", "new"] and anc_bytes is defined %}
void        arch_anc_init(struct Core *core);
{% endif %}

uint64_t    arch_proc_mb0_addr(const struct Core *core, uint64_t pix);
uint64_t    arch_proc_mb0_size(const struct Core *core, uint64_t pix);
uint64_t    arch_proc_mb1_addr(const struct Core *core, uint64_t pix);
uint64_t    arch_proc_mb1_size(const struct Core *core, uint64_t pix);
uint64_t    arch_proc_ip_addr(const struct Core *core, uint64_t pix);
uint64_t    arch_proc_sp_addr(const struct Core *core, uint64_t pix);
uint64_t    arch_proc_slice(const struct Core *core, uint64_t pix);
void        arch_on_proc_kill(struct Core *core);
void        arch_proc_step(struct Core *core, uint64_t pix);

{% if not args.optimized %}
void        arch_validate_proc(const struct Core *core, uint64_t pix);
{% endif %}

wchar_t     arch_symbol(uint8_t inst);
const char *arch_mnemonic(uint8_t inst);

{% if data_push_path is defined %}
void        arch_push_data_header();
void        arch_push_data_line();
{% endif %}

// ----------------------------------------------------------------------------
// Memory vector functions
// ----------------------------------------------------------------------------
{% if arch_vars.mvec_loop %}
uint64_t mvec_loop(uint64_t addr) {
    return addr % {{ mvec_size }};
}
{% endif %}

bool mvec_is_alloc(const struct Core *core, uint64_t addr) {
    assert(core);

    {% if arch_vars.mvec_loop %}
    return core->mvec[mvec_loop(addr)] & {{ mall_flag }} ? true : false;
    {% else %}
    if (addr < {{ mvec_size }}) {
        return core->mvec[addr] & {{ mall_flag }} ? true : false;
    } else {
        return true;
    }
    {% endif %}
}

void mvec_alloc(struct Core *core, uint64_t addr) {
    assert(core);
    assert(!mvec_is_alloc(core, addr));
    {% if arch_vars.mvec_loop %}
    core->mvec[mvec_loop(addr)] |= {{ mall_flag }};
    {% else %}
    assert(addr < {{ mvec_size }});
    core->mvec[addr] |= {{ mall_flag }};
    {% endif %}
    core->mall++;
}

void mvec_free(struct Core *core, uint64_t addr) {
    assert(core);
    assert(mvec_is_alloc(core, addr));
    {% if arch_vars.mvec_loop %}
    core->mvec[mvec_loop(addr)] ^= {{ mall_flag }};
    {% else %}
    assert(addr < {{ mvec_size }});
    core->mvec[addr] ^= {{ mall_flag }};
    {% endif %}
    core->mall--;
}

uint8_t mvec_get_byte(const struct Core *core, uint64_t addr) {
    assert(core);
    {% if arch_vars.mvec_loop %}
    return core->mvec[mvec_loop(addr)];
    {% else %}
    if (addr < {{ mvec_size }}) {
        return core->mvec[addr];
    } else {
        return 0;
    }
    {% endif %}
}

uint8_t mvec_get_inst(const struct Core *core, uint64_t addr) {
    assert(core);
    {% if arch_vars.mvec_loop %}
    return core->mvec[mvec_loop(addr)] & {{ inst_mask }};
    {% else %}
    if (addr < {{ mvec_size }}) {
        return core->mvec[addr] & {{ inst_mask }};
    } else {
        return 0;
    }
    {% endif %}
}

void mvec_set_inst(struct Core *core, uint64_t addr, uint8_t inst) {
    assert(core);
    assert(inst < {{ inst_cap}});
    {% if arch_vars.mvec_loop %}
    core->mvec[mvec_loop(addr)] &= {{ mall_flag }};
    core->mvec[mvec_loop(addr)] |= inst;
    {% else %}
    assert(addr < {{ mvec_size }});
    core->mvec[addr] &= {{ mall_flag }};
    core->mvec[addr] |= inst;
    {% endif %}
}

{% if args.muta_flip %}
void mvec_flip_bit(struct Core *core, uint64_t addr, int bit) {
    assert(core);
    assert(bit < 8);
    core->mvec[addr] ^= (1 << bit) & {{ inst_mask }};
}
{% endif %}

bool mvec_proc_is_live(const struct Core *core, uint64_t pix) {
    assert(core);

    return pix >= core->pfst && pix <= core->plst;
}

bool mvec_is_proc_owner(const struct Core *core, uint64_t addr, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    uint64_t mb0a = arch_proc_mb0_addr(core, pix);
    uint64_t mb0s = arch_proc_mb0_size(core, pix);

    if (((addr - mb0a) % {{ mvec_size }}) < mb0s) {
        return true;
    }

    uint64_t mb1a = arch_proc_mb1_addr(core, pix);
    uint64_t mb1s = arch_proc_mb1_size(core, pix);

    if (((addr - mb1a) % {{ mvec_size }}) < mb1s) {
        return true;
    }

    return false;
}

uint64_t mvec_get_owner(const struct Core *core, uint64_t addr) {
    assert(core);
    assert(mvec_is_alloc(core, addr));

    for (uint64_t pix = core->pfst; pix <= core->plst; ++pix) {
        if (mvec_is_proc_owner(core, addr, pix)) {
            return pix;
        }
    }

    assert(false);
    return -1;
}

// ----------------------------------------------------------------------------
// Mutator functions
// ----------------------------------------------------------------------------
{% if args.command in ["bench", "new"] %}
uint64_t muta_smix(uint64_t *seed) {
    assert(seed);

    uint64_t next = (*seed += 0x9e3779b97f4a7c15);
    next          = (next ^ (next >> 30)) * 0xbf58476d1ce4e5b9;
    next          = (next ^ (next >> 27)) * 0x94d049bb133111eb;

    return next ^ (next >> 31);
}
{% endif %}

uint64_t muta_ro64(uint64_t x, int k) {
    return (x << k) | (x >> (64 - k));
}

uint64_t muta_next(struct Core *core) {
    assert(core);

    uint64_t r = muta_ro64(core->muta[1] * 5, 7) * 9;
    uint64_t t = core->muta[1] << 17;

    core->muta[2] ^= core->muta[0];
    core->muta[3] ^= core->muta[1];
    core->muta[1] ^= core->muta[2];
    core->muta[0] ^= core->muta[3];

    core->muta[2] ^= t;
    core->muta[3]  = muta_ro64(core->muta[3], 45);

    return r;
}

void muta_cosmic_ray(struct Core *core) {
    assert(core);

    uint64_t a = muta_next(core) % {{ muta_range }};
    uint64_t b = muta_next(core);

    if (a < {{ mvec_size }}) {
    {% if args.muta_flip %}
        mvec_flip_bit(core, a, (int)(b % 8));
    {% else %}
        mvec_set_inst(core, a, b & {{ inst_mask }});
    {% endif %}
    }
}

// ----------------------------------------------------------------------------
// Process functions
// ----------------------------------------------------------------------------
void proc_new(struct Core *core, const struct Proc *proc) {
    assert(core);
    assert(proc);

    if (core->pnum == core->pcap) {
        // Reallocate dynamic array
        uint64_t new_pcap     = core->pcap * 2;
        struct Proc *new_pvec = calloc(new_pcap, sizeof(struct Proc));

        for (uint64_t pix = core->pfst; pix <= core->plst; ++pix) {
            uint64_t iold = pix % core->pcap;
            uint64_t inew = pix % new_pcap;
            memcpy(&new_pvec[inew], &core->pvec[iold], sizeof(struct Proc));
        }

        free(core->pvec);
        core->pcap = new_pcap;
        core->pvec = new_pvec;
    }

    core->pnum++;
    core->plst++;
    memcpy(&core->pvec[core->plst % core->pcap], proc, sizeof(struct Proc));
}

void proc_kill(struct Core *core) {
    assert(core);
    assert(core->pnum > 1);

    arch_on_proc_kill(core);

    core->pcur++;
    core->pfst++;
    core->pnum--;
}

const struct Proc *proc_get(const struct Core *core, uint64_t pix) {
    assert(core);

    if (mvec_proc_is_live(core, pix)) {
        return &core->pvec[pix % core->pcap];
    } else {
        return &g_dead_proc;
    }
}

struct Proc *proc_fetch(struct Core *core, uint64_t pix) {
    assert(core);
    assert(mvec_proc_is_live(core, pix));

    return &core->pvec[pix % core->pcap];
}

// ----------------------------------------------------------------------------
// Core functions
// ----------------------------------------------------------------------------
{% if args.command in ["load", "new"] %}
void core_save(FILE *f, const struct Core *core) {
    assert(f);
    assert(core);

    fwrite(&core->cycl, sizeof(uint64_t), 1, f);
    fwrite(&core->mall, sizeof(uint64_t), 1, f);
    fwrite( core->muta, sizeof(uint64_t), 4, f);
    fwrite(&core->pnum, sizeof(uint64_t), 1, f);
    fwrite(&core->pcap, sizeof(uint64_t), 1, f);
    fwrite(&core->pfst, sizeof(uint64_t), 1, f);
    fwrite(&core->plst, sizeof(uint64_t), 1, f);
    fwrite(&core->pcur, sizeof(uint64_t), 1, f);
    fwrite(&core->psli, sizeof(uint64_t), 1, f);
    fwrite(&core->ivpt, sizeof(uint64_t), 1, f);

    fwrite(core->iviv, sizeof(uint8_t), {{ sync_interval }}, f);
    fwrite(core->ivav, sizeof(uint64_t), {{ sync_interval }}, f);
    fwrite(core->pvec, sizeof(struct Proc), core->pcap, f);
    fwrite(core->mvec, sizeof(uint8_t), {{ mvec_size }}, f);
}
{% endif %}

{% if args.command in ["bench", "new"] %}
{% if anc_bytes is defined %}
void core_assemble_ancestor(struct Core *core) {
    assert(core);

    {% if arch_vars.mvec_loop %}
    uint64_t addr = {{ uint64_half }};
    {% else %}
    uint64_t addr = 0;
    {% endif %}

    uint8_t anc_bytes[] = {
        {{ anc_bytes|join(",") }}
    };

    for (uint64_t i = 0; i < sizeof(anc_bytes); ++i, ++addr) {
        for (uint64_t j = 0; j < {{ args.clones }}; ++j) {
            uint64_t addr_clone = addr + (({{ mvec_size }} / {{ args.clones }})) * j;

            mvec_alloc(core, addr_clone);
            mvec_set_inst(core, addr_clone, anc_bytes[i]);
        }
    }
}
{% endif %}

void core_init(struct Core *core, uint64_t *seed) {
    assert(core);
    assert(seed);

    if (*seed) {
        core->muta[0] = muta_smix(seed);
        core->muta[1] = muta_smix(seed);
        core->muta[2] = muta_smix(seed);
        core->muta[3] = muta_smix(seed);
    }

    core->pnum = {{ args.clones }};
    core->pcap = {{ args.clones }};
    core->plst = {{ args.clones }} - 1;
    core->iviv = calloc({{ sync_interval }}, sizeof(uint8_t));
    core->ivav = calloc({{ sync_interval }}, sizeof(uint64_t));
    core->pvec = calloc(core->pcap, sizeof(struct Proc));

    assert(core->iviv);
    assert(core->ivav);
    assert(core->pvec);

    {% if anc_bytes is defined %}
    core_assemble_ancestor(core);
    arch_anc_init(core);
    {% endif %}
}
{% endif %}

{% if args.command in ["load"] %}
void core_load(FILE *f, struct Core *core) {
    assert(f);
    assert(core);

    fread(&core->cycl, sizeof(uint64_t), 1, f);
    fread(&core->mall, sizeof(uint64_t), 1, f);
    fread( core->muta, sizeof(uint64_t), 4, f);
    fread(&core->pnum, sizeof(uint64_t), 1, f);
    fread(&core->pcap, sizeof(uint64_t), 1, f);
    fread(&core->pfst, sizeof(uint64_t), 1, f);
    fread(&core->plst, sizeof(uint64_t), 1, f);
    fread(&core->pcur, sizeof(uint64_t), 1, f);
    fread(&core->psli, sizeof(uint64_t), 1, f);
    fread(&core->ivpt, sizeof(uint64_t), 1, f);

    core->iviv = calloc({{ sync_interval }}, sizeof(uint8_t));
    core->ivav = calloc({{ sync_interval }}, sizeof(uint64_t));
    core->pvec = calloc(core->pcap, sizeof(struct Proc));

    assert(core->iviv);
    assert(core->ivav);
    assert(core->pvec);

    fread(core->iviv, sizeof(uint8_t), {{ sync_interval }}, f);
    fread(core->ivav, sizeof(uint64_t), {{ sync_interval }}, f);
    fread(core->pvec, sizeof(struct Proc), core->pcap, f);
    fread(core->mvec, sizeof(uint8_t), {{ mvec_size }}, f);
}
{% endif %}

void core_pull_ipcm(struct Core *core) {
    assert(core);
    assert(core->ivpt < {{ sync_interval }});

    uint8_t  *iinst = &core->iviv[core->ivpt];
    uint64_t *iaddr = &core->ivav[core->ivpt];

    if ((*iinst & {{ ipc_flag }}) != 0) {
        mvec_set_inst(core, *iaddr, *iinst & {{ inst_mask }});

        *iinst = 0;
        *iaddr = 0;
    }

    assert(*iinst == 0);
    assert(*iaddr == 0);
}

void core_push_ipcm(struct Core *core, uint8_t inst, uint64_t addr) {
    assert(core);
    assert(core->ivpt < {{ sync_interval }});
    assert((inst & {{ ipc_flag }}) == 0);

    uint8_t  *iinst = &core->iviv[core->ivpt];
    uint64_t *iaddr = &core->ivav[core->ivpt];

    assert(*iinst == 0);
    assert(*iaddr == 0);

    *iinst = inst | {{ ipc_flag }};
    *iaddr = addr;
}

void core_step(struct Core *core) {
    assert(core);

    if (core->psli != 0) {
        core_pull_ipcm(core);
        arch_proc_step(core, core->pcur);

        core->psli--;
        core->ivpt++;

        return;
    }

    if (core->pcur != core->plst) {
        core->psli = arch_proc_slice(core, ++core->pcur);
        core_step(core);
        return;
    }

    core->pcur = core->pfst;
    core->psli = arch_proc_slice(core, core->pcur);
    core->cycl++;

    // TODO: Implement day-night cycle
    while (core->mall > {{ mvec_size }} / 2 && core->pnum > 1) {
        proc_kill(core);
    }

    muta_cosmic_ray(core);
    core_step(core);
}

// ----------------------------------------------------------------------------
// Main salis functions
// ----------------------------------------------------------------------------
{% if args.command in ["load", "new"] %}
void salis_save(const char *path) {
    {% if args.compress %}
    size_t  size = 0;
    char   *in   = NULL;
    FILE   *f    = open_memstream(&in, &size);
    {% else %}
    FILE   *f    = fopen(path, "wb");
    {% endif %}

    assert(f);

    for (int i = 0; i < {{ args.cores }}; ++i) {
        core_save(f, &g_cores[i]);
    }

    fwrite(&g_steps, sizeof(uint64_t), 1, f);
    fwrite(&g_syncs, sizeof(uint64_t), 1, f);
    fclose(f);

    {% if args.compress %}
    assert(size);

    char *out = malloc(size);
    assert(out);

    z_stream strm = { 0 };
    strm.zalloc   = NULL,
    strm.zfree    = NULL,
    strm.opaque   = NULL,

    deflateInit(&strm, Z_DEFAULT_COMPRESSION);

    strm.avail_in  = size;
    strm.avail_out = size;
    strm.next_in   = (Bytef *)in;
    strm.next_out  = (Bytef *)out;

    deflate(&strm, Z_FINISH);

    FILE *fx = fopen(path, "wb");
    assert(fx);

    fwrite(&size, sizeof(size_t), 1, fx);
    fwrite(out, sizeof(char), strm.total_out, fx);
    fclose(fx);

    deflateEnd(&strm);

    free(in);
    free(out);
    {% endif %}
}

void salis_auto_save() {
    {% if not args.optimized %}
    int rem = snprintf(
    {% else %}
    snprintf(
    {% endif %}
        g_asav_pbuf,
        {{ auto_save_name_len }},
        "%s-%#018lx",
        "{{ sim_path }}",
        g_steps
    );

    assert(rem >= 0);
    assert(rem < {{ auto_save_name_len }});

    salis_save(g_asav_pbuf);
}
{% endif %}

{% if args.command in ["bench", "new"] %}
void salis_init() {
    uint64_t seed = {{ args.seed }};

    for (int i = 0; i < {{ args.cores }}; ++i) {
        core_init(&g_cores[i], &seed);
    }

    {% if args.command in ["new"] %}
    salis_auto_save();
    {% endif %}

    {% if data_push_path is defined %}
    sqlite3_open("{{ data_push_path }}", &g_sim_data);
    assert(g_sim_data);

    // Install busy handler to retry transactions if DB is locked
    sqlite3_busy_timeout(g_sim_data, {{ data_push_busy_timeout }});

    arch_push_data_header();
    arch_push_data_line();
    {% endif %}
}
{% endif %}

{% if args.command in ["load"] %}
void salis_load() {
    {% if args.compress %}
    FILE *fx = fopen("{{ sim_path }}", "rb");
    assert(fx);

    fseek(fx, 0, SEEK_END);
    size_t x_size = ftell(fx) - sizeof(size_t);
    char  *in     = malloc(x_size);
    rewind(fx);
    assert(x_size);
    assert(in);

    size_t size = 0;
    fread(&size, sizeof(size_t), 1, fx);
    fread(in, 1, x_size, fx);
    fclose(fx);
    assert(size);

    char *out = malloc(size);
    assert(out);

    z_stream strm = { 0 };
    strm.next_in  = (Bytef *)in;
    strm.avail_in = x_size;
    strm.zalloc   = NULL;
    strm.zfree    = NULL;
    strm.opaque   = NULL;

    inflateInit(&strm);

    strm.avail_out = size;
    strm.next_out  = (Bytef *)out;

    {% if not args.optimized %}
    assert(inflate(&strm, Z_FINISH));
    {% else %}
    inflate(&strm, Z_FINISH);
    {% endif %}

    inflateEnd(&strm);

    FILE *f = fmemopen(out, size, "rb");
    {% else %}
    FILE *f = fopen("{{ sim_path }}", "rb");
    {% endif %}

    assert(f);

    for (int i = 0; i < {{ args.cores }}; ++i) {
        core_load(f, &g_cores[i]);
    }

    fread(&g_steps, sizeof(uint64_t), 1, f);
    fread(&g_syncs, sizeof(uint64_t), 1, f);
    fclose(f);

    {% if args.compress %}
    free(in);
    free(out);
    {% endif %}

    {% if data_push_path is defined %}
    sqlite3_open("{{ data_push_path }}", &g_sim_data);
    assert(g_sim_data);

    // Install busy handler to retry transactions if DB is locked
    sqlite3_busy_timeout(g_sim_data, {{ data_push_busy_timeout }});
    {% endif %}
}
{% endif %}

int salis_thread(struct Core *core) {
    assert(core);

    for (uint64_t i = 0; i < core->thrd_idx; ++i) {
        core_step(core);
    }

    return 0;
}

void salis_run_thread(uint64_t ns) {
    for (int i = 0; i < {{ args.cores }}; ++i) {
        g_cores[i].thrd_idx = ns;

        thrd_create(
            &g_cores[i].thrd,
            (thrd_start_t)salis_thread,
            &g_cores[i]
        );
    }

    for (int i = 0; i < {{ args.cores }}; ++i) {
        thrd_join(g_cores[i].thrd, NULL);
    }

    g_steps += ns;
}

void salis_sync() {
    uint8_t  *iviv0 = g_cores[0].iviv;
    uint64_t *ivav0 = g_cores[0].ivav;

    for (int i = 1; i < {{ args.cores }}; ++i) {
        g_cores[i - 1].iviv = g_cores[i].iviv;
        g_cores[i - 1].ivav = g_cores[i].ivav;
    }

    g_cores[{{ args.cores }} - 1].iviv = iviv0;
    g_cores[{{ args.cores }} - 1].ivav = ivav0;

    for (int i = 0; i < {{ args.cores }}; ++i) {
        g_cores[i].ivpt = 0;
    }

    g_syncs++;
}

void salis_loop(uint64_t ns, uint64_t dt) {
    assert(dt);

    if (ns < dt) {
        salis_run_thread(ns);
        return;
    }

    salis_run_thread(dt);
    salis_sync();

    {% if args.command in ["load", "new"] %}
    if (g_steps % {{ auto_save_interval }} == 0) {
        salis_auto_save();
    }
    {% endif %}

    {% if data_push_path is defined %}
    if (g_steps % {{ data_push_interval }} == 0) {
        arch_push_data_line();
    }
    {% endif %}

    salis_loop(ns - dt, {{ sync_interval }});
}

{% if not args.optimized %}
void salis_validate_core(const struct Core *core) {
    assert(core->cycl <= g_steps);
    assert(core->plst >= core->pfst);
    assert(core->pnum == core->plst + 1 - core->pfst);
    assert(core->pnum <= core->pcap);
    assert(core->pcur >= core->pfst && core->pcur <= core->plst);

    uint64_t mall = 0;

    for (uint64_t i = 0; i < {{ mvec_size }}; ++i) {
        mall += mvec_is_alloc(core, i) ? 1 : 0;
    }

    assert(core->mall == mall);

    for (uint64_t i = core->pfst; i <= core->plst; ++i) {
        arch_validate_proc(core, i);
    }

    for (uint64_t i = 0; i < {{ sync_interval }}; ++i) {
        uint8_t iinst = core->iviv[i];

        if ((iinst & {{ ipc_flag }}) == 0) {
            uint64_t iaddr = core->ivav[i];

            assert(iinst == 0);
            assert(iaddr == 0);
        }
    }

    assert(core->ivpt == g_steps % {{ sync_interval }});
}

void salis_validate() {
    assert(g_steps / {{ sync_interval }} == g_syncs);

    for (int i = 0; i < {{ args.cores }}; ++i) {
        salis_validate_core(&g_cores[i]);
    }
}
{% endif %}

void salis_step(uint64_t ns) {
    assert(ns);
    salis_loop(ns, {{ sync_interval }} - (g_steps % {{ sync_interval }}));

    {% if not args.optimized %}
    salis_validate();
    {% endif %}
}

void salis_free() {
    {% if data_push_path is defined %}
    assert(g_sim_data);
    sqlite3_close(g_sim_data);
    {% endif %}

    for (int i = 0; i < {{ args.cores }}; ++i) {
        assert(g_cores[i].pvec);
        assert(g_cores[i].iviv);
        assert(g_cores[i].ivav);

        free(g_cores[i].pvec);
        free(g_cores[i].iviv);
        free(g_cores[i].ivav);

        g_cores[i].pvec = NULL;
        g_cores[i].iviv = NULL;
        g_cores[i].ivav = NULL;
    }
}

// ----------------------------------------------------------------------------
// Architecture
// ----------------------------------------------------------------------------
{% include "arch/" ~ args.arch ~ "/arch.j2.c" %}

// ----------------------------------------------------------------------------
// UI
// ----------------------------------------------------------------------------
{% if args.command in ["load", "new"] %}
    {% include "ui/" ~ args.ui ~ "/ui.j2.c" %}
{% else %}
    {% include "bench.j2.c" %}
{% endif %}