llama : do a warm-up eval at start for better timings (#1824)

2024-11-14 00:59:43 +00:00 · 2023-06-13 20:20:07 +03:00 · 2023-06-13 20:20:07 +03:00 · 2347e45e7b
commit 2347e45e7b
parent 74d4cfa343
1 changed files with 7 additions and 0 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -331,6 +331,13 @@ int main(int argc, char ** argv) {
    std::vector<llama_token> embd;
    // do one empty run to warm up the model
    {
        const std::vector<llama_token> tmp = { llama_token_bos(), };
        llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
        llama_reset_timings(ctx);
    }
    while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
        // predict
        if (embd.size() > 0) {