llama : do a warm-up eval at start for better timings (#1824)

This commit is contained in:
Georgi Gerganov 2023-06-13 20:20:07 +03:00 committed by GitHub
parent 74d4cfa343
commit 2347e45e7b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -331,6 +331,13 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd; std::vector<llama_token> embd;
// do one empty run to warm up the model
{
const std::vector<llama_token> tmp = { llama_token_bos(), };
llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
llama_reset_timings(ctx);
}
while ((n_remain != 0 && !is_antiprompt) || params.interactive) { while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
// predict // predict
if (embd.size() > 0) { if (embd.size() > 0) {