mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2025-02-22 07:40:00 +00:00
* Work around for recalculating logits in cached prompts
This commit is contained in:
parent
0e730dd23b
commit
248367605e
1 changed files with 6 additions and 0 deletions
|
@ -360,6 +360,12 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
}
|
||||
if (i > 0) {
|
||||
// check if we've used up all the prompt but not all cached tokens
|
||||
if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
|
||||
// force revaluation of the last token to recalculate logits
|
||||
i--;
|
||||
n_past--;
|
||||
}
|
||||
embd.erase(embd.begin(), embd.begin() + i);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue