mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2025-02-22 15:40:02 +00:00
* Work around for recalculating logits in cached prompts
This commit is contained in:
parent
0e730dd23b
commit
248367605e
1 changed files with 6 additions and 0 deletions
|
@ -360,6 +360,12 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
|
// check if we've used up all the prompt but not all cached tokens
|
||||||
|
if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
|
||||||
|
// force revaluation of the last token to recalculate logits
|
||||||
|
i--;
|
||||||
|
n_past--;
|
||||||
|
}
|
||||||
embd.erase(embd.begin(), embd.begin() + i);
|
embd.erase(embd.begin(), embd.begin() + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue