We could use std::unordered_map over std::map (#305)

* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token;

* fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size());

* Removed include <map>

* Nest struct token score inside gpt_vocab

* renamed token to tok
This commit is contained in:
Fabio R. Sluzala 2023-03-21 14:21:50 -03:00 committed by GitHub
parent 89d5d90f3b
commit 353ec251a4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 24 deletions

View file

@ -9,7 +9,6 @@
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <map>
#include <string> #include <string>
#include <vector> #include <vector>
@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
static const int EOS_TOKEN_ID = 2; static const int EOS_TOKEN_ID = 2;
// determine number of model parts based on the dimension // determine number of model parts based on the dimension
static const std::map<int, int> LLAMA_N_PARTS = { static const std::unordered_map<int, int> LLAMA_N_PARTS = {
{ 4096, 1 }, { 4096, 1 },
{ 5120, 2 }, { 5120, 2 },
{ 6656, 4 }, { 6656, 4 },
@ -123,7 +122,7 @@ struct llama_model {
// //
struct ggml_context * ctx; struct ggml_context * ctx;
std::map<std::string, struct ggml_tensor *> tensors; std::unordered_map<std::string, struct ggml_tensor *> tensors;
}; };
// load the model's weights from a file // load the model's weights from a file
@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
// load vocab // load vocab
{ {
std::string word; std::string word;
vocab.id_to_token.resize(model.hparams.n_vocab);
std::vector<char> tmp(64); std::vector<char> tmp(64);
for (int i = 0; i < model.hparams.n_vocab; i++) { for (int i = 0; i < model.hparams.n_vocab; i++) {
@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
fin.read((char *) &score, sizeof(score)); fin.read((char *) &score, sizeof(score));
vocab.token_to_id[word] = i; vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
vocab.score[i] = score; auto &tok_score = vocab.id_to_token[i];
tok_score.tok = word;
tok_score.score = score;
} }
} }
@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
for (int i = 0; i < (int) embd_inp.size(); i++) { for (int i = 0; i < (int) embd_inp.size(); i++) {
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
} }
fprintf(stderr, "\n"); fprintf(stderr, "\n");
if (params.interactive) { if (params.interactive) {
@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
// display text // display text
if (!input_noecho) { if (!input_noecho) {
for (auto id : embd) { for (auto id : embd) {
printf("%s", vocab.id_to_token[id].c_str()); printf("%s", vocab.id_to_token[id].tok.c_str());
} }
fflush(stdout); fflush(stdout);
} }
@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
// check for reverse prompt // check for reverse prompt
std::string last_output; std::string last_output;
for (auto id : last_n_tokens) { for (auto id : last_n_tokens) {
last_output += vocab.id_to_token[id]; last_output += vocab.id_to_token[id].tok;
} }
// Check if each of the reverse prompts appears at the end of the output. // Check if each of the reverse prompts appears at the end of the output.

View file

@ -8,7 +8,6 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
#include <map>
#include <string> #include <string>
#include <vector> #include <vector>
#include <regex> #include <regex>
@ -130,6 +129,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
} }
std::string word; std::string word;
vocab.id_to_token.resize(n_vocab);
for (int i = 0; i < n_vocab; i++) { for (int i = 0; i < n_vocab; i++) {
uint32_t len; uint32_t len;
finp.read ((char *) &len, sizeof(len)); finp.read ((char *) &len, sizeof(len));
@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
fout.write((char *) &score, sizeof(score)); fout.write((char *) &score, sizeof(score));
vocab.token_to_id[word] = i; vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
vocab.score[i] = score; auto &tok_score = vocab.id_to_token[i];
tok_score.tok = word;
tok_score.score = score;
} }
} }

View file

@ -155,8 +155,8 @@ void replace(std::string & str, const std::string & needle, const std::string &
} }
} }
std::map<std::string, int32_t> json_parse(const std::string & fname) { std::unordered_map<std::string, int32_t> json_parse(const std::string & fname) {
std::map<std::string, int32_t> result; std::unordered_map<std::string, int32_t> result;
// read file into string // read file into string
std::string json; std::string json;
@ -360,16 +360,16 @@ private:
return; return;
} }
auto score = vocab_.score.find((*token).second); if (static_cast<size_t>((*token).second) >= vocab_.id_to_token.size()) {
if (score == vocab_.score.end()) {
return; return;
} }
const auto &tok_score = vocab_.id_to_token[(*token).second];
llama_sp_bigram bigram; llama_sp_bigram bigram;
bigram.left = left; bigram.left = left;
bigram.right = right; bigram.right = right;
bigram.score = (*score).second; bigram.score = tok_score.score;
bigram.size = text.size(); bigram.size = text.size();
work_queue_.push(bigram); work_queue_.push(bigram);
} }
@ -393,6 +393,8 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
std::string word; std::string word;
std::vector<char> tmp(64); std::vector<char> tmp(64);
vocab.id_to_token.resize(n_vocab);
for (int i = 0; i < n_vocab; i++) { for (int i = 0; i < n_vocab; i++) {
uint32_t len; uint32_t len;
fin.read((char *) &len, sizeof(len)); fin.read((char *) &len, sizeof(len));
@ -410,8 +412,10 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) {
fin.read((char *) &score, sizeof(score)); fin.read((char *) &score, sizeof(score));
vocab.token_to_id[word] = i; vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
vocab.score[i] = score; auto &tok_score = vocab.id_to_token[i];
tok_score.tok = word;
tok_score.score = score;
} }
return true; return true;

14
utils.h
View file

@ -3,7 +3,7 @@
#pragma once #pragma once
#include <string> #include <string>
#include <map> #include <unordered_map>
#include <vector> #include <vector>
#include <random> #include <random>
#include <thread> #include <thread>
@ -65,15 +65,19 @@ struct llama_vocab {
using id = int32_t; using id = int32_t;
using token = std::string; using token = std::string;
std::map<token, id> token_to_id; struct token_score {
std::map<id, token> id_to_token; token tok;
std::map<id, float> score; float score;
};
std::unordered_map<token, id> token_to_id;
std::vector<token_score> id_to_token;
}; };
void replace(std::string & str, const std::string & needle, const std::string & replacement); void replace(std::string & str, const std::string & needle, const std::string & replacement);
// poor-man's JSON parsing // poor-man's JSON parsing
std::map<std::string, int32_t> json_parse(const std::string & fname); std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
// TODO: temporary until #77 is merged, need this now for some tokenizer tests // TODO: temporary until #77 is merged, need this now for some tokenizer tests
bool llama_vocab_load(const std::string & fname, llama_vocab & vocab); bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);