From eaa6ca5a61b8c9501df9ebe3d264f45b75a5f8aa Mon Sep 17 00:00:00 2001 From: David Yang Date: Tue, 27 Jun 2023 03:45:32 +0800 Subject: [PATCH] ggml : increase max tensor name + clean up compiler warnings in train-text (#1988) * Clean up compiler warnings in train-text Some brackets to disambiguate order of operations * Increase GGML_MAX_NAME Avoiding strncpy danger in train-text-from-scratch and reducing potential future name length issues --- .../train-text-from-scratch.cpp | 23 +++++-------------- ggml.h | 2 +- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 61c829e..5c6fd57 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -294,20 +294,9 @@ void init_model(struct my_llama_model * model) { ggml_set_name(layer.ffn_norm, (layers_i + ".ffn_norm.weight").c_str()); - // 'layers.10.feed_forward.w1.weight' has length of 32. - // ggml_tensor->name only has 32 characters, but we need one more for the '\0' terminator. - // ggml_set_name will set the last character to '\0', so we can only store 'layers.10.feed_forward.w1.weigh'. - // when saving llama compatible model the tensors names will miss a character. - // ggml_set_name(layer.w1, (layers_i + ".feed_forward.w1.weight").c_str()); - // ggml_set_name(layer.w2, (layers_i + ".feed_forward.w2.weight").c_str()); - // ggml_set_name(layer.w3, (layers_i + ".feed_forward.w3.weight").c_str()); - - strncpy(layer.w1->name, (layers_i + ".feed_forward.w1.weight").c_str(), sizeof(layer.w1->name)); - strncpy(layer.w2->name, (layers_i + ".feed_forward.w2.weight").c_str(), sizeof(layer.w2->name)); - strncpy(layer.w3->name, (layers_i + ".feed_forward.w3.weight").c_str(), sizeof(layer.w3->name)); - layer.w1->padding[0] = 0; - layer.w2->padding[0] = 0; - layer.w3->padding[0] = 0; + ggml_format_name(layer.w1, "%s.feed_forward.w1.weight", layers_i.c_str()); + ggml_format_name(layer.w2, "%s.feed_forward.w2.weight", layers_i.c_str()); + ggml_format_name(layer.w3, "%s.feed_forward.w3.weight", layers_i.c_str()); } } @@ -2368,7 +2357,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { file->write_u32(0); file->write_u32(0); file->write_u32(GGML_TYPE_F32); - file->seek(0-file->tell() & 31, SEEK_CUR); + file->seek((0-file->tell()) & 31, SEEK_CUR); return; } const char * name = ggml_get_name(tensor); @@ -2383,7 +2372,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { file->write_u32(tensor->type); file->write_raw(ne, sizeof(ne[0]) * nd); file->write_raw(name, name_len); - file->seek(0-file->tell() & 31, SEEK_CUR); + file->seek((0-file->tell()) & 31, SEEK_CUR); file->write_raw(tensor->data, ggml_nbytes(tensor)); } @@ -2404,7 +2393,7 @@ void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) { std::string name = file->read_string(name_len); GGML_ASSERT(strncmp(ggml_get_name(tensor), name.c_str(), sizeof(tensor->name)-1) == 0); - file->seek(0-file->tell() & 31, SEEK_CUR); + file->seek((0-file->tell()) & 31, SEEK_CUR); file->read_raw(tensor->data, ggml_nbytes(tensor)); } diff --git a/ggml.h b/ggml.h index 6b106b1..08025e5 100644 --- a/ggml.h +++ b/ggml.h @@ -198,7 +198,7 @@ #define GGML_MAX_PARAMS 256 #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 -#define GGML_MAX_NAME 32 +#define GGML_MAX_NAME 48 #define GGML_DEFAULT_N_THREADS 4 #define GGML_ASSERT(x) \