diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 81a975c..7b6daf4 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -662,6 +662,15 @@ static void ggml_cl_pool_free(cl_mem mem, size_t size) { clReleaseMemObject(mem); } +void ggml_cl_free_data(const struct ggml_tensor* tensor) { + if (tensor->backend != GGML_BACKEND_GPU) { + return; + } + + cl_mem mem = (cl_mem)tensor->data; + clReleaseMemObject(mem); +} + static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t offset, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, cl_event* ev) { cl_int err; const uint64_t ne0 = src->ne[0]; diff --git a/ggml-opencl.h b/ggml-opencl.h index c850bb8..bf95e5c 100644 --- a/ggml-opencl.h +++ b/ggml-opencl.h @@ -16,6 +16,8 @@ void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor void * ggml_cl_host_malloc(size_t size); void ggml_cl_host_free(void * ptr); +void ggml_cl_free_data(const struct ggml_tensor* tensor); + void ggml_cl_transform_tensor(struct ggml_tensor * tensor); void ggml_cl_load_data(const char * fname, struct ggml_tensor * tensor, size_t offset); diff --git a/llama.cpp b/llama.cpp index 16d6f6e..f40c5af 100644 --- a/llama.cpp +++ b/llama.cpp @@ -210,7 +210,11 @@ struct llama_model { for (size_t i = 0; i < tensors_by_name.size(); ++i) { ggml_cuda_free_data(tensors_by_name[i].second); } -#endif // GGML_USE_CUBLAS +#elif defined(GGML_USE_CLBLAST) + for (size_t i = 0; i < tensors_by_name.size(); ++i) { + ggml_cl_free_data(tensors_by_name[i].second); + } +#endif } };