mirror of
https://git.adityakumar.xyz/llama.cpp.git
synced 2024-11-09 15:29:43 +00:00
ggml : remove src0 and src1 from ggml_tensor and rename opt to src (#2178)
* Add ggml changes * Update train-text-from-scratch for change * mpi : adapt to new ggml_tensor->src --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
c9c74b4e3f
commit
5bf2a27718
6 changed files with 371 additions and 421 deletions
|
@ -1354,17 +1354,9 @@ struct ggml_tensor * expand(struct ggml_cgraph * g, struct ggml_tensor * t) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (t->src0) {
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
||||||
expand(g, t->src0);
|
if (t->src[i]) {
|
||||||
}
|
expand(g, t->src[i]);
|
||||||
|
|
||||||
if (t->src1) {
|
|
||||||
expand(g, t->src1);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < GGML_MAX_OPT; ++i) {
|
|
||||||
if (t->opt[i]) {
|
|
||||||
expand(g, t->opt[i]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
28
ggml-cuda.cu
28
ggml-cuda.cu
|
@ -3200,36 +3200,36 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursively assign CUDA buffers until a compute tensor is found
|
// recursively assign CUDA buffers until a compute tensor is found
|
||||||
if (tensor->src0 != nullptr && tensor->src0->backend == GGML_BACKEND_CPU) {
|
if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_CPU) {
|
||||||
const ggml_op src0_op = tensor->src0->op;
|
const ggml_op src0_op = tensor->src[0]->op;
|
||||||
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) {
|
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) {
|
||||||
ggml_cuda_assign_buffers_impl(tensor->src0, scratch, force_inplace);
|
ggml_cuda_assign_buffers_impl(tensor->src[0], scratch, force_inplace);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (tensor->op == GGML_OP_CPY && tensor->src1->backend == GGML_BACKEND_CPU) {
|
if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_CPU) {
|
||||||
ggml_cuda_assign_buffers_impl(tensor->src1, scratch, force_inplace);
|
ggml_cuda_assign_buffers_impl(tensor->src[1], scratch, force_inplace);
|
||||||
}
|
}
|
||||||
|
|
||||||
tensor->backend = GGML_BACKEND_GPU;
|
tensor->backend = GGML_BACKEND_GPU;
|
||||||
struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
|
struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
|
||||||
memset(extra, 0, sizeof(*extra));
|
memset(extra, 0, sizeof(*extra));
|
||||||
|
|
||||||
const bool inplace = (tensor->src0 != nullptr && tensor->src0->data == tensor->data) ||
|
const bool inplace = (tensor->src[0] != nullptr && tensor->src[0]->data == tensor->data) ||
|
||||||
tensor->op == GGML_OP_VIEW ||
|
tensor->op == GGML_OP_VIEW ||
|
||||||
force_inplace;
|
force_inplace;
|
||||||
const size_t size = ggml_nbytes(tensor);
|
const size_t size = ggml_nbytes(tensor);
|
||||||
|
|
||||||
CUDA_CHECK(cudaSetDevice(g_main_device));
|
CUDA_CHECK(cudaSetDevice(g_main_device));
|
||||||
if (inplace && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT)) {
|
if (inplace && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) {
|
||||||
struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src0->extra;
|
struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src[0]->extra;
|
||||||
char * src0_ddc = (char *) src0_extra->data_device[g_main_device];
|
char * src0_ddc = (char *) src0_extra->data_device[g_main_device];
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
if (tensor->op == GGML_OP_VIEW) {
|
if (tensor->op == GGML_OP_VIEW) {
|
||||||
memcpy(&offset, tensor->opt[0]->data, sizeof(size_t));
|
memcpy(&offset, tensor->src[2]->data, sizeof(size_t));
|
||||||
}
|
}
|
||||||
extra->data_device[g_main_device] = src0_ddc + offset;
|
extra->data_device[g_main_device] = src0_ddc + offset;
|
||||||
} else if (tensor->op == GGML_OP_CPY) {
|
} else if (tensor->op == GGML_OP_CPY) {
|
||||||
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src1->extra;
|
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src[1]->extra;
|
||||||
void * src1_ddv = src1_extra->data_device[g_main_device];
|
void * src1_ddv = src1_extra->data_device[g_main_device];
|
||||||
extra->data_device[g_main_device] = src1_ddv;
|
extra->data_device[g_main_device] = src1_ddv;
|
||||||
} else if (scratch) {
|
} else if (scratch) {
|
||||||
|
@ -3300,8 +3300,8 @@ void ggml_cuda_free_scratch() {
|
||||||
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
|
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
|
||||||
ggml_cuda_func_t func;
|
ggml_cuda_func_t func;
|
||||||
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|
||||||
|| (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT))
|
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
|
||||||
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
|
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);
|
||||||
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
case GGML_OP_ADD:
|
case GGML_OP_ADD:
|
||||||
|
@ -3329,7 +3329,7 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|
||||||
func = ggml_cuda_rms_norm;
|
func = ggml_cuda_rms_norm;
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MUL_MAT:
|
case GGML_OP_MUL_MAT:
|
||||||
if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src0, tensor->src1, tensor)) {
|
if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src[0], tensor->src[1], tensor)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
func = ggml_cuda_mul_mat;
|
func = ggml_cuda_mul_mat;
|
||||||
|
@ -3383,6 +3383,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
func(tensor->src0, tensor->src1, tensor);
|
func(tensor->src[0], tensor->src[1], tensor);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -393,8 +393,8 @@ void ggml_metal_graph_compute(
|
||||||
for (int i = node_start; i < node_end; ++i) {
|
for (int i = node_start; i < node_end; ++i) {
|
||||||
metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
|
metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
|
||||||
|
|
||||||
struct ggml_tensor * src0 = gf->nodes[i]->src0;
|
struct ggml_tensor * src0 = gf->nodes[i]->src[0];
|
||||||
struct ggml_tensor * src1 = gf->nodes[i]->src1;
|
struct ggml_tensor * src1 = gf->nodes[i]->src[1];
|
||||||
struct ggml_tensor * dst = gf->nodes[i];
|
struct ggml_tensor * dst = gf->nodes[i];
|
||||||
|
|
||||||
const int64_t ne00 = src0 ? src0->ne[0] : 0;
|
const int64_t ne00 = src0 ? src0->ne[0] : 0;
|
||||||
|
|
|
@ -175,11 +175,11 @@ void ggml_mpi_graph_compute_pre(
|
||||||
// attach the input data to all nodes that need it
|
// attach the input data to all nodes that need it
|
||||||
// TODO: not great - should be able to do this without modifying the compute graph (see next TODO below)
|
// TODO: not great - should be able to do this without modifying the compute graph (see next TODO below)
|
||||||
for (int i = idx_l0; i < idx_l1; i++) {
|
for (int i = idx_l0; i < idx_l1; i++) {
|
||||||
if (gf->nodes[i]->src0 == gf->nodes[idx_l0]) {
|
if (gf->nodes[i]->src[0] == gf->nodes[idx_l0]) {
|
||||||
gf->nodes[i]->src0 = inp0;
|
gf->nodes[i]->src[0] = inp0;
|
||||||
}
|
}
|
||||||
if (gf->nodes[i]->src1 == gf->nodes[idx_l0]) {
|
if (gf->nodes[i]->src[1] == gf->nodes[idx_l0]) {
|
||||||
gf->nodes[i]->src1 = inp0;
|
gf->nodes[i]->src[1] = inp0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
10
ggml.h
10
ggml.h
|
@ -132,10 +132,10 @@
|
||||||
// {
|
// {
|
||||||
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
|
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
|
||||||
//
|
//
|
||||||
// // a[1, 2] = 1.0f;
|
// // a[2, 1] = 1.0f;
|
||||||
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
||||||
//
|
//
|
||||||
// // a[2, 0] = 2.0f;
|
// // a[0, 2] = 2.0f;
|
||||||
// *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
|
// *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
|
||||||
//
|
//
|
||||||
// ...
|
// ...
|
||||||
|
@ -197,7 +197,7 @@
|
||||||
#define GGML_MAX_NODES 4096
|
#define GGML_MAX_NODES 4096
|
||||||
#define GGML_MAX_PARAMS 256
|
#define GGML_MAX_PARAMS 256
|
||||||
#define GGML_MAX_CONTEXTS 64
|
#define GGML_MAX_CONTEXTS 64
|
||||||
#define GGML_MAX_OPT 4
|
#define GGML_MAX_SRC 6
|
||||||
#define GGML_MAX_NAME 48
|
#define GGML_MAX_NAME 48
|
||||||
#define GGML_DEFAULT_N_THREADS 4
|
#define GGML_DEFAULT_N_THREADS 4
|
||||||
|
|
||||||
|
@ -414,9 +414,7 @@ extern "C" {
|
||||||
bool is_param;
|
bool is_param;
|
||||||
|
|
||||||
struct ggml_tensor * grad;
|
struct ggml_tensor * grad;
|
||||||
struct ggml_tensor * src0;
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
||||||
struct ggml_tensor * src1;
|
|
||||||
struct ggml_tensor * opt[GGML_MAX_OPT];
|
|
||||||
|
|
||||||
// performance
|
// performance
|
||||||
int perf_runs;
|
int perf_runs;
|
||||||
|
|
Loading…
Reference in a new issue