From 5bf2a2771886ee86137e01dbc7492f78fb392066 Mon Sep 17 00:00:00 2001 From: Spencer Sutton Date: Tue, 11 Jul 2023 12:31:10 -0400 Subject: [PATCH] ggml : remove src0 and src1 from ggml_tensor and rename opt to src (#2178) * Add ggml changes * Update train-text-from-scratch for change * mpi : adapt to new ggml_tensor->src --------- Co-authored-by: Georgi Gerganov --- .../train-text-from-scratch.cpp | 14 +- ggml-cuda.cu | 28 +- ggml-metal.m | 4 +- ggml-mpi.c | 8 +- ggml.c | 728 +++++++++--------- ggml.h | 10 +- 6 files changed, 371 insertions(+), 421 deletions(-) diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index b96fdcd..afbb4a7 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1354,17 +1354,9 @@ struct ggml_tensor * expand(struct ggml_cgraph * g, struct ggml_tensor * t) { } } - if (t->src0) { - expand(g, t->src0); - } - - if (t->src1) { - expand(g, t->src1); - } - - for (int i = 0; i < GGML_MAX_OPT; ++i) { - if (t->opt[i]) { - expand(g, t->opt[i]); + for (int i = 0; i < GGML_MAX_SRC; ++i) { + if (t->src[i]) { + expand(g, t->src[i]); } } diff --git a/ggml-cuda.cu b/ggml-cuda.cu index fd36f17..1673e7e 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -3200,36 +3200,36 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo } // recursively assign CUDA buffers until a compute tensor is found - if (tensor->src0 != nullptr && tensor->src0->backend == GGML_BACKEND_CPU) { - const ggml_op src0_op = tensor->src0->op; + if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_CPU) { + const ggml_op src0_op = tensor->src[0]->op; if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) { - ggml_cuda_assign_buffers_impl(tensor->src0, scratch, force_inplace); + ggml_cuda_assign_buffers_impl(tensor->src[0], scratch, force_inplace); } } - if (tensor->op == GGML_OP_CPY && tensor->src1->backend == GGML_BACKEND_CPU) { - ggml_cuda_assign_buffers_impl(tensor->src1, scratch, force_inplace); + if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_CPU) { + ggml_cuda_assign_buffers_impl(tensor->src[1], scratch, force_inplace); } tensor->backend = GGML_BACKEND_GPU; struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu; memset(extra, 0, sizeof(*extra)); - const bool inplace = (tensor->src0 != nullptr && tensor->src0->data == tensor->data) || + const bool inplace = (tensor->src[0] != nullptr && tensor->src[0]->data == tensor->data) || tensor->op == GGML_OP_VIEW || force_inplace; const size_t size = ggml_nbytes(tensor); CUDA_CHECK(cudaSetDevice(g_main_device)); - if (inplace && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT)) { - struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src0->extra; + if (inplace && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) { + struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src[0]->extra; char * src0_ddc = (char *) src0_extra->data_device[g_main_device]; size_t offset = 0; if (tensor->op == GGML_OP_VIEW) { - memcpy(&offset, tensor->opt[0]->data, sizeof(size_t)); + memcpy(&offset, tensor->src[2]->data, sizeof(size_t)); } extra->data_device[g_main_device] = src0_ddc + offset; } else if (tensor->op == GGML_OP_CPY) { - struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src1->extra; + struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src[1]->extra; void * src1_ddv = src1_extra->data_device[g_main_device]; extra->data_device[g_main_device] = src1_ddv; } else if (scratch) { @@ -3300,8 +3300,8 @@ void ggml_cuda_free_scratch() { bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){ ggml_cuda_func_t func; const bool any_on_device = tensor->backend == GGML_BACKEND_GPU - || (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT)) - || (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU); + || (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) + || (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU); switch (tensor->op) { case GGML_OP_ADD: @@ -3329,7 +3329,7 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ func = ggml_cuda_rms_norm; break; case GGML_OP_MUL_MAT: - if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src0, tensor->src1, tensor)) { + if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src[0], tensor->src[1], tensor)) { return false; } func = ggml_cuda_mul_mat; @@ -3383,6 +3383,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return true; } - func(tensor->src0, tensor->src1, tensor); + func(tensor->src[0], tensor->src[1], tensor); return true; } diff --git a/ggml-metal.m b/ggml-metal.m index 6473644..d7a1693 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -393,8 +393,8 @@ void ggml_metal_graph_compute( for (int i = node_start; i < node_end; ++i) { metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op)); - struct ggml_tensor * src0 = gf->nodes[i]->src0; - struct ggml_tensor * src1 = gf->nodes[i]->src1; + struct ggml_tensor * src0 = gf->nodes[i]->src[0]; + struct ggml_tensor * src1 = gf->nodes[i]->src[1]; struct ggml_tensor * dst = gf->nodes[i]; const int64_t ne00 = src0 ? src0->ne[0] : 0; diff --git a/ggml-mpi.c b/ggml-mpi.c index 872e808..ae176d7 100644 --- a/ggml-mpi.c +++ b/ggml-mpi.c @@ -175,11 +175,11 @@ void ggml_mpi_graph_compute_pre( // attach the input data to all nodes that need it // TODO: not great - should be able to do this without modifying the compute graph (see next TODO below) for (int i = idx_l0; i < idx_l1; i++) { - if (gf->nodes[i]->src0 == gf->nodes[idx_l0]) { - gf->nodes[i]->src0 = inp0; + if (gf->nodes[i]->src[0] == gf->nodes[idx_l0]) { + gf->nodes[i]->src[0] = inp0; } - if (gf->nodes[i]->src1 == gf->nodes[idx_l0]) { - gf->nodes[i]->src1 = inp0; + if (gf->nodes[i]->src[1] == gf->nodes[idx_l0]) { + gf->nodes[i]->src[1] = inp0; } } diff --git a/ggml.c b/ggml.c index c10877a..8dc30a3 100644 --- a/ggml.c +++ b/ggml.c @@ -4584,9 +4584,7 @@ struct ggml_tensor * ggml_new_tensor_impl( /*.op =*/ GGML_OP_NONE, /*.is_param =*/ false, /*.grad =*/ NULL, - /*.src0 =*/ NULL, - /*.src1 =*/ NULL, - /*.opt =*/ { NULL }, + /*.src =*/ { NULL }, /*.perf_runs =*/ 0, /*.perf_cycles =*/ 0, /*.perf_time_us =*/ 0, @@ -5012,8 +5010,8 @@ struct ggml_tensor * ggml_dup_impl( result->op = GGML_OP_DUP; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5049,8 +5047,8 @@ struct ggml_tensor * ggml_add_impl( result->op = GGML_OP_ADD; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5089,8 +5087,8 @@ struct ggml_tensor * ggml_add1_impl( result->op = GGML_OP_ADD1; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5147,9 +5145,9 @@ struct ggml_tensor * ggml_acc_impl( result->op = GGML_OP_ACC; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; return result; } @@ -5195,8 +5193,8 @@ struct ggml_tensor * ggml_sub_impl( result->op = GGML_OP_SUB; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5242,8 +5240,8 @@ struct ggml_tensor * ggml_mul_impl( result->op = GGML_OP_MUL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5285,8 +5283,8 @@ struct ggml_tensor * ggml_div_impl( result->op = GGML_OP_DIV; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5321,8 +5319,8 @@ struct ggml_tensor * ggml_sqr_impl( result->op = GGML_OP_SQR; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5355,8 +5353,8 @@ struct ggml_tensor * ggml_sqrt_impl( result->op = GGML_OP_SQRT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5390,8 +5388,8 @@ struct ggml_tensor * ggml_log_impl( result->op = GGML_OP_LOG; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5423,8 +5421,8 @@ struct ggml_tensor * ggml_sum( result->op = GGML_OP_SUM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5450,8 +5448,8 @@ struct ggml_tensor * ggml_sum_rows( result->op = GGML_OP_SUM_ROWS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5473,8 +5471,8 @@ struct ggml_tensor * ggml_mean( result->op = GGML_OP_MEAN; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5497,8 +5495,8 @@ struct ggml_tensor * ggml_argmax( result->op = GGML_OP_ARGMAX; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5525,8 +5523,8 @@ struct ggml_tensor * ggml_repeat( result->op = GGML_OP_REPEAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5553,8 +5551,8 @@ struct ggml_tensor * ggml_repeat_back( result->op = GGML_OP_REPEAT_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5575,8 +5573,8 @@ struct ggml_tensor * ggml_abs_impl( result->op = GGML_OP_ABS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5610,8 +5608,8 @@ struct ggml_tensor * ggml_sgn_impl( result->op = GGML_OP_SGN; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5644,8 +5642,8 @@ struct ggml_tensor * ggml_neg_impl( result->op = GGML_OP_NEG; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5678,8 +5676,8 @@ struct ggml_tensor * ggml_step_impl( result->op = GGML_OP_STEP; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5712,8 +5710,8 @@ struct ggml_tensor * ggml_tanh_impl( result->op = GGML_OP_TANH; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5746,8 +5744,8 @@ struct ggml_tensor * ggml_elu_impl( result->op = GGML_OP_ELU; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5780,8 +5778,8 @@ struct ggml_tensor * ggml_relu_impl( result->op = GGML_OP_RELU; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5814,8 +5812,8 @@ struct ggml_tensor * ggml_gelu_impl( result->op = GGML_OP_GELU; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5848,8 +5846,8 @@ struct ggml_tensor * ggml_gelu_quick_impl( result->op = GGML_OP_GELU_QUICK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5882,8 +5880,8 @@ struct ggml_tensor * ggml_silu_impl( result->op = GGML_OP_SILU; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -5917,8 +5915,8 @@ struct ggml_tensor * ggml_silu_back( result->op = GGML_OP_SILU_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -5940,8 +5938,8 @@ struct ggml_tensor * ggml_norm_impl( result->op = GGML_OP_NORM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; // TODO: maybe store epsilon here? + result->src[0] = a; + result->src[1] = NULL; // TODO: maybe store epsilon here? return result; } @@ -5972,8 +5970,8 @@ struct ggml_tensor * ggml_rms_norm_impl( result->op = GGML_OP_RMS_NORM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; // TODO: maybe store epsilon here? + result->src[0] = a; + result->src[1] = NULL; // TODO: maybe store epsilon here? return result; } @@ -6005,8 +6003,8 @@ struct ggml_tensor * ggml_rms_norm_back( result->op = GGML_OP_RMS_NORM_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6032,8 +6030,8 @@ struct ggml_tensor * ggml_mul_mat( result->op = GGML_OP_MUL_MAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6058,8 +6056,8 @@ struct ggml_tensor * ggml_out_prod( result->op = GGML_OP_OUT_PROD; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6084,8 +6082,8 @@ struct ggml_tensor * ggml_scale_impl( result->op = GGML_OP_SCALE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6140,9 +6138,9 @@ struct ggml_tensor * ggml_set_impl( result->op = GGML_OP_SET; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; return result; } @@ -6229,8 +6227,8 @@ struct ggml_tensor * ggml_cpy_impl( result->op = GGML_OP_CPY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6266,8 +6264,8 @@ struct ggml_tensor * ggml_cont_impl( result->op = GGML_OP_CONT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6310,8 +6308,8 @@ struct ggml_tensor * ggml_reshape( result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6335,8 +6333,8 @@ struct ggml_tensor * ggml_reshape_1d( result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6361,8 +6359,8 @@ struct ggml_tensor * ggml_reshape_2d( result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6388,8 +6386,8 @@ struct ggml_tensor * ggml_reshape_3d( result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6417,8 +6415,8 @@ struct ggml_tensor * ggml_reshape_4d( result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6450,9 +6448,9 @@ struct ggml_tensor * ggml_view_1d( result->op = GGML_OP_VIEW; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; - result->opt[0] = offs; + result->src[0] = a; + result->src[1] = NULL; + result->src[2] = offs; return result; } @@ -6492,9 +6490,9 @@ struct ggml_tensor * ggml_view_2d( result->op = GGML_OP_VIEW; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; - result->opt[0] = offs; + result->src[0] = a; + result->src[1] = NULL; + result->src[2] = offs; return result; } @@ -6536,9 +6534,9 @@ struct ggml_tensor * ggml_view_3d( result->op = GGML_OP_VIEW; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; - result->opt[0] = offs; + result->src[0] = a; + result->src[1] = NULL; + result->src[2] = offs; return result; } @@ -6582,9 +6580,9 @@ struct ggml_tensor * ggml_view_4d( result->op = GGML_OP_VIEW; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; - result->opt[0] = offs; + result->src[0] = a; + result->src[1] = NULL; + result->src[2] = offs; return result; } @@ -6644,8 +6642,8 @@ struct ggml_tensor * ggml_permute( result->op = GGML_OP_PERMUTE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; if (is_node) { ggml_scratch_save(ctx); @@ -6659,7 +6657,7 @@ struct ggml_tensor * ggml_permute( ggml_scratch_load(ctx); - result->opt[0] = b; + result->src[2] = b; } return result; @@ -6687,8 +6685,8 @@ struct ggml_tensor * ggml_transpose( result->op = GGML_OP_TRANSPOSE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6713,8 +6711,8 @@ struct ggml_tensor * ggml_get_rows( result->op = GGML_OP_GET_ROWS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6741,9 +6739,9 @@ struct ggml_tensor * ggml_get_rows_back( result->op = GGML_OP_GET_ROWS_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; return result; } @@ -6765,8 +6763,8 @@ struct ggml_tensor * ggml_diag( result->op = GGML_OP_DIAG; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6798,8 +6796,8 @@ struct ggml_tensor * ggml_diag_mask_inf_impl( result->op = GGML_OP_DIAG_MASK_INF; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6846,8 +6844,8 @@ struct ggml_tensor * ggml_diag_mask_zero_impl( result->op = GGML_OP_DIAG_MASK_ZERO; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6882,8 +6880,8 @@ struct ggml_tensor * ggml_soft_max_impl( result->op = GGML_OP_SOFT_MAX; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; + result->src[0] = a; + result->src[1] = NULL; return result; } @@ -6918,8 +6916,8 @@ struct ggml_tensor * ggml_soft_max_back_impl( result->op = GGML_OP_SOFT_MAX_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -6970,8 +6968,8 @@ struct ggml_tensor * ggml_rope_impl( result->op = GGML_OP_ROPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -7028,8 +7026,8 @@ struct ggml_tensor * ggml_rope_back( result->op = GGML_OP_ROPE_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -7067,8 +7065,8 @@ struct ggml_tensor * ggml_alibi( result->op = GGML_OP_ALIBI; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -7101,8 +7099,8 @@ struct ggml_tensor * ggml_clamp( result->op = GGML_OP_CLAMP; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -7144,9 +7142,9 @@ GGML_API struct ggml_tensor * ggml_conv_1d( result->op = GGML_OP_CONV_1D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; return result; } @@ -7192,9 +7190,9 @@ struct ggml_tensor* ggml_conv_2d( result->op = GGML_OP_CONV_2D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; return result; @@ -7233,10 +7231,10 @@ struct ggml_tensor * ggml_flash_attn( result->op = GGML_OP_FLASH_ATTN; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = q; - result->src1 = k; - result->opt[0] = v; - result->opt[1] = ggml_new_i32(ctx, masked ? 1 : 0); + result->src[0] = q; + result->src[1] = k; + result->src[2] = v; + result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0); return result; } @@ -7264,11 +7262,11 @@ struct ggml_tensor * ggml_flash_ff( result->op = GGML_OP_FLASH_FF; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b0; - result->opt[0] = b1; - result->opt[1] = c0; - result->opt[2] = c1; + result->src[0] = a; + result->src[1] = b0; + result->src[2] = b1; + result->src[3] = c0; + result->src[4] = c1; return result; } @@ -7328,11 +7326,11 @@ struct ggml_tensor * ggml_flash_attn_back( result->op = GGML_OP_FLASH_ATTN_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = q; - result->src1 = k; - result->opt[0] = v; - result->opt[1] = d; - result->opt[2] = ggml_new_i32(ctx, masked ? 1 : 0); + result->src[0] = q; + result->src[1] = k; + result->src[2] = v; + result->src[3] = d; + result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0); return result; } @@ -7377,9 +7375,9 @@ struct ggml_tensor * ggml_win_part( result->op = GGML_OP_WIN_PART; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; - result->opt[0] = b; + result->src[0] = a; + result->src[1] = NULL; + result->src[2] = b; return result; } @@ -7414,9 +7412,9 @@ struct ggml_tensor * ggml_win_unpart( result->op = GGML_OP_WIN_UNPART; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = NULL; - result->opt[0] = b; + result->src[0] = a; + result->src[1] = NULL; + result->src[2] = b; return result; } @@ -7445,8 +7443,8 @@ struct ggml_tensor * ggml_map_unary_impl_f32( result->op = GGML_OP_MAP_UNARY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->opt[0] = addr_tensor; + result->src[0] = a; + result->src[2] = addr_tensor; return result; } @@ -7492,9 +7490,9 @@ struct ggml_tensor * ggml_map_binary_impl_f32( result->op = GGML_OP_MAP_BINARY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = addr_tensor; + result->src[0] = a; + result->src[1] = b; + result->src[2] = addr_tensor; return result; } @@ -7539,8 +7537,8 @@ struct ggml_tensor * ggml_map_custom1_impl_f32( result->op = GGML_OP_MAP_CUSTOM1; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->opt[0] = addr_tensor; + result->src[0] = a; + result->src[2] = addr_tensor; return result; } @@ -7584,9 +7582,9 @@ struct ggml_tensor * ggml_map_custom2_impl_f32( result->op = GGML_OP_MAP_CUSTOM2; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = addr_tensor; + result->src[0] = a; + result->src[1] = b; + result->src[2] = addr_tensor; return result; } @@ -7633,10 +7631,10 @@ struct ggml_tensor * ggml_map_custom3_impl_f32( result->op = GGML_OP_MAP_CUSTOM3; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = addr_tensor; - result->opt[1] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = addr_tensor; + result->src[3] = c; return result; } @@ -7676,8 +7674,8 @@ struct ggml_tensor * ggml_cross_entropy_loss( result->op = GGML_OP_CROSS_ENTROPY_LOSS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src0 = a; - result->src1 = b; + result->src[0] = a; + result->src[1] = b; return result; } @@ -7696,9 +7694,9 @@ struct ggml_tensor * ggml_cross_entropy_loss_back( result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK; result->grad = NULL; - result->src0 = a; - result->src1 = b; - result->opt[0] = c; + result->src[0] = a; + result->src[1] = b; + result->src[2] = c; return result; } @@ -14567,287 +14565,287 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm if (skip_cpu) { return; } - GGML_ASSERT(tensor->src0 == NULL || tensor->src0->backend == GGML_BACKEND_CPU); - GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU); + GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU); + GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU); #endif // GGML_USE_CUBLAS switch (tensor->op) { case GGML_OP_DUP: { - ggml_compute_forward_dup(params, tensor->src0, tensor); + ggml_compute_forward_dup(params, tensor->src[0], tensor); } break; case GGML_OP_ADD: { - ggml_compute_forward_add(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_add(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ADD1: { - ggml_compute_forward_add1(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_add1(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ACC: { - ggml_compute_forward_acc(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_SUB: { - ggml_compute_forward_sub(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_sub(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_MUL: { - ggml_compute_forward_mul(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_mul(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_DIV: { - ggml_compute_forward_div(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_div(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SQR: { - ggml_compute_forward_sqr(params, tensor->src0, tensor); + ggml_compute_forward_sqr(params, tensor->src[0], tensor); } break; case GGML_OP_SQRT: { - ggml_compute_forward_sqrt(params, tensor->src0, tensor); + ggml_compute_forward_sqrt(params, tensor->src[0], tensor); } break; case GGML_OP_LOG: { - ggml_compute_forward_log(params, tensor->src0, tensor); + ggml_compute_forward_log(params, tensor->src[0], tensor); } break; case GGML_OP_SUM: { - ggml_compute_forward_sum(params, tensor->src0, tensor); + ggml_compute_forward_sum(params, tensor->src[0], tensor); } break; case GGML_OP_SUM_ROWS: { - ggml_compute_forward_sum_rows(params, tensor->src0, tensor); + ggml_compute_forward_sum_rows(params, tensor->src[0], tensor); } break; case GGML_OP_MEAN: { - ggml_compute_forward_mean(params, tensor->src0, tensor); + ggml_compute_forward_mean(params, tensor->src[0], tensor); } break; case GGML_OP_ARGMAX: { - ggml_compute_forward_argmax(params, tensor->src0, tensor); + ggml_compute_forward_argmax(params, tensor->src[0], tensor); } break; case GGML_OP_REPEAT: { - ggml_compute_forward_repeat(params, tensor->src0, tensor); + ggml_compute_forward_repeat(params, tensor->src[0], tensor); } break; case GGML_OP_REPEAT_BACK: { - ggml_compute_forward_repeat_back(params, tensor->src0, tensor); + ggml_compute_forward_repeat_back(params, tensor->src[0], tensor); } break; case GGML_OP_ABS: { - ggml_compute_forward_abs(params, tensor->src0, tensor); + ggml_compute_forward_abs(params, tensor->src[0], tensor); } break; case GGML_OP_SGN: { - ggml_compute_forward_sgn(params, tensor->src0, tensor); + ggml_compute_forward_sgn(params, tensor->src[0], tensor); } break; case GGML_OP_NEG: { - ggml_compute_forward_neg(params, tensor->src0, tensor); + ggml_compute_forward_neg(params, tensor->src[0], tensor); } break; case GGML_OP_STEP: { - ggml_compute_forward_step(params, tensor->src0, tensor); + ggml_compute_forward_step(params, tensor->src[0], tensor); } break; case GGML_OP_TANH: { - ggml_compute_forward_tanh(params, tensor->src0, tensor); + ggml_compute_forward_tanh(params, tensor->src[0], tensor); } break; case GGML_OP_ELU: { - ggml_compute_forward_elu(params, tensor->src0, tensor); + ggml_compute_forward_elu(params, tensor->src[0], tensor); } break; case GGML_OP_RELU: { - ggml_compute_forward_relu(params, tensor->src0, tensor); + ggml_compute_forward_relu(params, tensor->src[0], tensor); } break; case GGML_OP_GELU: { - ggml_compute_forward_gelu(params, tensor->src0, tensor); + ggml_compute_forward_gelu(params, tensor->src[0], tensor); } break; case GGML_OP_GELU_QUICK: { - ggml_compute_forward_gelu_quick(params, tensor->src0, tensor); + ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor); } break; case GGML_OP_SILU: { - ggml_compute_forward_silu(params, tensor->src0, tensor); + ggml_compute_forward_silu(params, tensor->src[0], tensor); } break; case GGML_OP_SILU_BACK: { - ggml_compute_forward_silu_back(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_NORM: { - ggml_compute_forward_norm(params, tensor->src0, tensor); + ggml_compute_forward_norm(params, tensor->src[0], tensor); } break; case GGML_OP_RMS_NORM: { - ggml_compute_forward_rms_norm(params, tensor->src0, tensor); + ggml_compute_forward_rms_norm(params, tensor->src[0], tensor); } break; case GGML_OP_RMS_NORM_BACK: { - ggml_compute_forward_rms_norm_back(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_rms_norm_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_MUL_MAT: { - ggml_compute_forward_mul_mat(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_OUT_PROD: { - ggml_compute_forward_out_prod(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_out_prod(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SCALE: { - ggml_compute_forward_scale(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_scale(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SET: { - ggml_compute_forward_set(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_CPY: { - ggml_compute_forward_cpy(params, tensor->src0, tensor); + ggml_compute_forward_cpy(params, tensor->src[0], tensor); } break; case GGML_OP_CONT: { - ggml_compute_forward_cont(params, tensor->src0, tensor); + ggml_compute_forward_cont(params, tensor->src[0], tensor); } break; case GGML_OP_RESHAPE: { - ggml_compute_forward_reshape(params, tensor->src0, tensor); + ggml_compute_forward_reshape(params, tensor->src[0], tensor); } break; case GGML_OP_VIEW: { - ggml_compute_forward_view(params, tensor->src0); + ggml_compute_forward_view(params, tensor->src[0]); } break; case GGML_OP_PERMUTE: { - ggml_compute_forward_permute(params, tensor->src0); + ggml_compute_forward_permute(params, tensor->src[0]); } break; case GGML_OP_TRANSPOSE: { - ggml_compute_forward_transpose(params, tensor->src0); + ggml_compute_forward_transpose(params, tensor->src[0]); } break; case GGML_OP_GET_ROWS: { - ggml_compute_forward_get_rows(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_get_rows(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_GET_ROWS_BACK: { - ggml_compute_forward_get_rows_back(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + ggml_compute_forward_get_rows_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_DIAG: { - ggml_compute_forward_diag(params, tensor->src0, tensor); + ggml_compute_forward_diag(params, tensor->src[0], tensor); } break; case GGML_OP_DIAG_MASK_INF: { - ggml_compute_forward_diag_mask_inf(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_DIAG_MASK_ZERO: { - ggml_compute_forward_diag_mask_zero(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SOFT_MAX: { - ggml_compute_forward_soft_max(params, tensor->src0, tensor); + ggml_compute_forward_soft_max(params, tensor->src[0], tensor); } break; case GGML_OP_SOFT_MAX_BACK: { - ggml_compute_forward_soft_max_back(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_soft_max_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ROPE: { - ggml_compute_forward_rope(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_rope(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ROPE_BACK: { - ggml_compute_forward_rope_back(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_rope_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ALIBI: { - ggml_compute_forward_alibi(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_alibi(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CLAMP: { - ggml_compute_forward_clamp(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_clamp(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CONV_1D: { - ggml_compute_forward_conv_1d(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_CONV_2D: { - ggml_compute_forward_conv_2d(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_FLASH_ATTN: { - const int32_t t = ggml_get_i32_1d(tensor->opt[1], 0); + const int32_t t = ggml_get_i32_1d(tensor->src[3], 0); GGML_ASSERT(t == 0 || t == 1); const bool masked = t != 0; - ggml_compute_forward_flash_attn(params, tensor->src0, tensor->src1, tensor->opt[0], masked, tensor); + ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor); } break; case GGML_OP_FLASH_FF: { - ggml_compute_forward_flash_ff(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], tensor->opt[2], tensor); + ggml_compute_forward_flash_ff(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], tensor->src[4], tensor); } break; case GGML_OP_FLASH_ATTN_BACK: { - int32_t t = ggml_get_i32_1d(tensor->opt[2], 0); + int32_t t = ggml_get_i32_1d(tensor->src[4], 0); GGML_ASSERT(t == 0 || t == 1); bool masked = t != 0; - ggml_compute_forward_flash_attn_back(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], masked, tensor); + ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor); } break; case GGML_OP_WIN_PART: { - ggml_compute_forward_win_part(params, tensor->src0, tensor->opt[0], tensor); + ggml_compute_forward_win_part(params, tensor->src[0], tensor->src[2], tensor); } break; case GGML_OP_WIN_UNPART: { - ggml_compute_forward_win_unpart(params, tensor->src0, tensor->opt[0], tensor); + ggml_compute_forward_win_unpart(params, tensor->src[0], tensor->src[2], tensor); } break; case GGML_OP_MAP_UNARY: { - const ggml_unary_op_f32_t fun = *((ggml_unary_op_f32_t *)tensor->opt[0]->data); - ggml_compute_forward_map_unary(params, tensor->src0, tensor, fun); + const ggml_unary_op_f32_t fun = *((ggml_unary_op_f32_t *)tensor->src[2]->data); + ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun); } break; case GGML_OP_MAP_BINARY: { - const ggml_binary_op_f32_t fun = *((ggml_binary_op_f32_t *)tensor->opt[0]->data); - ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun); + const ggml_binary_op_f32_t fun = *((ggml_binary_op_f32_t *)tensor->src[2]->data); + ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun); } break; case GGML_OP_MAP_CUSTOM1: { - const ggml_custom1_op_f32_t fun = *((ggml_custom1_op_f32_t *)tensor->opt[0]->data); - ggml_compute_forward_map_custom1(params, tensor->src0, tensor, fun); + const ggml_custom1_op_f32_t fun = *((ggml_custom1_op_f32_t *)tensor->src[2]->data); + ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun); } break; case GGML_OP_MAP_CUSTOM2: { - const ggml_custom2_op_f32_t fun = *((ggml_custom2_op_f32_t *)tensor->opt[0]->data); - ggml_compute_forward_map_custom2(params, tensor->src0, tensor->src1, tensor, fun); + const ggml_custom2_op_f32_t fun = *((ggml_custom2_op_f32_t *)tensor->src[2]->data); + ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun); } break; case GGML_OP_MAP_CUSTOM3: { - const ggml_custom3_op_f32_t fun = *((ggml_custom3_op_f32_t *)tensor->opt[0]->data); - ggml_compute_forward_map_custom3(params, tensor->src0, tensor->src1, tensor->opt[1], tensor, fun); + const ggml_custom3_op_f32_t fun = *((ggml_custom3_op_f32_t *)tensor->src[2]->data); + ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[3], tensor, fun); } break; case GGML_OP_CROSS_ENTROPY_LOSS: { - ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor); + ggml_compute_forward_cross_entropy_loss(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CROSS_ENTROPY_LOSS_BACK: { - ggml_compute_forward_cross_entropy_loss_back(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + ggml_compute_forward_cross_entropy_loss_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_NONE: @@ -14864,8 +14862,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm //////////////////////////////////////////////////////////////////////////////// static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, bool inplace) { - struct ggml_tensor * src0 = tensor->src0; - struct ggml_tensor * src1 = tensor->src1; + struct ggml_tensor * src0 = tensor->src[0]; + struct ggml_tensor * src1 = tensor->src[1]; switch (tensor->op) { case GGML_OP_DUP: @@ -14901,12 +14899,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } if (src1->grad) { - GGML_ASSERT(ggml_nelements(tensor->opt[0]) == 5); - GGML_ASSERT(tensor->opt[0]->type == GGML_TYPE_I32); - const size_t nb1 = (( int32_t * ) tensor->opt[0]->data)[0]; - const size_t nb2 = (( int32_t * ) tensor->opt[0]->data)[1]; - const size_t nb3 = (( int32_t * ) tensor->opt[0]->data)[2]; - const size_t offset = (( int32_t * ) tensor->opt[0]->data)[3]; + GGML_ASSERT(ggml_nelements(tensor->src[2]) == 5); + GGML_ASSERT(tensor->src[2]->type == GGML_TYPE_I32); + const size_t nb1 = (( int32_t * ) tensor->src[2]->data)[0]; + const size_t nb2 = (( int32_t * ) tensor->src[2]->data)[1]; + const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2]; + const size_t offset = (( int32_t * ) tensor->src[2]->data)[3]; struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx, tensor->grad, @@ -15214,12 +15212,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_SET: { - GGML_ASSERT(ggml_nelements(tensor->opt[0]) == 5); - GGML_ASSERT(tensor->opt[0]->type == GGML_TYPE_I32); - const size_t nb1 = (( int32_t * ) tensor->opt[0]->data)[0]; - const size_t nb2 = (( int32_t * ) tensor->opt[0]->data)[1]; - const size_t nb3 = (( int32_t * ) tensor->opt[0]->data)[2]; - const size_t offset = (( int32_t * ) tensor->opt[0]->data)[3]; + GGML_ASSERT(ggml_nelements(tensor->src[2]) == 5); + GGML_ASSERT(tensor->src[2]->type == GGML_TYPE_I32); + const size_t nb1 = (( int32_t * ) tensor->src[2]->data)[0]; + const size_t nb2 = (( int32_t * ) tensor->src[2]->data)[1]; + const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2]; + const size_t offset = (( int32_t * ) tensor->src[2]->data)[3]; struct ggml_tensor * tensor_grad_view = NULL; @@ -15296,8 +15294,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor if (src0->grad) { size_t offset; - GGML_ASSERT(sizeof(offset) <= ggml_nbytes(tensor->opt[0])); - memcpy(&offset, tensor->opt[0]->data, sizeof(offset)); + GGML_ASSERT(sizeof(offset) <= ggml_nbytes(tensor->src[2])); + memcpy(&offset, tensor->src[2]->data, sizeof(offset)); size_t nb1 = tensor->nb[1]; size_t nb2 = tensor->nb[2]; @@ -15324,7 +15322,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor { // necessary for llama if (src0->grad) { - int32_t * axes = (int32_t *) tensor->opt[0]->data; + int32_t * axes = (int32_t *) tensor->src[2]->data; int axis0 = axes[0] & 0x3; int axis1 = axes[1] & 0x3; int axis2 = axes[2] & 0x3; @@ -15487,15 +15485,15 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor case GGML_OP_FLASH_ATTN: { struct ggml_tensor * flash_grad = NULL; - if (src0->grad || src1->grad || tensor->opt[0]->grad) { - int32_t t = ggml_get_i32_1d(tensor->opt[1], 0); + if (src0->grad || src1->grad || tensor->src[2]->grad) { + int32_t t = ggml_get_i32_1d(tensor->src[3], 0); GGML_ASSERT(t == 0 || t == 1); bool masked = t != 0; flash_grad = ggml_flash_attn_back(ctx, src0, src1, - tensor->opt[0], + tensor->src[2], tensor->grad, masked); } @@ -15592,7 +15590,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor inplace); } - struct ggml_tensor * opt0 = tensor->opt[0]; + struct ggml_tensor * opt0 = tensor->src[2]; if (opt0->grad) { struct ggml_tensor * grad_v = NULL; @@ -15708,17 +15706,9 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * } } - if (node->src0) { - ggml_visit_parents(cgraph, node->src0); - } - - if (node->src1) { - ggml_visit_parents(cgraph, node->src1); - } - - for (int i = 0; i < GGML_MAX_OPT; ++i) { - if (node->opt[i]) { - ggml_visit_parents(cgraph, node->opt[i]); + for (int i = 0; i < GGML_MAX_SRC; ++i) { + if (node->src[i]) { + ggml_visit_parents(cgraph, node->src[i]); } } @@ -16110,8 +16100,8 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { size_t cur = 0; - if (ggml_is_quantized(node->src0->type)) { - cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src0->ne[0] * n_tasks; + if (ggml_is_quantized(node->src[0]->type)) { + cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src[0]->ne[0] * n_tasks; } work_size = MAX(work_size, cur); @@ -16122,8 +16112,8 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { size_t cur = 0; - if (ggml_is_quantized(node->src0->type)) { - cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src1->ne[0] * n_tasks; + if (ggml_is_quantized(node->src[0]->type)) { + cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src[1]->ne[0] * n_tasks; } work_size = MAX(work_size, cur); @@ -16166,39 +16156,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { n_tasks = n_threads; // TODO: use different scheduling for different matrix sizes - //const int nr0 = ggml_nrows(node->src0); - //const int nr1 = ggml_nrows(node->src1); + //const int nr0 = ggml_nrows(node->src[0]); + //const int nr1 = ggml_nrows(node->src[1]); //n_tasks = MIN(n_threads, MAX(1, nr0/128)); //printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks); size_t cur = 0; - const enum ggml_type vec_dot_type = type_traits[node->src0->type].vec_dot_type; + const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type; #if defined(GGML_USE_CUBLAS) - if (ggml_cuda_can_mul_mat(node->src0, node->src1, node)) { + if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) { n_tasks = 1; // TODO: this actually is doing nothing // the threads are still spinning } else #elif defined(GGML_USE_CLBLAST) - if (ggml_cl_can_mul_mat(node->src0, node->src1, node)) { + if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) { n_tasks = 1; // TODO: this actually is doing nothing // the threads are still spinning - cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node); + cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node); } else #endif #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) - if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { + if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) { n_tasks = 1; // TODO: this actually is doing nothing // the threads are still spinning - if (node->src0->type != GGML_TYPE_F32) { + if (node->src[0]->type != GGML_TYPE_F32) { // here we need memory just for single 2D matrix from src0 - cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); + cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src[0]->ne[0]*node->src[0]->ne[1]); } } else #endif - if (node->src1->type != vec_dot_type) { - cur = GGML_TYPE_SIZE[vec_dot_type]*ggml_nelements(node->src1)/GGML_BLCK_SIZE[vec_dot_type]; + if (node->src[1]->type != vec_dot_type) { + cur = GGML_TYPE_SIZE[vec_dot_type]*ggml_nelements(node->src[1])/GGML_BLCK_SIZE[vec_dot_type]; } else { cur = 0; } @@ -16242,24 +16232,24 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { { n_tasks = n_threads; - GGML_ASSERT(node->src0->ne[3] == 1); - GGML_ASSERT(node->src1->ne[2] == 1); - GGML_ASSERT(node->src1->ne[3] == 1); + GGML_ASSERT(node->src[0]->ne[3] == 1); + GGML_ASSERT(node->src[1]->ne[2] == 1); + GGML_ASSERT(node->src[1]->ne[3] == 1); size_t cur = 0; - const int nk = node->src0->ne[0]; + const int nk = node->src[0]->ne[0]; - if (node->src0->type == GGML_TYPE_F16 && - node->src1->type == GGML_TYPE_F32) { + if (node->src[0]->type == GGML_TYPE_F16 && + node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(ggml_fp16_t)*( - nk*ggml_up32(node->src0->ne[1])*node->src0->ne[2] + - ( 2*(nk/2) + node->src1->ne[0])*node->src1->ne[1] + nk*ggml_up32(node->src[0]->ne[1])*node->src[0]->ne[2] + + ( 2*(nk/2) + node->src[1]->ne[0])*node->src[1]->ne[1] ); - } else if (node->src0->type == GGML_TYPE_F32 && - node->src1->type == GGML_TYPE_F32) { + } else if (node->src[0]->type == GGML_TYPE_F32 && + node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*( - nk*ggml_up32(node->src0->ne[1])*node->src0->ne[2] + - ( 2*(nk/2) + node->src1->ne[0])*node->src1->ne[1] + nk*ggml_up32(node->src[0]->ne[1])*node->src[0]->ne[2] + + ( 2*(nk/2) + node->src[1]->ne[0])*node->src[1]->ne[1] ); } else { GGML_ASSERT(false); @@ -16271,16 +16261,16 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { { n_tasks = n_threads; - GGML_ASSERT(node->src1->ne[3] == 1); + GGML_ASSERT(node->src[1]->ne[3] == 1); - const int64_t ne00 = node->src0->ne[0]; // W - const int64_t ne01 = node->src0->ne[1]; // H - const int64_t ne02 = node->src0->ne[2]; // C - const int64_t ne03 = node->src0->ne[3]; // N + const int64_t ne00 = node->src[0]->ne[0]; // W + const int64_t ne01 = node->src[0]->ne[1]; // H + const int64_t ne02 = node->src[0]->ne[2]; // C + const int64_t ne03 = node->src[0]->ne[3]; // N - const int64_t ne10 = node->src1->ne[0]; // W - const int64_t ne11 = node->src1->ne[1]; // H - const int64_t ne12 = node->src1->ne[2]; // C + const int64_t ne10 = node->src[1]->ne[0]; // W + const int64_t ne11 = node->src[1]->ne[1]; // H + const int64_t ne12 = node->src[1]->ne[2]; // C const int64_t nk = ne00*ne01; @@ -16290,11 +16280,11 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { size_t cur = 0; - if (node->src0->type == GGML_TYPE_F16 && - node->src1->type == GGML_TYPE_F32) { + if (node->src[0]->type == GGML_TYPE_F16 && + node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(ggml_fp16_t)*(ne10*ne11*ne12); - } else if (node->src0->type == GGML_TYPE_F32 && - node->src1->type == GGML_TYPE_F32) { + } else if (node->src[0]->type == GGML_TYPE_F32 && + node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)* (ne10*ne11*ne12); } else { GGML_ASSERT(false); @@ -16308,14 +16298,14 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { size_t cur = 0; - const int64_t ne11 = ggml_up(node->src1->ne[1], GGML_SOFT_MAX_UNROLL); + const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL); - if (node->src1->type == GGML_TYPE_F32) { + if (node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1) cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2 } - if (node->src1->type == GGML_TYPE_F16) { + if (node->src[1]->type == GGML_TYPE_F16) { cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1) cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2 } @@ -16328,14 +16318,14 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { size_t cur = 0; - if (node->src1->type == GGML_TYPE_F32) { - cur = sizeof(float)*node->src1->ne[1]*n_tasks; // TODO: this can become (n_tasks-1) - cur += sizeof(float)*node->src1->ne[1]*n_tasks; // this is overestimated by x2 + if (node->src[1]->type == GGML_TYPE_F32) { + cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2 } - if (node->src1->type == GGML_TYPE_F16) { - cur = sizeof(float)*node->src1->ne[1]*n_tasks; // TODO: this can become (n_tasks-1) - cur += sizeof(float)*node->src1->ne[1]*n_tasks; // this is overestimated by x2 + if (node->src[1]->type == GGML_TYPE_F16) { + cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2 } work_size = MAX(work_size, cur); @@ -16346,15 +16336,15 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { size_t cur = 0; - const int64_t D = node->src0->ne[0]; - const int64_t ne11 = ggml_up(node->src1->ne[1], GGML_SOFT_MAX_UNROLL); + const int64_t D = node->src[0]->ne[0]; + const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL); const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back - if (node->src1->type == GGML_TYPE_F32) { + if (node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1) cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2 } - if (node->src1->type == GGML_TYPE_F16) { + if (node->src[1]->type == GGML_TYPE_F16) { cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1) cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2 } @@ -16375,7 +16365,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { { n_tasks = n_threads; - size_t cur = ggml_type_size(node->type)*(n_tasks + node->src0->ne[0]*n_tasks); + size_t cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks); work_size = MAX(work_size, cur); } break; @@ -16383,7 +16373,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { { n_tasks = n_threads; - size_t cur = ggml_type_size(node->type)*node->src0->ne[0]*n_tasks; + size_t cur = ggml_type_size(node->type)*node->src[0]->ne[0]*n_tasks; work_size = MAX(work_size, cur); } break; @@ -16593,8 +16583,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { ggml_graph_export_leaf(cgraph->leafs[i], fout); GGML_ASSERT(cgraph->leafs[i]->op == GGML_OP_NONE); - GGML_ASSERT(cgraph->leafs[i]->src0 == NULL); - GGML_ASSERT(cgraph->leafs[i]->src1 == NULL); + GGML_ASSERT(cgraph->leafs[i]->src[0] == NULL); + GGML_ASSERT(cgraph->leafs[i]->src[1] == NULL); } // header @@ -16605,17 +16595,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { for (int i = 0; i < cgraph->n_nodes; ++i) { ggml_graph_export_node(cgraph->nodes[i], "DST", fout); - if (cgraph->nodes[i]->src0) { - ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout); - } - - if (cgraph->nodes[i]->src1) { - ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout); - } - - for (int j = 0; j < GGML_MAX_OPT; ++j) { - if (cgraph->nodes[i]->opt[j]) { - ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout); + for (int j = 0; j < GGML_MAX_SRC; ++j) { + if (cgraph->nodes[i]->src[j]) { + ggml_graph_export_node(cgraph->nodes[i]->src[j], "SRC", fout); } } @@ -16706,16 +16688,13 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { // output the op arguments { - struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL }; + struct ggml_tensor * args[GGML_MAX_SRC] = { NULL }; - args[0] = tensor->src0; - args[1] = tensor->src1; - - for (int j = 0; j < GGML_MAX_OPT; ++j) { - args[2 + j] = tensor->opt[j]; + for (int j = 0; j < GGML_MAX_SRC; ++j) { + args[j] = tensor->src[j]; } - for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) { + for (int j = 0; j < GGML_MAX_SRC; ++j) { if (args[j]) { int32_t idx = -1; @@ -16933,12 +16912,12 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** const char * ptr_name = ptr; ptr += GGML_MAX_NAME; - const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += (2 + GGML_MAX_OPT)*sizeof(int32_t); + const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t); - struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL }; + struct ggml_tensor * args[GGML_MAX_SRC] = { NULL }; // parse args - for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) { + for (int j = 0; j < GGML_MAX_SRC; ++j) { const int32_t arg_idx = ptr_arg_idx[j]; if (arg_idx == -1) { @@ -16995,11 +16974,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** tensor->nb[j] = nb[j]; } - tensor->src0 = args[0]; - tensor->src1 = args[1]; - - for (int j = 0; j < GGML_MAX_OPT; ++j) { - tensor->opt[j] = args[2 + j]; + for (int j = 0; j < GGML_MAX_SRC; ++j) { + tensor->src[j] = args[j]; } result.nodes[i] = tensor; @@ -17198,19 +17174,11 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph for (int i = 0; i < gb->n_nodes; i++) { struct ggml_tensor * node = gb->nodes[i]; - if (node->src0) { - ggml_graph_dump_dot_node_edge(fp, gb, node, node->src0, "x"); - } - - if (node->src1) { - ggml_graph_dump_dot_node_edge(fp, gb, node, node->src1, "y"); - } - - for (int j = 0; j < GGML_MAX_OPT; j++) { - if (node->opt[j]) { + for (int j = 0; j < GGML_MAX_SRC; j++) { + if (node->src[j]) { char label[16]; - snprintf(label, sizeof(label), "opt %d", j); - ggml_graph_dump_dot_node_edge(fp, gb, node, node->opt[j], label); + snprintf(label, sizeof(label), "src %d", j); + ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label); } } } @@ -17218,19 +17186,11 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph for (int i = 0; i < gb->n_leafs; i++) { struct ggml_tensor * node = gb->leafs[i]; - if (node->src0) { - ggml_graph_dump_dot_leaf_edge(fp, node, node->src0, "x"); - } - - if (node->src1) { - ggml_graph_dump_dot_leaf_edge(fp, node, node->src1, "y"); - } - - for (int j = 0; j < GGML_MAX_OPT; j++) { - if (node->opt[j]) { + for (int j = 0; j < GGML_MAX_SRC; j++) { + if (node->src[j]) { char label[16]; - snprintf(label, sizeof(label), "opt %d", j); - ggml_graph_dump_dot_leaf_edge(fp, node, node->opt[j], label); + snprintf(label, sizeof(label), "src %d", j); + ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label); } } } diff --git a/ggml.h b/ggml.h index ab84bef..d7c9e0f 100644 --- a/ggml.h +++ b/ggml.h @@ -132,10 +132,10 @@ // { // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3); // -// // a[1, 2] = 1.0f; +// // a[2, 1] = 1.0f; // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f; // -// // a[2, 0] = 2.0f; +// // a[0, 2] = 2.0f; // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f; // // ... @@ -197,7 +197,7 @@ #define GGML_MAX_NODES 4096 #define GGML_MAX_PARAMS 256 #define GGML_MAX_CONTEXTS 64 -#define GGML_MAX_OPT 4 +#define GGML_MAX_SRC 6 #define GGML_MAX_NAME 48 #define GGML_DEFAULT_N_THREADS 4 @@ -414,9 +414,7 @@ extern "C" { bool is_param; struct ggml_tensor * grad; - struct ggml_tensor * src0; - struct ggml_tensor * src1; - struct ggml_tensor * opt[GGML_MAX_OPT]; + struct ggml_tensor * src[GGML_MAX_SRC]; // performance int perf_runs;