ggml : fix Q4_3 cuBLAS

2025-02-21 15:30:00 +00:00 · 2023-04-22 16:31:56 +03:00 · 2023-04-22 16:31:56 +03:00 · 0e018fe008
commit 0e018fe008
parent 857308d1e8
2 changed files with 5 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -312,6 +312,7 @@ add_library(ggml OBJECT
 target_include_directories(ggml PUBLIC .)
 target_compile_features(ggml PUBLIC c_std_11) # don't bump
 target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
+
 if (BUILD_SHARED_LIBS)
    set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
 endif()
@ -324,6 +325,7 @@ add_library(llama
 target_include_directories(llama PUBLIC .)
 target_compile_features(llama PUBLIC cxx_std_11) # don't bump
 target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS})
+
 if (BUILD_SHARED_LIBS)
    set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
    target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
--- a/ggml.c
+++ b/ggml.c
@ -7992,6 +7992,9 @@ static void ggml_compute_forward_mul_mat_q_f32(
        else if (type == GGML_TYPE_Q4_2) {
            dequantize_row_q_cuda = dequantize_row_q4_2_cuda;
        }
+        else if (type == GGML_TYPE_Q4_3) {
+            dequantize_row_q_cuda = dequantize_row_q4_3_cuda;
+        }
        else {
            GGML_ASSERT(false);
        }