Skip to content

Commit 99c2274

Browse files
authored
fix qwen 14b fp6 abnormal output (#11583)
1 parent c279849 commit 99c2274

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

python/llm/src/ipex_llm/transformers/convert.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ def replace_with_low_bit_linear_for_module(model, qtype, module_name=None,
667667
return model
668668

669669

670-
def _optimize_pre(model):
670+
def _optimize_pre(model, qtype=None):
671671
try:
672672
from sentence_transformers.SentenceTransformer import SentenceTransformer
673673
if isinstance(model, SentenceTransformer):
@@ -743,8 +743,9 @@ def _optimize_pre(model):
743743
if should_apply_merge_qkv:
744744
from ipex_llm.transformers.models.qwen2 import merge_qkv
745745
model.apply(merge_qkv)
746-
from ipex_llm.transformers.models.qwen2 import padding_mlp
747-
model.apply(padding_mlp)
746+
if qtype != ggml_tensor_qtype["fp6"]:
747+
from ipex_llm.transformers.models.qwen2 import padding_mlp
748+
model.apply(padding_mlp)
748749
if model.config.model_type == "qwen2_moe":
749750
from ipex_llm.transformers.models.qwen2_moe import merge_qkv
750751
model.apply(merge_qkv)
@@ -795,7 +796,7 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
795796
return model
796797

797798
if optimize_model:
798-
model = _optimize_pre(model)
799+
model = _optimize_pre(model, qtype)
799800

800801
act_order = False
801802
if getattr(model, "quantization_method", None) == "gptq":

0 commit comments

Comments
 (0)