File tree 7 files changed +7
-7
lines changed
7 files changed +7
-7
lines changed Original file line number Diff line number Diff line change 47
47
optimize_model = False ,
48
48
trust_remote_code = True ,
49
49
use_cache = True )
50
- model = model .to ('xpu' )
50
+ model = model .half (). to ('xpu' )
51
51
52
52
# Load tokenizer
53
53
tokenizer = CodeLlamaTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 47
47
optimize_model = False ,
48
48
trust_remote_code = True ,
49
49
use_cache = True )
50
- model = model .to ('xpu' )
50
+ model = model .half (). to ('xpu' )
51
51
52
52
# Load tokenizer
53
53
tokenizer = AutoTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 47
47
load_in_4bit = True ,
48
48
trust_remote_code = True ,
49
49
use_cache = True )
50
- model = model .to ('xpu' )
50
+ model = model .half (). to ('xpu' )
51
51
52
52
# Load tokenizer
53
53
tokenizer = AutoTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 50
50
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
51
51
model = optimize_model (model )
52
52
53
- model = model .to ('xpu' )
53
+ model = model .half (). to ('xpu' )
54
54
55
55
# Load tokenizer
56
56
tokenizer = CodeLlamaTokenizer .from_pretrained (model_path , trust_remote_code = True )
Original file line number Diff line number Diff line change 46
46
use_cache = True )
47
47
model = optimize_model (model )
48
48
49
- model = model .to ('xpu' )
49
+ model = model .half (). to ('xpu' )
50
50
51
51
# Load tokenizer
52
52
tokenizer = AutoTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 49
49
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
50
50
model = optimize_model (model )
51
51
52
- model = model .to ('xpu' )
52
+ model = model .half (). to ('xpu' )
53
53
54
54
# Load tokenizer
55
55
tokenizer = AutoTokenizer .from_pretrained (model_path , trust_remote_code = True )
Original file line number Diff line number Diff line change 49
49
# When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
50
50
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
51
51
model = optimize_model (model )
52
- model = model .to ('xpu' )
52
+ model = model .half (). to ('xpu' )
53
53
54
54
# Load tokenizer
55
55
tokenizer = AutoTokenizer .from_pretrained (model_path , trust_remote_code = True )
You can’t perform that action at this time.
0 commit comments