Skip to content

Commit

Permalink
Upgrade to Transformers 4.28 (#202)
Browse files Browse the repository at this point in the history
  • Loading branch information
regisss authored Apr 14, 2023
1 parent b75f3ba commit a77933f
Show file tree
Hide file tree
Showing 32 changed files with 421 additions and 97 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))

# Run code quality checks
style_check:
black --check .
ruff .
black --check . setup.py
ruff . setup.py

style:
black .
ruff . --fix
black . setup.py
ruff . setup.py --fix

# Run unit and integration tests
fast_tests:
Expand Down
2 changes: 1 addition & 1 deletion examples/audio-classification/run_audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/contrastive-image-text/run_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/image-classification/run_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
logger = logging.getLogger(__name__)

# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

Expand Down
5 changes: 5 additions & 0 deletions examples/language-modeling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -271,3 +271,8 @@ Here is a DeepSpeed configuration you can use to train your models on Gaudi:
## Streaming

To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` to the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.


## Low Cpu Memory Usage

To use low cpu memory mode which can be very useful for LLM, add `--low_cpu_mem_usage` to the command line.
2 changes: 1 addition & 1 deletion examples/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/language-modeling/run_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/speech-recognition/run_speech_recognition_ctc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/summarization/run_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion examples/translation/run_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.27.0")
check_min_version("4.28.0")

require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")

Expand Down
2 changes: 1 addition & 1 deletion optimum/habana/distributed/distributed_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def create_single_node_setup(self):
use_env_param = "--use_env" if self._use_env else ""

self._interpreter = (
f"{sys.executable} -um torch.distributed.launch --nproc_per_node={self._world_size} {use_env_param} "
f"{sys.executable} -um torch.distributed.run --nproc_per_node={self._world_size} {use_env_param} "
)

def create_multi_node_setup(self):
Expand Down
69 changes: 62 additions & 7 deletions optimum/habana/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
import copy
import inspect
import warnings
from typing import Any, Callable, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union

import torch
import torch.distributed as dist
from transformers.deepspeed import is_deepspeed_zero3_enabled
from transformers.generation.beam_constraints import DisjunctiveConstraint, PhrasalConstraint
from transformers.generation.beam_search import BeamScorer, BeamSearchScorer, ConstrainedBeamSearchScorer
from transformers.generation.configuration_utils import GenerationConfig
Expand Down Expand Up @@ -50,6 +51,10 @@
from optimum.utils import logging


if TYPE_CHECKING:
from .streamers import BaseStreamer


logger = logging.get_logger(__name__)


Expand Down Expand Up @@ -130,7 +135,8 @@ def generate(
logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
synced_gpus: Optional[bool] = False,
synced_gpus: Optional[bool] = None,
streamer: Optional["BaseStreamer"] = None,
lazy_mode: Optional[bool] = False,
hpu_graphs: Optional[bool] = False,
ignore_eos: Optional[bool] = None,
Expand Down Expand Up @@ -180,8 +186,13 @@ def generate(
on the batch ID `batch_id` and the previously generated tokens `inputs_ids`. This argument is useful
for constrained generation conditioned on the prefix, as described in [Autoregressive Entity
Retrieval](https://arxiv.org/abs/2010.00904).
synced_gpus (`bool`, *optional*, defaults to `False`):
Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
synced_gpus (`bool`, *optional*):
Whether to continue running the while loop until max_length. Unless overridden this flag will be set to
`True` under DeepSpeed ZeRO Stage 3 multiple GPUs environment to avoid hanging if one GPU finished
generating before other GPUs. Otherwise it'll be set to `False`.
streamer (`BaseStreamer`, *optional*):
Streamer object that will be used to stream the generated sequences. Generated tokens are passed
through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
lazy_mode (`bool`, *optional*, defaults to `False`):
Whether the run is executed in lazy mode or not (i.e. eager mode).
hpu_graphs (`bool`, *optional*, defaults to `False`):
Expand Down Expand Up @@ -209,6 +220,12 @@ def generate(
- [`transformers.generation.BeamSearchEncoderDecoderOutput`],
- [`transformers.generation.BeamSampleEncoderDecoderOutput`]
"""
if synced_gpus is None:
if is_deepspeed_zero3_enabled() and dist.get_world_size() > 1:
synced_gpus = True
else:
synced_gpus = False

# 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call
self._validate_model_class()
if hpu_graphs and not lazy_mode:
Expand Down Expand Up @@ -307,9 +324,16 @@ def generate(
model_kwargs=model_kwargs,
device=inputs_tensor.device,
)

# conditional generation for multi-modal models.
if "input_ids" in model_kwargs and model_input_name == "pixel_values":
input_ids = torch.cat([input_ids, model_kwargs.pop("input_ids")], dim=-1)
else:
input_ids = inputs_tensor if model_input_name == "input_ids" else model_kwargs.pop("input_ids")

if streamer is not None:
streamer.put(input_ids.cpu())

# 6. Prepare `max_length` depending on other stopping criteria.
input_ids_seq_length = input_ids.shape[-1]
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
Expand Down Expand Up @@ -350,7 +374,8 @@ def generate(
)

is_contrastive_search_gen_mode = (
generation_config.top_k is not None
(generation_config.num_beams == 1)
and generation_config.top_k is not None
and generation_config.top_k > 1
and generation_config.do_sample is False
and generation_config.penalty_alpha is not None
Expand Down Expand Up @@ -399,6 +424,11 @@ def generate(
"Diverse beam search cannot be used in sampling mode. Make sure that `do_sample` is set to `False`."
)

if streamer is not None and (generation_config.num_beams > 1):
raise ValueError(
"`streamer` cannot be used with beam search (yet!). Make sure that `num_beams` is set to 1."
)

if self.device.type != input_ids.device.type:
warnings.warn(
(
Expand Down Expand Up @@ -457,6 +487,7 @@ def generate(
output_scores=generation_config.output_scores,
return_dict_in_generate=generation_config.return_dict_in_generate,
synced_gpus=synced_gpus,
streamer=streamer,
lazy_mode=lazy_mode,
ignore_eos=ignore_eos,
**model_kwargs,
Expand All @@ -480,6 +511,7 @@ def generate(
output_scores=generation_config.output_scores,
return_dict_in_generate=generation_config.return_dict_in_generate,
synced_gpus=synced_gpus,
streamer=streamer,
**model_kwargs,
)

Expand All @@ -506,6 +538,7 @@ def generate(
output_scores=generation_config.output_scores,
return_dict_in_generate=generation_config.return_dict_in_generate,
synced_gpus=synced_gpus,
streamer=streamer,
lazy_mode=lazy_mode,
**model_kwargs,
)
Expand Down Expand Up @@ -741,6 +774,7 @@ def contrastive_search(
output_scores: Optional[bool] = None,
return_dict_in_generate: Optional[bool] = None,
synced_gpus: Optional[bool] = False,
streamer: Optional["BaseStreamer"] = None,
lazy_mode: Optional[bool] = False,
**model_kwargs,
) -> Union[ContrastiveSearchOutput, torch.LongTensor]:
Expand Down Expand Up @@ -789,6 +823,9 @@ def contrastive_search(
Whether or not to return a [`transformers.generationutils.ModelOutput`] instead of a plain tuple.
synced_gpus (`bool`, *optional*, defaults to `False`):
Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
streamer (`BaseStreamer`, *optional*):
Streamer object that will be used to stream the generated sequences. Generated tokens are passed
through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
lazy_mode (`bool`, *optional*, defaults to `False`):
Whether the run is executed in lazy mode or not (i.e. eager mode).
model_kwargs:
Expand Down Expand Up @@ -841,6 +878,7 @@ def greedy_search(
output_scores: Optional[bool] = None,
return_dict_in_generate: Optional[bool] = None,
synced_gpus: Optional[bool] = False,
streamer: Optional["BaseStreamer"] = None,
lazy_mode: Optional[bool] = False,
ignore_eos: Optional[bool] = None,
**model_kwargs,
Expand Down Expand Up @@ -886,6 +924,9 @@ def greedy_search(
Whether or not to return a [`transformers.generationutils.ModelOutput`] instead of a plain tuple.
synced_gpus (`bool`, *optional*, defaults to `False`):
Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
streamer (`BaseStreamer`, *optional*):
Streamer object that will be used to stream the generated sequences. Generated tokens are passed
through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
lazy_mode (`bool`, *optional*, defaults to `False`):
Whether the run is executed in lazy mode or not (i.e. eager mode).
ignore_eos (`bool`, *optional*):
Expand Down Expand Up @@ -982,7 +1023,7 @@ def greedy_search(

# keep track of which sequences are already finished
if not ignore_eos:
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)

this_peer_finished = False # used by synced_gpus only
while True:
Expand Down Expand Up @@ -1056,6 +1097,8 @@ def greedy_search(
)
else:
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
if streamer is not None:
streamer.put(next_tokens.cpu())
model_kwargs = self._update_model_kwargs_for_generation(
outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
)
Expand All @@ -1073,6 +1116,9 @@ def greedy_search(
else:
this_peer_finished = True

if streamer is not None:
streamer.end()

if return_dict_in_generate:
if self.config.is_encoder_decoder:
return GreedySearchEncoderDecoderOutput(
Expand Down Expand Up @@ -1108,6 +1154,7 @@ def sample(
output_scores: Optional[bool] = None,
return_dict_in_generate: Optional[bool] = None,
synced_gpus: Optional[bool] = False,
streamer: Optional["BaseStreamer"] = None,
lazy_mode: Optional[bool] = False,
**model_kwargs,
) -> Union[SampleOutput, torch.LongTensor]:
Expand Down Expand Up @@ -1155,6 +1202,9 @@ def sample(
Whether or not to return a [`transformers.generationutils.ModelOutput`] instead of a plain tuple.
synced_gpus (`bool`, *optional*, defaults to `False`):
Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
streamer (`BaseStreamer`, *optional*):
Streamer object that will be used to stream the generated sequences. Generated tokens are passed
through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
lazy_mode (`bool`, *optional*, defaults to `False`):
Whether the run is executed in lazy mode or not (i.e. eager mode).
model_kwargs:
Expand Down Expand Up @@ -1268,7 +1318,7 @@ def sample(
)

# keep track of which sequences are already finished
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device)

this_peer_finished = False # used by synced_gpus only
# auto-regressive generation
Expand Down Expand Up @@ -1345,6 +1395,8 @@ def sample(
)
else:
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
if streamer is not None:
streamer.put(next_tokens.cpu())
model_kwargs = self._update_model_kwargs_for_generation(
outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
)
Expand All @@ -1365,6 +1417,9 @@ def sample(
else:
this_peer_finished = True

if streamer is not None:
streamer.end()

if return_dict_in_generate:
if self.config.is_encoder_decoder:
return SampleEncoderDecoderOutput(
Expand Down
2 changes: 2 additions & 0 deletions optimum/habana/transformers/models/bloom/modeling_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@ def forward(

loss = None
if labels is not None:
# move labels to correct device to enable model parallelism
labels = labels.to(lm_logits.device)
# Shift so that tokens < n predict n
shift_logits = lm_logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
Expand Down
Loading

0 comments on commit a77933f

Please sign in to comment.