Skip to content

Commit

Permalink
[not4land] repro dynamo error
Browse files Browse the repository at this point in the history
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
  • Loading branch information
jerryzh168 committed Oct 18, 2024
1 parent b56e2ee commit f1d0d67
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 16 deletions.
51 changes: 45 additions & 6 deletions torchbenchmark/util/backends/torchdynamo.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,11 @@ def apply_torchdynamo_args(
),
)

print("{args.quantization=}")
if args.quantization:
import torchao
from torchao.quantization import (
change_linear_weights_to_int4_woqtensors,
change_linear_weights_to_int8_dqtensors,
change_linear_weights_to_int8_woqtensors,
quantize_, int8_weight_only, int4_weight_only, int8_dynamic_activation_int8_weight
)

torch._dynamo.config.automatic_dynamic_shapes = False
Expand All @@ -196,12 +195,52 @@ def apply_torchdynamo_args(
module, example_inputs = model.get_module()
if args.quantization == "int8dynamic":
torch._inductor.config.force_fuse_int_mm_with_mul = True
change_linear_weights_to_int8_dqtensors(module)
quantize_(module, int8_dynamic_activation_int8_weight(), set_inductor_config=False)
elif args.quantization == "int8weightonly":
torch._inductor.config.use_mixed_mm = True
change_linear_weights_to_int8_woqtensors(module)
quantize_(module, int8_weight_only(), set_inductor_config=False)
elif args.quantization == "int4weightonly":
change_linear_weights_to_int4_woqtensors(module)
quantize_(module, int4_weight_only(), set_inductor_config=False)
if args.quantization == "autoquant":
print("module:", type(module))

torchao.autoquant(module, example_input=example_inputs, manual=True, error_on_unseen=False, set_inductor_config=False)
# torchao.autoquant(module, error_on_unseen=False, set_inductor_config=False)
if isinstance(example_inputs, dict):
module(**example_inputs)
else:
module(*example_inputs)

module.finalize_autoquant()

# for n, m in model.named_modules():
# if isinstance(m, torch.nn.Linear):
# print(f"name {n}, weight type:, {type(m.weight.data)}")

from torchao.quantization.autoquant import AUTOQUANT_CACHE
assert len(AUTOQUANT_CACHE)>0, f"Err: found no autoquantizable layers in model {type(module)}, stopping autoquantization"

# print("autoquant profile")
# from torchao.utils import benchmark_model, profiler_runner
# model = torch.compile(module, mode="max-autotune")
# inputs = example_inputs
# benchmark_model(model, 20, inputs)
# print("elapsed_time: ", benchmark_model(model, 100, inputs), " milliseconds")
# profiler_runner("quant.json.gz", benchmark_model, model, 5, inputs)

else:
unwrap_tensor_subclass(module)
# else:
# module, example_inputs = model.get_module()
# # noquant profile
# print("noquant profile")
# from torchao.utils import benchmark_model, profiler_runner
# model = torch.compile(module, mode="max-autotune")
# inputs = example_inputs
# benchmark_model(model, 20, inputs)
# print("elapsed_time: ", benchmark_model(model, 100, inputs), " milliseconds")
# profiler_runner("noquant.json.gz", benchmark_model, model, 5, inputs)
>>>>>>> Stashed changes

if args.freeze_prepack_weights:
torch._inductor.config.freezing = True
Expand Down
8 changes: 7 additions & 1 deletion torchbenchmark/util/experiment/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from torchbenchmark.util.experiment.instantiator import TorchBenchModelConfig
from torchbenchmark.util.model import BenchmarkModel

WARMUP_ROUNDS = 10
WARMUP_ROUNDS = 20
BENCHMARK_ITERS = 15
MEMPROF_ITER = 2
NANOSECONDS_PER_MILLISECONDS = 1_000_000.0
Expand Down Expand Up @@ -53,6 +53,12 @@ def get_latencies(
func()
t1 = time.time_ns()
result_summary.append((t1 - t0) / NANOSECONDS_PER_MILLISECONDS)

# from torchao.utils import benchmark_model, profiler_runner
# print("device:", device)
# print("elpased:", benchmark_model(func, 100, (), device_type="cuda"))
# profiler_runner("quant.json.gz", benchmark_model, func, 5, (), device_type="cuda")

return result_summary


Expand Down
23 changes: 22 additions & 1 deletion userbenchmark/dynamo/dynamobench/torchao_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ def torchao_optimize_ctx(quantization: str):
def inner(model_iter_fn: Callable):
def _torchao_apply(module: torch.nn.Module, example_inputs: Any):
if getattr(module, "_quantized", None) is None:
if quantization == "noquant":
print("noquant run")
from torchao.utils import benchmark_model, profiler_runner
model = torch.compile(module, mode="max-autotune")
inputs = example_inputs
benchmark_model(model, 20, inputs)
print("elapsed_time: ", benchmark_model(model, 100, inputs), " milliseconds")
# profiler_runner("noquant.json.gz", benchmark_model, model, 5, inputs)

if quantization == "int8dynamic":
quantize_(
module,
Expand All @@ -35,21 +44,33 @@ def _torchao_apply(module: torch.nn.Module, example_inputs: Any):
elif quantization == "int4weightonly":
quantize_(module, int4_weight_only(), set_inductor_config=False)
if quantization == "autoquant":
autoquant(module, error_on_unseen=False, set_inductor_config=False)
autoquant(module, example_input=example_inputs, manual=True, error_on_unseen=False, set_inductor_config=False)
if isinstance(example_inputs, dict):
module(**example_inputs)
else:
module(*example_inputs)
module.finalize_autoquant()

from torchao.quantization.autoquant import AUTOQUANT_CACHE

if len(AUTOQUANT_CACHE) == 0:
raise Exception( # noqa: TRY002`
"NotAutoquantizable"
f"Found no autoquantizable layers in model {type(module)}, stopping autoquantized run"
)

print("autoquant run")
from torchao.utils import benchmark_model, profiler_runner
model = torch.compile(module, mode="max-autotune")
inputs = example_inputs
benchmark_model(model, 20, inputs)
print("elapsed_time: ", benchmark_model(model, 100, inputs), " milliseconds")
# profiler_runner("quant.json.gz", benchmark_model, model, 5, inputs)
else:
unwrap_tensor_subclass(module)
setattr(module, "_quantized", True) # noqa: B010


model_iter_fn(module, example_inputs)

return _torchao_apply
Expand Down
13 changes: 5 additions & 8 deletions userbenchmark/group_bench/configs/torch_ao.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
model: "*"
extended_models:
- huggingface
- timm
model: "resnet18"
test: eval
device: cuda
extra_args: --precision bf16 --torchdynamo inductor --inductor-compile-mode max-autotune
Expand All @@ -10,7 +7,7 @@ metrics:
test_group:
test_batch_size_default:
subgroup:
- extra_args:
- extra_args: --quantization int8dynamic
- extra_args: --quantization int8weightonly
- extra_args: --quantization int4weightonly
- extra_args: --quantization autoquant
- extra_args: --quantization noquant


2 changes: 2 additions & 0 deletions userbenchmark/torchao/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def run(args: List[str]):
else:
benchmark_args = [pt2_args]

print("benchmark args:", benchmark_args)

output_files = [_run_pt2_args(args) for args in benchmark_args]
# Post-processing
if args.dashboard:
Expand Down

0 comments on commit f1d0d67

Please sign in to comment.