From 2fd914ca47daf54491d216fd51ca10ba922f61c3 Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Fri, 17 Jan 2025 11:03:37 -0800 Subject: [PATCH] Explictly use the linalg.vector_norm call --- deepspeed/runtime/comm/compressed.py | 4 ++-- deepspeed/runtime/comm/hccl.py | 4 ++-- deepspeed/runtime/comm/mpi.py | 4 ++-- deepspeed/runtime/comm/nccl.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/deepspeed/runtime/comm/compressed.py b/deepspeed/runtime/comm/compressed.py index 7f8c7395451d..ea0bd296cffc 100644 --- a/deepspeed/runtime/comm/compressed.py +++ b/deepspeed/runtime/comm/compressed.py @@ -67,7 +67,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro buffer_m = torch.cat([buffer_m, empty_tensor]) buffer_m.add_(worker_error) - worker_scale = torch.linalg.norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) + worker_scale = torch.linalg.vector_norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) worker_error.set_(buffer_m - worker_scale * buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) @@ -96,7 +96,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro compensated_server_m.add_(server_error) - server_scale = torch.norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) + server_scale = torch.linalg.vector_norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) server_error.set_(compensated_server_m - server_scale * compensated_server_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) diff --git a/deepspeed/runtime/comm/hccl.py b/deepspeed/runtime/comm/hccl.py index 09fb11a731b8..f8027a3e83b4 100644 --- a/deepspeed/runtime/comm/hccl.py +++ b/deepspeed/runtime/comm/hccl.py @@ -55,7 +55,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro buffer_m = torch.cat([buffer_m, empty_tensor]) buffer_m.add_(worker_error) - worker_scale = torch.linalg.norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) + worker_scale = torch.linalg.vector_norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) worker_error.set_(buffer_m - worker_scale * buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) @@ -83,7 +83,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro compensated_server_m.add_(server_error) - server_scale = torch.norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) + server_scale = torch.linalg.vector_norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) server_error.set_(compensated_server_m - server_scale * compensated_server_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) diff --git a/deepspeed/runtime/comm/mpi.py b/deepspeed/runtime/comm/mpi.py index bc544787aa7a..a59bedba8353 100644 --- a/deepspeed/runtime/comm/mpi.py +++ b/deepspeed/runtime/comm/mpi.py @@ -144,7 +144,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro buffer_m = torch.cat([buffer_m, empty_tensor]) buffer_m.add_(worker_error) - worker_scale = torch.linalg.norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) + worker_scale = torch.linalg.vector_norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) worker_error.set_(buffer_m - worker_scale * buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) cupy_sign_list_packed = self.compression_backend.compress_by_chunk( @@ -173,7 +173,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro (cupy.unpackbits(cupy_recvbuf_sign.flatten())).reshape(self.size, -1)).float().add_(-0.5).mul_(2.0).mul_( self.compression_backend.cupy2torch(cupy_recvbuf_scale).mul_(1 / self.size)).sum(0) compensated_server_m.add_(server_error) - server_scale = torch.linalg.norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) + server_scale = torch.linalg.vector_norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) server_error.set_(compensated_server_m - server_scale * compensated_server_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) diff --git a/deepspeed/runtime/comm/nccl.py b/deepspeed/runtime/comm/nccl.py index a57b7519a295..ace3172ab4da 100644 --- a/deepspeed/runtime/comm/nccl.py +++ b/deepspeed/runtime/comm/nccl.py @@ -63,7 +63,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro buffer_m = torch.cat([buffer_m, empty_tensor]) buffer_m.add_(worker_error) - worker_scale = torch.linalg.norm(buffer_m) / np.sqrt(buffer_m.numel()) + worker_scale = torch.linalg.vector_norm(buffer_m) / np.sqrt(buffer_m.numel()) worker_error.set_(buffer_m - worker_scale * buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) if self.bool_not_supported: @@ -109,7 +109,7 @@ def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_erro (cupy.unpackbits(cupy_recvbuf_sign.flatten())).reshape(self.size, -1)).float().add_(-0.5).mul_(2.0).mul_( torch.stack(recvbuf_scale).mul_(1 / self.size)).sum(0) compensated_server_m.add_(server_error) - server_scale = torch.linalg.norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) + server_scale = torch.linalg.vector_norm(compensated_server_m) / np.sqrt(compensated_server_m.numel()) server_error.set_(compensated_server_m - server_scale * compensated_server_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0))