From 0a10c60d641555ec1d42f4819c1fa4bb64dc88db Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Mon, 19 Sep 2022 13:10:10 -0500 Subject: [PATCH 01/18] PyKokkos: enable multi gpu usage in pykokkos --- pykokkos/__init__.py | 3 +- pykokkos/core/compile.sh | 13 +-- pykokkos/core/compiler.py | 2 +- pykokkos/core/cpp_setup.py | 90 +++++++++++++++++--- pykokkos/core/module_setup.py | 8 +- pykokkos/core/runtime.py | 26 ++++-- pykokkos/core/translators/bindings.py | 2 +- pykokkos/interface/__init__.py | 2 +- pykokkos/interface/execution_space.py | 14 +++ pykokkos/interface/views.py | 21 +++-- pykokkos/kokkos_manager/__init__.py | 117 +++++++++++++++++++++++++- 11 files changed, 262 insertions(+), 36 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 40a7e690..dd179f9f 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -8,7 +8,8 @@ initialize, finalize, get_default_space, set_default_space, get_default_precision, set_default_precision, - is_uvm_enabled, enable_uvm, disable_uvm + is_uvm_enabled, enable_uvm, disable_uvm, + set_device_id ) initialize() diff --git a/pykokkos/core/compile.sh b/pykokkos/core/compile.sh index cee2c609..df575216 100755 --- a/pykokkos/core/compile.sh +++ b/pykokkos/core/compile.sh @@ -9,6 +9,7 @@ PK_REAL="${6}" KOKKOS_LIB_PATH="${7}" KOKKOS_INCLUDE_PATH="${8}" COMPUTE_CAPABILITY="${9}" +LIB_SUFFIX="${10}" SRC=$(find -name "*.cpp") @@ -34,11 +35,11 @@ if [ "${COMPILER}" == "g++" ]; then -shared \ -fopenmp \ "${SRC}".o -o "${MODULE}" \ - "${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \ - "${KOKKOS_LIB_PATH}/libkokkoscore.so" + "${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \ + "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" elif [ "${COMPILER}" == "nvcc" ]; then - "${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \ + "${KOKKOS_LIB_PATH}/../../bin/nvcc_wrapper" \ `python3 -m pybind11 --includes` \ -I.. \ -O3 \ @@ -54,7 +55,7 @@ elif [ "${COMPILER}" == "nvcc" ]; then -Dpk_exec_space="Kokkos::${EXEC_SPACE}" \ -Dpk_real="${PK_REAL}" - "${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \ + "${KOKKOS_LIB_PATH}/../../bin/nvcc_wrapper" \ -I.. \ -O3 \ -shared \ @@ -62,6 +63,6 @@ elif [ "${COMPILER}" == "nvcc" ]; then --expt-extended-lambda \ -fopenmp \ "${SRC}".o -o "${MODULE}" \ - "${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \ - "${KOKKOS_LIB_PATH}/libkokkoscore.so" + "${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \ + "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" fi \ No newline at end of file diff --git a/pykokkos/core/compiler.py b/pykokkos/core/compiler.py index 4c985f11..d03f9d5c 100644 --- a/pykokkos/core/compiler.py +++ b/pykokkos/core/compiler.py @@ -178,7 +178,7 @@ def compile_entity( if module_setup.is_compiled(): return - cpp_setup = CppSetup(module_setup.module_file, self.functor_file, self.bindings_file) + cpp_setup = CppSetup(module_setup.module_file, module_setup.gpu_module_files, self.functor_file, self.bindings_file) translator = StaticTranslator(module_setup.name, self.functor_file, members) t_start: float = time.perf_counter() diff --git a/pykokkos/core/cpp_setup.py b/pykokkos/core/cpp_setup.py index 59951936..302640a4 100644 --- a/pykokkos/core/cpp_setup.py +++ b/pykokkos/core/cpp_setup.py @@ -3,10 +3,13 @@ import shutil import subprocess import sys +from types import ModuleType from typing import List, Tuple - -from pykokkos.interface import ExecutionSpace, get_default_layout, get_default_memory_space +from pykokkos.interface import ( + ExecutionSpace, get_default_layout, get_default_memory_space, + is_host_execution_space +) import pykokkos.kokkos_manager as km @@ -15,16 +18,18 @@ class CppSetup: Creates the directory to hold the translation and invokes the compiler """ - def __init__(self, module_file: str, functor: str, bindings: str): + def __init__(self, module_file: str, gpu_module_files: List[str], functor: str, bindings: str): """ CppSetup constructor :param module: the name of the file containing the compiled Python module + :param gpu_module_files: the list of names of files containing for each gpu module :param functor: the name of the generated functor file :param bindings: the name of the generated bindings file """ self.module_file: str = module_file + self.gpu_module_files: List[str] = gpu_module_files self.functor_file: str = functor self.bindings_file: str = bindings @@ -58,6 +63,8 @@ def compile( self.write_source(output_dir, functor, bindings) self.copy_script(output_dir) self.invoke_script(output_dir, space, enable_uvm, compiler) + if space is ExecutionSpace.Cuda and km.is_multi_gpu_enabled(): + self.copy_multi_gpu_kernel(output_dir) def initialize_directory(self, name: Path) -> None: @@ -115,13 +122,14 @@ def copy_script(self, output_dir: Path) -> None: print(f"Exception while copying views and makefile: {ex}") sys.exit(1) - def get_kokkos_paths(self) -> Tuple[Path, Path]: + def get_kokkos_paths(self, space: ExecutionSpace) -> Tuple[Path, Path]: """ Get the paths of the Kokkos instal lib and include directories. If the environment variable is set, use that - Kokkos install. If not, fall back to installed pykokkos-base - package. + Kokkos install. If not, fall back to the installed + pykokkos-base package. + :param space: the execution space to compile for :returns: a tuple of paths to the Kokkos lib/ and include/ directories respectively """ @@ -139,8 +147,9 @@ def get_kokkos_paths(self) -> Tuple[Path, Path]: return lib_path, include_path - from pykokkos.bindings import kokkos - install_path = Path(kokkos.__path__[0]).parent + is_cpu: bool = is_host_execution_space(space) + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + install_path = Path(kokkos_lib.__path__[0]) if (install_path / "lib").is_dir(): lib_path = install_path / "lib" @@ -150,10 +159,24 @@ def get_kokkos_paths(self) -> Tuple[Path, Path]: raise RuntimeError("lib/ or lib64/ directories not found in installed pykokkos-base package." f" Try setting {self.lib_path_env} instead.") - include_path = lib_path.parent / "include/kokkos" + include_path = lib_path.parent.parent / "include/kokkos" return lib_path, include_path + def get_kokkos_lib_suffix(self, space: ExecutionSpace) -> str: + """ + Get the suffix of the libkokkoscore and libkokkoscontainers + libraries corresponding to the enabled device + + :param space: the execution space to compile for + :returns: the suffix as a string + """ + + if is_host_execution_space(space) or not km.is_multi_gpu_enabled(): + return "" + + return f"_{km.get_device_id()}" + def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: bool, compiler: str) -> None: """ Invoke the compilation script @@ -176,8 +199,9 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision: str = km.get_default_precision().__name__.split(".")[-1] lib_path: Path include_path: Path - lib_path, include_path = self.get_kokkos_paths() + lib_path, include_path = self.get_kokkos_paths(space) compute_capability: str = self.get_cuda_compute_capability(compiler) + lib_suffix: str = self.get_kokkos_lib_suffix(space) command: List[str] = [f"./{self.script}", compiler, # What compiler to use @@ -188,7 +212,8 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision, # Default real precision str(lib_path), # Path to Kokkos install lib/ directory str(include_path), # Path to Kokkos install include/ directory - compute_capability] # Device compute capability + compute_capability, # Device compute capability + lib_suffix] # The libkokkos* suffix identifying the gpu compile_result = subprocess.run(command, cwd=output_dir, capture_output=True, check=False) if compile_result.returncode != 0: @@ -207,6 +232,49 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo print(f"patchelf failed") sys.exit(1) + def copy_multi_gpu_kernel(self, output_dir: Path) -> None: + """ + Copy the kernel .so file once for each device and run patchelf + to point to the right library + + :param output_dir: the base directory + """ + + original_module: Path = output_dir / self.module_file + for id, (kernel_filename, kokkos_gpu_module) in enumerate(zip(self.gpu_module_files, km.get_kokkos_gpu_modules())): + kernel_path: Path = output_dir / kernel_filename + + try: + shutil.copy(original_module, kernel_path) + except Exception as ex: + print(f"Exception while copying kernel: {ex}") + sys.exit(1) + + lib_path: Path = Path(kokkos_gpu_module.__path__[0]) / "lib" + patchelf: List[str] = ["patchelf", + "--set-rpath", + str(lib_path), + kernel_filename] + + patchelf_result = subprocess.run(patchelf, cwd=output_dir, capture_output=True, check=False) + if patchelf_result.returncode != 0: + print(patchelf_result.stderr.decode("utf-8")) + print(f"patchelf failed") + sys.exit(1) + + # Now replace the needed libkokkos* libraries with the correct version + needed_libraries: str = subprocess.run(["patchelf", "--print-needed", kernel_filename], cwd=output_dir, capture_output=True, check=False).stdout.decode("utf-8") + + for line in needed_libraries.splitlines(): + if "libkokkoscore" in line or "libkokkoscontainers" in line: + # Line will be of the form f"libkokkoscore_{id}.so.3.4" + # This will extract id + current_id: int = int(line.split("_")[1].split(".")[0]) + to_remove: str = line + to_add: str = line.replace(f"_{current_id}", f"_{id}") + + subprocess.run(["patchelf", "--replace-needed", to_remove, to_add, kernel_filename], cwd=output_dir, capture_output=True, check=False) + def get_cuda_compute_capability(self, compiler: str) -> str: """ Get the compute capability of an Nvidia GPU diff --git a/pykokkos/core/module_setup.py b/pykokkos/core/module_setup.py index 3f294d02..028cec92 100644 --- a/pykokkos/core/module_setup.py +++ b/pykokkos/core/module_setup.py @@ -5,7 +5,7 @@ import sys import sysconfig import time -from typing import Callable, Optional, Union +from typing import Callable, List, Optional, Union from pykokkos.interface import ExecutionSpace import pykokkos.kokkos_manager as km @@ -105,9 +105,15 @@ def __init__( self.main: Path = self.get_main_path() self.output_dir: Optional[Path] = self.get_output_dir(self.main, self.metadata, space) + self.gpu_module_files: List[str] = [] + if km.is_multi_gpu_enabled(): + self.gpu_module_files = [f"kernel{device_id}{suffix}" for device_id in range(km.get_num_gpus())] if self.output_dir is not None: self.path: str = os.path.join(self.output_dir, self.module_file) + if km.is_multi_gpu_enabled(): + self.gpu_module_paths: str = [os.path.join(self.output_dir, module_file) for module_file in self.gpu_module_files] + self.name: str = self.path.replace("/", "_") self.name: str = self.name.replace("-", "_") self.name: str = self.name.replace(".", "_") diff --git a/pykokkos/core/runtime.py b/pykokkos/core/runtime.py index d9f02a13..14a9291a 100644 --- a/pykokkos/core/runtime.py +++ b/pykokkos/core/runtime.py @@ -9,7 +9,8 @@ from pykokkos.core.visitors import visitors_util from pykokkos.interface import ( DataType, ExecutionPolicy, ExecutionSpace, MemorySpace, - RandomPool, RangePolicy, TeamPolicy, View, ViewType + RandomPool, RangePolicy, TeamPolicy, View, ViewType, + is_host_execution_space ) import pykokkos.kokkos_manager as km @@ -47,7 +48,7 @@ def run_workload(self, space: ExecutionSpace, workload: object) -> None: if members is None: raise RuntimeError("ERROR: members cannot be none") - self.execute(workload, module_setup, members) + self.execute(workload, module_setup, members, space) self.run_callbacks(workload, members) @@ -82,7 +83,7 @@ def run_workunit( if members is None: raise RuntimeError("ERROR: members cannot be none") - return self.execute(workunit, module_setup, members, policy=policy, name=name, **kwargs) + return self.execute(workunit, module_setup, members, policy.space, policy=policy, name=name, **kwargs) def is_debug(self, space: ExecutionSpace) -> bool: """ @@ -100,6 +101,7 @@ def execute( entity: Union[object, Callable[..., None]], module_setup: ModuleSetup, members: PyKokkosMembers, + space: ExecutionSpace, policy: Optional[ExecutionPolicy] = None, name: Optional[str] = None, **kwargs @@ -110,13 +112,21 @@ def execute( :param entity: the workload or workunit object :param module_path: the path to the compiled module :param members: a collection of PyKokkos related members + :param space: the execution space :param policy: the execution policy for workunits :param name: the name of the kernel :param kwargs: the keyword arguments passed to the workunit :returns: the result of the operation (None for "for" and workloads) """ - module = self.import_module(module_setup.name, module_setup.path) + module_path: str + if is_host_execution_space(space): + module_path = module_setup.path + else: + device_id: int = km.get_device_id() + module_path = module_setup.gpu_module_paths[device_id] + + module = self.import_module(module_setup.name, module_path) args: Dict[str, Any] = self.get_arguments(entity, members, policy, **kwargs) if name is None: @@ -141,12 +151,14 @@ def import_module(self, module_name: str, module_path: str): :returns: the imported module """ - if module_name in sys.modules: - return sys.modules[module_name] + hashed_name: str = module_name.replace("kernel", f"kernel_{km.get_device_id()}") + + if hashed_name in sys.modules: + return sys.modules[hashed_name] spec = importlib.util.spec_from_file_location(module_name, module_path) module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module + sys.modules[hashed_name] = module spec.loader.exec_module(module) return module diff --git a/pykokkos/core/translators/bindings.py b/pykokkos/core/translators/bindings.py index 74e889d0..0def6a82 100644 --- a/pykokkos/core/translators/bindings.py +++ b/pykokkos/core/translators/bindings.py @@ -269,7 +269,7 @@ def generate_call(operation: str, functor: str, members: PyKokkosMembers, tag: c if is_hierarchical: args.append(f"Kokkos::TeamPolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.LeagueSize.value},Kokkos::AUTO,{Keywords.VectorLength.value})") else: - args.append(f"Kokkos::RangePolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.ThreadsBegin.value},{Keywords.ThreadsEnd.value})") + args.append(f"Kokkos::RangePolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.DefaultExecSpaceInstance.value}, {Keywords.ThreadsBegin.value},{Keywords.ThreadsEnd.value})") args.append(Keywords.Instance.value) diff --git a/pykokkos/interface/__init__.py b/pykokkos/interface/__init__.py index 8c975b5f..48bdfdc2 100644 --- a/pykokkos/interface/__init__.py +++ b/pykokkos/interface/__init__.py @@ -27,7 +27,7 @@ ExecutionPolicy, RangePolicy, MDRangePolicy, TeamPolicy, TeamThreadRange, ThreadVectorRange, Iterate, Rank ) -from .execution_space import ExecutionSpace +from .execution_space import ExecutionSpace, is_host_execution_space from .layout import Layout, get_default_layout from .hierarchical import ( AUTO, TeamMember, PerTeam, PerThread, single diff --git a/pykokkos/interface/execution_space.py b/pykokkos/interface/execution_space.py index 0d31eae1..51aae703 100644 --- a/pykokkos/interface/execution_space.py +++ b/pykokkos/interface/execution_space.py @@ -1,5 +1,6 @@ from enum import Enum +import pykokkos.kokkos_manager as km class ExecutionSpace(Enum): Cuda = "Cuda" @@ -8,3 +9,16 @@ class ExecutionSpace(Enum): Serial = "Serial" Debug = "Debug" Default = "Default" + +def is_host_execution_space(space: ExecutionSpace) -> bool: + """ + Check if the supplied execution space runs on the host + + :param space: the space being checked + :returns: True if the space runs on the host + """ + + if space is ExecutionSpace.Default: + space = km.get_default_space() + + return space in {ExecutionSpace.OpenMP, ExecutionSpace.Pthreads, ExecutionSpace.Serial} \ No newline at end of file diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index add4dacd..50f8ee4c 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -1,9 +1,9 @@ from __future__ import annotations import ctypes -import os import math from enum import Enum import sys +from types import ModuleType from typing import ( Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union @@ -215,7 +215,10 @@ def resize(self, dimension: int, size: int) -> None: shape_list[dimension] = size self.shape = tuple(shape_list) - self.array = kokkos.array( + + is_cpu: bool = self.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + self.array = kokkos_lib.array( "", self.shape, None, None, self.dtype.value, self.space.value, self.layout.value, self.trait.value) self.data = np.array(self.array, copy=False) @@ -276,6 +279,9 @@ def _init_view( self.layout: Layout = layout self.trait: Trait = trait + is_cpu: bool = self.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + if self.dtype == pk.float: self.dtype = DataType.float elif self.dtype == pk.double: @@ -285,11 +291,11 @@ def _init_view( elif self.dtype == pk.int64: pass if trait is trait.Unmanaged: - self.array = kokkos.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) + self.array = kokkos_lib.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) else: if len(self.shape) == 0: shape = [1] - self.array = kokkos.array("", shape, None, None, self.dtype.value, space.value, layout.value, trait.value) + self.array = kokkos_lib.array("", shape, None, None, self.dtype.value, space.value, layout.value, trait.value) self.data = np.array(self.array, copy=False) def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: @@ -357,10 +363,15 @@ def __init__(self, parent_view: Union[Subview, View], data_slice: Union[slice, T self.data: np.ndarray = parent_view.data[data_slice] self.dtype = parent_view.dtype - self.array = kokkos.array( + + is_cpu: bool = self.parent_view.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + + self.array = kokkos_lib.array( self.data, dtype=parent_view.dtype.value, space=parent_view.space.value, layout=parent_view.layout.value, trait=kokkos.Unmanaged) self.shape: Tuple[int] = self.data.shape + if self.data.shape == (0,): self.data = np.array([], dtype=self.data.dtype) self.shape = () diff --git a/pykokkos/kokkos_manager/__init__.py b/pykokkos/kokkos_manager/__init__.py index 3d43647b..18e0a270 100644 --- a/pykokkos/kokkos_manager/__init__.py +++ b/pykokkos/kokkos_manager/__init__.py @@ -1,5 +1,6 @@ import os -from typing import Any, Dict +from types import ModuleType +from typing import Any, Dict, List from pykokkos.bindings import kokkos from pykokkos.interface.execution_space import ExecutionSpace @@ -9,7 +10,12 @@ "EXECUTION_SPACE": ExecutionSpace.OpenMP, "REAL_DTYPE": double, "IS_INITIALIZED": False, - "ENABLE_UVM": False + "ENABLE_UVM": False, + "MULTI_GPU": False, + "NUM_GPUS": 0, + "KOKKOS_GPU_MODULE": kokkos, + "KOKKOS_GPU_MODULE_LIST": [], + "DEVICE_ID": 0 } def get_default_space() -> ExecutionSpace: @@ -99,3 +105,110 @@ def finalize() -> None: if CONSTANTS["IS_INITIALIZED"] == True: kokkos.finalize() CONSTANTS["IS_INITIALIZED"] = False + +def get_kokkos_module(is_cpu: bool) -> ModuleType: + """ + Get the current kokkos module + + :param is_cpu: is the lib needed for cpu + :returns: the kokkos module + """ + + if is_cpu: + return kokkos + + return CONSTANTS["KOKKOS_GPU_MODULE"] + +def set_device_id(device_id: int) -> None: + """ + Set the current device ID + + :param device_id: the ID of the device to enable + """ + + if not isinstance(device_id, int): + raise TypeError("'device_id' must be of type 'int'") + + num_gpus: int = CONSTANTS["NUM_GPUS"] + if device_id >= num_gpus or device_id < 0: + raise RuntimeError(f"Device {device_id} does not exist (range [0..{num_gpus})") + + if num_gpus == 1: + return + + import cupy + cupy.cuda.runtime.setDevice(device_id) + CONSTANTS["DEVICE_ID"] = device_id + + gpu_lib = CONSTANTS["KOKKOS_GPU_MODULE_LIST"][device_id] + CONSTANTS["KOKKOS_GPU_MODULE"] = gpu_lib + +def get_device_id() -> int: + """ + Get the ID of the currently enabled device + + :returns: the ID of the enabled device + """ + + return CONSTANTS["DEVICE_ID"] + +def is_multi_gpu_enabled() -> bool: + """ + Check if pykokkos has been configured for multi-gpu use + + :returns: True or False + """ + + return CONSTANTS["MULTI_GPU"] + +def get_kokkos_gpu_modules() -> List: + """ + Get the pykokkos-base gpu modules + + :returns: the list of modules + """ + + return CONSTANTS["KOKKOS_GPU_MODULE_LIST"] + +def get_num_gpus() -> bool: + """ + Get the number of gpus pykokkos has been configured for + + :returns: the number of gpus + """ + + return CONSTANTS["NUM_GPUS"] + +try: + # Import multiple kokkos libs to support multiple devices per + # process. This assumes that there are modules named f"gpu{id}" + # that can be imported. + import atexit + import cupy as cp + import importlib + import sys + + NUM_CUDA_GPUS: int = cp.cuda.runtime.getDeviceCount() + CONSTANTS["MULTI_GPU"] = True + CONSTANTS["NUM_GPUS"] = NUM_CUDA_GPUS + KOKKOS_LIBS: List[str] = [f"gpu{id}" for id in range(NUM_CUDA_GPUS)] + + KOKKOS_LIB_INSTANCES: List = [] + for id, lib in enumerate(KOKKOS_LIBS): + module = importlib.import_module(lib) + KOKKOS_LIB_INSTANCES.append(module) + + # Can't pass device id directly to initialize(), so need to + # append argument to select device to sys.argv. + # (see https://github.com/kokkos/pykokkos-base/blob/d3946ed56483f3cbe2e660cc50fe73c50dad19ea/src/libpykokkos.cpp#L65) + sys.argv.append(f"--device-id={id}") + module.initialize() + atexit.register(module.finalize) + sys.argv.pop() + + CONSTANTS["KOKKOS_GPU_MODULE_LIST"] = KOKKOS_LIB_INSTANCES + CONSTANTS["KOKKOS_GPU_MODULE"] = KOKKOS_LIB_INSTANCES[0] + +except Exception: + import traceback + traceback.print_exc() From 4f699bf995ca982f98830dd496b5c3c4793b6b66 Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Mon, 19 Sep 2022 13:10:41 -0500 Subject: [PATCH 02/18] Examples: add multi gpu usage example --- examples/pykokkos/multi_gpu.py | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 examples/pykokkos/multi_gpu.py diff --git a/examples/pykokkos/multi_gpu.py b/examples/pykokkos/multi_gpu.py new file mode 100644 index 00000000..d20fd2c6 --- /dev/null +++ b/examples/pykokkos/multi_gpu.py @@ -0,0 +1,60 @@ +import pykokkos as pk + +import numpy as np +import cupy as cp + +pk.set_default_space(pk.Cuda) + +size = 10000 + +pk.set_device_id(0) +cp_arr_0 = cp.arange(size).astype(np.int32) + +pk.set_device_id(1) +cp_arr_1 = cp.arange(size).astype(np.int32) + +print(cp_arr_0.device) +print(cp_arr_1.device) + +@pk.workunit(cp_arr = pk.ViewTypeInfo(space=pk.CudaSpace)) +def reduction_cp(i: int, acc: pk.Acc[int], cp_arr: pk.View1D[int]): + acc += cp_arr[i] + +pk.set_device_id(1) +cp_view_0 = pk.from_cupy(cp_arr_1) +result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_0) +print(result_0) + +pk.set_device_id(0) +cp_view_1 = pk.from_cupy(cp_arr_0) +result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_1) + +print(f"Reducing array 0: {result_0}") +print(f"Reducing array 1: {result_1}") +print(f"Sum: {result_0 + result_1}") + +pk.set_device_id(0) +view_0 = pk.View((size,), dtype=int) + +pk.set_device_id(1) +view_1 = pk.View((size,), dtype=int) + +@pk.workunit +def init_view(i: int, view: pk.View1D[int]): + view[i] = i + +@pk.workunit +def reduce_view(i: int, acc: pk.Acc[int], view: pk.View1D[int]): + acc += view[i] + +pk.set_device_id(0) +pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_0) +result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_0) + +pk.set_device_id(1) +pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_1) +result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_1) + +print(f"Reducing view 0: {result_0}") +print(f"Reducing view 1: {result_1}") +print(f"Sum: {result_0 + result_1}") From bc7661e7f466a0f74311177198a9e901650d501c Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 20 Sep 2022 16:59:01 -0600 Subject: [PATCH 03/18] BUG: uint32/64 wrapping Fixes #86 * prohibit negative values in `pk.uint32`/`pk.uint64`-specified views, and match the "wrapping" behavior used by NumPy in these cases --- pykokkos/interface/views.py | 4 ++++ tests/test_views.py | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index add4dacd..ce856ebd 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -284,6 +284,10 @@ def _init_view( self.dtype = DataType.int32 elif self.dtype == pk.int64: pass + elif self.dtype == pk.uint32: + self.dtype = DataType.uint32 + elif self.dtype == pk.uint64: + self.dtype = DataType.uint64 if trait is trait.Unmanaged: self.array = kokkos.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) else: diff --git a/tests/test_views.py b/tests/test_views.py index 686061f8..6fe3aa6a 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -9,7 +9,7 @@ HAS_CUDA = False import numpy as np -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose, assert_equal import pykokkos as pk @@ -357,5 +357,20 @@ def test_asarray_consts_vs_numpy(const, np_dtype, pk_dtype): assert not "int" in pk_type_string + +@pytest.mark.parametrize("pk_dtype, np_dtype", [ + (pk.uint8, np.uint8), + (pk.uint16, np.uint16), + (pk.uint32, np.uint32), + (pk.uint64, np.uint64), + ]) +def test_unsigned_int_overflow(pk_dtype, np_dtype): + # test for gh-86 + actual = pk.View([1], dtype=pk_dtype) + actual[:] = -1 + expected = np.array(-1, dtype=np_dtype) + assert_equal(actual, expected) + + if __name__ == '__main__': unittest.main() From 1eaecd9ddfe1e7256f28fa41274f6799643dcb9c Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 26 Aug 2022 15:13:06 -0600 Subject: [PATCH 04/18] ENH, TST: more 0-D handling * we can now pass an additional testing module in the array API suite, `test_utils.py`, which has also been added to the CI workflow now * `DataTypeClass` subclasses have been given a `np_equiv` attribute to facilitate interopartion with NumPy types; admittedly, this is probably just a temporary adjustment as the type system matures * `View` instances now have slightly more robust `0-D` array handling because of two adjustments: - lean a bit more heavily on NumPy in these cases - start using a custom `__eq__` for scalar comparisons (which apparently also requires a custom `__hash__` for other parts of `pykokkos`) * my primary metric that this is actually "progress" instead of just leaning more heavily on NumPy is that we can pass more array API tests now; ultimately, this may just result in deferring proper implementations for some things, though being able to satisfy the array API with some parts falling back to NumPy probably isn't a bad thing anyway, at least at first * no changes to the `pykokkos` test suite were needed --- .github/workflows/array_api.yml | 2 +- pykokkos/interface/data_types.py | 12 ++++++++++++ pykokkos/interface/views.py | 27 ++++++++++++++++++++++++--- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 0ff4a411..e3b57f88 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -45,4 +45,4 @@ jobs: pip install -r requirements.txt export ARRAY_API_TESTS_MODULE=pykokkos # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index 371e315d..601fd7ad 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -40,40 +40,52 @@ class int8(DataTypeClass): class int16(DataTypeClass): value = kokkos.int16 + np_equiv = np.int16 class int32(DataTypeClass): value = kokkos.int32 + np_equiv = np.int32 class int64(DataTypeClass): value = kokkos.int64 + np_equiv = np.int64 class uint16(DataTypeClass): value = kokkos.uint16 + np_equiv = np.uint16 class uint32(DataTypeClass): value = kokkos.int32 + np_equiv = np.uint32 class uint64(DataTypeClass): value = kokkos.int64 + np_equiv = np.uint64 class float(DataTypeClass): value = kokkos.float + np_equiv = np.float32 class double(DataTypeClass): value = kokkos.double + np_equiv = np.float64 class real(DataTypeClass): value = None + np_equiv = None class float32(DataTypeClass): value = kokkos.float + np_equiv = np.float32 class float64(DataTypeClass): value = kokkos.double + np_equiv = np.float64 class bool(DataTypeClass): value = kokkos.int16 + np_equiv = np.bool_ diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index ce856ebd..11a430c8 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -289,7 +289,12 @@ def _init_view( elif self.dtype == pk.uint64: self.dtype = DataType.uint64 if trait is trait.Unmanaged: - self.array = kokkos.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) + if array is not None and array.ndim == 0: + # TODO: we don't really support 0-D under the hood--use + # NumPy for now... + self.array = array + else: + self.array = kokkos.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) else: if len(self.shape) == 0: shape = [1] @@ -326,6 +331,19 @@ def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: return None + + def __eq__(self, other): + if self.array == other: + return True + else: + return False + + + def __hash__(self): + hash_value = hash(self.array) + return hash_value + + @staticmethod def _get_dtype_name(type_name: str) -> str: """ @@ -472,7 +490,7 @@ def from_numpy(array: np.ndarray, space: Optional[MemorySpace] = None, layout: O # temporary/terrible hack here for array API testing.. if array.ndim == 0: ret_list = () - array = np.array(()) + array = np.array(array, dtype=np_dtype) else: ret_list = list((array.shape)) @@ -544,7 +562,10 @@ def asarray(obj, /, *, dtype=None, device=None, copy=None): return view if "bool" in str(dtype): dtype = np.bool_ - arr = np.asarray(obj, dtype=dtype) + if dtype is not None: + arr = np.asarray(obj, dtype=dtype.np_equiv) + else: + arr = np.asarray(obj) ret = from_numpy(arr) return ret From 9458ced702e370cc6014d2fc85f9ea4dee8a4401 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 21 Sep 2022 09:22:24 -0600 Subject: [PATCH 05/18] MAINT: PR 67 revisions * `bool` is now aliased to a more appropriate `pykokkos` type -- `uint8` * `int8` and `uint8` types have been updated to include appropriate `np_equiv` attributes * fix a broken conditional chain in `from_numpy()` --- pykokkos/interface/data_types.py | 4 +++- pykokkos/interface/views.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index 601fd7ad..cb33ae74 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -34,9 +34,11 @@ class DataTypeClass: class uint8(DataTypeClass): value = kokkos.uint8 + np_equiv = np.uint8 class int8(DataTypeClass): value = kokkos.int8 + np_equiv = np.int8 class int16(DataTypeClass): value = kokkos.int16 @@ -87,5 +89,5 @@ class float64(DataTypeClass): np_equiv = np.float64 class bool(DataTypeClass): - value = kokkos.int16 + value = kokkos.uint8 np_equiv = np.bool_ diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 11a430c8..414ab364 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -451,7 +451,7 @@ def from_numpy(array: np.ndarray, space: Optional[MemorySpace] = None, layout: O if np_dtype is np.int8: dtype = int8 - if np_dtype is np.int16: + elif np_dtype is np.int16: dtype = int16 elif np_dtype is np.int32: dtype = int32 From a6023597038718e5c3655f275353ce4033e0a7bf Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 22 Sep 2022 10:27:38 -0600 Subject: [PATCH 06/18] MAINT, BUG: typing cleanups * this splits off a few cleanups from gh-73 * there is some pointless type remapping code in the `View` class that has been removed * removing the above code causes the test suite to fail because of incorrect `uint32`/`uint64` `value` mappings in `data_types` module, so fix those to allow tests to pass --- pykokkos/interface/data_types.py | 4 ++-- pykokkos/interface/views.py | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pykokkos/interface/data_types.py b/pykokkos/interface/data_types.py index cb33ae74..5e88fad2 100644 --- a/pykokkos/interface/data_types.py +++ b/pykokkos/interface/data_types.py @@ -58,12 +58,12 @@ class uint16(DataTypeClass): class uint32(DataTypeClass): - value = kokkos.int32 + value = kokkos.uint32 np_equiv = np.uint32 class uint64(DataTypeClass): - value = kokkos.int64 + value = kokkos.uint64 np_equiv = np.uint64 diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 414ab364..ed437239 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -280,14 +280,6 @@ def _init_view( self.dtype = DataType.float elif self.dtype == pk.double: self.dtype = DataType.double - elif self.dtype == pk.int32: - self.dtype = DataType.int32 - elif self.dtype == pk.int64: - pass - elif self.dtype == pk.uint32: - self.dtype = DataType.uint32 - elif self.dtype == pk.uint64: - self.dtype = DataType.uint64 if trait is trait.Unmanaged: if array is not None and array.ndim == 0: # TODO: we don't really support 0-D under the hood--use From 52daba8d290621ba59d70cb375de688e740a6e97 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 29 Aug 2022 11:01:31 -0600 Subject: [PATCH 07/18] ENH: pk.ones() to API standard * implement `pk.ones()` such that `array_api_tests/test_creation_functions.py::test_ones` passes * the Python array API standard `test_ones()` makes use of `pk.equal()`, which wasn't implemented, so an early-stage implementation is added here (which should help later on as well, since the need to occasionally do `view1 == view2` should be obvious) * this is no doubt a bit of a "hack," especially in terms of full-featured broadcasting for `pk.equal`, which is currently only supported for "scalar" broadcasting in my work here * I was a bit surprised that I only needed to add limited 1D support for `pk.equal()`, but I imagine the code there will grow substantially to support more scenarios/remove hacks in the future * note that this branch also borrows some of the typing changes from gh-67 (and improves on them slightly I think) * special shims are present for some `0-D` type scenarios, and for now I've doubled down on the adoption of an unsigned integer type as a proxy for a `bool` array--I suggest we at least use a shorter width type for this in the future though, once the pykokkos-base bindings are merged * although I'm leaning on the Python array API standard tests here, I did check that i.e., this works: ```python import pykokkos as pk def main(): a = pk.ones((3, 3)) print(a) if __name__ == "__main__": main() ``` ``` [[1. 1. 1.] [1. 1. 1.] [1. 1. 1.]] ``` * note, however, that the hook-in of `pk.equal()` to `__equal__` wasn't the focus of this work (since the array API tests simply call `pk.equal()` for the isolated test I was working on), so for now: ```python import pykokkos as pk def main(): a = pk.ones((3,)) b = pk.ones((3,)) result = a == b print(result) result2 = pk.equal(a, b) print(result2) if __name__ == "__main__": main() ``` Produces: ``` False [1 1 1] ``` * what we really want is `True` and `[True True True]`, but the latter isn't too bad at least--the `1` values are a proxy for truth in the `pk.uint16` type we're using in place of `bool` for now; I suggest we delay the hook-in to `__equal__` for now... --- .github/workflows/array_api.yml | 2 +- pykokkos/__init__.py | 5 +- pykokkos/interface/views.py | 41 +++++--- pykokkos/lib/create.py | 12 +++ pykokkos/lib/ufuncs.py | 171 ++++++++++++++++++++++++++++++++ pykokkos/lib/util.py | 2 +- 6 files changed, 217 insertions(+), 16 deletions(-) diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index e3b57f88..1e17ee41 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -45,4 +45,4 @@ jobs: pip install -r requirements.txt export ARRAY_API_TESTS_MODULE=pykokkos # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 40a7e690..c88db687 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -46,9 +46,10 @@ exp, exp2, isinf, - isnan) + isnan, + equal) from pykokkos.lib.info import iinfo, finfo -from pykokkos.lib.create import zeros +from pykokkos.lib.create import zeros, ones from pykokkos.lib.util import all, any from pykokkos.lib.constants import e, pi, inf, nan diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index ed437239..e4d3a26a 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -22,7 +22,7 @@ int16, int32, int64, uint8, uint16, uint32, uint64, - double + double, float64, ) from .layout import get_default_layout, Layout from .memory_space import get_default_memory_space, MemorySpace @@ -138,8 +138,14 @@ def __len__(self) -> int: :returns: the length of the first dimension """ + # NOTE: careful with 0-D treatments and __bool__ + # related handling; you can have shape () and + # still be True for example... if len(self.shape) == 0: - return 0 + if self.data != 0: + return 1 + else: + return 0 return self.shape[0] def __iter__(self) -> Iterator: @@ -149,7 +155,11 @@ def __iter__(self) -> Iterator: :returns: an iterator over the data """ - return (n for n in self.data) + if self.data.ndim > 0: + return (n for n in self.data) + else: + # 0-D case returns empty generator + return zip() def __str__(self) -> str: """ @@ -171,7 +181,7 @@ def __deepcopy__(self, memo): class View(ViewType): def __init__( self, - shape: List[int], + shape: Tuple[int], dtype: Union[DataTypeClass, type] = real, space: MemorySpace = MemorySpace.MemorySpaceDefault, layout: Layout = Layout.LayoutDefault, @@ -181,7 +191,7 @@ def __init__( """ View constructor. - :param shape: the shape of the view as a list of integers + :param shape: the shape of the view as a tuple of integers :param dtype: the data type of the view, either a pykokkos DataType or "int" or "float". :param space: the memory space of the view. Will be set to the execution space of the view by default. :param layout: the layout of the view in memory. @@ -246,7 +256,7 @@ def _init_view( """ Initialize the view - :param shape: the shape of the view as a list of integers + :param shape: the shape of the view as a tuple of integers :param dtype: the data type of the view, either a pykokkos DataType or "int" or "float". :param space: the memory space of the view. Will be set to the execution space of the view by default. :param layout: the layout of the view in memory. @@ -332,7 +342,10 @@ def __eq__(self, other): def __hash__(self): - hash_value = hash(self.array) + try: + hash_value = hash(self.array) + except TypeError: + hash_value = hash(self.array.data.tobytes()) return hash_value @@ -460,9 +473,9 @@ def from_numpy(array: np.ndarray, space: Optional[MemorySpace] = None, layout: O elif np_dtype is np.float32: dtype = DataType.float # PyKokkos float elif np_dtype is np.float64: - dtype = double + dtype = float64 elif np_dtype is np.bool_: - dtype = int16 + dtype = uint16 else: raise RuntimeError(f"ERROR: unsupported numpy datatype {np_dtype}") @@ -482,7 +495,13 @@ def from_numpy(array: np.ndarray, space: Optional[MemorySpace] = None, layout: O # temporary/terrible hack here for array API testing.. if array.ndim == 0: ret_list = () - array = np.array(array, dtype=np_dtype) + if np_dtype == np.bool_: + if array == 1: + array = np.array(1, dtype=np.uint8) + else: + array = np.array(0, dtype=np.uint8) + else: + array = np.array(array, dtype=np_dtype) else: ret_list = list((array.shape)) @@ -552,8 +571,6 @@ def asarray(obj, /, *, dtype=None, device=None, copy=None): view = pk.View([1], dtype=dtype) view[:] = obj return view - if "bool" in str(dtype): - dtype = np.bool_ if dtype is not None: arr = np.asarray(obj, dtype=dtype.np_equiv) else: diff --git a/pykokkos/lib/create.py b/pykokkos/lib/create.py index 6218dcd8..95d7363f 100644 --- a/pykokkos/lib/create.py +++ b/pykokkos/lib/create.py @@ -2,3 +2,15 @@ def zeros(shape, *, dtype=None, device=None): return pk.View([*shape], dtype=dtype) + + +def ones(shape, *, dtype=None, device=None): + if dtype is None: + # NumPy also defaults to a double for ones() + dtype = pk.float64 + view: pk.View = pk.View([*shape], dtype=dtype) + view[:] = 1 + if shape == (0,): + view.shape = (0,) + view.shape = tuple(view.shape) + return view diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 6b920a9f..b3831a93 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -1,5 +1,7 @@ import pykokkos as pk +import numpy as np + @pk.workunit def reciprocal_impl_1d_double(tid: int, view: pk.View1D[pk.double]): @@ -1429,3 +1431,172 @@ def isinf(view): view=view, out=out) return out +======= +======= +def equal_impl_1d_double(tid: int, + view1: pk.View1D[pk.double], + view2: pk.View1D[pk.double], + view2_size: int, + view_result: pk.View1D[pk.uint16]): + view2_idx: int = 0 + if view2_size == 1: + view2_idx = 0 + else: + view2_idx = tid + if view1[tid] == view2[view2_idx]: + view_result[tid] = 1 + else: + view_result[tid] = 0 + + +@pk.workunit +def equal_impl_1d_uint16(tid: int, + view1: pk.View1D[pk.uint16], + view2: pk.View1D[pk.uint16], + view2_size: int, + view_result: pk.View1D[pk.uint16]): + view2_idx: int = 0 + if view2_size == 1: + view2_idx = 0 + else: + view2_idx = tid + if view1[tid] == view2[view2_idx]: + view_result[tid] = 1 + else: + view_result[tid] = 0 + + +@pk.workunit +def equal_impl_1d_int16(tid: int, + view1: pk.View1D[pk.int16], + view2: pk.View1D[pk.int16], + view2_size: int, + view_result: pk.View1D[pk.uint16]): + view2_idx: int = 0 + if view2_size == 1: + view2_idx = 0 + else: + view2_idx = tid + if view1[tid] == view2[view2_idx]: + view_result[tid] = 1 + else: + view_result[tid] = 0 + + +@pk.workunit +def equal_impl_1d_int32(tid: int, + view1: pk.View1D[pk.int32], + view2: pk.View1D[pk.int32], + view2_size: int, + view_result: pk.View1D[pk.uint16]): + view2_idx: int = 0 + if view2_size == 1: + view2_idx = 0 + else: + view2_idx = tid + if view1[tid] == view2[view2_idx]: + view_result[tid] = 1 + else: + view_result[tid] = 0 + + +@pk.workunit +def equal_impl_1d_int64(tid: int, + view1: pk.View1D[pk.int64], + view2: pk.View1D[pk.int64], + view2_size: int, + view_result: pk.View1D[pk.uint16]): + view2_idx: int = 0 + if view2_size == 1: + view2_idx = 0 + else: + view2_idx = tid + if view1[tid] == view2[view2_idx]: + view_result[tid] = 1 + else: + view_result[tid] = 0 + +def equal(view1, view2): + # TODO: write even more dispatching for cases where view1 and view2 + # have different, but comparable, types (like float32 vs. float64?) + # this may "explode" without templating + + if sum(view1.shape) == 0 or sum(view2.shape) == 0: + return np.empty(shape=(0,)) + + if view1.shape != view2.shape: + if not view1.size <= 1 and not view2.size <= 1: + # TODO: supporting __eq__ over broadcasted shapes beyond + # scalar (i.e., matching number of columns) + raise ValueError("view1 and view2 have incompatible shapes") + + # TODO: something more appropriate than uint16 as a proxy + # for the bool type? (a shorter integer like uint8 + # at least?) + view_result = pk.View([*view1.shape], dtype=pk.uint16) + + # NOTE: the blocks below are asymmetric on view1 vs view2, + # and also quite awkward--they evolved from making the array API + # test_ones() test pass, but need refinement or removal eventually + try: + if isinstance(view2.array, np.ndarray): + if view2.size <= 1: + new_shape = (1,) + else: + new_shape = view2.shape + view2r = pk.View([*new_shape], dtype=view2.dtype) + view2r[:] = view2.array + view2 = view2r + except AttributeError: + pass + try: + if isinstance(view1.array, np.ndarray): + if view1.shape == () or view1.shape == (0,): + view1r = pk.View([1], dtype=view1.dtype) + view1r[:] = view1.array + view1 = view1r + except AttributeError: + pass + + if ("double" in str(view1.dtype) or "float64" in str(view1.dtype) and + ("double" in str(view2.dtype) or "float64" in str(view2.dtype))): + pk.parallel_for(view1.size, + equal_impl_1d_double, + view1=view1, + view2=view2, + view2_size=view2.size, + view_result=view_result) + elif (("uint16" in str(view1.dtype) or "bool" in str(view1.dtype)) and + ("uint16" in str(view2.dtype) or "bool" in str(view2.dtype))): + pk.parallel_for(view1.size, + equal_impl_1d_uint16, + view1=view1, + view2=view2, + view2_size=view2.size, + view_result=view_result) + elif "int16" in str(view1.dtype) and "int16" in str(view1.dtype): + pk.parallel_for(view1.size, + equal_impl_1d_int16, + view1=view1, + view2=view2, + view2_size=view2.size, + view_result=view_result) + elif "int32" in str(view1.dtype) and "int32" in str(view1.dtype): + pk.parallel_for(view1.size, + equal_impl_1d_int32, + view1=view1, + view2=view2, + view2_size=view2.size, + view_result=view_result) + elif "int64" in str(view1.dtype) and "int64" in str(view1.dtype): + pk.parallel_for(view1.size, + equal_impl_1d_int64, + view1=view1, + view2=view2, + view2_size=view2.size, + view_result=view_result) + else: + # TODO: include the view types in the error message + raise NotImplementedError("equal ufunc not implemented for this comparison") + + return view_result diff --git a/pykokkos/lib/util.py b/pykokkos/lib/util.py index 7c7921a6..778d1d37 100644 --- a/pykokkos/lib/util.py +++ b/pykokkos/lib/util.py @@ -13,7 +13,7 @@ def all(x, /, *, axis=None, keepdims=False): elif x == False: return False np_result = np.all(x) - ret_val = pk.View(pk.from_numpy(np.all(x))) + ret_val = pk.from_numpy(np_result) return ret_val From e47c7737d587e488cbfe7fab3152062786c7cdfc Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 6 Sep 2022 14:43:57 -0600 Subject: [PATCH 08/18] MAINT: cleanup after rebase --- pykokkos/interface/views.py | 5 ++++- pykokkos/lib/ufuncs.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index e4d3a26a..bad35eee 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -286,6 +286,9 @@ def _init_view( self.layout: Layout = layout self.trait: Trait = trait + # TODO: if ufuncs stop inspecting + # type "strings," we should be able to + # purge these mappings if self.dtype == pk.float: self.dtype = DataType.float elif self.dtype == pk.double: @@ -475,7 +478,7 @@ def from_numpy(array: np.ndarray, space: Optional[MemorySpace] = None, layout: O elif np_dtype is np.float64: dtype = float64 elif np_dtype is np.bool_: - dtype = uint16 + dtype = uint8 else: raise RuntimeError(f"ERROR: unsupported numpy datatype {np_dtype}") diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index b3831a93..bcfc6caf 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -1431,8 +1431,8 @@ def isinf(view): view=view, out=out) return out -======= -======= + +@pk.workunit def equal_impl_1d_double(tid: int, view1: pk.View1D[pk.double], view2: pk.View1D[pk.double], From 170a0532daa22dc5bef3b329ae4f70a9d493a699 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 22 Sep 2022 12:06:00 -0600 Subject: [PATCH 09/18] MAINT: PR 73 revisions * updated the `equal()` ufunc code in this branch to correctly use the new `uint8` type instead of `uint16` as a temporary proxy for a `bool` type * removed extraneous shape handling code in `ones()` and `View`, as pointed out by reviewer * `View` `shape` argument type adjusted to list OR tuple based on reviewer feedback * simplified the `equal()` ufunc by removing some of the `np.ndarray()` support shims--it seems we don't have testable evidence that this is currently a requirement --- pykokkos/interface/views.py | 11 +++-------- pykokkos/lib/create.py | 3 --- pykokkos/lib/ufuncs.py | 38 ++++++------------------------------- 3 files changed, 9 insertions(+), 43 deletions(-) diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index bad35eee..01e9ebd0 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -181,7 +181,7 @@ def __deepcopy__(self, memo): class View(ViewType): def __init__( self, - shape: Tuple[int], + shape: Union[List[int], Tuple[int]], dtype: Union[DataTypeClass, type] = real, space: MemorySpace = MemorySpace.MemorySpaceDefault, layout: Layout = Layout.LayoutDefault, @@ -191,7 +191,7 @@ def __init__( """ View constructor. - :param shape: the shape of the view as a tuple of integers + :param shape: the shape of the view as a list or tuple of integers :param dtype: the data type of the view, either a pykokkos DataType or "int" or "float". :param space: the memory space of the view. Will be set to the execution space of the view by default. :param layout: the layout of the view in memory. @@ -256,7 +256,7 @@ def _init_view( """ Initialize the view - :param shape: the shape of the view as a tuple of integers + :param shape: the shape of the view as a list or tuple of integers :param dtype: the data type of the view, either a pykokkos DataType or "int" or "float". :param space: the memory space of the view. Will be set to the execution space of the view by default. :param layout: the layout of the view in memory. @@ -265,8 +265,6 @@ def _init_view( """ self.shape: Tuple[int] = tuple(shape) - if self.shape == (0,): - self.shape = () self.size: int = math.prod(shape) self.dtype: Optional[DataType] = self._get_type(dtype) if self.dtype is None: @@ -286,9 +284,6 @@ def _init_view( self.layout: Layout = layout self.trait: Trait = trait - # TODO: if ufuncs stop inspecting - # type "strings," we should be able to - # purge these mappings if self.dtype == pk.float: self.dtype = DataType.float elif self.dtype == pk.double: diff --git a/pykokkos/lib/create.py b/pykokkos/lib/create.py index 95d7363f..cdeefc3e 100644 --- a/pykokkos/lib/create.py +++ b/pykokkos/lib/create.py @@ -10,7 +10,4 @@ def ones(shape, *, dtype=None, device=None): dtype = pk.float64 view: pk.View = pk.View([*shape], dtype=dtype) view[:] = 1 - if shape == (0,): - view.shape = (0,) - view.shape = tuple(view.shape) return view diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index bcfc6caf..0809604b 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -1437,7 +1437,7 @@ def equal_impl_1d_double(tid: int, view1: pk.View1D[pk.double], view2: pk.View1D[pk.double], view2_size: int, - view_result: pk.View1D[pk.uint16]): + view_result: pk.View1D[pk.uint8]): view2_idx: int = 0 if view2_size == 1: view2_idx = 0 @@ -1454,7 +1454,7 @@ def equal_impl_1d_uint16(tid: int, view1: pk.View1D[pk.uint16], view2: pk.View1D[pk.uint16], view2_size: int, - view_result: pk.View1D[pk.uint16]): + view_result: pk.View1D[pk.uint8]): view2_idx: int = 0 if view2_size == 1: view2_idx = 0 @@ -1471,7 +1471,7 @@ def equal_impl_1d_int16(tid: int, view1: pk.View1D[pk.int16], view2: pk.View1D[pk.int16], view2_size: int, - view_result: pk.View1D[pk.uint16]): + view_result: pk.View1D[pk.uint8]): view2_idx: int = 0 if view2_size == 1: view2_idx = 0 @@ -1488,7 +1488,7 @@ def equal_impl_1d_int32(tid: int, view1: pk.View1D[pk.int32], view2: pk.View1D[pk.int32], view2_size: int, - view_result: pk.View1D[pk.uint16]): + view_result: pk.View1D[pk.uint8]): view2_idx: int = 0 if view2_size == 1: view2_idx = 0 @@ -1505,7 +1505,7 @@ def equal_impl_1d_int64(tid: int, view1: pk.View1D[pk.int64], view2: pk.View1D[pk.int64], view2_size: int, - view_result: pk.View1D[pk.uint16]): + view_result: pk.View1D[pk.uint8]): view2_idx: int = 0 if view2_size == 1: view2_idx = 0 @@ -1530,33 +1530,7 @@ def equal(view1, view2): # scalar (i.e., matching number of columns) raise ValueError("view1 and view2 have incompatible shapes") - # TODO: something more appropriate than uint16 as a proxy - # for the bool type? (a shorter integer like uint8 - # at least?) - view_result = pk.View([*view1.shape], dtype=pk.uint16) - - # NOTE: the blocks below are asymmetric on view1 vs view2, - # and also quite awkward--they evolved from making the array API - # test_ones() test pass, but need refinement or removal eventually - try: - if isinstance(view2.array, np.ndarray): - if view2.size <= 1: - new_shape = (1,) - else: - new_shape = view2.shape - view2r = pk.View([*new_shape], dtype=view2.dtype) - view2r[:] = view2.array - view2 = view2r - except AttributeError: - pass - try: - if isinstance(view1.array, np.ndarray): - if view1.shape == () or view1.shape == (0,): - view1r = pk.View([1], dtype=view1.dtype) - view1r[:] = view1.array - view1 = view1r - except AttributeError: - pass + view_result = pk.View([*view1.shape], dtype=pk.uint8) if ("double" in str(view1.dtype) or "float64" in str(view1.dtype) and ("double" in str(view2.dtype) or "float64" in str(view2.dtype))): From b9e54d9b156d81d51bec133a4e2547010f770504 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 23 Sep 2022 11:08:59 -0600 Subject: [PATCH 10/18] TST, MAINT: test durations * add an option to report the top `N` slowest tests when running `runtests.py`, which basically just passes through to the built-in pytest option: https://docs.pytest.org/en/7.1.x/how-to/usage.html#profiling-test-execution-duration * I'm hoping not to build many more options in though, I just wanted this one because we'll want a convenient way to keep an eye on what tests are consuming time as our suite grows * some sample incantations/results: - `python runtests.py -d 10` ``` =================================================================================================================================================== slowest 10 durations =================================================================================================================================================== 11.51s call tests/test_ufuncs.py::test_caching 11.21s call tests/test_hierarchical.py::TestHierarchical::test_outer_for 9.23s call tests/test_AST_translator.py::TestASTTranslator::test_ann_assign 7.95s call tests/test_kokkosfunctions_translator.py::TestKokkosFunctionsTranslator::test_args_sum 7.83s call tests/test_atomics.py::TestAtomic::test_atomic_add 6.27s call tests/test_ops_translator.py::TestOpsTranslator::test_add_op 5.36s call tests/test_hierarchical.py::TestHierarchical::test_yAx_vector 5.10s call tests/test_ufuncs.py::test_matmul_1d_exposed_ufuncs_vs_numpy[double-float64-matmul-matmul] 5.08s call tests/test_ufuncs.py::test_matmul_1d_exposed_ufuncs_vs_numpy[float-float32-matmul-matmul] 4.92s call tests/test_ufuncs.py::test_multi_array_1d_exposed_ufuncs_vs_numpy[float-float32-greater-greater] ============================================================================================================================ 203 passed, 9 skipped, 9 xfailed, 16 warnings in 450.11s (0:07:30) ============================================================================================================================ ``` - `python runtests.py -d 5 -t tests/test_views.py` ``` =================================================================================================================================================== slowest 5 durations ==================================================================================================================================================== 0.02s call tests/test_views.py::test_asarray_consts_vs_numpy[None-None-nan] 0.02s call tests/test_views.py::test_sizes[input_arr2-view_dims2-View3D] 0.02s call tests/test_views.py::test_asarray_consts_vs_numpy[int32-int32-2.718281828459045] 0.02s call tests/test_views.py::test_sizes[input_arr1-view_dims1-View2D] 0.02s call tests/test_views.py::test_asarray_consts_vs_numpy[int64-int64-2.718281828459045] ============================================================================================================================================== 31 passed, 9 skipped in 0.78s =============================================================================================================================================== ``` --- runtests.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtests.py b/runtests.py index e9e53baf..b186e592 100644 --- a/runtests.py +++ b/runtests.py @@ -19,10 +19,13 @@ parser = argparse.ArgumentParser() parser.add_argument('-t', '--specifictests', type=str) +parser.add_argument('-d', '--durations', type=int) args = parser.parse_args() if args.specifictests: pytest_args.append(args.specifictests) +if args.durations: + pytest_args.append(f"--durations={args.durations}") # force pytest to actually import # all the test modules directly From 5878590724e01cca178318adffa1d59f0385ce32 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 29 Aug 2022 11:01:31 -0600 Subject: [PATCH 11/18] ENH: pk.ones() to API standard * implement `pk.ones()` such that `array_api_tests/test_creation_functions.py::test_ones` passes * the Python array API standard `test_ones()` makes use of `pk.equal()`, which wasn't implemented, so an early-stage implementation is added here (which should help later on as well, since the need to occasionally do `view1 == view2` should be obvious) * this is no doubt a bit of a "hack," especially in terms of full-featured broadcasting for `pk.equal`, which is currently only supported for "scalar" broadcasting in my work here * I was a bit surprised that I only needed to add limited 1D support for `pk.equal()`, but I imagine the code there will grow substantially to support more scenarios/remove hacks in the future * note that this branch also borrows some of the typing changes from gh-67 (and improves on them slightly I think) * special shims are present for some `0-D` type scenarios, and for now I've doubled down on the adoption of an unsigned integer type as a proxy for a `bool` array--I suggest we at least use a shorter width type for this in the future though, once the pykokkos-base bindings are merged * although I'm leaning on the Python array API standard tests here, I did check that i.e., this works: ```python import pykokkos as pk def main(): a = pk.ones((3, 3)) print(a) if __name__ == "__main__": main() ``` ``` [[1. 1. 1.] [1. 1. 1.] [1. 1. 1.]] ``` * note, however, that the hook-in of `pk.equal()` to `__equal__` wasn't the focus of this work (since the array API tests simply call `pk.equal()` for the isolated test I was working on), so for now: ```python import pykokkos as pk def main(): a = pk.ones((3,)) b = pk.ones((3,)) result = a == b print(result) result2 = pk.equal(a, b) print(result2) if __name__ == "__main__": main() ``` Produces: ``` False [1 1 1] ``` * what we really want is `True` and `[True True True]`, but the latter isn't too bad at least--the `1` values are a proxy for truth in the `pk.uint16` type we're using in place of `bool` for now; I suggest we delay the hook-in to `__equal__` for now... --- pykokkos/lib/ufuncs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 0809604b..7f94a900 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -817,6 +817,7 @@ def logaddexp(viewA, viewB): raise RuntimeError("Incompatible Types") return out + def true_divide(viewA, viewB): """ true_divide is an alias of divide @@ -1432,6 +1433,7 @@ def isinf(view): out=out) return out + @pk.workunit def equal_impl_1d_double(tid: int, view1: pk.View1D[pk.double], From cf996b1e09b647dd58c87c56f130a5875f8dafcc Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Tue, 6 Sep 2022 15:51:53 -0600 Subject: [PATCH 12/18] ENH: more creation API * changes needed to pass `array_api_tests/test_creation_functions.py::test_ones_like` * this builds on gh-73, and is meant to be reviewed after that one --- .github/workflows/array_api.yml | 2 +- pykokkos/__init__.py | 9 ++++- pykokkos/lib/create.py | 19 ++++++++++ pykokkos/lib/info.py | 8 ++++ pykokkos/lib/manipulate.py | 11 ++++++ pykokkos/lib/ufuncs.py | 66 +++++++++++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 pykokkos/lib/manipulate.py diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml index 1e17ee41..681f2f7c 100644 --- a/.github/workflows/array_api.yml +++ b/.github/workflows/array_api.yml @@ -45,4 +45,4 @@ jobs: pip install -r requirements.txt export ARRAY_API_TESTS_MODULE=pykokkos # only run a subset of the conformance tests to get started - pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones + pytest array_api_tests/meta/test_broadcasting.py array_api_tests/meta/test_equality_mapping.py array_api_tests/meta/test_signatures.py array_api_tests/meta/test_special_cases.py array_api_tests/test_constants.py array_api_tests/meta/test_utils.py array_api_tests/test_creation_functions.py::test_ones array_api_tests/test_creation_functions.py::test_ones_like diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index c88db687..1d7aa81f 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -47,9 +47,14 @@ exp2, isinf, isnan, - equal) + equal, + isfinite) from pykokkos.lib.info import iinfo, finfo -from pykokkos.lib.create import zeros, ones +from pykokkos.lib.create import (zeros, + ones, + ones_like, + full) +from pykokkos.lib.manipulate import reshape from pykokkos.lib.util import all, any from pykokkos.lib.constants import e, pi, inf, nan diff --git a/pykokkos/lib/create.py b/pykokkos/lib/create.py index cdeefc3e..1c4256f1 100644 --- a/pykokkos/lib/create.py +++ b/pykokkos/lib/create.py @@ -11,3 +11,22 @@ def ones(shape, *, dtype=None, device=None): view: pk.View = pk.View([*shape], dtype=dtype) view[:] = 1 return view + + +def ones_like(x, /, *, dtype=None, device=None): + if dtype is None: + dtype = x.dtype + view: pk.View = pk.View([*x.shape], dtype=dtype) + view[:] = 1 + return view + + +def full(shape, fill_value, *, dtype=None, device=None): + if dtype is None: + dtype = fill_value.dtype + try: + view: pk.View = pk.View([*shape], dtype=dtype) + except TypeError: + view: pk.View = pk.View([shape], dtype=dtype) + view[:] = fill_value + return view diff --git a/pykokkos/lib/info.py b/pykokkos/lib/info.py index bb8b088d..b6976e3b 100644 --- a/pykokkos/lib/info.py +++ b/pykokkos/lib/info.py @@ -36,6 +36,14 @@ def iinfo(type_or_arr): return info_type_attrs(bits=64, min=-9223372036854775808, max=9223372036854775807) + elif "uint8" in str(type_or_arr): + return info_type_attrs(bits=8, + min=0, + max=255) + elif "int8" in str(type_or_arr): + return info_type_attrs(bits=8, + min=-128, + max=127) def finfo(type_or_arr): diff --git a/pykokkos/lib/manipulate.py b/pykokkos/lib/manipulate.py new file mode 100644 index 00000000..fcfbab39 --- /dev/null +++ b/pykokkos/lib/manipulate.py @@ -0,0 +1,11 @@ +import pykokkos as pk + +import numpy as np + + +def reshape(x, /, shape, *, copy=None): + view: pk.View = pk.View([*shape], dtype=x.dtype) + # TODO: write in a kernel/workunit and lean + # less on NumPy? + view[:] = np.reshape(x, shape) + return view diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 7f94a900..12a9e380 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -1576,3 +1576,69 @@ def equal(view1, view2): raise NotImplementedError("equal ufunc not implemented for this comparison") return view_result + + +@pk.workunit +def isnan_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +@pk.workunit +def isnan_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): + out[tid] = isnan(view[tid]) + + +def isnan(view): + out = pk.View([*view.shape], dtype=pk.uint8) + if "double" in str(view.dtype) or "float64" in str(view.dtype): + pk.parallel_for(view.shape[0], + isnan_impl_1d_double, + view=view, + out=out) + elif "float" in str(view.dtype): + pk.parallel_for(view.shape[0], + isnan_impl_1d_float, + view=view, + out=out) + return out + + +@pk.workunit +def isfinite_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8]): + out[tid] = isfinite(view[tid]) + + +@pk.workunit +def isfinite_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): + out[tid] = isfinite(view[tid]) + + +def isfinite(view): + out = pk.View([*view.shape], dtype=pk.uint8) + if "double" in str(view.dtype) or "float64" in str(view.dtype): + if view.shape == (): + new_view = pk.View([1], dtype=pk.double) + new_view[:] = view + pk.parallel_for(1, + isfinite_impl_1d_double, + view=new_view, + out=out) + else: + pk.parallel_for(view.shape[0], + isfinite_impl_1d_double, + view=view, + out=out) + elif "float" in str(view.dtype): + if view.shape == (): + new_view = pk.View([1], dtype=pk.float) + new_view[:] = view + pk.parallel_for(1, + isfinite_impl_1d_float, + view=new_view, + out=out) + else: + pk.parallel_for(view.shape[0], + isfinite_impl_1d_float, + view=view, + out=out) + return out From d19586eddf68d4bec7df2941bde3cb37beb59003 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 23 Sep 2022 12:37:14 -0600 Subject: [PATCH 13/18] MAINT: PR 81 revisions * cleanups after rebase --- pykokkos/__init__.py | 4 ++-- pykokkos/lib/ufuncs.py | 27 --------------------------- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index 1d7aa81f..f2947662 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -47,8 +47,8 @@ exp2, isinf, isnan, - equal, - isfinite) + equal, + isfinite) from pykokkos.lib.info import iinfo, finfo from pykokkos.lib.create import (zeros, ones, diff --git a/pykokkos/lib/ufuncs.py b/pykokkos/lib/ufuncs.py index 12a9e380..5f7e5860 100644 --- a/pykokkos/lib/ufuncs.py +++ b/pykokkos/lib/ufuncs.py @@ -817,7 +817,6 @@ def logaddexp(viewA, viewB): raise RuntimeError("Incompatible Types") return out - def true_divide(viewA, viewB): """ true_divide is an alias of divide @@ -1433,7 +1432,6 @@ def isinf(view): out=out) return out - @pk.workunit def equal_impl_1d_double(tid: int, view1: pk.View1D[pk.double], @@ -1578,31 +1576,6 @@ def equal(view1, view2): return view_result -@pk.workunit -def isnan_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -@pk.workunit -def isnan_impl_1d_float(tid: int, view: pk.View1D[pk.float], out: pk.View1D[pk.uint8]): - out[tid] = isnan(view[tid]) - - -def isnan(view): - out = pk.View([*view.shape], dtype=pk.uint8) - if "double" in str(view.dtype) or "float64" in str(view.dtype): - pk.parallel_for(view.shape[0], - isnan_impl_1d_double, - view=view, - out=out) - elif "float" in str(view.dtype): - pk.parallel_for(view.shape[0], - isnan_impl_1d_float, - view=view, - out=out) - return out - - @pk.workunit def isfinite_impl_1d_double(tid: int, view: pk.View1D[pk.double], out: pk.View1D[pk.uint8]): out[tid] = isfinite(view[tid]) From e289d48aa8cae1f095f6976d548fdca2f7e65832 Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Mon, 19 Sep 2022 13:10:10 -0500 Subject: [PATCH 14/18] PyKokkos: enable multi gpu usage in pykokkos --- pykokkos/__init__.py | 3 +- pykokkos/core/compile.sh | 13 +-- pykokkos/core/compiler.py | 2 +- pykokkos/core/cpp_setup.py | 90 +++++++++++++++++--- pykokkos/core/module_setup.py | 8 +- pykokkos/core/runtime.py | 26 ++++-- pykokkos/core/translators/bindings.py | 2 +- pykokkos/interface/__init__.py | 2 +- pykokkos/interface/execution_space.py | 14 +++ pykokkos/interface/views.py | 21 +++-- pykokkos/kokkos_manager/__init__.py | 117 +++++++++++++++++++++++++- 11 files changed, 262 insertions(+), 36 deletions(-) diff --git a/pykokkos/__init__.py b/pykokkos/__init__.py index f2947662..40df672c 100644 --- a/pykokkos/__init__.py +++ b/pykokkos/__init__.py @@ -8,7 +8,8 @@ initialize, finalize, get_default_space, set_default_space, get_default_precision, set_default_precision, - is_uvm_enabled, enable_uvm, disable_uvm + is_uvm_enabled, enable_uvm, disable_uvm, + set_device_id ) initialize() diff --git a/pykokkos/core/compile.sh b/pykokkos/core/compile.sh index cee2c609..df575216 100755 --- a/pykokkos/core/compile.sh +++ b/pykokkos/core/compile.sh @@ -9,6 +9,7 @@ PK_REAL="${6}" KOKKOS_LIB_PATH="${7}" KOKKOS_INCLUDE_PATH="${8}" COMPUTE_CAPABILITY="${9}" +LIB_SUFFIX="${10}" SRC=$(find -name "*.cpp") @@ -34,11 +35,11 @@ if [ "${COMPILER}" == "g++" ]; then -shared \ -fopenmp \ "${SRC}".o -o "${MODULE}" \ - "${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \ - "${KOKKOS_LIB_PATH}/libkokkoscore.so" + "${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \ + "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" elif [ "${COMPILER}" == "nvcc" ]; then - "${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \ + "${KOKKOS_LIB_PATH}/../../bin/nvcc_wrapper" \ `python3 -m pybind11 --includes` \ -I.. \ -O3 \ @@ -54,7 +55,7 @@ elif [ "${COMPILER}" == "nvcc" ]; then -Dpk_exec_space="Kokkos::${EXEC_SPACE}" \ -Dpk_real="${PK_REAL}" - "${KOKKOS_LIB_PATH}/../bin/nvcc_wrapper" \ + "${KOKKOS_LIB_PATH}/../../bin/nvcc_wrapper" \ -I.. \ -O3 \ -shared \ @@ -62,6 +63,6 @@ elif [ "${COMPILER}" == "nvcc" ]; then --expt-extended-lambda \ -fopenmp \ "${SRC}".o -o "${MODULE}" \ - "${KOKKOS_LIB_PATH}/libkokkoscontainers.so" \ - "${KOKKOS_LIB_PATH}/libkokkoscore.so" + "${KOKKOS_LIB_PATH}/libkokkoscontainers${LIB_SUFFIX}.so" \ + "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" fi \ No newline at end of file diff --git a/pykokkos/core/compiler.py b/pykokkos/core/compiler.py index 4c985f11..d03f9d5c 100644 --- a/pykokkos/core/compiler.py +++ b/pykokkos/core/compiler.py @@ -178,7 +178,7 @@ def compile_entity( if module_setup.is_compiled(): return - cpp_setup = CppSetup(module_setup.module_file, self.functor_file, self.bindings_file) + cpp_setup = CppSetup(module_setup.module_file, module_setup.gpu_module_files, self.functor_file, self.bindings_file) translator = StaticTranslator(module_setup.name, self.functor_file, members) t_start: float = time.perf_counter() diff --git a/pykokkos/core/cpp_setup.py b/pykokkos/core/cpp_setup.py index 59951936..302640a4 100644 --- a/pykokkos/core/cpp_setup.py +++ b/pykokkos/core/cpp_setup.py @@ -3,10 +3,13 @@ import shutil import subprocess import sys +from types import ModuleType from typing import List, Tuple - -from pykokkos.interface import ExecutionSpace, get_default_layout, get_default_memory_space +from pykokkos.interface import ( + ExecutionSpace, get_default_layout, get_default_memory_space, + is_host_execution_space +) import pykokkos.kokkos_manager as km @@ -15,16 +18,18 @@ class CppSetup: Creates the directory to hold the translation and invokes the compiler """ - def __init__(self, module_file: str, functor: str, bindings: str): + def __init__(self, module_file: str, gpu_module_files: List[str], functor: str, bindings: str): """ CppSetup constructor :param module: the name of the file containing the compiled Python module + :param gpu_module_files: the list of names of files containing for each gpu module :param functor: the name of the generated functor file :param bindings: the name of the generated bindings file """ self.module_file: str = module_file + self.gpu_module_files: List[str] = gpu_module_files self.functor_file: str = functor self.bindings_file: str = bindings @@ -58,6 +63,8 @@ def compile( self.write_source(output_dir, functor, bindings) self.copy_script(output_dir) self.invoke_script(output_dir, space, enable_uvm, compiler) + if space is ExecutionSpace.Cuda and km.is_multi_gpu_enabled(): + self.copy_multi_gpu_kernel(output_dir) def initialize_directory(self, name: Path) -> None: @@ -115,13 +122,14 @@ def copy_script(self, output_dir: Path) -> None: print(f"Exception while copying views and makefile: {ex}") sys.exit(1) - def get_kokkos_paths(self) -> Tuple[Path, Path]: + def get_kokkos_paths(self, space: ExecutionSpace) -> Tuple[Path, Path]: """ Get the paths of the Kokkos instal lib and include directories. If the environment variable is set, use that - Kokkos install. If not, fall back to installed pykokkos-base - package. + Kokkos install. If not, fall back to the installed + pykokkos-base package. + :param space: the execution space to compile for :returns: a tuple of paths to the Kokkos lib/ and include/ directories respectively """ @@ -139,8 +147,9 @@ def get_kokkos_paths(self) -> Tuple[Path, Path]: return lib_path, include_path - from pykokkos.bindings import kokkos - install_path = Path(kokkos.__path__[0]).parent + is_cpu: bool = is_host_execution_space(space) + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + install_path = Path(kokkos_lib.__path__[0]) if (install_path / "lib").is_dir(): lib_path = install_path / "lib" @@ -150,10 +159,24 @@ def get_kokkos_paths(self) -> Tuple[Path, Path]: raise RuntimeError("lib/ or lib64/ directories not found in installed pykokkos-base package." f" Try setting {self.lib_path_env} instead.") - include_path = lib_path.parent / "include/kokkos" + include_path = lib_path.parent.parent / "include/kokkos" return lib_path, include_path + def get_kokkos_lib_suffix(self, space: ExecutionSpace) -> str: + """ + Get the suffix of the libkokkoscore and libkokkoscontainers + libraries corresponding to the enabled device + + :param space: the execution space to compile for + :returns: the suffix as a string + """ + + if is_host_execution_space(space) or not km.is_multi_gpu_enabled(): + return "" + + return f"_{km.get_device_id()}" + def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: bool, compiler: str) -> None: """ Invoke the compilation script @@ -176,8 +199,9 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision: str = km.get_default_precision().__name__.split(".")[-1] lib_path: Path include_path: Path - lib_path, include_path = self.get_kokkos_paths() + lib_path, include_path = self.get_kokkos_paths(space) compute_capability: str = self.get_cuda_compute_capability(compiler) + lib_suffix: str = self.get_kokkos_lib_suffix(space) command: List[str] = [f"./{self.script}", compiler, # What compiler to use @@ -188,7 +212,8 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision, # Default real precision str(lib_path), # Path to Kokkos install lib/ directory str(include_path), # Path to Kokkos install include/ directory - compute_capability] # Device compute capability + compute_capability, # Device compute capability + lib_suffix] # The libkokkos* suffix identifying the gpu compile_result = subprocess.run(command, cwd=output_dir, capture_output=True, check=False) if compile_result.returncode != 0: @@ -207,6 +232,49 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo print(f"patchelf failed") sys.exit(1) + def copy_multi_gpu_kernel(self, output_dir: Path) -> None: + """ + Copy the kernel .so file once for each device and run patchelf + to point to the right library + + :param output_dir: the base directory + """ + + original_module: Path = output_dir / self.module_file + for id, (kernel_filename, kokkos_gpu_module) in enumerate(zip(self.gpu_module_files, km.get_kokkos_gpu_modules())): + kernel_path: Path = output_dir / kernel_filename + + try: + shutil.copy(original_module, kernel_path) + except Exception as ex: + print(f"Exception while copying kernel: {ex}") + sys.exit(1) + + lib_path: Path = Path(kokkos_gpu_module.__path__[0]) / "lib" + patchelf: List[str] = ["patchelf", + "--set-rpath", + str(lib_path), + kernel_filename] + + patchelf_result = subprocess.run(patchelf, cwd=output_dir, capture_output=True, check=False) + if patchelf_result.returncode != 0: + print(patchelf_result.stderr.decode("utf-8")) + print(f"patchelf failed") + sys.exit(1) + + # Now replace the needed libkokkos* libraries with the correct version + needed_libraries: str = subprocess.run(["patchelf", "--print-needed", kernel_filename], cwd=output_dir, capture_output=True, check=False).stdout.decode("utf-8") + + for line in needed_libraries.splitlines(): + if "libkokkoscore" in line or "libkokkoscontainers" in line: + # Line will be of the form f"libkokkoscore_{id}.so.3.4" + # This will extract id + current_id: int = int(line.split("_")[1].split(".")[0]) + to_remove: str = line + to_add: str = line.replace(f"_{current_id}", f"_{id}") + + subprocess.run(["patchelf", "--replace-needed", to_remove, to_add, kernel_filename], cwd=output_dir, capture_output=True, check=False) + def get_cuda_compute_capability(self, compiler: str) -> str: """ Get the compute capability of an Nvidia GPU diff --git a/pykokkos/core/module_setup.py b/pykokkos/core/module_setup.py index 3f294d02..028cec92 100644 --- a/pykokkos/core/module_setup.py +++ b/pykokkos/core/module_setup.py @@ -5,7 +5,7 @@ import sys import sysconfig import time -from typing import Callable, Optional, Union +from typing import Callable, List, Optional, Union from pykokkos.interface import ExecutionSpace import pykokkos.kokkos_manager as km @@ -105,9 +105,15 @@ def __init__( self.main: Path = self.get_main_path() self.output_dir: Optional[Path] = self.get_output_dir(self.main, self.metadata, space) + self.gpu_module_files: List[str] = [] + if km.is_multi_gpu_enabled(): + self.gpu_module_files = [f"kernel{device_id}{suffix}" for device_id in range(km.get_num_gpus())] if self.output_dir is not None: self.path: str = os.path.join(self.output_dir, self.module_file) + if km.is_multi_gpu_enabled(): + self.gpu_module_paths: str = [os.path.join(self.output_dir, module_file) for module_file in self.gpu_module_files] + self.name: str = self.path.replace("/", "_") self.name: str = self.name.replace("-", "_") self.name: str = self.name.replace(".", "_") diff --git a/pykokkos/core/runtime.py b/pykokkos/core/runtime.py index d9f02a13..14a9291a 100644 --- a/pykokkos/core/runtime.py +++ b/pykokkos/core/runtime.py @@ -9,7 +9,8 @@ from pykokkos.core.visitors import visitors_util from pykokkos.interface import ( DataType, ExecutionPolicy, ExecutionSpace, MemorySpace, - RandomPool, RangePolicy, TeamPolicy, View, ViewType + RandomPool, RangePolicy, TeamPolicy, View, ViewType, + is_host_execution_space ) import pykokkos.kokkos_manager as km @@ -47,7 +48,7 @@ def run_workload(self, space: ExecutionSpace, workload: object) -> None: if members is None: raise RuntimeError("ERROR: members cannot be none") - self.execute(workload, module_setup, members) + self.execute(workload, module_setup, members, space) self.run_callbacks(workload, members) @@ -82,7 +83,7 @@ def run_workunit( if members is None: raise RuntimeError("ERROR: members cannot be none") - return self.execute(workunit, module_setup, members, policy=policy, name=name, **kwargs) + return self.execute(workunit, module_setup, members, policy.space, policy=policy, name=name, **kwargs) def is_debug(self, space: ExecutionSpace) -> bool: """ @@ -100,6 +101,7 @@ def execute( entity: Union[object, Callable[..., None]], module_setup: ModuleSetup, members: PyKokkosMembers, + space: ExecutionSpace, policy: Optional[ExecutionPolicy] = None, name: Optional[str] = None, **kwargs @@ -110,13 +112,21 @@ def execute( :param entity: the workload or workunit object :param module_path: the path to the compiled module :param members: a collection of PyKokkos related members + :param space: the execution space :param policy: the execution policy for workunits :param name: the name of the kernel :param kwargs: the keyword arguments passed to the workunit :returns: the result of the operation (None for "for" and workloads) """ - module = self.import_module(module_setup.name, module_setup.path) + module_path: str + if is_host_execution_space(space): + module_path = module_setup.path + else: + device_id: int = km.get_device_id() + module_path = module_setup.gpu_module_paths[device_id] + + module = self.import_module(module_setup.name, module_path) args: Dict[str, Any] = self.get_arguments(entity, members, policy, **kwargs) if name is None: @@ -141,12 +151,14 @@ def import_module(self, module_name: str, module_path: str): :returns: the imported module """ - if module_name in sys.modules: - return sys.modules[module_name] + hashed_name: str = module_name.replace("kernel", f"kernel_{km.get_device_id()}") + + if hashed_name in sys.modules: + return sys.modules[hashed_name] spec = importlib.util.spec_from_file_location(module_name, module_path) module = importlib.util.module_from_spec(spec) - sys.modules[module_name] = module + sys.modules[hashed_name] = module spec.loader.exec_module(module) return module diff --git a/pykokkos/core/translators/bindings.py b/pykokkos/core/translators/bindings.py index 74e889d0..0def6a82 100644 --- a/pykokkos/core/translators/bindings.py +++ b/pykokkos/core/translators/bindings.py @@ -269,7 +269,7 @@ def generate_call(operation: str, functor: str, members: PyKokkosMembers, tag: c if is_hierarchical: args.append(f"Kokkos::TeamPolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.LeagueSize.value},Kokkos::AUTO,{Keywords.VectorLength.value})") else: - args.append(f"Kokkos::RangePolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.ThreadsBegin.value},{Keywords.ThreadsEnd.value})") + args.append(f"Kokkos::RangePolicy<{Keywords.DefaultExecSpace.value},{functor}::{tag_name}>({Keywords.DefaultExecSpaceInstance.value}, {Keywords.ThreadsBegin.value},{Keywords.ThreadsEnd.value})") args.append(Keywords.Instance.value) diff --git a/pykokkos/interface/__init__.py b/pykokkos/interface/__init__.py index 8c975b5f..48bdfdc2 100644 --- a/pykokkos/interface/__init__.py +++ b/pykokkos/interface/__init__.py @@ -27,7 +27,7 @@ ExecutionPolicy, RangePolicy, MDRangePolicy, TeamPolicy, TeamThreadRange, ThreadVectorRange, Iterate, Rank ) -from .execution_space import ExecutionSpace +from .execution_space import ExecutionSpace, is_host_execution_space from .layout import Layout, get_default_layout from .hierarchical import ( AUTO, TeamMember, PerTeam, PerThread, single diff --git a/pykokkos/interface/execution_space.py b/pykokkos/interface/execution_space.py index 0d31eae1..51aae703 100644 --- a/pykokkos/interface/execution_space.py +++ b/pykokkos/interface/execution_space.py @@ -1,5 +1,6 @@ from enum import Enum +import pykokkos.kokkos_manager as km class ExecutionSpace(Enum): Cuda = "Cuda" @@ -8,3 +9,16 @@ class ExecutionSpace(Enum): Serial = "Serial" Debug = "Debug" Default = "Default" + +def is_host_execution_space(space: ExecutionSpace) -> bool: + """ + Check if the supplied execution space runs on the host + + :param space: the space being checked + :returns: True if the space runs on the host + """ + + if space is ExecutionSpace.Default: + space = km.get_default_space() + + return space in {ExecutionSpace.OpenMP, ExecutionSpace.Pthreads, ExecutionSpace.Serial} \ No newline at end of file diff --git a/pykokkos/interface/views.py b/pykokkos/interface/views.py index 01e9ebd0..9b50d9c4 100644 --- a/pykokkos/interface/views.py +++ b/pykokkos/interface/views.py @@ -1,9 +1,9 @@ from __future__ import annotations import ctypes -import os import math from enum import Enum import sys +from types import ModuleType from typing import ( Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union @@ -225,7 +225,10 @@ def resize(self, dimension: int, size: int) -> None: shape_list[dimension] = size self.shape = tuple(shape_list) - self.array = kokkos.array( + + is_cpu: bool = self.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + self.array = kokkos_lib.array( "", self.shape, None, None, self.dtype.value, self.space.value, self.layout.value, self.trait.value) self.data = np.array(self.array, copy=False) @@ -284,6 +287,9 @@ def _init_view( self.layout: Layout = layout self.trait: Trait = trait + is_cpu: bool = self.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + if self.dtype == pk.float: self.dtype = DataType.float elif self.dtype == pk.double: @@ -294,11 +300,11 @@ def _init_view( # NumPy for now... self.array = array else: - self.array = kokkos.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) + self.array = kokkos_lib.unmanaged_array(array, dtype=self.dtype.value, space=self.space.value, layout=self.layout.value) else: if len(self.shape) == 0: shape = [1] - self.array = kokkos.array("", shape, None, None, self.dtype.value, space.value, layout.value, trait.value) + self.array = kokkos_lib.array("", shape, None, None, self.dtype.value, space.value, layout.value, trait.value) self.data = np.array(self.array, copy=False) def _get_type(self, dtype: Union[DataType, type]) -> Optional[DataType]: @@ -382,10 +388,15 @@ def __init__(self, parent_view: Union[Subview, View], data_slice: Union[slice, T self.data: np.ndarray = parent_view.data[data_slice] self.dtype = parent_view.dtype - self.array = kokkos.array( + + is_cpu: bool = self.parent_view.space is MemorySpace.HostSpace + kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) + + self.array = kokkos_lib.array( self.data, dtype=parent_view.dtype.value, space=parent_view.space.value, layout=parent_view.layout.value, trait=kokkos.Unmanaged) self.shape: Tuple[int] = self.data.shape + if self.data.shape == (0,): self.data = np.array([], dtype=self.data.dtype) self.shape = () diff --git a/pykokkos/kokkos_manager/__init__.py b/pykokkos/kokkos_manager/__init__.py index 3d43647b..18e0a270 100644 --- a/pykokkos/kokkos_manager/__init__.py +++ b/pykokkos/kokkos_manager/__init__.py @@ -1,5 +1,6 @@ import os -from typing import Any, Dict +from types import ModuleType +from typing import Any, Dict, List from pykokkos.bindings import kokkos from pykokkos.interface.execution_space import ExecutionSpace @@ -9,7 +10,12 @@ "EXECUTION_SPACE": ExecutionSpace.OpenMP, "REAL_DTYPE": double, "IS_INITIALIZED": False, - "ENABLE_UVM": False + "ENABLE_UVM": False, + "MULTI_GPU": False, + "NUM_GPUS": 0, + "KOKKOS_GPU_MODULE": kokkos, + "KOKKOS_GPU_MODULE_LIST": [], + "DEVICE_ID": 0 } def get_default_space() -> ExecutionSpace: @@ -99,3 +105,110 @@ def finalize() -> None: if CONSTANTS["IS_INITIALIZED"] == True: kokkos.finalize() CONSTANTS["IS_INITIALIZED"] = False + +def get_kokkos_module(is_cpu: bool) -> ModuleType: + """ + Get the current kokkos module + + :param is_cpu: is the lib needed for cpu + :returns: the kokkos module + """ + + if is_cpu: + return kokkos + + return CONSTANTS["KOKKOS_GPU_MODULE"] + +def set_device_id(device_id: int) -> None: + """ + Set the current device ID + + :param device_id: the ID of the device to enable + """ + + if not isinstance(device_id, int): + raise TypeError("'device_id' must be of type 'int'") + + num_gpus: int = CONSTANTS["NUM_GPUS"] + if device_id >= num_gpus or device_id < 0: + raise RuntimeError(f"Device {device_id} does not exist (range [0..{num_gpus})") + + if num_gpus == 1: + return + + import cupy + cupy.cuda.runtime.setDevice(device_id) + CONSTANTS["DEVICE_ID"] = device_id + + gpu_lib = CONSTANTS["KOKKOS_GPU_MODULE_LIST"][device_id] + CONSTANTS["KOKKOS_GPU_MODULE"] = gpu_lib + +def get_device_id() -> int: + """ + Get the ID of the currently enabled device + + :returns: the ID of the enabled device + """ + + return CONSTANTS["DEVICE_ID"] + +def is_multi_gpu_enabled() -> bool: + """ + Check if pykokkos has been configured for multi-gpu use + + :returns: True or False + """ + + return CONSTANTS["MULTI_GPU"] + +def get_kokkos_gpu_modules() -> List: + """ + Get the pykokkos-base gpu modules + + :returns: the list of modules + """ + + return CONSTANTS["KOKKOS_GPU_MODULE_LIST"] + +def get_num_gpus() -> bool: + """ + Get the number of gpus pykokkos has been configured for + + :returns: the number of gpus + """ + + return CONSTANTS["NUM_GPUS"] + +try: + # Import multiple kokkos libs to support multiple devices per + # process. This assumes that there are modules named f"gpu{id}" + # that can be imported. + import atexit + import cupy as cp + import importlib + import sys + + NUM_CUDA_GPUS: int = cp.cuda.runtime.getDeviceCount() + CONSTANTS["MULTI_GPU"] = True + CONSTANTS["NUM_GPUS"] = NUM_CUDA_GPUS + KOKKOS_LIBS: List[str] = [f"gpu{id}" for id in range(NUM_CUDA_GPUS)] + + KOKKOS_LIB_INSTANCES: List = [] + for id, lib in enumerate(KOKKOS_LIBS): + module = importlib.import_module(lib) + KOKKOS_LIB_INSTANCES.append(module) + + # Can't pass device id directly to initialize(), so need to + # append argument to select device to sys.argv. + # (see https://github.com/kokkos/pykokkos-base/blob/d3946ed56483f3cbe2e660cc50fe73c50dad19ea/src/libpykokkos.cpp#L65) + sys.argv.append(f"--device-id={id}") + module.initialize() + atexit.register(module.finalize) + sys.argv.pop() + + CONSTANTS["KOKKOS_GPU_MODULE_LIST"] = KOKKOS_LIB_INSTANCES + CONSTANTS["KOKKOS_GPU_MODULE"] = KOKKOS_LIB_INSTANCES[0] + +except Exception: + import traceback + traceback.print_exc() From 0ea0589e9e61a945bec57adff6fcf2e9c2c447cc Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Mon, 19 Sep 2022 13:10:41 -0500 Subject: [PATCH 15/18] Examples: add multi gpu usage example --- examples/pykokkos/multi_gpu.py | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 examples/pykokkos/multi_gpu.py diff --git a/examples/pykokkos/multi_gpu.py b/examples/pykokkos/multi_gpu.py new file mode 100644 index 00000000..d20fd2c6 --- /dev/null +++ b/examples/pykokkos/multi_gpu.py @@ -0,0 +1,60 @@ +import pykokkos as pk + +import numpy as np +import cupy as cp + +pk.set_default_space(pk.Cuda) + +size = 10000 + +pk.set_device_id(0) +cp_arr_0 = cp.arange(size).astype(np.int32) + +pk.set_device_id(1) +cp_arr_1 = cp.arange(size).astype(np.int32) + +print(cp_arr_0.device) +print(cp_arr_1.device) + +@pk.workunit(cp_arr = pk.ViewTypeInfo(space=pk.CudaSpace)) +def reduction_cp(i: int, acc: pk.Acc[int], cp_arr: pk.View1D[int]): + acc += cp_arr[i] + +pk.set_device_id(1) +cp_view_0 = pk.from_cupy(cp_arr_1) +result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_0) +print(result_0) + +pk.set_device_id(0) +cp_view_1 = pk.from_cupy(cp_arr_0) +result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduction_cp, cp_arr=cp_view_1) + +print(f"Reducing array 0: {result_0}") +print(f"Reducing array 1: {result_1}") +print(f"Sum: {result_0 + result_1}") + +pk.set_device_id(0) +view_0 = pk.View((size,), dtype=int) + +pk.set_device_id(1) +view_1 = pk.View((size,), dtype=int) + +@pk.workunit +def init_view(i: int, view: pk.View1D[int]): + view[i] = i + +@pk.workunit +def reduce_view(i: int, acc: pk.Acc[int], view: pk.View1D[int]): + acc += view[i] + +pk.set_device_id(0) +pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_0) +result_0 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_0) + +pk.set_device_id(1) +pk.parallel_for(pk.RangePolicy(pk.Cuda, 0, size), init_view, view=view_1) +result_1 = pk.parallel_reduce(pk.RangePolicy(pk.Cuda, 0, size), reduce_view, view=view_1) + +print(f"Reducing view 0: {result_0}") +print(f"Reducing view 1: {result_1}") +print(f"Sum: {result_0 + result_1}") From 5c73042632de34e3bd1d83bd5b8c1a86350e705d Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Tue, 27 Sep 2022 14:05:16 -0500 Subject: [PATCH 16/18] CppSetup: pass compiler path to script and account for differences in kokkos paths when multi gpu is not enabled --- pykokkos/core/compile.sh | 5 +++-- pykokkos/core/cpp_setup.py | 36 +++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/pykokkos/core/compile.sh b/pykokkos/core/compile.sh index df575216..1b96e118 100755 --- a/pykokkos/core/compile.sh +++ b/pykokkos/core/compile.sh @@ -10,6 +10,7 @@ KOKKOS_LIB_PATH="${7}" KOKKOS_INCLUDE_PATH="${8}" COMPUTE_CAPABILITY="${9}" LIB_SUFFIX="${10}" +COMPILER_PATH="${11}" SRC=$(find -name "*.cpp") @@ -39,7 +40,7 @@ if [ "${COMPILER}" == "g++" ]; then "${KOKKOS_LIB_PATH}/libkokkoscore${LIB_SUFFIX}.so" elif [ "${COMPILER}" == "nvcc" ]; then - "${KOKKOS_LIB_PATH}/../../bin/nvcc_wrapper" \ + "${COMPILER_PATH}" \ `python3 -m pybind11 --includes` \ -I.. \ -O3 \ @@ -55,7 +56,7 @@ elif [ "${COMPILER}" == "nvcc" ]; then -Dpk_exec_space="Kokkos::${EXEC_SPACE}" \ -Dpk_real="${PK_REAL}" - "${KOKKOS_LIB_PATH}/../../bin/nvcc_wrapper" \ + "${COMPILER_PATH}" \ -I.. \ -O3 \ -shared \ diff --git a/pykokkos/core/cpp_setup.py b/pykokkos/core/cpp_setup.py index 302640a4..cb0e49b0 100644 --- a/pykokkos/core/cpp_setup.py +++ b/pykokkos/core/cpp_setup.py @@ -122,7 +122,7 @@ def copy_script(self, output_dir: Path) -> None: print(f"Exception while copying views and makefile: {ex}") sys.exit(1) - def get_kokkos_paths(self, space: ExecutionSpace) -> Tuple[Path, Path]: + def get_kokkos_paths(self, space: ExecutionSpace, compiler: str) -> Tuple[Path, Path, Path]: """ Get the paths of the Kokkos instal lib and include directories. If the environment variable is set, use that @@ -130,8 +130,9 @@ def get_kokkos_paths(self, space: ExecutionSpace) -> Tuple[Path, Path]: pykokkos-base package. :param space: the execution space to compile for - :returns: a tuple of paths to the Kokkos lib/ and include/ - directories respectively + :param compiler: what compiler to use + :returns: a tuple of paths to the Kokkos lib/, include/, + and compiler to be used """ lib_path: Path @@ -150,18 +151,29 @@ def get_kokkos_paths(self, space: ExecutionSpace) -> Tuple[Path, Path]: is_cpu: bool = is_host_execution_space(space) kokkos_lib: ModuleType = km.get_kokkos_module(is_cpu) install_path = Path(kokkos_lib.__path__[0]) + lib_parent_path: Path + if km.is_multi_gpu_enabled(): + lib_parent_path = install_path + else: + lib_parent_path = install_path.parent - if (install_path / "lib").is_dir(): - lib_path = install_path / "lib" - elif (install_path / "lib64").is_dir(): - lib_path = install_path / "lib64" + if (lib_parent_path / "lib").is_dir(): + lib_path = lib_parent_path / "lib" + elif (lib_parent_path / "lib64").is_dir(): + lib_path = lib_parent_path / "lib64" else: raise RuntimeError("lib/ or lib64/ directories not found in installed pykokkos-base package." f" Try setting {self.lib_path_env} instead.") - include_path = lib_path.parent.parent / "include/kokkos" + include_path = install_path.parent / "include/kokkos" + + compiler_path: Path + if compiler != "nvcc": + compiler_path = Path("g++") + else: + compiler_path = install_path.parent / "bin/nvcc_wrapper" - return lib_path, include_path + return lib_path, include_path, compiler_path def get_kokkos_lib_suffix(self, space: ExecutionSpace) -> str: """ @@ -199,7 +211,8 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo precision: str = km.get_default_precision().__name__.split(".")[-1] lib_path: Path include_path: Path - lib_path, include_path = self.get_kokkos_paths(space) + compiler_path: Path + lib_path, include_path, compiler_path = self.get_kokkos_paths(space, compiler) compute_capability: str = self.get_cuda_compute_capability(compiler) lib_suffix: str = self.get_kokkos_lib_suffix(space) @@ -213,7 +226,8 @@ def invoke_script(self, output_dir: Path, space: ExecutionSpace, enable_uvm: boo str(lib_path), # Path to Kokkos install lib/ directory str(include_path), # Path to Kokkos install include/ directory compute_capability, # Device compute capability - lib_suffix] # The libkokkos* suffix identifying the gpu + lib_suffix, # The libkokkos* suffix identifying the gpu + str(compiler_path)] # The path to the compiler to use compile_result = subprocess.run(command, cwd=output_dir, capture_output=True, check=False) if compile_result.returncode != 0: From eac7334df6548206b02f03004082001e34802a3c Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Tue, 27 Sep 2022 14:06:27 -0500 Subject: [PATCH 17/18] kokkos_manager: fix constant values when multi gpu is not enabled --- pykokkos/kokkos_manager/__init__.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pykokkos/kokkos_manager/__init__.py b/pykokkos/kokkos_manager/__init__.py index 18e0a270..d68b1b44 100644 --- a/pykokkos/kokkos_manager/__init__.py +++ b/pykokkos/kokkos_manager/__init__.py @@ -189,8 +189,6 @@ def get_num_gpus() -> bool: import sys NUM_CUDA_GPUS: int = cp.cuda.runtime.getDeviceCount() - CONSTANTS["MULTI_GPU"] = True - CONSTANTS["NUM_GPUS"] = NUM_CUDA_GPUS KOKKOS_LIBS: List[str] = [f"gpu{id}" for id in range(NUM_CUDA_GPUS)] KOKKOS_LIB_INSTANCES: List = [] @@ -206,9 +204,10 @@ def get_num_gpus() -> bool: atexit.register(module.finalize) sys.argv.pop() + CONSTANTS["MULTI_GPU"] = True + CONSTANTS["NUM_GPUS"] = NUM_CUDA_GPUS CONSTANTS["KOKKOS_GPU_MODULE_LIST"] = KOKKOS_LIB_INSTANCES CONSTANTS["KOKKOS_GPU_MODULE"] = KOKKOS_LIB_INSTANCES[0] except Exception: - import traceback - traceback.print_exc() + pass From 15837b00874ad074b9a4e2296fdfc16e7d2ab31c Mon Sep 17 00:00:00 2001 From: Nader Al Awar Date: Tue, 27 Sep 2022 14:07:43 -0500 Subject: [PATCH 18/18] Runtime: check if multi gpu is enabled before selecting kokkos gpu module --- pykokkos/core/runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pykokkos/core/runtime.py b/pykokkos/core/runtime.py index 14a9291a..a8dbc57e 100644 --- a/pykokkos/core/runtime.py +++ b/pykokkos/core/runtime.py @@ -120,7 +120,7 @@ def execute( """ module_path: str - if is_host_execution_space(space): + if is_host_execution_space(space) or not km.is_multi_gpu_enabled(): module_path = module_setup.path else: device_id: int = km.get_device_id()