From fccf7c7bd0c3aba180cc040c96818c6e951523a2 Mon Sep 17 00:00:00 2001 From: Troy Chiu <114708546+troychiu@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:39:00 +0800 Subject: [PATCH 01/63] async s3fs plugin (#1936) Signed-off-by: troychiu Signed-off-by: Kevin Su Co-authored-by: Kevin Su --- .github/workflows/pythonbuild.yml | 1 + plugins/flytekit-async-fsspec/README.md | 14 + .../flytekitplugins/async_fsspec/__init__.py | 16 ++ .../async_fsspec/s3fs/__init__.py | 0 .../async_fsspec/s3fs/constants.py | 6 + .../flytekitplugins/async_fsspec/s3fs/s3fs.py | 240 ++++++++++++++++++ plugins/flytekit-async-fsspec/setup.py | 37 +++ .../flytekit-async-fsspec/tests/__init__.py | 0 .../flytekit-async-fsspec/tests/test_s3fs.py | 201 +++++++++++++++ plugins/setup.py | 1 + 10 files changed, 516 insertions(+) create mode 100644 plugins/flytekit-async-fsspec/README.md create mode 100644 plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/__init__.py create mode 100644 plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/__init__.py create mode 100644 plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/constants.py create mode 100644 plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/s3fs.py create mode 100644 plugins/flytekit-async-fsspec/setup.py create mode 100644 plugins/flytekit-async-fsspec/tests/__init__.py create mode 100644 plugins/flytekit-async-fsspec/tests/test_s3fs.py diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 0b2ba136d7..54da3c5327 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -174,6 +174,7 @@ jobs: plugin-names: # Please maintain an alphabetical order in the following list - flytekit-airflow + - flytekit-async-fsspec - flytekit-aws-athena - flytekit-aws-batch # TODO: uncomment this when the sagemaker agent is implemented: https://github.com/flyteorg/flyte/issues/4079 diff --git a/plugins/flytekit-async-fsspec/README.md b/plugins/flytekit-async-fsspec/README.md new file mode 100644 index 0000000000..7dfc1c6de9 --- /dev/null +++ b/plugins/flytekit-async-fsspec/README.md @@ -0,0 +1,14 @@ +# Flytekit Async fsspec Plugin + +The Flyte async fsspec plugin is a powerful addition to the Flyte ecosystem designed to optimize the performance of object transmission. This plugin focuses on overriding key methods of the file systems in fsspec to introduce efficiency improvements, resulting in accelerated data transfers between Flyte workflows and object storage. + +Currently, the async fsspec plugin improves the following file systems: +1. s3fs + +To install the plugin, run the following command: + +```bash +pip install flytekitplugins-async-fsspec +``` + +Once installed, the plugin will automatically override the original file system and register optimized ones, seamlessly integrating with your Flyte workflows. diff --git a/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/__init__.py b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/__init__.py new file mode 100644 index 0000000000..3cc0de14e7 --- /dev/null +++ b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/__init__.py @@ -0,0 +1,16 @@ +""" +.. currentmodule:: flytekitplugins.async_fsspec + +This package contains things that are useful when extending Flytekit. + +.. autosummary:: + :template: custom.rst + :toctree: generated/ + + AsyncS3FileSystem +""" +import fsspec + +from .s3fs.s3fs import AsyncS3FileSystem + +fsspec.register_implementation("s3", AsyncS3FileSystem) diff --git a/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/__init__.py b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/constants.py b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/constants.py new file mode 100644 index 0000000000..e5058a03c1 --- /dev/null +++ b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/constants.py @@ -0,0 +1,6 @@ +DEFAULT_UPLOAD_CHUNK_SIZE = 50 * 2**20 # 50MB +DEFAULT_CONCURRENT_UPLOAD = 4 +SINGLE_OBJECT_UPLOAD_LIMIT = 5 * 2**30 # 5GB +DEFAULT_DOWNLOAD_CHUNK_SIZE = 50 * 2**20 # 50MB +DEFAULT_CONCURRENT_DOWNLOAD = 4 +DEFAULT_DOWNLOAD_BODY_READ_SIZE = 2**16 # from s3fs diff --git a/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/s3fs.py b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/s3fs.py new file mode 100644 index 0000000000..c44d09a23c --- /dev/null +++ b/plugins/flytekit-async-fsspec/flytekitplugins/async_fsspec/s3fs/s3fs.py @@ -0,0 +1,240 @@ +import asyncio +import mimetypes +import os + +from fsspec.callbacks import _DEFAULT_CALLBACK +from s3fs import S3FileSystem +from s3fs.core import S3_RETRYABLE_ERRORS, version_id_kw + +from .constants import ( + DEFAULT_CONCURRENT_DOWNLOAD, + DEFAULT_CONCURRENT_UPLOAD, + DEFAULT_DOWNLOAD_BODY_READ_SIZE, + DEFAULT_DOWNLOAD_CHUNK_SIZE, + DEFAULT_UPLOAD_CHUNK_SIZE, + SINGLE_OBJECT_UPLOAD_LIMIT, +) + + +class AsyncS3FileSystem(S3FileSystem): + def __init__(self, **s3kwargs): + super().__init__(**s3kwargs) + + async def _put_file( + self, + lpath, + rpath, + callback=_DEFAULT_CALLBACK, + chunksize=DEFAULT_UPLOAD_CHUNK_SIZE, + concurrent_upload=DEFAULT_CONCURRENT_UPLOAD, + **kwargs, + ): + """ + Put a file from lpath to rpath. + Args: + lpath (str): The local path of the file to be uploaded. + rpath (str): The remote path which the file should be uploaded to. + callback (function, optional): The callback function. + chunksize (int, optional): Upload chunksize. Defaults to 50 * 2**20 (50MB). + concurrent_upload (int, optional): The number of concurrent upload when using multipart upload. Defaults to 4. + """ + bucket, key, _ = self.split_path(rpath) + if os.path.isdir(lpath): + if key: + # don't make remote "directory" + return + else: + await self._mkdir(lpath) + size = os.path.getsize(lpath) + callback.set_size(size) + + if "ContentType" not in kwargs: + content_type, _ = mimetypes.guess_type(lpath) + if content_type is not None: + kwargs["ContentType"] = content_type + + with open(lpath, "rb") as f0: + if size < min(SINGLE_OBJECT_UPLOAD_LIMIT, 2 * chunksize): + chunk = f0.read() + await self._call_s3("put_object", Bucket=bucket, Key=key, Body=chunk, **kwargs) + callback.relative_update(size) + else: + mpu = await self._call_s3("create_multipart_upload", Bucket=bucket, Key=key, **kwargs) + + # async function to upload a single chunk + async def upload_chunk(chunk, part_number): + result = await self._call_s3( + "upload_part", + Bucket=bucket, + PartNumber=part_number, + UploadId=mpu["UploadId"], + Body=chunk, + Key=key, + ) + callback.relative_update(len(chunk)) + return {"PartNumber": part_number, "ETag": result["ETag"]} + + tasks = set() + part_number = 1 + parts = [] + read_end = False + while True: + while len(tasks) < concurrent_upload: + chunk = f0.read(chunksize) + if not chunk: + read_end = True + break + tasks.add(asyncio.create_task(upload_chunk(chunk, part_number))) + part_number += 1 + if read_end: + break + done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + parts.extend(map(lambda x: x.result(), done)) + tasks = pending + + parts.extend(await asyncio.gather(*tasks)) + parts.sort(key=lambda part: part["PartNumber"]) + await self._call_s3( + "complete_multipart_upload", + Bucket=bucket, + Key=key, + UploadId=mpu["UploadId"], + MultipartUpload={"Parts": parts}, + ) + while rpath: + self.invalidate_cache(rpath) + rpath = self._parent(rpath) + + async def _get_file( + self, + rpath, + lpath, + callback=_DEFAULT_CALLBACK, + version_id=None, + chunksize=DEFAULT_DOWNLOAD_CHUNK_SIZE, + concurrent_download=DEFAULT_CONCURRENT_DOWNLOAD, + ): + """ + Get a file from rpath to lpath. + Args: + rpath (str): The remote path of the file to be downloaded. + lpath (str): The local path which the file should be downloaded to. + callback (function, optional): The callback function. + chunksize (int, optional): Download chunksize. Defaults to 50 * 2**20 (50MB). + version_id (str, optional): The version id of the file. Defaults to None. + """ + if os.path.isdir(lpath): + return + + # get the file size + file_info = await self._info(path=rpath, version_id=version_id) + file_size = file_info["size"] + + bucket, key, vers = self.split_path(rpath) + + # the async function to get a range of the remote file + async def _open_file(start_byte: int, end_byte: int = None): + kw = self.req_kw.copy() + if end_byte: + kw["Range"] = f"bytes={start_byte}-{end_byte}" + else: + kw["Range"] = f"bytes={start_byte}" + resp = await self._call_s3( + "get_object", + Bucket=bucket, + Key=key, + **version_id_kw(version_id or vers), + **kw, + ) + return resp["Body"], resp.get("ContentLength", None) + + # Refer to s3fs's implementation + async def handle_read_error(body, failed_reads, restart_byte, end_byte=None): + if failed_reads >= self.retries: + raise + try: + body.close() + except Exception: + pass + + await asyncio.sleep(min(1.7**failed_reads * 0.1, 15)) + body, _ = await _open_file(restart_byte, end_byte) + return body + + # According to s3fs documentation, some file systems might not be able to measure the file’s size, + # in which case, the returned dict will include 'size': None. When we cannot get the file size + # in advance, we keep using the original implementation of s3fs. + if file_size is None: + # From s3fs + body, content_length = await _open_file(start_byte=0) + callback.set_size(content_length) + + failed_reads = 0 + bytes_read = 0 + + try: + with open(lpath, "wb") as f0: + while True: + try: + chunk = await body.read(DEFAULT_DOWNLOAD_BODY_READ_SIZE) + except S3_RETRYABLE_ERRORS: + failed_reads += 1 + body = await handle_read_error(body, failed_reads, bytes_read) + continue + + if not chunk: + break + + f0.write(chunk) + bytes_read += len(chunk) + callback.relative_update(len(chunk)) + finally: + try: + body.close() + except Exception: + pass + else: + callback.set_size(file_size) + with open(lpath, "wb") as f0: + # async function to download a single chunk + async def download_chunk(chunk_index: int): + start_byte = chunk_index * chunksize + end_byte = min(start_byte + chunksize, file_size) - 1 + body, _ = await _open_file(start_byte, end_byte) + failed_reads = 0 + bytes_read = 0 + try: + while True: + try: + chunk = await body.read(DEFAULT_DOWNLOAD_BODY_READ_SIZE) + except S3_RETRYABLE_ERRORS: + failed_reads += 1 + body = await handle_read_error(body, failed_reads, start_byte + bytes_read, end_byte) + continue + + if not chunk: + break + + f0.seek(start_byte + bytes_read) + f0.write(chunk) + bytes_read += len(chunk) + callback.relative_update(len(chunk)) + finally: + try: + body.close() + except Exception: + pass + + chunk_count = file_size // chunksize + if file_size % chunksize > 0: + chunk_count += 1 + + tasks = set() + current_chunk = 0 + while current_chunk < chunk_count: + while current_chunk < chunk_count and len(tasks) < concurrent_download: + tasks.add(asyncio.create_task(download_chunk(current_chunk))) + current_chunk += 1 + _, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + tasks = pending + await asyncio.gather(*tasks) diff --git a/plugins/flytekit-async-fsspec/setup.py b/plugins/flytekit-async-fsspec/setup.py new file mode 100644 index 0000000000..141f2b6081 --- /dev/null +++ b/plugins/flytekit-async-fsspec/setup.py @@ -0,0 +1,37 @@ +from setuptools import setup + +PLUGIN_NAME = "async_fsspec" + +microlib_name = "flytekitplugins-async-fsspec" + +plugin_requires = ["flytekit"] + +__version__ = "0.0.0+develop" + +setup( + name=microlib_name, + version=__version__, + author="flyteorg", + author_email="admin@flyte.org", + description="This package holds the data persistence plugins for flytekit", + namespace_packages=["flytekitplugins"], + packages=[f"flytekitplugins.{PLUGIN_NAME}", f"flytekitplugins.{PLUGIN_NAME}.s3fs"], + install_requires=plugin_requires, + license="apache2", + python_requires=">=3.8", + classifiers=[ + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]}, +) diff --git a/plugins/flytekit-async-fsspec/tests/__init__.py b/plugins/flytekit-async-fsspec/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plugins/flytekit-async-fsspec/tests/test_s3fs.py b/plugins/flytekit-async-fsspec/tests/test_s3fs.py new file mode 100644 index 0000000000..97025ee72d --- /dev/null +++ b/plugins/flytekit-async-fsspec/tests/test_s3fs.py @@ -0,0 +1,201 @@ +import os +from unittest import mock +from unittest.mock import MagicMock, mock_open + +import pytest +from flytekitplugins.async_fsspec import AsyncS3FileSystem +from flytekitplugins.async_fsspec.s3fs.constants import DEFAULT_DOWNLOAD_CHUNK_SIZE, DEFAULT_UPLOAD_CHUNK_SIZE + + +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._parent") +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem.invalidate_cache") +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._call_s3") +@mock.patch("mimetypes.guess_type") +@mock.patch("os.path.getsize") +@pytest.mark.asyncio +async def test_put_file_single_object_upload( + mock_getsize, mock_guess_type, mock_call_s3, mock_invalidate_cache, mock_parent +): + mock_bucket = "mock-bucket" + mock_file_name = "mock_file_name" + mock_file_size = 32 * 2**20 # 32MB + mock_getsize.return_value = mock_file_size + mock_guess_type.return_value = (None, None) + mock_parent.return_value = None + mock_body = os.urandom(mock_file_size) + m = mock_open(read_data=mock_body) + + with mock.patch("builtins.open", m): + asyncs3fs = AsyncS3FileSystem() + await asyncs3fs._put_file(lpath=f"/{mock_file_name}", rpath=f"s3://{mock_bucket}/{mock_file_name}") + + mock_call_s3.assert_called_once_with("put_object", Bucket=mock_bucket, Key=mock_file_name, Body=mock_body) + + +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._parent") +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem.invalidate_cache") +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._call_s3") +@mock.patch("mimetypes.guess_type") +@mock.patch("os.path.getsize") +@pytest.mark.asyncio +async def test_put_file_multipart_upload( + mock_getsize, mock_guess_type, mock_call_s3, mock_invalidate_cache, mock_parent +): + mock_bucket = "mock-bucket" + mock_file_name = "mock_file_name" + mock_upload_id = "mock_upload_id" + mock_ETag = "mock_ETag" + mock_file_size = 256 * 2**20 # 256MB + mock_getsize.return_value = mock_file_size + mock_guess_type.return_value = (None, None) + + def call_s3_side_effect(*args, **kwargs): + if args[0] == "create_multipart_upload": + return {"UploadId": mock_upload_id} + elif args[0] == "upload_part": + part_number = kwargs["PartNumber"] + return {"ETag": f"{mock_ETag}{part_number}"} + elif args[0] == "complete_multipart_upload": + return None + + mock_call_s3.side_effect = call_s3_side_effect + + mock_parent.return_value = None + + mock_body = os.urandom(mock_file_size) + m = mock_open(read_data=mock_body) + + with mock.patch("builtins.open", m): + asyncs3fs = AsyncS3FileSystem() + await asyncs3fs._put_file(lpath=f"/{mock_file_name}", rpath=f"s3://{mock_bucket}/{mock_file_name}") + + mock_chunk_count = mock_file_size // DEFAULT_UPLOAD_CHUNK_SIZE + if mock_file_size % DEFAULT_UPLOAD_CHUNK_SIZE > 0: + mock_chunk_count += 1 + put_object_calls = [] + for i in range(mock_chunk_count): + part_number = i + 1 + start_byte = i * DEFAULT_UPLOAD_CHUNK_SIZE + end_byte = min(start_byte + DEFAULT_UPLOAD_CHUNK_SIZE, mock_file_size) + body = mock_body[start_byte:end_byte] + put_object_calls.append( + mock.call( + "upload_part", + Bucket=mock_bucket, + Key=mock_file_name, + PartNumber=part_number, + UploadId=mock_upload_id, + Body=body, + ), + ) + + mock_call_s3.assert_has_calls( + put_object_calls + + [ + mock.call("create_multipart_upload", Bucket=mock_bucket, Key=mock_file_name), + mock.call( + "complete_multipart_upload", + Bucket=mock_bucket, + Key=mock_file_name, + UploadId=mock_upload_id, + MultipartUpload={ + "Parts": [{"PartNumber": i, "ETag": f"{mock_ETag}{i}"} for i in range(1, mock_chunk_count + 1)] + }, + ), + ], + any_order=True, + ) + assert mock_call_s3.call_count == 2 + mock_chunk_count + + +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._call_s3") +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._info") +@mock.patch("os.path.isdir") +@pytest.mark.asyncio +async def test_get_file_file_size_is_none(mock_isdir, mock_info, mock_call_s3): + mock_bucket = "mock-bucket" + mock_file_name = "mock_file_name" + mock_file_size = 32 * 2**20 # 32MB + mock_isdir.return_value = False + mock_info.return_value = {"size": None} + + file_been_read = 0 + + async def read_side_effect(*args, **kwargs): + read_size = args[0] + nonlocal file_been_read + real_read_size = min(read_size, mock_file_size - file_been_read) + if real_read_size == 0: + return None + file_been_read += real_read_size + return os.urandom(real_read_size) + + mock_chunk = MagicMock() + mock_chunk.read.side_effect = read_side_effect + mock_call_s3.return_value = {"Body": mock_chunk, "ContentLength": mock_file_size} + + m = mock_open() + + with mock.patch("builtins.open", m): + asyncs3fs = AsyncS3FileSystem() + await asyncs3fs._get_file(lpath=f"/{mock_file_name}", rpath=f"s3://{mock_bucket}/{mock_file_name}") + + assert file_been_read == mock_file_size + mock_call_s3.assert_called_once_with("get_object", Bucket=mock_bucket, Key=mock_file_name, Range="bytes=0") + + +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._call_s3") +@mock.patch("flytekitplugins.async_fsspec.AsyncS3FileSystem._info") +@mock.patch("os.path.isdir") +@pytest.mark.asyncio +async def test_get_file_file_size_is_not_none(mock_isdir, mock_info, mock_call_s3): + mock_bucket = "mock-bucket" + mock_file_name = "mock_file_name" + mock_file_size = 256 * 2**20 # 256MB + mock_isdir.return_value = False + mock_info.return_value = {"size": mock_file_size} + + file_been_read = 0 + + def call_s3_side_effect(*args, **kwargs): + start_byte, end_byte = kwargs["Range"][6:].split("-") + start_byte, end_byte = int(start_byte), int(end_byte) + chunk_size = end_byte - start_byte + 1 + chunk_been_read = 0 + + async def read_side_effect(*args, **kwargs): + nonlocal chunk_been_read + nonlocal file_been_read + read_size = args[0] + real_read_size = min(read_size, chunk_size - chunk_been_read) + if real_read_size == 0: + return None + chunk_been_read += real_read_size + file_been_read += real_read_size + return os.urandom(real_read_size) + + mock_chunk = MagicMock() + mock_chunk.read.side_effect = read_side_effect + return {"Body": mock_chunk, "ContentLength": chunk_size} + + mock_call_s3.side_effect = call_s3_side_effect + + m = mock_open() + with mock.patch("builtins.open", m): + asyncs3fs = AsyncS3FileSystem() + await asyncs3fs._get_file(lpath=f"/{mock_file_name}", rpath=f"s3://{mock_bucket}/{mock_file_name}") + + assert file_been_read == mock_file_size + + mock_chunk_count = mock_file_size // DEFAULT_DOWNLOAD_CHUNK_SIZE + if mock_file_size % DEFAULT_DOWNLOAD_CHUNK_SIZE > 0: + mock_chunk_count += 1 + get_object_calls = [] + for i in range(mock_chunk_count): + start_byte = i * DEFAULT_DOWNLOAD_CHUNK_SIZE + end_byte = min(start_byte + DEFAULT_DOWNLOAD_CHUNK_SIZE, mock_file_size) - 1 + get_object_calls.append( + mock.call("get_object", Bucket=mock_bucket, Key=mock_file_name, Range=f"bytes={start_byte}-{end_byte}") + ) + mock_call_s3.assert_has_calls(get_object_calls) + assert mock_call_s3.call_count == len(get_object_calls) diff --git a/plugins/setup.py b/plugins/setup.py index 7933ac43fb..690c4cc94f 100644 --- a/plugins/setup.py +++ b/plugins/setup.py @@ -11,6 +11,7 @@ # Please maintain an alphabetical order in the following list SOURCES = { + "flytekitplugins-async-fsspec": "flytekit-async-fsspec", "flytekitplugins-athena": "flytekit-aws-athena", "flytekitplugins-awsbatch": "flytekit-aws-batch", "flytekitplugins-awssagemaker": "flytekit-aws-sagemaker", From e45fdc2e33ee74e020a61e018a3782a0d4518e5b Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Thu, 21 Dec 2023 16:16:05 -0300 Subject: [PATCH 02/63] Enable python 3.12 (#2005) * Enable python 3.12 Signed-off-by: Eduardo Apolinario * Relax the constraints on tensorflow Signed-off-by: Eduardo Apolinario * wip - split tensorflow tests Signed-off-by: Eduardo Apolinario * Run on python 3.12 Signed-off-by: Eduardo Apolinario * Split tensorflow unit tests in their own tests Signed-off-by: Eduardo Apolinario * Fix typo in dev-requirements.in Signed-off-by: Eduardo Apolinario * Run extra unit tests in separate step Signed-off-by: Eduardo Apolinario * Remove windows restriction Signed-off-by: Eduardo Apolinario * Fix unit_test_extras make target Signed-off-by: Eduardo Apolinario * Fix typo in makefile and version restriction in dev-requirements.in Signed-off-by: Eduardo Apolinario * Add support for 3.12 in default_images.py Signed-off-by: Eduardo Apolinario * Modify regex in dataclass tests Signed-off-by: Eduardo Apolinario * Rename step in pythonbuild gh workflow Signed-off-by: Eduardo Apolinario * Enable 3.12 in serialization tests Signed-off-by: Eduardo Apolinario * Read python versions from env var Signed-off-by: Eduardo Apolinario * Revert "Read python versions from env var" This reverts commit 6d0fb12ee63626ad11098681a7d74546ae8d8bc7. Signed-off-by: Eduardo Apolinario * Run integration tests on 3.12 Signed-off-by: Eduardo Apolinario * Use bookworm as base image Signed-off-by: Eduardo Apolinario * Restrict ydata-profiling Signed-off-by: Eduardo Apolinario * Install latest flytekit Signed-off-by: Eduardo Apolinario * Mount .git in dev image and output version Signed-off-by: Eduardo Apolinario * Cat dynamic version in integration test gh workflow Signed-off-by: Eduardo Apolinario * Use solution described in https://github.com/pypa/setuptools_scm/issues/414 Signed-off-by: Eduardo Apolinario * Remove investigation aids Signed-off-by: Eduardo Apolinario * Install pandas in deck plugin Signed-off-by: Eduardo Apolinario * Revert bump of flytekit version in pod plugin Signed-off-by: Eduardo Apolinario * Combine installation of flytekit and plugins in dev image Signed-off-by: Eduardo Apolinario * Test setting fetch-depth: 0 Signed-off-by: Eduardo Apolinario * Use with stanza Signed-off-by: Eduardo Apolinario * Comment need for fetch-depth: 0 Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- .github/workflows/pythonbuild.yml | 18 +++++++++++++++--- Dockerfile | 2 +- Dockerfile.agent | 2 +- Dockerfile.dev | 10 +++++----- Makefile | 13 ++++++++++--- dev-requirements.in | 10 ++++------ flytekit/configuration/default_images.py | 2 ++ plugins/flytekit-deck-standard/setup.py | 4 +++- plugins/flytekit-flyin/Dockerfile | 2 +- plugins/flytekit-sqlalchemy/Dockerfile | 2 +- pyproject.toml | 2 +- tests/flytekit/unit/core/test_type_engine.py | 4 ++-- 12 files changed, 46 insertions(+), 25 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 54da3c5327..d9ab7a97c2 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -20,7 +20,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -39,6 +39,14 @@ jobs: make setup pip uninstall -y pandas pip freeze + - name: Run extras unit tests with coverage + # Skip this step if running on python 3.12 due to https://github.com/tensorflow/tensorflow/issues/62003 + # and https://github.com/pytorch/pytorch/issues/110436 + if: ${{ matrix.python-version != '3.12' }} + env: + PYTEST_OPTS: -n2 + run: | + make unit_test_extras_codecov - name: Test with coverage env: PYTEST_OPTS: -n2 @@ -93,7 +101,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -128,10 +136,14 @@ jobs: os: [ubuntu-latest] # python 3.11 has intermittent issues with the docker build + push step # https://github.com/flyteorg/flytekit/actions/runs/5800978835/job/15724237979?pr=1579 - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.11", "3.12"] steps: - uses: insightsengineering/disk-space-reclaimer@v1 + # As described in https://github.com/pypa/setuptools_scm/issues/414, SCM needs git history + # and tags to work. - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: diff --git a/Dockerfile b/Dockerfile index e8dcab237f..a7062bfc0d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ ARG PYTHON_VERSION -FROM python:${PYTHON_VERSION}-slim-buster +FROM python:${PYTHON_VERSION}-slim-bookworm MAINTAINER Flyte Team LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytekit diff --git a/Dockerfile.agent b/Dockerfile.agent index ebf930bf70..fe4ce56290 100644 --- a/Dockerfile.agent +++ b/Dockerfile.agent @@ -1,4 +1,4 @@ -FROM python:3.9-slim-buster +FROM python:3.9-slim-bookworm MAINTAINER Flyte Team LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytekit diff --git a/Dockerfile.dev b/Dockerfile.dev index a2cada316c..97277b6837 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -6,7 +6,7 @@ # $ pyflyte run --image localhost:30000/flytekittest:someversion ARG PYTHON_VERSION -FROM python:${PYTHON_VERSION}-slim-buster +FROM python:${PYTHON_VERSION}-slim-bookworm MAINTAINER Flyte Team LABEL org.opencontainers.image.source https://github.com/flyteorg/flytekit @@ -28,10 +28,10 @@ COPY . /flytekit # 4. Create a non-root user 'flytekit' and set appropriate permissions for directories. RUN apt-get update && apt-get install build-essential vim libmagic1 git -y \ && pip install --no-cache-dir -e /flytekit \ - && pip install --no-cache-dir -e /flytekit/plugins/flytekit-k8s-pod \ - && pip install --no-cache-dir -e /flytekit/plugins/flytekit-deck-standard \ - && pip install --no-cache-dir -e /flytekit/plugins/flytekit-flyin \ - && pip install --no-cache-dir scikit-learn \ + -e /flytekit/plugins/flytekit-k8s-pod \ + -e /flytekit/plugins/flytekit-deck-standard \ + -e /flytekit/plugins/flytekit-flyin \ + scikit-learn \ && apt-get clean autoclean \ && apt-get autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log}/ \ diff --git a/Makefile b/Makefile index 3daf0da7da..5d672c21f5 100644 --- a/Makefile +++ b/Makefile @@ -52,12 +52,19 @@ test: lint unit_test unit_test_codecov: $(MAKE) CODECOV_OPTS="--cov=./ --cov-report=xml --cov-append" unit_test +.PHONY: unit_test_extras_codecov +unit_test_extras_codecov: + $(MAKE) CODECOV_OPTS="--cov=./ --cov-report=xml --cov-append" unit_test_extras + .PHONY: unit_test unit_test: - # Skip tensorflow tests and run them with the necessary env var set so that a working (albeit slower) + # Skip all extra tests and run them with the necessary env var set so that a working (albeit slower) # library is used to serialize/deserialize protobufs is used. - $(PYTEST) -m "not sandbox_test" tests/flytekit/unit/ --ignore=tests/flytekit/unit/extras/tensorflow --ignore=tests/flytekit/unit/models ${CODECOV_OPTS} && \ - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python $(PYTEST) tests/flytekit/unit/extras/tensorflow ${CODECOV_OPTS} + $(PYTEST) -m "not sandbox_test" tests/flytekit/unit/ --ignore=tests/flytekit/unit/extras/ --ignore=tests/flytekit/unit/models ${CODECOV_OPTS} + +.PHONY: unit_test_extras +unit_test_extras: + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python $(PYTEST) tests/flytekit/unit/extras ${CODECOV_OPTS} .PHONY: test_serialization_codecov test_serialization_codecov: diff --git a/dev-requirements.in b/dev-requirements.in index a548af4c42..b89c389736 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -19,16 +19,14 @@ IPython keyrings.alt setuptools_scm -# Only install tensorflow if not running on an arm Mac. -tensorflow==2.8.1; python_version<'3.11' and (platform_machine!='arm64' or platform_system!='Darwin') -# Tensorflow release candidate supports python 3.11 -tensorflow==2.13.0; python_version>='3.11' and (platform_machine!='arm64' or platform_system!='Darwin') - +# Tensorflow is not available for python 3.12 yet: https://github.com/tensorflow/tensorflow/issues/62003 +tensorflow; python_version<'3.12' # Newer versions of torch bring in nvidia dependencies that are not present in windows, so # we put this constraint while we do not have per-environment requirements files torch<=1.12.1; python_version<'3.11' # pytorch 2 supports python 3.11 -torch<=2.0.0; python_version>='3.11' or platform_system!='Windows' +# pytorch 2 does not support 3.12 yet: https://github.com/pytorch/pytorch/issues/110436 +torch; python_version<'3.12' # TODO: Currently, the python-magic library causes build errors on Windows due to its dependency on DLLs for libmagic. # We have temporarily disabled this feature on Windows and are using python-magic for Mac OS and Linux instead. diff --git a/flytekit/configuration/default_images.py b/flytekit/configuration/default_images.py index 06af49664f..ea9d162a8d 100644 --- a/flytekit/configuration/default_images.py +++ b/flytekit/configuration/default_images.py @@ -8,6 +8,7 @@ class PythonVersion(enum.Enum): PYTHON_3_9 = (3, 9) PYTHON_3_10 = (3, 10) PYTHON_3_11 = (3, 11) + PYTHON_3_12 = (3, 12) class DefaultImages(object): @@ -20,6 +21,7 @@ class DefaultImages(object): PythonVersion.PYTHON_3_9: "cr.flyte.org/flyteorg/flytekit:py3.9-", PythonVersion.PYTHON_3_10: "cr.flyte.org/flyteorg/flytekit:py3.10-", PythonVersion.PYTHON_3_11: "cr.flyte.org/flyteorg/flytekit:py3.11-", + PythonVersion.PYTHON_3_12: "cr.flyte.org/flyteorg/flytekit:py3.12-", } @classmethod diff --git a/plugins/flytekit-deck-standard/setup.py b/plugins/flytekit-deck-standard/setup.py index b894abb3a3..b0d2c4783d 100644 --- a/plugins/flytekit-deck-standard/setup.py +++ b/plugins/flytekit-deck-standard/setup.py @@ -8,7 +8,9 @@ "flytekit", "markdown", "plotly", - "ydata-profiling", + # ydata-profiling is not compatible with python 3.12 yet: https://github.com/ydataai/ydata-profiling/issues/1510 + "ydata-profiling; python_version<'3.12'", + "pandas", "ipywidgets", "pygments", ] diff --git a/plugins/flytekit-flyin/Dockerfile b/plugins/flytekit-flyin/Dockerfile index 5faf1df710..9d56a84826 100644 --- a/plugins/flytekit-flyin/Dockerfile +++ b/plugins/flytekit-flyin/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim-buster +FROM python:3.10-slim-bookworm MAINTAINER Flyte Team LABEL org.opencontainers.image.source https://github.com/flyteorg/flytekit WORKDIR /root diff --git a/plugins/flytekit-sqlalchemy/Dockerfile b/plugins/flytekit-sqlalchemy/Dockerfile index ed1a644d8f..4b29e67dea 100644 --- a/plugins/flytekit-sqlalchemy/Dockerfile +++ b/plugins/flytekit-sqlalchemy/Dockerfile @@ -1,5 +1,5 @@ ARG PYTHON_VERSION -FROM python:${PYTHON_VERSION}-slim-buster +FROM python:${PYTHON_VERSION}-slim-bookworm WORKDIR /root ENV LANG C.UTF-8 diff --git a/pyproject.toml b/pyproject.toml index 2b18904e53..e166b337dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [{ name = "Flyte Contributors", email = "admin@flyte.org" }] description = "Flyte SDK for Python" license = { text = "Apache-2.0" } readme = { file = "README.md", content-type = "text/markdown" } -requires-python = ">=3.8,<3.12" +requires-python = ">=3.8,<3.13" dependencies = [ # Please maintain an alphabetical order in the following list "adlfs", diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index 4d3661f329..f3fabdbf42 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -1407,7 +1407,7 @@ class Bar(DataClassJsonMixin): pv = Bar(x=3) with pytest.raises( - TypeTransformerFailedError, match="Type of Val '' is not an instance of " + TypeTransformerFailedError, match="Type of Val '' is not an instance of " ): DataclassTransformer().assert_type(gt, pv) @@ -1438,7 +1438,7 @@ class Bar(DataClassJSONMixin): pv = Bar(x=3) with pytest.raises( TypeTransformerFailedError, - match="Type of Val '' is not an instance of ", + match="Type of Val '' is not an instance of ", ): DataclassTransformer().assert_type(gt, pv) From 274549fb7db16092cb76535adaff4b4ca49781d8 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 21 Dec 2023 14:16:28 -0500 Subject: [PATCH 03/63] Fixes type for plugin (#2065) Signed-off-by: Thomas J. Fan --- flytekit/configuration/plugin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flytekit/configuration/plugin.py b/flytekit/configuration/plugin.py index 0eedcc224f..85bc811c44 100644 --- a/flytekit/configuration/plugin.py +++ b/flytekit/configuration/plugin.py @@ -19,7 +19,7 @@ """ from typing import Optional, Protocol, runtime_checkable -from click import Command +from click import Group from importlib_metadata import entry_points from flytekit.configuration import Config, get_config_file @@ -36,7 +36,7 @@ def get_remote( """Get FlyteRemote object for CLI session.""" @staticmethod - def configure_pyflyte_cli(main: Command) -> Command: + def configure_pyflyte_cli(main: Group) -> Group: """Configure pyflyte's CLI.""" @@ -58,7 +58,7 @@ def get_remote( ) @staticmethod - def configure_pyflyte_cli(main: Command) -> Command: + def configure_pyflyte_cli(main: Group) -> Group: """Configure pyflyte's CLI.""" return main From bf726b9ea21ba5a6a25f9d1f9978e5e206989a0b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Dec 2023 16:16:51 -0300 Subject: [PATCH 04/63] Bump apache-airflow from 2.7.3 to 2.8.0 in /plugins/flytekit-airflow (#2066) Bumps [apache-airflow](https://github.com/apache/airflow) from 2.7.3 to 2.8.0. - [Release notes](https://github.com/apache/airflow/releases) - [Changelog](https://github.com/apache/airflow/blob/main/RELEASE_NOTES.rst) - [Commits](https://github.com/apache/airflow/compare/2.7.3...2.8.0) --- updated-dependencies: - dependency-name: apache-airflow dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- plugins/flytekit-airflow/dev-requirements.txt | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/plugins/flytekit-airflow/dev-requirements.txt b/plugins/flytekit-airflow/dev-requirements.txt index a0433fc2cb..744f2c092c 100644 --- a/plugins/flytekit-airflow/dev-requirements.txt +++ b/plugins/flytekit-airflow/dev-requirements.txt @@ -21,7 +21,7 @@ annotated-types==0.6.0 # via pydantic anyio==4.0.0 # via httpx -apache-airflow==2.7.3 +apache-airflow==2.8.0 # via # apache-airflow-providers-apache-beam # apache-airflow-providers-common-sql @@ -89,9 +89,7 @@ cachetools==5.3.2 # apache-beam # google-auth cattrs==23.1.2 - # via - # apache-airflow - # looker-sdk + # via looker-sdk certifi==2023.7.22 # via # httpcore @@ -181,7 +179,7 @@ flask==2.2.5 # flask-session # flask-sqlalchemy # flask-wtf -flask-appbuilder==4.3.6 +flask-appbuilder==4.3.10 # via apache-airflow flask-babel==2.0.0 # via flask-appbuilder @@ -208,7 +206,10 @@ frozenlist==1.4.0 # aiohttp # aiosignal fsspec==2023.10.0 - # via gcsfs + # via + # apache-airflow + # gcsfs + # universal-pathlib gcloud-aio-auth==4.2.3 # via # apache-airflow-providers-google @@ -951,6 +952,8 @@ uc-micro-py==1.0.2 # via linkify-it-py unicodecsv==0.14.1 # via apache-airflow +universal-pathlib==0.1.4 + # via apache-airflow uritemplate==4.1.1 # via google-api-python-client urllib3==2.0.7 @@ -960,13 +963,13 @@ werkzeug==2.2.3 # apache-airflow # connexion # flask + # flask-appbuilder # flask-jwt-extended # flask-login wrapt==1.15.0 # via deprecated wtforms==3.0.1 # via - # apache-airflow # flask-appbuilder # flask-wtf yarl==1.9.2 From 8af01f28d08ce68d5f745f0d9184ca312a0b7557 Mon Sep 17 00:00:00 2001 From: "Fabio M. Graetz, Ph.D" Date: Thu, 21 Dec 2023 20:19:23 +0100 Subject: [PATCH 05/63] Fix: Handle SIGTERM in kubeflow pytorch elastic training plugin (#2064) Signed-off-by: Fabio Graetz --- plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py b/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py index cceb0c1cc7..ffe21d90cd 100644 --- a/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py +++ b/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py @@ -386,6 +386,7 @@ def fn_partial(): else: raise Exception("Bad start method") + from torch.distributed.elastic.multiprocessing.api import SignalException from torch.distributed.elastic.multiprocessing.errors import ChildFailedError try: @@ -399,6 +400,9 @@ def fn_partial(): raise FlyteRecoverableException(e.format_msg()) else: raise RuntimeError(e.format_msg()) + except SignalException as e: + logger.exception(f"Elastic launch agent process terminating: {e}") + raise IgnoreOutputs() # `out` is a dictionary of rank (not local rank) -> result # Rank 0 returns the result of the task function From 1087356ec02122711cc041991608e86783d41547 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Fri, 22 Dec 2023 02:21:50 +0530 Subject: [PATCH 06/63] unpin envd and add `extra_path` to imagespec (#2063) Signed-off-by: Samhita Alla Signed-off-by: Kevin Su Co-authored-by: Kevin Su --- .../flytekitplugins/envd/image_builder.py | 21 ++++++++++++++++--- plugins/flytekit-envd/setup.py | 2 +- .../flytekit-envd/tests/test_image_spec.py | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py index b806e76594..0a534a6763 100644 --- a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py +++ b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py @@ -1,9 +1,11 @@ +import json import os import pathlib import shutil import subprocess import click +from packaging.version import Version from flytekit.configuration import DefaultImages from flytekit.core import context_manager @@ -19,16 +21,22 @@ class EnvdImageSpecBuilder(ImageSpecBuilder): def execute_command(self, command): click.secho(f"Run command: {command} ", fg="blue") p = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + result = [] for line in iter(p.stdout.readline, ""): if p.poll() is not None: break if line.decode().strip() != "": - click.secho(line.decode().strip(), fg="blue") + output = line.decode().strip() + click.secho(output, fg="blue") + result.append(output) if p.returncode != 0: _, stderr = p.communicate() raise Exception(f"failed to run command {command} with error {stderr}") + return result + def build_image(self, image_spec: ImageSpec): cfg_path = create_envd_config(image_spec) @@ -65,7 +73,7 @@ def build(): run(commands=[{', '.join(map(str, map(lambda x: f'"{x}"', commands)))}]) install.python_packages(name=[{', '.join(map(str, map(lambda x: f'"{x}"', packages)))}]) install.apt_packages(name=[{', '.join(map(str, map(lambda x: f'"{x}"', apt_packages)))}]) - runtime.environ(env={env}) + runtime.environ(env={env}, extra_path=['/root']) config.pip_index(url="{pip_index}") """ ctx = context_manager.FlyteContextManager.current_context() @@ -87,8 +95,15 @@ def build(): if image_spec.source_root: shutil.copytree(image_spec.source_root, pathlib.Path(cfg_path).parent, dirs_exist_ok=True) + + version_command = "envd version -s -f json" + envd_version = json.loads(EnvdImageSpecBuilder().execute_command(version_command)[0])["envd"].replace("v", "") + # Indentation is required by envd - envd_config += ' io.copy(host_path="./", envd_path="/root")' + if Version(envd_version) <= Version("0.3.37"): + envd_config += ' io.copy(host_path="./", envd_path="/root")' + else: + envd_config += ' io.copy(source="./", target="/root")' with open(cfg_path, "w+") as f: f.write(envd_config) diff --git a/plugins/flytekit-envd/setup.py b/plugins/flytekit-envd/setup.py index 93d03f7ab8..d95a260958 100644 --- a/plugins/flytekit-envd/setup.py +++ b/plugins/flytekit-envd/setup.py @@ -4,7 +4,7 @@ microlib_name = f"flytekitplugins-{PLUGIN_NAME}" -plugin_requires = ["flytekit", "envd<=0.3.36"] +plugin_requires = ["flytekit", "envd"] __version__ = "0.0.0+develop" diff --git a/plugins/flytekit-envd/tests/test_image_spec.py b/plugins/flytekit-envd/tests/test_image_spec.py index 0a4a09b63a..49c8f5085a 100644 --- a/plugins/flytekit-envd/tests/test_image_spec.py +++ b/plugins/flytekit-envd/tests/test_image_spec.py @@ -30,7 +30,7 @@ def build(): run(commands=["echo hello"]) install.python_packages(name=["pandas"]) install.apt_packages(name=["git"]) - runtime.environ(env={{'PYTHONPATH': '/root', '_F_IMG_ID': '{image_name}'}}) + runtime.environ(env={{'PYTHONPATH': '/root', '_F_IMG_ID': '{image_name}'}}, extra_path=['/root']) config.pip_index(url="https://private-pip-index/simple") install.python(version="3.8") """ From ad328a746b87101c3ae265629f0c5a1188b8ee94 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 22 Dec 2023 05:32:20 +0800 Subject: [PATCH 07/63] Change Flyin VScode Default Extensions (#2058) Signed-off-by: Future Outlier Co-authored-by: Future Outlier --- .../flytekitplugins/flyin/vscode_lib/config.py | 6 ++++-- .../flytekitplugins/flyin/vscode_lib/constants.py | 4 ++-- plugins/flytekit-flyin/tests/test_flyin_plugin.py | 6 ++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/config.py b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/config.py index 76f79632c6..332e5a7108 100644 --- a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/config.py +++ b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/config.py @@ -34,8 +34,10 @@ def add_extensions(self, extensions: Union[str, List[str]]): COPILOT_EXTENSION = ( "https://raw.githubusercontent.com/flyteorg/flytetools/master/flytekitplugins/flyin/GitHub.copilot-1.138.563.vsix" ) -VIM_EXTENSION = "https://open-vsx.org/api/vscodevim/vim/1.27.0/file/vscodevim.vim-1.27.0.vsix" -CODE_TOGETHER_EXTENSION = "https://openvsxorg.blob.core.windows.net/resources/genuitecllc/codetogether/2023.2.0/genuitecllc.codetogether-2023.2.0.vsix" +VIM_EXTENSION = ( + "https://raw.githubusercontent.com/flyteorg/flytetools/master/flytekitplugins/flyin/vscodevim.vim-1.27.0.vsix" +) +CODE_TOGETHER_EXTENSION = "https://raw.githubusercontent.com/flyteorg/flytetools/master/flytekitplugins/flyin/genuitecllc.codetogether-2023.2.0.vsix" # Predefined VSCode config with extensions VIM_CONFIG = VscodeConfig( diff --git a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py index 8de8348a0a..faabbfcaee 100644 --- a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py +++ b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py @@ -10,8 +10,8 @@ "arm64": "https://github.com/coder/code-server/releases/download/v4.18.0/code-server-4.18.0-linux-arm64.tar.gz", } DEFAULT_CODE_SERVER_EXTENSIONS = [ - "https://open-vsx.org/api/ms-python/python/2023.20.0/file/ms-python.python-2023.20.0.vsix", - "https://open-vsx.org/api/ms-toolsai/jupyter/2023.9.100/file/ms-toolsai.jupyter-2023.9.100.vsix", + "https://raw.githubusercontent.com/flyteorg/flytetools/master/flytekitplugins/flyin/ms-python.python-2023.20.0.vsix", + "https://raw.githubusercontent.com/flyteorg/flytetools/master/flytekitplugins/flyin/ms-toolsai.jupyter-2023.9.100.vsix", ] DEFAULT_CODE_SERVER_DIR_NAMES = { "amd64": "code-server-4.18.0-linux-amd64", diff --git a/plugins/flytekit-flyin/tests/test_flyin_plugin.py b/plugins/flytekit-flyin/tests/test_flyin_plugin.py index 7eb937394d..085532e8fc 100644 --- a/plugins/flytekit-flyin/tests/test_flyin_plugin.py +++ b/plugins/flytekit-flyin/tests/test_flyin_plugin.py @@ -237,6 +237,12 @@ def test_vscode_config(): assert vim_config.code_server_dir_names == DEFAULT_CODE_SERVER_DIR_NAMES assert vim_config.extension_remote_paths == DEFAULT_CODE_SERVER_EXTENSIONS + [VIM_EXTENSION] + all_extensions_config = VscodeConfig() + all_extensions_config.add_extensions([CODE_TOGETHER_EXTENSION, COPILOT_EXTENSION, VIM_EXTENSION]) + assert CODE_TOGETHER_EXTENSION in all_extensions_config.extension_remote_paths + assert COPILOT_EXTENSION in all_extensions_config.extension_remote_paths + assert VIM_EXTENSION in all_extensions_config.extension_remote_paths + def test_vscode_config_add_extensions(): additional_extensions = [COPILOT_EXTENSION, VIM_EXTENSION, CODE_TOGETHER_EXTENSION] From 68430db13b2c6bbd9ce5374b101f45dd995192da Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 22 Dec 2023 16:17:59 -0500 Subject: [PATCH 08/63] Allows Secret groups to be optional and configurable (#2062) Signed-off-by: Thomas J. Fan --- flytekit/configuration/plugin.py | 9 ++++++++ flytekit/core/context_manager.py | 20 +++++++++++++----- flytekit/models/security.py | 6 ++++-- .../unit/core/test_context_manager.py | 21 +++++++++++++++++++ .../unit/models/core/test_security.py | 13 ++++++++++++ 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/flytekit/configuration/plugin.py b/flytekit/configuration/plugin.py index 85bc811c44..aa0a9f79d1 100644 --- a/flytekit/configuration/plugin.py +++ b/flytekit/configuration/plugin.py @@ -39,6 +39,10 @@ def get_remote( def configure_pyflyte_cli(main: Group) -> Group: """Configure pyflyte's CLI.""" + @staticmethod + def secret_requires_group() -> bool: + """Return True if secrets require group entry.""" + class FlytekitPlugin: @staticmethod @@ -62,6 +66,11 @@ def configure_pyflyte_cli(main: Group) -> Group: """Configure pyflyte's CLI.""" return main + @staticmethod + def secret_requires_group() -> bool: + """Return True if secrets require group entry.""" + return True + def _get_plugin_from_entrypoint(): """Get plugin from entrypoint.""" diff --git a/flytekit/core/context_manager.py b/flytekit/core/context_manager.py index 833c7d8562..d30eba0918 100644 --- a/flytekit/core/context_manager.py +++ b/flytekit/core/context_manager.py @@ -350,7 +350,11 @@ def __getattr__(self, item: str) -> _GroupSecrets: return self._GroupSecrets(item, self) def get( - self, group: str, key: Optional[str] = None, group_version: Optional[str] = None, encode_mode: str = "r" + self, + group: Optional[str] = None, + key: Optional[str] = None, + group_version: Optional[str] = None, + encode_mode: str = "r", ) -> str: """ Retrieves a secret using the resolution order -> Env followed by file. If not found raises a ValueError @@ -370,7 +374,9 @@ def get( f"in Env Var:{env_var} and FilePath: {fpath}" ) - def get_secrets_env_var(self, group: str, key: Optional[str] = None, group_version: Optional[str] = None) -> str: + def get_secrets_env_var( + self, group: Optional[str] = None, key: Optional[str] = None, group_version: Optional[str] = None + ) -> str: """ Returns a string that matches the ENV Variable to look for the secrets """ @@ -378,7 +384,9 @@ def get_secrets_env_var(self, group: str, key: Optional[str] = None, group_versi l = [k.upper() for k in filter(None, (group, group_version, key))] return f"{self._env_prefix}{'_'.join(l)}" - def get_secrets_file(self, group: str, key: Optional[str] = None, group_version: Optional[str] = None) -> str: + def get_secrets_file( + self, group: Optional[str] = None, key: Optional[str] = None, group_version: Optional[str] = None + ) -> str: """ Returns a path that matches the file to look for the secrets """ @@ -388,8 +396,10 @@ def get_secrets_file(self, group: str, key: Optional[str] = None, group_version: return os.path.join(self._base_dir, *l) @staticmethod - def check_group_key(group: str): - if group is None or group == "": + def check_group_key(group: Optional[str]): + from flytekit.configuration.plugin import get_plugin + + if get_plugin().secret_requires_group() and (group is None or group == ""): raise ValueError("secrets group is a mandatory field.") diff --git a/flytekit/models/security.py b/flytekit/models/security.py index 9af90a4b8a..a9ee7e7cb9 100644 --- a/flytekit/models/security.py +++ b/flytekit/models/security.py @@ -35,13 +35,15 @@ class MountType(Enum): Caution: May not be supported in all environments """ - group: str + group: Optional[str] = None key: Optional[str] = None group_version: Optional[str] = None mount_requirement: MountType = MountType.ANY def __post_init__(self): - if self.group is None: + from flytekit.configuration.plugin import get_plugin + + if get_plugin().secret_requires_group() and self.group is None: raise ValueError("Group is a required parameter") def to_flyte_idl(self) -> _sec.Secret: diff --git a/tests/flytekit/unit/core/test_context_manager.py b/tests/flytekit/unit/core/test_context_manager.py index 2ec7eb8e19..ca22f359c9 100644 --- a/tests/flytekit/unit/core/test_context_manager.py +++ b/tests/flytekit/unit/core/test_context_manager.py @@ -1,11 +1,14 @@ import base64 import os from datetime import datetime +from pathlib import Path +from unittest.mock import Mock import mock import py import pytest +import flytekit.configuration.plugin from flytekit.configuration import ( SERIALIZED_CONTEXT_ENV_VAR, FastSerializationSettings, @@ -128,6 +131,24 @@ def test_secrets_manager_get_envvar(): assert sec.get_secrets_env_var("group") == f"{cfg.env_prefix}GROUP" +def test_secret_manager_no_group(monkeypatch): + plugin_mock = Mock() + plugin_mock.secret_requires_group.return_value = False + mock_global_plugin = {"plugin": plugin_mock} + monkeypatch.setattr(flytekit.configuration.plugin, "_GLOBAL_CONFIG", mock_global_plugin) + + sec = SecretsManager() + cfg = SecretsConfig.auto() + sec.check_group_key(None) + sec.check_group_key("") + + assert sec.get_secrets_env_var(key="ABC") == f"{cfg.env_prefix}ABC" + + default_path = Path(cfg.default_dir) + expected_path = default_path / f"{cfg.file_prefix}abc" + assert sec.get_secrets_file(key="ABC") == str(expected_path) + + def test_secrets_manager_get_file(): sec = SecretsManager() with pytest.raises(ValueError): diff --git a/tests/flytekit/unit/models/core/test_security.py b/tests/flytekit/unit/models/core/test_security.py index c2933f9353..a7ed006174 100644 --- a/tests/flytekit/unit/models/core/test_security.py +++ b/tests/flytekit/unit/models/core/test_security.py @@ -1,3 +1,6 @@ +from unittest.mock import Mock + +import flytekit.configuration.plugin from flytekit.models.security import Secret @@ -11,3 +14,13 @@ def test_secret(): obj2 = Secret.from_flyte_idl(obj.to_flyte_idl()) assert obj2.key is None assert obj2.group_version == "v1" + + +def test_secret_no_group(monkeypatch): + plugin_mock = Mock() + plugin_mock.secret_requires_group.return_value = False + mock_global_plugin = {"plugin": plugin_mock} + monkeypatch.setattr(flytekit.configuration.plugin, "_GLOBAL_CONFIG", mock_global_plugin) + + s = Secret(key="key") + assert s.group is None From eab12e65bc06686e0aadd512ed352fa11ff4db2a Mon Sep 17 00:00:00 2001 From: Jason Lai Date: Sat, 23 Dec 2023 05:35:49 +0800 Subject: [PATCH 09/63] [Core feature] Allow dictionaries to be passed to execute workflows that take dataclasses (#2013) Signed-off-by: jason.lai --- flytekit/core/type_engine.py | 70 +++++++++++++++--- tests/flytekit/unit/core/test_type_engine.py | 77 ++++++++++++++++++++ 2 files changed, 135 insertions(+), 12 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 4bcbbeceef..71c630b3b4 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -6,6 +6,7 @@ import datetime as _datetime import enum import inspect +import json import json as _json import mimetypes import textwrap @@ -349,20 +350,61 @@ def assert_type(self, expected_type: Type[DataClassJsonMixin], v: T): for f in dataclasses.fields(expected_type): expected_fields_dict[f.name] = f.type - for f in dataclasses.fields(type(v)): # type: ignore - original_type = f.type - expected_type = expected_fields_dict[f.name] + if isinstance(v, dict): + original_dict = v - if UnionTransformer.is_optional_type(original_type): - original_type = UnionTransformer.get_sub_type_in_optional(original_type) - if UnionTransformer.is_optional_type(expected_type): - expected_type = UnionTransformer.get_sub_type_in_optional(expected_type) + # Find the Optional keys in expected_fields_dict + optional_keys = {k for k, t in expected_fields_dict.items() if UnionTransformer.is_optional_type(t)} - val = v.__getattribute__(f.name) - if dataclasses.is_dataclass(val): - self.assert_type(expected_type, val) - elif original_type != expected_type: - raise TypeTransformerFailedError(f"Type of Val '{original_type}' is not an instance of {expected_type}") + # Remove the Optional keys from the keys of original_dict + original_key = set(original_dict.keys()) - optional_keys + expected_key = set(expected_fields_dict.keys()) - optional_keys + + # Check if original_key is missing any keys from expected_key + missing_keys = expected_key - original_key + if missing_keys: + raise TypeTransformerFailedError( + f"The original fields are missing the following keys from the dataclass fields: {list(missing_keys)}" + ) + + # Check if original_key has any extra keys that are not in expected_key + extra_keys = original_key - expected_key + if extra_keys: + raise TypeTransformerFailedError( + f"The original fields have the following extra keys that are not in dataclass fields: {list(extra_keys)}" + ) + + for k, v in original_dict.items(): + if k in expected_fields_dict: + if isinstance(v, dict): + self.assert_type(expected_fields_dict[k], v) + else: + expected_type = expected_fields_dict[k] + original_type = type(v) + if UnionTransformer.is_optional_type(expected_type): + expected_type = UnionTransformer.get_sub_type_in_optional(expected_type) + if original_type != expected_type: + raise TypeTransformerFailedError( + f"Type of Val '{original_type}' is not an instance of {expected_type}" + ) + + else: + for f in dataclasses.fields(type(v)): # type: ignore + original_type = f.type + expected_type = expected_fields_dict[f.name] + + if UnionTransformer.is_optional_type(original_type): + original_type = UnionTransformer.get_sub_type_in_optional(original_type) + if UnionTransformer.is_optional_type(expected_type): + expected_type = UnionTransformer.get_sub_type_in_optional(expected_type) + + val = v.__getattribute__(f.name) + if dataclasses.is_dataclass(val): + self.assert_type(expected_type, val) + elif original_type != expected_type: + raise TypeTransformerFailedError( + f"Type of Val '{original_type}' is not an instance of {expected_type}" + ) def get_literal_type(self, t: Type[T]) -> LiteralType: """ @@ -424,6 +466,10 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: return _type_models.LiteralType(simple=_type_models.SimpleType.STRUCT, metadata=schema, structure=ts) def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: + if isinstance(python_val, dict): + json_str = json.dumps(python_val) + return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct()))) + if not dataclasses.is_dataclass(python_val): raise TypeTransformerFailedError( f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for " diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index f3fabdbf42..cc0d7d336a 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -2,6 +2,7 @@ import datetime import json import os +import re import sys import tempfile import typing @@ -1412,6 +1413,82 @@ class Bar(DataClassJsonMixin): DataclassTransformer().assert_type(gt, pv) +@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.") +def test_assert_dict_type(): + import pandas as pd + + @dataclass + class AnotherDataClass(DataClassJsonMixin): + z: int + + @dataclass + class Args(DataClassJsonMixin): + x: int + y: typing.Optional[str] + file: FlyteFile + dataset: StructuredDataset + another_dataclass: AnotherDataClass + + pv = tempfile.mkdtemp(prefix="flyte-") + df = pd.DataFrame({"Name": ["Tom", "Joseph"], "Age": [20, 22]}) + sd = StructuredDataset(dataframe=df, file_format="parquet") + # Test when v is a dict + vd = {"x": 3, "y": "hello", "file": FlyteFile(pv), "dataset": sd, "another_dataclass": {"z": 4}} + DataclassTransformer().assert_type(Args, vd) + + # Test when v is a dict but missing Optional keys and other keys from dataclass + md = {"x": 3, "file": FlyteFile(pv), "dataset": sd, "another_dataclass": {"z": 4}} + DataclassTransformer().assert_type(Args, md) + + # Test when v is a dict but missing non-Optional keys from dataclass + md = {"y": "hello", "file": FlyteFile(pv), "dataset": sd, "another_dataclass": {"z": 4}} + with pytest.raises( + TypeTransformerFailedError, + match=re.escape("The original fields are missing the following keys from the dataclass fields: ['x']"), + ): + DataclassTransformer().assert_type(Args, md) + + # Test when v is a dict but has extra keys that are not in dataclass + ed = {"x": 3, "y": "hello", "file": FlyteFile(pv), "dataset": sd, "another_dataclass": {"z": 4}, "z": "extra"} + with pytest.raises( + TypeTransformerFailedError, + match=re.escape("The original fields have the following extra keys that are not in dataclass fields: ['z']"), + ): + DataclassTransformer().assert_type(Args, ed) + + # Test when the type of value in the dict does not match the expected_type in the dataclass + td = {"x": "3", "y": "hello", "file": FlyteFile(pv), "dataset": sd, "another_dataclass": {"z": 4}} + with pytest.raises( + TypeTransformerFailedError, match="Type of Val '' is not an instance of " + ): + DataclassTransformer().assert_type(Args, td) + + +def test_to_literal_dict(): + @dataclass + class Args(DataClassJsonMixin): + x: int + y: typing.Optional[str] + + ctx = FlyteContext.current_context() + python_type = Args + expected = TypeEngine.to_literal_type(python_type) + + # Test when python_val is a dict + python_val = {"x": 3, "y": "hello"} + literal = DataclassTransformer().to_literal(ctx, python_val, python_type, expected) + literal_json = _json_format.MessageToJson(literal.scalar.generic) + assert json.loads(literal_json) == python_val + + # Test when python_val is not a dict and not a dataclass + python_val = "not a dict or dataclass" + with pytest.raises( + TypeTransformerFailedError, + match="not of type @dataclass, only Dataclasses are supported for user defined datatypes in Flytekit", + ): + DataclassTransformer().to_literal(ctx, python_val, python_type, expected) + + @dataclass class ArgsAssert(DataClassJSONMixin): x: int From 0e8abd908644247a2b8df3c27aeecc41549fd47b Mon Sep 17 00:00:00 2001 From: Troy Chiu <114708546+troychiu@users.noreply.github.com> Date: Tue, 26 Dec 2023 10:18:20 +0800 Subject: [PATCH 10/63] fix context usage (#2070) Signed-off-by: troychiu --- plugins/flytekit-flyin/flytekitplugins/flyin/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-flyin/flytekitplugins/flyin/utils.py b/plugins/flytekit-flyin/flytekitplugins/flyin/utils.py index 4b0a4ee2d4..3c13951a2e 100644 --- a/plugins/flytekit-flyin/flytekitplugins/flyin/utils.py +++ b/plugins/flytekit-flyin/flytekitplugins/flyin/utils.py @@ -52,6 +52,6 @@ def get_task_inputs(task_module_name, task_name, context_working_dir): task_module = load_module_from_path(task_module_name, os.path.join(context_working_dir, f"{task_module_name}.py")) task_def = getattr(task_module, task_name) native_inputs = TypeEngine.literal_map_to_kwargs( - FlyteContextManager(), idl_input_literals, task_def.python_interface.inputs + FlyteContextManager().current_context(), idl_input_literals, task_def.python_interface.inputs ) return native_inputs From e107994073c42948554cfdab6f770efdf9df9b07 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 26 Dec 2023 09:20:43 -0500 Subject: [PATCH 11/63] Error when multiple configuration plugins are installed (#2069) * Error when multiple configuration plugins are installed Signed-off-by: Thomas J. Fan * Adds semi-colon Signed-off-by: Thomas J. Fan --------- Signed-off-by: Thomas J. Fan --- flytekit/configuration/plugin.py | 4 +++- tests/flytekit/unit/cli/pyflyte/test_plugin.py | 17 +++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/flytekit/configuration/plugin.py b/flytekit/configuration/plugin.py index aa0a9f79d1..051d421d04 100644 --- a/flytekit/configuration/plugin.py +++ b/flytekit/configuration/plugin.py @@ -82,7 +82,9 @@ def _get_plugin_from_entrypoint(): if len(plugins) >= 2: plugin_names = [p.name for p in plugins] - logger.info(f"Multiple plugins seen for {group}: {plugin_names}") + raise ValueError( + f"Multiple plugins installed: {plugin_names}. flytekit only supports one installed plugin at a time." + ) plugin_to_load = plugins[0] logger.info(f"Loading plugin: {plugin_to_load.name}") diff --git a/tests/flytekit/unit/cli/pyflyte/test_plugin.py b/tests/flytekit/unit/cli/pyflyte/test_plugin.py index 6e3423a1ca..5d7b8d8bf6 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_plugin.py +++ b/tests/flytekit/unit/cli/pyflyte/test_plugin.py @@ -1,6 +1,8 @@ +import re from unittest.mock import Mock, patch import click +import pytest from flytekit.configuration.plugin import FlytekitPlugin, FlytekitPluginProtocol, _get_plugin_from_entrypoint @@ -14,20 +16,19 @@ def test_get_plugin_default(entry_points): @patch("flytekit.configuration.plugin.entry_points") -def test_get_plugin_load_other_plugin(entry_points, caplog): - loaded_plugin_1 = Mock() +def test_get_plugin_errors_with_multiple_plugins(entry_points, caplog): entry_1 = Mock() entry_1.name = "entry_1" - entry_1.load.side_effect = lambda: loaded_plugin_1 entry_2 = Mock() + entry_2.name = "entry_2" entry_points.side_effect = lambda *args, **kwargs: [entry_1, entry_2] - plugin = _get_plugin_from_entrypoint() - assert plugin is loaded_plugin_1 - - assert entry_1.load.call_count == 1 - assert entry_2.load.call_count == 0 + msg = re.escape( + "Multiple plugins installed: ['entry_1', 'entry_2']. flytekit only supports one installed plugin at a time." + ) + with pytest.raises(ValueError, match=msg): + _get_plugin_from_entrypoint() class CustomPlugin(FlytekitPlugin): From 452a2ecde0bf4f9ddd1ed25dc8b446f1ad0e3e7d Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 2 Jan 2024 11:28:49 +0800 Subject: [PATCH 12/63] test (#2077) Signed-off-by: Future Outlier Co-authored-by: Future Outlier --- flytekit/core/context_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/core/context_manager.py b/flytekit/core/context_manager.py index d30eba0918..5b9143fb52 100644 --- a/flytekit/core/context_manager.py +++ b/flytekit/core/context_manager.py @@ -365,7 +365,7 @@ def get( fpath = self.get_secrets_file(group, key, group_version) v = os.environ.get(env_var) if v is not None: - return v + return v.strip() if os.path.exists(fpath): with open(fpath, encode_mode) as f: return f.read().strip() From d1ce05bf46a5dfc9ddadc28f6ebb507a2877f640 Mon Sep 17 00:00:00 2001 From: pryce-turner <31577879+pryce-turner@users.noreply.github.com> Date: Tue, 2 Jan 2024 11:07:43 -0800 Subject: [PATCH 13/63] Generalized subproc_execute and added tests (#2072) Signed-off-by: pryce-turner --- flytekit/extras/tasks/shell.py | 13 +++++++++++-- tests/flytekit/unit/extras/tasks/test_shell.py | 18 +++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/flytekit/extras/tasks/shell.py b/flytekit/extras/tasks/shell.py index 319a927743..57d7eb0109 100644 --- a/flytekit/extras/tasks/shell.py +++ b/flytekit/extras/tasks/shell.py @@ -34,7 +34,7 @@ class OutputLocation: location: typing.Union[os.PathLike, str] -def subproc_execute(command: List[str]) -> Tuple[str, str]: +def subproc_execute(command: typing.Union[List[str], str], **kwargs) -> Tuple[str, str]: """ Execute a command and capture its stdout and stderr. Useful for executing shell commands from within a python task. @@ -52,9 +52,18 @@ def subproc_execute(command: List[str]) -> Tuple[str, str]: guidance on specifying a container image in the task definition when using custom dependencies. """ + defaults = { + "stdout": subprocess.PIPE, + "stderr": subprocess.PIPE, + "text": True, + "check": True, + } + + kwargs = {**defaults, **kwargs} + try: # Execute the command and capture stdout and stderr - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True) + result = subprocess.run(command, **kwargs) # Access the stdout and stderr output return result.stdout, result.stderr diff --git a/tests/flytekit/unit/extras/tasks/test_shell.py b/tests/flytekit/unit/extras/tasks/test_shell.py index 08e5bb92af..0ca7ca66bf 100644 --- a/tests/flytekit/unit/extras/tasks/test_shell.py +++ b/tests/flytekit/unit/extras/tasks/test_shell.py @@ -11,7 +11,7 @@ import flytekit from flytekit import kwtypes from flytekit.exceptions.user import FlyteRecoverableException -from flytekit.extras.tasks.shell import OutputLocation, RawShellTask, ShellTask, get_raw_shell_task +from flytekit.extras.tasks.shell import OutputLocation, RawShellTask, ShellTask, get_raw_shell_task, subproc_execute from flytekit.types.directory import FlyteDirectory from flytekit.types.file import CSVFile, FlyteFile @@ -323,3 +323,19 @@ def test_long_run_script(): name="long-running", script=script, )() + + +def test_subproc_execute(): + cmd = ["echo", "hello"] + o, e = subproc_execute(cmd) + assert o == "hello\n" + assert e == "" + + +def test_subproc_execute_with_shell(): + with tempfile.TemporaryDirectory() as tmp: + opth = os.path.join(tmp, "test.txt") + cmd = f"echo hello > {opth}" + subproc_execute(cmd, shell=True) + cont = open(opth).read() + assert "hello" in cont From 76ebae26566cb657343cf60140d9ddc259fbce32 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 2 Jan 2024 17:26:49 -0800 Subject: [PATCH 14/63] Improve error handling for local execution (#2068) Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/utils.py | 7 ++++--- flytekit/core/base_task.py | 8 ++------ flytekit/core/workflow.py | 1 - flytekit/extend/backend/base_agent.py | 2 +- plugins/flytekit-pandera/tests/test_plugin.py | 11 ++--------- .../unit/core/test_type_conversion_errors.py | 19 +++++-------------- tests/flytekit/unit/core/test_type_hints.py | 4 ++-- 7 files changed, 16 insertions(+), 36 deletions(-) diff --git a/flytekit/clis/sdk_in_container/utils.py b/flytekit/clis/sdk_in_container/utils.py index b89e6b3829..8c59b00c1c 100644 --- a/flytekit/clis/sdk_in_container/utils.py +++ b/flytekit/clis/sdk_in_container/utils.py @@ -83,7 +83,7 @@ def pretty_print_grpc_error(e: grpc.RpcError): def pretty_print_traceback(e): """ - This method will print the Traceback of a error. + This method will print the Traceback of an error. """ if e.__traceback__: stack_list = traceback.format_list(traceback.extract_tb(e.__traceback__)) @@ -115,8 +115,7 @@ def pretty_print_exception(e: Exception): if isinstance(cause, grpc.RpcError): pretty_print_grpc_error(cause) else: - click.secho(f"Underlying Exception: {cause}") - pretty_print_traceback(e) + pretty_print_traceback(cause) return if isinstance(e, grpc.RpcError): @@ -141,6 +140,8 @@ def invoke(self, ctx: click.Context) -> typing.Any: except Exception as e: if verbose > 0: click.secho("Verbose mode on") + if isinstance(e, FlyteException): + raise e.with_traceback(None) raise e pretty_print_exception(e) raise SystemExit(e) from e diff --git a/flytekit/core/base_task.py b/flytekit/core/base_task.py index 21c7279c53..b9171f54b8 100644 --- a/flytekit/core/base_task.py +++ b/flytekit/core/base_task.py @@ -628,12 +628,8 @@ def dispatch_execute( # TODO: Logger should auto inject the current context information to indicate if the task is running within # a workflow or a subworkflow etc logger.info(f"Invoking {self.name} with inputs: {native_inputs}") - try: - with timeit("Execute user level code"): - native_outputs = self.execute(**native_inputs) - except Exception as e: - logger.exception(f"Exception when executing {e}") - raise e + with timeit("Execute user level code"): + native_outputs = self.execute(**native_inputs) if inspect.iscoroutine(native_outputs): # If native outputs is a coroutine, then this is an eager workflow. diff --git a/flytekit/core/workflow.py b/flytekit/core/workflow.py index 41e421fe69..190d516721 100644 --- a/flytekit/core/workflow.py +++ b/flytekit/core/workflow.py @@ -291,7 +291,6 @@ def __call__(self, *args, **kwargs) -> Union[Tuple[Promise], Promise, VoidPromis if self.on_failure.python_interface and "err" in self.on_failure.python_interface.inputs: input_kwargs["err"] = FlyteError(failed_node_id="", message=str(exc)) self.on_failure(**input_kwargs) - exc.args = (f"Encountered error while executing workflow '{self.name}':\n {exc}", *exc.args[1:]) raise exc def execute(self, **kwargs): diff --git a/flytekit/extend/backend/base_agent.py b/flytekit/extend/backend/base_agent.py index 72bbcb698e..e184cb88fb 100644 --- a/flytekit/extend/backend/base_agent.py +++ b/flytekit/extend/backend/base_agent.py @@ -183,7 +183,7 @@ def execute(self, **kwargs) -> typing.Any: res = asyncio.run(self._get(resource_meta=res.resource_meta)) if res.resource.state != SUCCEEDED: - raise FlyteUserException(f"Failed to run the task {self._entity.name}") + raise FlyteUserException(f"Failed to run the task {self._entity.name} with error: {res.resource.message}") # Read the literals from a remote file, if agent doesn't return the output literals. if task_template.interface.outputs and len(res.resource.outputs.literals) == 0: diff --git a/plugins/flytekit-pandera/tests/test_plugin.py b/plugins/flytekit-pandera/tests/test_plugin.py index 7e73aac932..a3e7c82565 100644 --- a/plugins/flytekit-pandera/tests/test_plugin.py +++ b/plugins/flytekit-pandera/tests/test_plugin.py @@ -57,10 +57,7 @@ def wf_with_df_input(df: pandera.typing.DataFrame[InSchema]) -> pandera.typing.D with pytest.raises( pandera.errors.SchemaError, - match=( - "^Encountered error while executing workflow 'test_plugin.wf_with_df_input':\n" - " expected series 'col2' to have type float64, got object" - ), + match="expected series 'col2' to have type float64, got object", ): wf_with_df_input(df=invalid_df) @@ -75,11 +72,7 @@ def wf_invalid_output(df: pandera.typing.DataFrame[InSchema]) -> pandera.typing. with pytest.raises( TypeError, - match=( - "^Encountered error while executing workflow 'test_plugin.wf_invalid_output':\n" - " Error encountered while executing 'wf_invalid_output':\n" - " Failed to convert outputs of task" - ), + match="Error encountered while executing 'wf_invalid_output':\n" " Failed to convert outputs of task", ): wf_invalid_output(df=valid_df) diff --git a/tests/flytekit/unit/core/test_type_conversion_errors.py b/tests/flytekit/unit/core/test_type_conversion_errors.py index fbdd2c8640..e8aca2570a 100644 --- a/tests/flytekit/unit/core/test_type_conversion_errors.py +++ b/tests/flytekit/unit/core/test_type_conversion_errors.py @@ -82,11 +82,10 @@ def test_workflow_with_task_error(correct_input): with pytest.raises( TypeError, match=( - r"Encountered error while executing workflow '{}':\n" - r" Error encountered while executing 'wf_with_task_error':\n" + r"Error encountered while executing 'wf_with_task_error':\n" r" Failed to convert outputs of task '.+' at position 0:\n" r" Expected value of type \ but got .+ of type .+" - ).format(wf_with_task_error.name), + ).format(), ): wf_with_task_error(a=correct_input) @@ -96,9 +95,7 @@ def test_workflow_with_task_error(correct_input): def test_workflow_with_input_error(incorrect_input): with pytest.raises( TypeError, - match=(r"Encountered error while executing workflow '{}':\n" r" Failed argument").format( - wf_with_output_error.name - ), + match=r"Failed argument".format(), ): wf_with_output_error(a=incorrect_input) @@ -108,10 +105,7 @@ def test_workflow_with_input_error(incorrect_input): def test_workflow_with_output_error(correct_input): with pytest.raises( TypeError, - match=( - r"Encountered error while executing workflow '{}':\n" - r" Failed to convert output in position 0 of value .+, expected type \" - ).format(wf_with_output_error.name), + match=(r"Failed to convert output in position 0 of value .+, expected type \"), ): wf_with_output_error(a=correct_input) @@ -128,9 +122,6 @@ def test_workflow_with_output_error(correct_input): def test_workflow_with_multioutput_error(workflow, position, correct_input): with pytest.raises( TypeError, - match=( - r"Encountered error while executing workflow '{}':\n " - r"Failed to convert output in position {} of value .+, expected type \" - ).format(workflow.name, position), + match=(r"Failed to convert output in position {} of value .+, expected type \").format(position), ): workflow(a=correct_input, b=correct_input) diff --git a/tests/flytekit/unit/core/test_type_hints.py b/tests/flytekit/unit/core/test_type_hints.py index 53c1e45173..bdbacf246e 100644 --- a/tests/flytekit/unit/core/test_type_hints.py +++ b/tests/flytekit/unit/core/test_type_hints.py @@ -1636,7 +1636,7 @@ def wf2(a: int, b: str) -> typing.Tuple[int, str]: with pytest.raises( ValueError, - match="Encountered error while executing workflow", + match="Error encountered while executing", ): v, s = wf1(a=10, b="hello") assert v == 11 @@ -1646,7 +1646,7 @@ def wf2(a: int, b: str) -> typing.Tuple[int, str]: with pytest.raises( ValueError, - match="Encountered error while executing workflow", + match="Error encountered while executing", ): v, s = wf2(a=10, b="hello") assert v == 11 From 6f613e778103aa9b802c2176456e774ff2bffb12 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 2 Jan 2024 17:34:06 -0800 Subject: [PATCH 15/63] Download specific files in FlyteDirectory (#2059) Signed-off-by: Kevin Su --- flytekit/core/workflow.py | 2 +- flytekit/types/directory/types.py | 66 +++++++++++++++- flytekit/types/file/file.py | 2 +- .../types/structured/structured_dataset.py | 2 +- .../unit/core/test_flyte_directory.py | 75 ++++++++++++++----- 5 files changed, 122 insertions(+), 25 deletions(-) diff --git a/flytekit/core/workflow.py b/flytekit/core/workflow.py index 190d516721..7c5e0c65f7 100644 --- a/flytekit/core/workflow.py +++ b/flytekit/core/workflow.py @@ -460,7 +460,7 @@ def execute(self, **kwargs): raise FlyteValidationException(f"Workflow not ready, wf is currently {self}") # Create a map that holds the outputs of each node. - intermediate_node_outputs: Dict[Node, Dict[str, Promise]] = {GLOBAL_START_NODE: {}} + intermediate_node_outputs: Dict[Node, Dict[str, Promise]] = {GLOBAL_START_NODE: {}} # type: ignore # Start things off with the outputs of the global input node, i.e. the inputs to the workflow. # local_execute should've already ensured that all the values in kwargs are Promise objects diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 14337427c4..169e9d0a6d 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -123,7 +123,7 @@ def __init__( self, path: typing.Union[str, os.PathLike], downloader: typing.Optional[typing.Callable] = None, - remote_directory: typing.Optional[typing.Union[os.PathLike, typing.Literal[False]]] = None, + remote_directory: typing.Optional[typing.Union[os.PathLike, str, typing.Literal[False]]] = None, ): """ :param path: The source path that users are expected to call open() on @@ -138,7 +138,7 @@ def __init__( self._downloader = downloader or noop self._downloaded = False self._remote_directory = remote_directory - self._remote_source = None + self._remote_source: typing.Optional[str] = None def __fspath__(self): """ @@ -192,7 +192,7 @@ def downloaded(self) -> bool: return self._downloaded @property - def remote_directory(self) -> typing.Optional[typing.Union[os.PathLike, bool]]: + def remote_directory(self) -> typing.Optional[typing.Union[os.PathLike, bool, str]]: return self._remote_directory @property @@ -236,6 +236,66 @@ def new_dir(self, name: typing.Optional[str] = None) -> FlyteDirectory: def download(self) -> str: return self.__fspath__() + @classmethod + def listdir(cls, directory: FlyteDirectory) -> typing.List[typing.Union[FlyteDirectory, FlyteFile]]: + """ + This function will list all files and folders in the given directory, but without downloading the contents. + In addition, it will return a list of FlyteFile and FlyteDirectory objects that have ability to lazily download the + contents of the file/folder. For example: + + .. code-block:: python + + entity = FlyteDirectory.listdir(directory) + for e in entity: + print("s3 object:", e.remote_source) + # s3 object: s3://test-flytedir/file1.txt + # s3 object: s3://test-flytedir/file2.txt + # s3 object: s3://test-flytedir/sub_dir + + open(entity[0], "r") # This will download the file to the local disk. + open(entity[0], "r") # flytekit will read data from the local disk if you open it again. + """ + + final_path = directory.path + if directory.remote_source: + final_path = directory.remote_source + elif directory.remote_directory: + final_path = typing.cast(os.PathLike, directory.remote_directory) + + paths: typing.List[typing.Union[FlyteDirectory, FlyteFile]] = [] + file_access = FlyteContextManager.current_context().file_access + if not file_access.is_remote(final_path): + for p in os.listdir(final_path): + if os.path.isfile(os.path.join(final_path, p)): + paths.append(FlyteFile(p)) + else: + paths.append(FlyteDirectory(p)) + return paths + + def create_downloader(_remote_path: str, _local_path: str, is_multipart: bool): + return lambda: file_access.get_data(_remote_path, _local_path, is_multipart=is_multipart) + + fs = file_access.get_filesystem_for_path(final_path) + for key in fs.listdir(final_path): + remote_path = os.path.join(final_path, key["name"].split(os.sep)[-1]) + if key["type"] == "file": + local_path = file_access.get_random_local_path() + os.makedirs(pathlib.Path(local_path).parent, exist_ok=True) + downloader = create_downloader(remote_path, local_path, is_multipart=False) + + flyte_file: FlyteFile = FlyteFile(local_path, downloader=downloader) + flyte_file._remote_source = remote_path + paths.append(flyte_file) + else: + local_folder = file_access.get_random_local_directory() + downloader = create_downloader(remote_path, local_folder, is_multipart=True) + + flyte_directory: FlyteDirectory = FlyteDirectory(path=local_folder, downloader=downloader) + flyte_directory._remote_source = remote_path + paths.append(flyte_directory) + + return paths + def crawl( self, maxdepth: typing.Optional[int] = None, topdown: bool = True, **kwargs ) -> Generator[Tuple[typing.Union[str, os.PathLike[Any]], typing.Dict[Any, Any]], None, None]: diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 4729c25e9c..6dac55e818 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -200,7 +200,7 @@ def __init__( self._downloader = downloader self._downloaded = False self._remote_path = remote_path - self._remote_source = None + self._remote_source: typing.Optional[str] = None def __fspath__(self): # This is where a delayed downloading of the file will happen diff --git a/flytekit/types/structured/structured_dataset.py b/flytekit/types/structured/structured_dataset.py index ce5ab52de1..bd01b871bc 100644 --- a/flytekit/types/structured/structured_dataset.py +++ b/flytekit/types/structured/structured_dataset.py @@ -300,7 +300,7 @@ def convert_schema_type_to_structured_dataset_type( def get_supported_types(): import numpy as _np - _SUPPORTED_TYPES: typing.Dict[Type, LiteralType] = { + _SUPPORTED_TYPES: typing.Dict[Type, LiteralType] = { # type: ignore _np.int32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), _np.int64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), _np.uint32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), diff --git a/tests/flytekit/unit/core/test_flyte_directory.py b/tests/flytekit/unit/core/test_flyte_directory.py index 48f778e4c7..206b58ec5d 100644 --- a/tests/flytekit/unit/core/test_flyte_directory.py +++ b/tests/flytekit/unit/core/test_flyte_directory.py @@ -5,6 +5,7 @@ import typing from unittest.mock import MagicMock +import mock import pytest import flytekit.configuration @@ -16,6 +17,7 @@ from flytekit.core.task import task from flytekit.core.type_engine import TypeEngine from flytekit.core.workflow import workflow +from flytekit.exceptions.user import FlyteAssertion from flytekit.models.core.types import BlobType from flytekit.models.literals import LiteralMap from flytekit.types.directory.types import FlyteDirectory, FlyteDirToMultipartBlobTransformer @@ -89,25 +91,24 @@ def test_transformer_to_literal_local(): TypeEngine.to_literal(ctx, 3, FlyteDirectory, lt) -# def test_transformer_to_literal_localss(): -# random_dir = context_manager.FlyteContext.current_context().file_access.get_random_local_directory() -# fs = FileAccessProvider(local_sandbox_dir=random_dir, raw_output_prefix=os.path.join(random_dir, "raw")) -# ctx = context_manager.FlyteContext.current_context() -# with context_manager.FlyteContextManager.with_context(ctx.with_file_access(fs)) as ctx: -# -# tf = FlyteDirToMultipartBlobTransformer() -# lt = tf.get_literal_type(FlyteDirectory) -# # Can't use if it's not a directory -# with pytest.raises(FlyteAssertion): -# p = "/tmp/flyte/xyz" -# path = pathlib.Path(p) -# try: -# path.unlink() -# except OSError: -# ... -# with open(p, "w") as fh: -# fh.write("hello world\n") -# tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) +def test_transformer_to_literal_local_path(): + random_dir = context_manager.FlyteContext.current_context().file_access.get_random_local_directory() + fs = FileAccessProvider(local_sandbox_dir=random_dir, raw_output_prefix=os.path.join(random_dir, "raw")) + ctx = context_manager.FlyteContext.current_context() + with context_manager.FlyteContextManager.with_context(ctx.with_file_access(fs)) as ctx: + tf = FlyteDirToMultipartBlobTransformer() + lt = tf.get_literal_type(FlyteDirectory) + # Can't use if it's not a directory + with pytest.raises(FlyteAssertion): + p = ctx.file_access.get_random_local_path() + path = pathlib.Path(p) + try: + path.unlink() + except OSError: + ... + with open(p, "w") as fh: + fh.write("hello world\n") + tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) def test_transformer_to_literal_remote(): @@ -279,3 +280,39 @@ def test_directory_guess(): fft = transformer.guess_python_type(lt) assert issubclass(fft, FlyteDirectory) assert fft.extension() == "" + + +@mock.patch("s3fs.core.S3FileSystem._lsdir") +@mock.patch("flytekit.core.data_persistence.FileAccessProvider.get_data") +def test_list_dir(mock_get_data, mock_lsdir): + remote_dir = "s3://test-flytedir" + mock_lsdir.return_value = [ + {"name": os.path.join(remote_dir, "file1.txt"), "type": "file"}, + {"name": os.path.join(remote_dir, "file2.txt"), "type": "file"}, + {"name": os.path.join(remote_dir, "subdir"), "type": "directory"}, + ] + + mock_get_data.side_effect = lambda: Exception("Should not be called") + + temp_dir = tempfile.mkdtemp(prefix="temp_example_") + file1_path = os.path.join(temp_dir, "file1.txt") + sub_dir = os.path.join(temp_dir, "subdir") + os.mkdir(sub_dir) + with open(file1_path, "w") as file1: + file1.write("Content of file1.txt") + + f = FlyteDirectory(temp_dir) + paths = FlyteDirectory.listdir(f) + assert len(paths) == 2 + + f = FlyteDirectory(path=temp_dir, remote_directory=remote_dir) + paths = FlyteDirectory.listdir(f) + assert len(paths) == 3 + + f = FlyteDirectory(path=temp_dir) + f._remote_source = remote_dir + paths = FlyteDirectory.listdir(f) + assert len(paths) == 3 + + with pytest.raises(Exception): + open(paths[0], "r") From 9bf7afae0f63510712d30e2e4fa5fbd219750b81 Mon Sep 17 00:00:00 2001 From: Honnix Date: Wed, 3 Jan 2024 20:58:19 +0100 Subject: [PATCH 16/63] Constraint the full fsspec family (#2087) Signed-off-by: Hongxin Liang --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e166b337dd..f85b00b535 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.8,<3.13" dependencies = [ # Please maintain an alphabetical order in the following list - "adlfs", + "adlfs>=2023.3.0,<=2023.9.2", "click>=6.6,<9.0", "cloudpickle>=2.0.0", "cookiecutter>=1.7.3", @@ -23,7 +23,7 @@ dependencies = [ "docstring-parser>=0.9.0", "flyteidl>=1.10.0", "fsspec>=2023.3.0,<=2023.9.2", - "gcsfs", + "gcsfs>=2023.3.0,<=2023.9.2", "googleapis-common-protos>=1.57", "grpcio", "grpcio-status", @@ -44,7 +44,7 @@ dependencies = [ "requests>=2.18.4,<3.0.0", "rich", "rich_click", - "s3fs>=0.6.0", + "s3fs>=2023.3.0,<=2023.9.2", "statsd>=3.0.0,<4.0.0", "typing_extensions", "urllib3>=1.22,<2.0.0", From 5841a1e488a21f3bbd697a7bdf36e1b7bf226334 Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Thu, 4 Jan 2024 15:58:11 -0600 Subject: [PATCH 17/63] Update default template used by pyflyte init (#2041) Signed-off-by: nikki everett --- flytekit/clis/sdk_in_container/init.py | 2 +- tests/flytekit/unit/cli/pyflyte/test_init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flytekit/clis/sdk_in_container/init.py b/flytekit/clis/sdk_in_container/init.py index 627b393578..23df6ab62d 100644 --- a/flytekit/clis/sdk_in_container/init.py +++ b/flytekit/clis/sdk_in_container/init.py @@ -5,7 +5,7 @@ @click.command("init") @click.option( "--template", - default="simple-example", + default="basic-template-imagespec", help="cookiecutter template folder name to be used in the repo - https://github.com/flyteorg/flytekit-python-template.git", ) @click.argument("project-name") diff --git a/tests/flytekit/unit/cli/pyflyte/test_init.py b/tests/flytekit/unit/cli/pyflyte/test_init.py index 0a66433625..97d24ca50d 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_init.py +++ b/tests/flytekit/unit/cli/pyflyte/test_init.py @@ -10,7 +10,7 @@ "command", [ ["example"], - ["example", "--template", "simple-example"], + ["example", "--template", "basic-template-imagespec"], ["example", "--template", "bayesian-optimization"], ], ) From b2f3b773bd23749d85fe404778dee715018ac309 Mon Sep 17 00:00:00 2001 From: Dan Rammer Date: Fri, 5 Jan 2024 14:29:03 -0600 Subject: [PATCH 18/63] Support PodTemplate in ArrayNode (#2088) * added _cmd_prefix handling Signed-off-by: Daniel Rammer * fixed typing imports Signed-off-by: Daniel Rammer * add get_config Signed-off-by: Kevin Su * updating get_config to use underlying functions config Signed-off-by: Daniel Rammer --------- Signed-off-by: Daniel Rammer Signed-off-by: Kevin Su Co-authored-by: Kevin Su --- flytekit/core/array_node_map_task.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/flytekit/core/array_node_map_task.py b/flytekit/core/array_node_map_task.py index 904ba51f42..e4fef9ed10 100644 --- a/flytekit/core/array_node_map_task.py +++ b/flytekit/core/array_node_map_task.py @@ -82,6 +82,7 @@ def __init__( ).hexdigest() self._name = f"{mod}.map_{f}_{h}-arraynode" + self._cmd_prefix: Optional[List[str]] = None self._concurrency: Optional[int] = concurrency self._min_successes: Optional[int] = min_successes self._min_success_ratio: Optional[float] = min_success_ratio @@ -149,6 +150,9 @@ def prepare_target(self): def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]: return ArrayJob(parallelism=self._concurrency, min_success_ratio=self._min_success_ratio).to_dict() + def get_config(self, settings: SerializationSettings) -> Optional[Dict[str, str]]: + return self.python_function_task.get_config(settings) + def get_container(self, settings: SerializationSettings) -> Container: with self.prepare_target(): return self.python_function_task.get_container(settings) @@ -185,11 +189,13 @@ def get_command(self, settings: SerializationSettings) -> List[str]: *mt.loader_args(settings, self), ] - # TODO: add support for ContainerTask - # if self._cmd_prefix: - # return self._cmd_prefix + container_args + if self._cmd_prefix: + return self._cmd_prefix + container_args return container_args + def set_command_prefix(self, cmd: Optional[List[str]]): + self._cmd_prefix = cmd + def __call__(self, *args, **kwargs): """ This call method modifies the kwargs and adds kwargs from partial. From 9c45cb21c619d36461b30cbc2fd0956117ef3b2c Mon Sep 17 00:00:00 2001 From: Douenergy <103009868+douenergy@users.noreply.github.com> Date: Sat, 6 Jan 2024 17:14:26 +0800 Subject: [PATCH 19/63] remove unnecessary newline in plugin readme (#2090) Signed-off-by: DouEnergy --- plugins/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/README.md b/plugins/README.md index abf47b9556..d738c5b5a4 100644 --- a/plugins/README.md +++ b/plugins/README.md @@ -23,8 +23,7 @@ All the Flytekit plugins maintained by the core team are added here. It is not n | Snowflake | ```bash pip install flytekitplugins-snowflake``` | Use Snowflake as a 'data warehouse-as-a-service' within Flyte | [![PyPI version fury.io](https://badge.fury.io/py/flytekitplugins-snowflake.svg)](https://pypi.python.org/pypi/flytekitplugins-snowflake/) | Backend | | dbt | ```bash pip install flytekitplugins-dbt``` | Run dbt within Flyte | [![PyPI version fury.io](https://badge.fury.io/py/flytekitplugins-dbt.svg)](https://pypi.python.org/pypi/flytekitplugins-dbt/) | Flytekit-only | | Huggingface | ```bash pip install flytekitplugins-huggingface``` | Read & write Hugginface Datasets as Flyte StructuredDatasets | [![PyPI version fury.io](https://badge.fury.io/py/flytekitplugins-huggingface.svg)](https://pypi.python.org/pypi/flytekitplugins-huggingface/) | Flytekit-only | -| DuckDB | ```bash pip install flytekitplugins-duckdb``` | Run analytical workloads with ease using DuckDB. -| [![PyPI version fury.io](https://badge.fury.io/py/flytekitplugins-duckdb.svg)](https://pypi.python.org/pypi/flytekitplugins-duckdb/) | Flytekit-only | +| DuckDB | ```bash pip install flytekitplugins-duckdb``` | Run analytical workloads with ease using DuckDB | [![PyPI version fury.io](https://badge.fury.io/py/flytekitplugins-duckdb.svg)](https://pypi.python.org/pypi/flytekitplugins-duckdb/) | Flytekit-only | ## Have a Plugin Idea? 💡 Please [file an issue](https://github.com/flyteorg/flyte/issues/new?assignees=&labels=untriaged%2Cplugins&template=backend-plugin-request.md&title=%5BPlugin%5D). From f22cec205314ea2699560ae188dc866c87dd33b2 Mon Sep 17 00:00:00 2001 From: Austin Liu Date: Tue, 9 Jan 2024 04:21:31 +0800 Subject: [PATCH 20/63] add fs path exists checking (fix flyteorg/flyte#4470) (#2079) Signed-off-by: Austin Liu --- flytekit/core/data_persistence.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flytekit/core/data_persistence.py b/flytekit/core/data_persistence.py index d1651e8548..1579ef3f6b 100644 --- a/flytekit/core/data_persistence.py +++ b/flytekit/core/data_persistence.py @@ -33,7 +33,7 @@ from flytekit.configuration import DataConfig from flytekit.core.local_fsspec import FlyteLocalFileSystem from flytekit.core.utils import timeit -from flytekit.exceptions.user import FlyteAssertion +from flytekit.exceptions.user import FlyteAssertion, FlyteValueException from flytekit.interfaces.random import random from flytekit.loggers import logger @@ -254,6 +254,8 @@ def get(self, from_path: str, to_path: str, recursive: bool = False, **kwargs): return to_path except OSError as oe: logger.debug(f"Error in getting {from_path} to {to_path} rec {recursive} {oe}") + if not file_system.exists(from_path): + raise FlyteValueException(from_path, "File not found") file_system = self.get_filesystem(get_protocol(from_path), anonymous=True) if file_system is not None: logger.debug(f"Attempting anonymous get with {file_system}") From b5dbd857a5b81757d13d03116574ba705ea8798c Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 8 Jan 2024 16:31:50 -0500 Subject: [PATCH 21/63] add monodocs redirect banner (#2091) Signed-off-by: Niels Bantilan --- docs/source/conf.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index b12e355845..16c5165296 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -106,20 +106,29 @@ html_theme = "furo" html_title = "Flyte" +announcement = """ +📢 This is the old documentation for Flyte. +Please visit the new documentation here. +""" + html_theme_options = { "light_css_variables": { "color-brand-primary": "#4300c9", "color-brand-content": "#4300c9", + "color-announcement-background": "#FEE7B8", + "color-announcement-text": "#535353", }, "dark_css_variables": { "color-brand-primary": "#9D68E4", "color-brand-content": "#9D68E4", + "color-announcement-background": "#493100", }, # custom flyteorg furo theme options "github_repo": "flytekit", "github_username": "flyteorg", "github_commit": "master", "docs_path": "docs/source", # path to documentation source + "announcement": announcement, } templates_path = ["_templates"] From 2fde2edad0ce0f01687dd3df1f49f27741caf5d7 Mon Sep 17 00:00:00 2001 From: Ketan Umare <16888709+kumare3@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:19:15 -0800 Subject: [PATCH 22/63] Accelerator docs (#2076) * Accelerator docs Signed-off-by: Ketan Umare * updating docs Signed-off-by: Ketan Umare * Updated docs Signed-off-by: Ketan Umare * updated docs Signed-off-by: Ketan Umare * more docs update Signed-off-by: Ketan Umare * updated Signed-off-by: Ketan Umare * updated docs Signed-off-by: Ketan Umare --------- Signed-off-by: Ketan Umare Co-authored-by: Ketan Umare --- docs/source/extras.accelerators.rst | 4 + docs/source/index.rst | 1 + flytekit/extras/accelerators.py | 188 ++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+) create mode 100644 docs/source/extras.accelerators.rst diff --git a/docs/source/extras.accelerators.rst b/docs/source/extras.accelerators.rst new file mode 100644 index 0000000000..2655200a23 --- /dev/null +++ b/docs/source/extras.accelerators.rst @@ -0,0 +1,4 @@ +.. automodule:: flytekit.extras.accelerators + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst index f123248cec..7262d7668e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -80,6 +80,7 @@ Expected output: testing extend deck + extras.accelerators plugins/index tasks.extend types.extend diff --git a/flytekit/extras/accelerators.py b/flytekit/extras/accelerators.py index 3615f32bdb..b58159e1f0 100644 --- a/flytekit/extras/accelerators.py +++ b/flytekit/extras/accelerators.py @@ -1,3 +1,93 @@ +""" +Specifying Accelerators +========================== + +.. tags:: MachineLearning, Advanced, Hardware + +Flyte allows you to specify `gpu` resources for a given task. However, in some cases, you may want to use a different +accelerator type, such as TPU, specific variations of GPUs, or fractional GPUs. You can configure the Flyte backend to +use your preferred accelerators, and those who write workflow code can import the `flytekit.extras.accelerators` module +to specify an accelerator in the task decorator. + + +If you want to use a specific GPU device, you can pass the device name directly to the task decorator, e.g.: + +.. code-block:: + + @task( + limits=Resources(gpu="1"), + accelerator=GPUAccelerator("nvidia-tesla-v100"), + ) + def my_task() -> None: + ... + + +Base Classes +------------ +These classes can be used to create custom accelerator type constants. For example, you can create a TPU accelerator. + + + +.. currentmodule:: flytekit.extras.accelerators + +.. autosummary:: + + BaseAccelerator + GPUAccelerator + MultiInstanceGPUAccelerator + +But, often, you may want to use a well known accelerator type, and to simplify this, flytekit provides a set of +predefined accelerator constants, as described in the next section. + + +Predefined Accelerator Constants +-------------------------------- + +The `flytekit.extras.accelerators` module provides some constants for known accelerators, listed below, but this is not +a complete list. If you know the name of the accelerator, you can pass the string name to the task decorator directly. + +If using the constants, you can import them directly from the module, e.g.: + +.. code-block:: + + from flytekit.extras.accelerators import T4 + + @task( + limits=Resources(gpu="1"), + accelerator=T4, + ) + def my_task() -> None: + ... + +if you want to use a fractional GPU, you can use the ``partitioned`` method on the accelerator constant, e.g.: + +.. code-block:: + + from flytekit.extras.accelerators import A100 + + @task( + limits=Resources(gpu="1"), + accelerator=A100.partition_2g_10gb, + ) + def my_task() -> None: + ... + +.. currentmodule:: flytekit.extras.accelerators + +.. autosummary:: + + A10G + L4 + K80 + M60 + P4 + P100 + T4 + V100 + A100 + A100_80GB + +""" import abc import copy from typing import ClassVar, Generic, Optional, Type, TypeVar @@ -9,12 +99,22 @@ class BaseAccelerator(abc.ABC, Generic[T]): + """ + Base class for all accelerator types. This class is not meant to be instantiated directly. + """ + @abc.abstractmethod def to_flyte_idl(self) -> T: ... class GPUAccelerator(BaseAccelerator): + """ + Class that represents a GPU accelerator. The class can be instantiated with any valid GPU device name, but + it is recommended to use one of the pre-defined constants below, as name has to match the name of the device + configured on the cluster. + """ + def __init__(self, device: str) -> None: self._device = device @@ -22,17 +122,46 @@ def to_flyte_idl(self) -> tasks_pb2.GPUAccelerator: return tasks_pb2.GPUAccelerator(device=self._device) +#: use this constant to specify that the task should run on an +#: `NVIDIA A10 Tensor Core GPU `_ A10G = GPUAccelerator("nvidia-a10g") + +#: use this constant to specify that the task should run on an +#: `NVIDIA L4 Tensor Core GPU `_ L4 = GPUAccelerator("nvidia-l4-vws") + +#: use this constant to specify that the task should run on an +#: `NVIDIA Tesla K80 GPU `_ K80 = GPUAccelerator("nvidia-tesla-k80") + +#: use this constant to specify that the task should run on an +#: `NVIDIA Tesla M60 GPU `_ M60 = GPUAccelerator("nvidia-tesla-m60") + +#: use this constant to specify that the task should run on an +#: `NVIDIA Tesla P4 GPU `_ P4 = GPUAccelerator("nvidia-tesla-p4") + +#: use this constant to specify that the task should run on an +#: `NVIDIA Tesla P100 GPU `_ P100 = GPUAccelerator("nvidia-tesla-p100") + +#: use this constant to specify that the task should run on an +#: `NVIDIA T4 Tensor Core GPU `_ T4 = GPUAccelerator("nvidia-tesla-t4") + +#: use this constant to specify that the task should run on an +#: `NVIDIA Tesla V100 GPU `_ V100 = GPUAccelerator("nvidia-tesla-v100") class MultiInstanceGPUAccelerator(BaseAccelerator): + """ + Base class for all multi-instance GPU accelerator types. It is recommended to use one of the pre-defined constants + below, as name has to match the name of the device configured on the cluster. + For example, to specify a 10GB partition of an A100 GPU, use ``A100.partition_2g_10gb``. + """ + device: ClassVar[str] _partition_size: Optional[str] @@ -65,13 +194,44 @@ class _A100_Base(MultiInstanceGPUAccelerator): class _A100(_A100_Base): + """ + Class that represents an `NVIDIA A100 GPU `_. It is possible + to specify a partition of an A100 GPU by using the provided paritions on the class. For example, to specify a + 10GB partition, use ``A100.partition_2g_10gb``. + Refer to `Partitioned GPUs `_ + """ + partition_1g_5gb = _A100_Base.partitioned("1g.5gb") + """ + 5GB partition of an A100 GPU. + """ partition_2g_10gb = _A100_Base.partitioned("2g.10gb") + """ + 10GB partition of an A100 GPU - 2x5GB slices with 2/7th of the SM. + """ partition_3g_20gb = _A100_Base.partitioned("3g.20gb") + """ + 20GB partition of an A100 GPU - 4x5GB slices, with 3/7th fraction of SM (Streaming multiprocessor). + """ partition_4g_20gb = _A100_Base.partitioned("4g.20gb") + """ + 20GB partition of an A100 GPU - 4x5GB slices, with 4/7th fraction of SM. + """ partition_7g_40gb = _A100_Base.partitioned("7g.40gb") + """ + 40GB partition of an A100 GPU - 8x5GB slices, with 7/7th fraction of SM. + """ +#: Use this constant to specify that the task should run on an entire +#: `NVIDIA A100 GPU `_. Fractional partitions are also available. +#: +#: Use pre-defined partitions (as instance attributes). For example, to specify a 10GB partition, use +#: ``A100.partition_2g_10gb``. +#: All partitions are nested in the class as follows: +#: +#: .. autoclass:: _A100 +#: :members: A100 = _A100() @@ -80,11 +240,39 @@ class _A100_80GB_Base(MultiInstanceGPUAccelerator): class _A100_80GB(_A100_80GB_Base): + """ + Partitions of an `NVIDIA A100 80GB GPU `_. + """ + partition_1g_10gb = _A100_80GB_Base.partitioned("1g.10gb") + """ + 10GB partition of an A100 80GB GPU - 2x5GB slices with 1/7th of the SM. + """ partition_2g_20gb = _A100_80GB_Base.partitioned("2g.20gb") + """ + 2GB partition of an A100 80GB GPU - 4x5GB slices with 2/7th of the SM. + """ partition_3g_40gb = _A100_80GB_Base.partitioned("3g.40gb") + """ + 3GB partition of an A100 80GB GPU - 8x5GB slices with 3/7th of the SM. + """ partition_4g_40gb = _A100_80GB_Base.partitioned("4g.40gb") + """ + 4GB partition of an A100 80GB GPU - 8x5GB slices with 4/7th of the SM. + """ partition_7g_80gb = _A100_80GB_Base.partitioned("7g.80gb") + """ + 7GB partition of an A100 80GB GPU - 16x5GB slices with 7/7th of the SM. + """ +#: use this constant to specify that the task should run on an entire +#: `NVIDIA A100 80GB GPU `_. Fractional partitions are also available. +#: +#: Use pre-defined partitions (as instance attributes). For example, to specify a 10GB partition, use +#: ``A100.partition_2g_10gb``. +#: All available partitions are listed below: +#: +#: .. autoclass:: _A100_80GB +#: :members: A100_80GB = _A100_80GB() From b3f0c408188c4e240a9c593b7a8c734a7ddc6405 Mon Sep 17 00:00:00 2001 From: Ketan Umare <16888709+kumare3@users.noreply.github.com> Date: Tue, 9 Jan 2024 22:19:42 -0800 Subject: [PATCH 23/63] Remote workflow & task execution (#2094) * Add support for running remote-tasks and remote-workflows Signed-off-by: Ketan Umare * updated Signed-off-by: Ketan Umare --------- Signed-off-by: Ketan Umare Co-authored-by: Ketan Umare --- flytekit/clis/sdk_in_container/run.py | 145 ++++++++++++++++++-------- 1 file changed, 102 insertions(+), 43 deletions(-) diff --git a/flytekit/clis/sdk_in_container/run.py b/flytekit/clis/sdk_in_container/run.py index 9c2bb2c17a..79e7f0fce5 100644 --- a/flytekit/clis/sdk_in_container/run.py +++ b/flytekit/clis/sdk_in_container/run.py @@ -32,6 +32,7 @@ from flytekit.core.workflow import PythonFunctionWorkflow, WorkflowBase from flytekit.exceptions.system import FlyteSystemException from flytekit.interaction.click_types import FlyteLiteralConverter, key_value_callback +from flytekit.interaction.string_literals import literal_string_repr from flytekit.loggers import logger from flytekit.models import security from flytekit.models.common import RawOutputDataConfig @@ -240,10 +241,10 @@ class RunLevelParams(PyFlyteParams): param_decls=["--limit", "limit"], required=False, type=int, - default=10, + default=50, + hidden=True, show_default=True, - help="Use this to limit number of launch plans retreived from the backend, " - "if `from-server` option is used", + help="Use this to limit number of entities to fetch", ) ) cluster_pool: str = make_click_option_field( @@ -553,32 +554,40 @@ def _run(*args, **kwargs): return _run -class DynamicLaunchPlanCommand(click.RichCommand): +class DynamicEntityLaunchCommand(click.RichCommand): """ This is a dynamic command that is created for each launch plan. This is used to execute a launch plan. It will fetch the launch plan from remote and create parameters from all the inputs of the launch plan. """ - def __init__(self, name: str, h: str, lp_name: str, **kwargs): + LP_LAUNCHER = "lp" + TASK_LAUNCHER = "task" + + def __init__(self, name: str, h: str, entity_name: str, launcher: str, **kwargs): super().__init__(name=name, help=h, **kwargs) - self._lp_name = lp_name - self._lp = None + self._entity_name = entity_name + self._launcher = launcher + self._entity = None - def _fetch_launch_plan(self, ctx: click.Context) -> FlyteLaunchPlan: - if self._lp: - return self._lp + def _fetch_entity(self, ctx: click.Context) -> typing.Union[FlyteLaunchPlan, FlyteTask]: + if self._entity: + return self._entity run_level_params: RunLevelParams = ctx.obj r = run_level_params.remote_instance() - self._lp = r.fetch_launch_plan(run_level_params.project, run_level_params.domain, self._lp_name) - return self._lp + if self._launcher == self.LP_LAUNCHER: + entity = r.fetch_launch_plan(run_level_params.project, run_level_params.domain, self._entity_name) + else: + entity = r.fetch_task(run_level_params.project, run_level_params.domain, self._entity_name) + self._entity = entity + return entity def _get_params( self, ctx: click.Context, inputs: typing.Dict[str, Variable], native_inputs: typing.Dict[str, type], - fixed: typing.Dict[str, Literal], - defaults: typing.Dict[str, Parameter], + fixed: typing.Optional[typing.Dict[str, Literal]] = None, + defaults: typing.Optional[typing.Dict[str, Parameter]] = None, ) -> typing.List["click.Parameter"]: params = [] flyte_ctx = context_manager.FlyteContextManager.current_context() @@ -586,21 +595,31 @@ def _get_params( if fixed and name in fixed: continue required = True + default_val = None if defaults and name in defaults: - required = False - params.append(to_click_option(ctx, flyte_ctx, name, var, native_inputs[name], None, required)) + if not defaults[name].required: + required = False + default_val = literal_string_repr(defaults[name].default) if defaults[name].default else None + params.append(to_click_option(ctx, flyte_ctx, name, var, native_inputs[name], default_val, required)) return params def get_params(self, ctx: click.Context) -> typing.List["click.Parameter"]: if not self.params: self.params = [] - lp = self._fetch_launch_plan(ctx) - if lp.interface: - if lp.interface.inputs: - types = TypeEngine.guess_python_types(lp.interface.inputs) - self.params = self._get_params( - ctx, lp.interface.inputs, types, lp.fixed_inputs.literals, lp.default_inputs.parameters - ) + entity = self._fetch_entity(ctx) + if entity.interface: + if entity.interface.inputs: + types = TypeEngine.guess_python_types(entity.interface.inputs) + if isinstance(entity, FlyteLaunchPlan): + self.params = self._get_params( + ctx, + entity.interface.inputs, + types, + entity.fixed_inputs.literals, + entity.default_inputs.parameters, + ) + else: + self.params = self._get_params(ctx, entity.interface.inputs, types) return super().get_params(ctx) @@ -611,40 +630,61 @@ def invoke(self, ctx: click.Context) -> typing.Any: """ run_level_params: RunLevelParams = ctx.obj r = run_level_params.remote_instance() - lp = self._fetch_launch_plan(ctx) + entity = self._fetch_entity(ctx) run_remote( r, - lp, + entity, run_level_params.project, run_level_params.domain, ctx.params, run_level_params, - type_hints=lp.python_interface.inputs if lp.python_interface else None, + type_hints=entity.python_interface.inputs if entity.python_interface else None, ) -class RemoteLaunchPlanGroup(click.RichGroup): +class RemoteEntityGroup(click.RichGroup): """ click multicommand that retrieves launchplans from a remote flyte instance and executes them. """ - COMMAND_NAME = "remote-launchplan" + LAUNCHPLAN_COMMAND = "remote-launchplan" + WORKFLOW_COMMAND = "remote-workflow" + TASK_COMMAND = "remote-task" - def __init__(self): + def __init__(self, command_name: str): super().__init__( - name="from-server", - help="Retrieve launchplans from a remote flyte instance and execute them.", + name=command_name, + help=f"Retrieve {command_name} from a remote flyte instance and execute them.", params=[ click.Option( - ["--limit"], help="Limit the number of launchplans to retrieve.", default=10, show_default=True + ["--limit", "limit"], + help=f"Limit the number of {command_name}'s to retrieve.", + default=50, + show_default=True, ) ], ) - self._lps = [] + self._command_name = command_name + self._entities = [] + + def _get_entities(self, r: FlyteRemote, project: str, domain: str, limit: int) -> typing.List[str]: + """ + Retreieves the right entities from the remote flyte instance. + """ + if self._command_name == self.LAUNCHPLAN_COMMAND: + lps = r.client.list_launch_plan_ids_paginated(project=project, domain=domain, limit=limit) + return [l.name for l in lps[0]] + elif self._command_name == self.WORKFLOW_COMMAND: + wfs = r.client.list_workflow_ids_paginated(project=project, domain=domain, limit=limit) + return [w.name for w in wfs[0]] + elif self._command_name == self.TASK_COMMAND: + tasks = r.client.list_task_ids_paginated(project=project, domain=domain, limit=limit) + return [t.name for t in tasks[0]] + return [] def list_commands(self, ctx): - if self._lps or ctx.obj is None: - return self._lps + if self._entities or ctx.obj is None: + return self._entities run_level_params: RunLevelParams = ctx.obj r = run_level_params.remote_instance() @@ -653,17 +693,28 @@ def list_commands(self, ctx): with progress: progress.start_task(task) try: - lps = r.client.list_launch_plan_ids_paginated( - project=run_level_params.project, domain=run_level_params.domain, limit=run_level_params.limit + self._entities = self._get_entities( + r, run_level_params.project, run_level_params.domain, run_level_params.limit ) - self._lps = [l.name for l in lps[0]] - return self._lps + return self._entities except FlyteSystemException as e: pretty_print_exception(e) return [] def get_command(self, ctx, name): - return DynamicLaunchPlanCommand(name=name, h="Execute a launchplan from remote.", lp_name=name) + if self._command_name in [self.LAUNCHPLAN_COMMAND, self.WORKFLOW_COMMAND]: + return DynamicEntityLaunchCommand( + name=name, + h=f"Execute a {self._command_name}.", + entity_name=name, + launcher=DynamicEntityLaunchCommand.LP_LAUNCHER, + ) + return DynamicEntityLaunchCommand( + name=name, + h=f"Execute a {self._command_name}.", + entity_name=name, + launcher=DynamicEntityLaunchCommand.TASK_LAUNCHER, + ) class WorkflowCommand(click.RichGroup): @@ -789,7 +840,11 @@ def list_commands(self, ctx, add_remote: bool = True): self._files = [str(p) for p in pathlib.Path(".").glob("*.py") if str(p) != "__init__.py"] self._files = sorted(self._files) if add_remote: - self._files = self._files + [RemoteLaunchPlanGroup.COMMAND_NAME] + self._files = self._files + [ + RemoteEntityGroup.LAUNCHPLAN_COMMAND, + RemoteEntityGroup.WORKFLOW_COMMAND, + RemoteEntityGroup.TASK_COMMAND, + ] return self._files def get_command(self, ctx, filename): @@ -800,8 +855,12 @@ def get_command(self, ctx, filename): params.update(ctx.params) params.update(ctx.obj) ctx.obj = self._run_params.from_dict(params) - if filename == RemoteLaunchPlanGroup.COMMAND_NAME: - return RemoteLaunchPlanGroup() + if filename == RemoteEntityGroup.LAUNCHPLAN_COMMAND: + return RemoteEntityGroup(RemoteEntityGroup.LAUNCHPLAN_COMMAND) + elif filename == RemoteEntityGroup.WORKFLOW_COMMAND: + return RemoteEntityGroup(RemoteEntityGroup.WORKFLOW_COMMAND) + elif filename == RemoteEntityGroup.TASK_COMMAND: + return RemoteEntityGroup(RemoteEntityGroup.TASK_COMMAND) return WorkflowCommand(filename, name=filename, help=f"Run a [workflow|task] from {filename}") From dc35e7f91dc22b7a124b1984fb134d5beec196ff Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 10 Jan 2024 15:00:38 -0800 Subject: [PATCH 24/63] Print log links in local execution (#2097) Signed-off-by: Kevin Su --- flytekit/extend/backend/base_agent.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flytekit/extend/backend/base_agent.py b/flytekit/extend/backend/base_agent.py index e184cb88fb..02c28287a1 100644 --- a/flytekit/extend/backend/base_agent.py +++ b/flytekit/extend/backend/base_agent.py @@ -241,7 +241,10 @@ async def _get(self, resource_meta: bytes) -> GetTaskResponse: else: res = self._agent.get(grpc_ctx, resource_meta) state = res.resource.state - logger.info(f"Task state: {state}, State message: {res.resource.message}") + progress.print(f"Task state: {State.Name(state)}, State message: {res.resource.message}") + if hasattr(res.resource, "log_links"): + for link in res.resource.log_links: + progress.print(f"{link.name}: {link.uri}") return res def signal_handler(self, resource_meta: bytes, signum: int, frame: FrameType) -> typing.Any: From 949bc1b40e1966b409c821b96011d900241b22fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 15:02:16 -0800 Subject: [PATCH 25/63] Bump fonttools (#2092) Bumps [fonttools](https://github.com/fonttools/fonttools) from 4.41.1 to 4.43.0. - [Release notes](https://github.com/fonttools/fonttools/releases) - [Changelog](https://github.com/fonttools/fonttools/blob/main/NEWS.rst) - [Commits](https://github.com/fonttools/fonttools/compare/4.41.1...4.43.0) --- updated-dependencies: - dependency-name: fonttools dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../remote/mock_flyte_repo/workflows/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt index 3d4c3fd037..48ee6ca93c 100644 --- a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt +++ b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt @@ -91,7 +91,7 @@ flyteidl==1.5.13 # via flytekit flytekit==1.8.2 # via -r requirements.in -fonttools==4.41.1 +fonttools==4.43.0 # via matplotlib frozenlist==1.4.0 # via From d451ee2171e6ab9bef53b65fbdfac1e54feb00ac Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Thu, 11 Jan 2024 12:38:49 -0500 Subject: [PATCH 26/63] add monodocs index page in flytekit (#2100) * add monodocs index page in flytekit Signed-off-by: Niels Bantilan * updates Signed-off-by: Niels Bantilan --------- Signed-off-by: Niels Bantilan --- docs/source/conf.py | 2 +- docs/source/docs_index.rst | 22 ++++++++++++++++++++++ docs/source/index.rst | 5 +++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 docs/source/docs_index.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 16c5165296..09eb276ca0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -96,7 +96,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = [] +exclude_patterns = ["docs_index.rst"] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/source/docs_index.rst b/docs/source/docs_index.rst new file mode 100644 index 0000000000..dbbf95af83 --- /dev/null +++ b/docs/source/docs_index.rst @@ -0,0 +1,22 @@ +********************** +Flytekit API Reference +********************** + +.. toctree:: + :maxdepth: 2 + + design/index + flytekit + configuration + remote + clients + testing + extras.accelerators + extend + deck + plugins/index + tasks.extend + types.extend + experimental + pyflyte + contributing diff --git a/docs/source/index.rst b/docs/source/index.rst index 7262d7668e..d4d0730236 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,3 +1,8 @@ +.. DO NOT EDIT THIS FILE! + This file is the index for the old flytekit documentation. The index for the monodocs is now + at `docs_index.rst`. Please edit that file if you want to add new entries to the flytekit api + documentation. + .. simpleble documentation master file, created by sphinx-quickstart on Fri Mar 9 04:07:53 2018. You can adapt this file completely to your liking, but it should at least From 3a2ab2c0e196dbe3e61926f0a5bba9fc69fe10a9 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Thu, 11 Jan 2024 12:42:02 -0500 Subject: [PATCH 27/63] add monodocs redirects to the flytekit readthedocs project (#2095) * add monodocs redirects to the flytekit readthedocs project Signed-off-by: Niels Bantilan * update requirements Signed-off-by: Niels Bantilan * add redirects enabling flag Signed-off-by: Niels Bantilan * update Signed-off-by: Niels Bantilan --------- Signed-off-by: Niels Bantilan --- doc-requirements.in | 32 +- doc-requirements.txt | 1032 +++++++----------------------------------- docs/source/conf.py | 25 + 3 files changed, 188 insertions(+), 901 deletions(-) diff --git a/doc-requirements.in b/doc-requirements.in index 4a30b8afef..3b602e7e52 100644 --- a/doc-requirements.in +++ b/doc-requirements.in @@ -11,6 +11,7 @@ sphinx-autoapi sphinx-copybutton sphinx_fontawesome sphinx-panels +sphinx-reredirects sphinxcontrib-youtube==1.2.0 cryptography google-api-core[grpc] @@ -19,37 +20,8 @@ sphinx-tags sphinx-click retry mashumaro +pydantic<2.0.0 # Packages for Plugin docs # Package name Plugin needing it -botocore # fsspec -fsspec # fsspec -google-cloud # bigquery -google-cloud-bigquery # bigquery -markdown # deck -plotly # deck -pandas_profiling # deck -dolt_integrations # dolt -great-expectations # greatexpectations -datasets # huggingface -kubernetes # k8s-pod -modin # modin -pandera # pandera -papermill # papermill -jupyter # papermill -polars # polars -pyspark # spark -sqlalchemy # sqlalchemy -torch # pytorch -skl2onnx # onnxscikitlearn -tf2onnx # onnxtensorflow -tensorflow # onnxtensorflow -whylogs==1.3.3 # whylogs -whylabs-client # whylogs -ray==2.6.3 # ray -scikit-learn # scikit-learn -dask[distributed] # dask vaex # vaex -mlflow==2.7.0 # mlflow -duckdb # duckdb -snowflake-connector-python # snowflake diff --git a/doc-requirements.txt b/doc-requirements.txt index ef8f3cfdcd..e1c2a72dcb 100644 --- a/doc-requirements.txt +++ b/doc-requirements.txt @@ -6,228 +6,138 @@ # -e file:.#egg=flytekit # via -r doc-requirements.in -absl-py==1.4.0 - # via - # tensorboard - # tensorflow -adlfs==2023.8.0 +adlfs==2023.9.0 # via flytekit aiobotocore==2.5.4 # via s3fs -aiohttp==3.8.5 +aiohttp==3.9.1 # via # adlfs # aiobotocore - # datasets - # fsspec # gcsfs # s3fs aioitertools==0.11.0 # via aiobotocore aiosignal==1.3.1 - # via - # aiohttp - # ray -alabaster==0.7.13 + # via aiohttp +alabaster==0.7.16 # via sphinx -alembic==1.12.0 - # via mlflow -altair==4.2.2 - # via great-expectations -ansiwrap==0.8.4 - # via papermill -anyio==3.7.1 +anyio==4.2.0 # via - # fastapi - # jupyter-server + # azure-core # starlette # watchfiles aplus==0.11.0 # via vaex-core -appnope==0.1.3 - # via - # ipykernel - # ipython -argon2-cffi==23.1.0 - # via jupyter-server -argon2-cffi-bindings==21.2.0 - # via argon2-cffi -arrow==1.2.3 - # via - # cookiecutter - # isoduration -asn1crypto==1.5.1 - # via - # oscrypto - # snowflake-connector-python -astroid==2.15.6 +arrow==1.3.0 + # via cookiecutter +astroid==3.0.2 # via sphinx-autoapi -astropy==5.3.3 +astropy==6.0.0 # via vaex-astro -asttokens==2.4.0 +astropy-iers-data==0.2024.1.8.0.30.55 + # via astropy +asttokens==2.4.1 # via stack-data -astunparse==1.6.3 - # via tensorflow -async-lru==2.0.4 - # via jupyterlab async-timeout==4.0.3 # via aiohttp -attrs==23.1.0 - # via - # aiohttp - # jsonschema - # referencing - # visions -azure-core==1.29.1 +attrs==23.2.0 + # via aiohttp +azure-core==1.29.6 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.53 # via adlfs -azure-identity==1.14.0 +azure-identity==1.15.0 # via adlfs -azure-storage-blob==12.18.1 +azure-storage-blob==12.19.0 # via adlfs -babel==2.12.1 - # via - # jupyterlab-server - # sphinx -backcall==0.2.0 - # via ipython +babel==2.14.0 + # via sphinx beautifulsoup4==4.12.2 # via # furo - # nbconvert # sphinx-code-include # sphinx-material binaryornot==0.4.4 # via cookiecutter -blake3==0.3.3 +blake3==0.3.4 # via vaex-core -bleach==6.0.0 - # via nbconvert -blinker==1.6.2 - # via flask botocore==1.31.17 - # via - # -r doc-requirements.in - # aiobotocore -bqplot==0.12.40 + # via aiobotocore +bqplot==0.12.42 # via # ipyvolume # vaex-jupyter -branca==0.6.0 +branca==0.7.0 # via ipyleaflet -cachetools==5.3.1 +cachetools==5.3.2 # via # google-auth # vaex-server -certifi==2023.7.22 +certifi==2023.11.17 # via # kubernetes # requests - # snowflake-connector-python -cffi==1.15.1 +cffi==1.16.0 # via - # argon2-cffi-bindings # azure-datalake-store # cryptography - # snowflake-connector-python cfgv==3.4.0 # via pre-commit chardet==5.2.0 # via binaryornot -charset-normalizer==3.2.0 - # via - # aiohttp - # requests - # snowflake-connector-python +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # cookiecutter # dask - # databricks-cli - # distributed - # flask # flytekit - # great-expectations - # mlflow - # papermill - # ray # rich-click # sphinx-click # uvicorn -cloudpickle==2.2.1 +cloudpickle==3.0.0 # via # dask - # distributed # flytekit - # mlflow # vaex-core -colorama==0.4.6 - # via great-expectations -comm==0.1.4 - # via - # ipykernel - # ipywidgets -contourpy==1.1.1 +comm==0.2.1 + # via ipywidgets +contourpy==1.2.0 # via matplotlib -cookiecutter==2.3.0 +cookiecutter==2.5.0 # via flytekit -croniter==1.4.1 +croniter==2.0.1 # via flytekit -cryptography==41.0.3 +cryptography==41.0.7 # via # -r doc-requirements.in # azure-identity # azure-storage-blob - # great-expectations # msal # pyjwt - # snowflake-connector-python css-html-js-minify==2.5.5 # via sphinx-material -cycler==0.11.0 +cycler==0.12.1 # via matplotlib -dacite==1.8.1 - # via ydata-profiling -dask[distributed]==2023.9.2 - # via - # -r doc-requirements.in - # distributed - # vaex-core -databricks-cli==0.17.7 - # via mlflow +dask==2023.12.1 + # via vaex-core dataclasses-json==0.5.9 - # via - # dolt-integrations - # flytekit -datasets==2.14.5 - # via -r doc-requirements.in -debugpy==1.8.0 - # via ipykernel + # via flytekit decorator==5.1.1 # via # gcsfs # ipython # retry -defusedxml==0.7.1 - # via nbconvert -dill==0.3.7 - # via - # datasets - # multiprocess diskcache==5.6.3 # via flytekit -distlib==0.3.7 +distlib==0.3.8 # via virtualenv -distributed==2023.9.2 - # via dask docker==6.1.3 - # via - # flytekit - # mlflow + # via flytekit docstring-parser==0.15 # via flytekit docutils==0.17.1 @@ -235,83 +145,47 @@ docutils==0.17.1 # sphinx # sphinx-click # sphinx-panels -dolt-integrations==0.1.5 - # via -r doc-requirements.in -doltcli==0.1.18 - # via dolt-integrations -duckdb==0.8.1 - # via -r doc-requirements.in -entrypoints==0.4 - # via - # altair - # mlflow - # papermill -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via # anyio # ipython -executing==1.2.0 +executing==2.0.1 # via stack-data -fastapi==0.103.1 +fastapi==0.108.0 # via vaex-server -fastjsonschema==2.18.0 - # via nbformat -filelock==3.12.4 - # via - # huggingface-hub - # ray - # snowflake-connector-python - # torch +filelock==3.13.1 + # via # vaex-core # virtualenv -flask==2.3.3 - # via mlflow -flatbuffers==23.5.26 - # via tensorflow flyteidl==1.10.6 # via flytekit -fonttools==4.42.1 +fonttools==4.47.0 # via matplotlib -fqdn==1.5.1 - # via jsonschema -frozendict==2.3.8 +frozendict==2.4.0 # via vaex-core -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal - # ray -fsspec[http]==2023.6.0 +fsspec==2023.9.2 # via - # -r doc-requirements.in # adlfs # dask - # datasets # flytekit # gcsfs - # huggingface-hub - # modin # s3fs furo @ git+https://github.com/flyteorg/furo@main # via -r doc-requirements.in future==0.18.3 # via vaex-core -gast==0.4.0 - # via tensorflow -gcsfs==2023.6.0 +gcsfs==2023.9.2 # via flytekit -gitdb==4.0.10 - # via gitpython -gitpython==3.1.36 - # via - # mlflow -google-api-core[grpc]==2.11.1 +google-api-core[grpc]==2.15.0 # via # -r doc-requirements.in - # google-cloud-bigquery # google-cloud-core # google-cloud-storage -google-auth==2.23.0 +google-auth==2.26.1 # via # gcsfs # google-api-core @@ -319,119 +193,69 @@ google-auth==2.23.0 # google-cloud-core # google-cloud-storage # kubernetes - # tensorboard -google-auth-oauthlib==1.0.0 - # via - # gcsfs - # tensorboard -google-cloud==0.34.0 - # via -r doc-requirements.in -google-cloud-bigquery==3.11.4 - # via -r doc-requirements.in -google-cloud-core==2.3.3 - # via - # google-cloud-bigquery - # google-cloud-storage -google-cloud-storage==2.10.0 +google-auth-oauthlib==1.2.0 + # via gcsfs +google-cloud-core==2.4.1 + # via google-cloud-storage +google-cloud-storage==2.14.0 # via gcsfs google-crc32c==1.5.0 - # via google-resumable-media -google-pasta==0.2.0 - # via tensorflow -google-resumable-media==2.6.0 # via - # google-cloud-bigquery # google-cloud-storage -googleapis-common-protos==1.60.0 + # google-resumable-media +google-resumable-media==2.7.0 + # via google-cloud-storage +googleapis-common-protos==1.62.0 # via # flyteidl # flytekit # google-api-core # grpcio-status -great-expectations==0.17.16 - # via -r doc-requirements.in -grpcio==1.53.0 +grpcio==1.60.0 # via # -r doc-requirements.in # flytekit # google-api-core - # google-cloud-bigquery # grpcio-status - # ray - # tensorboard - # tensorflow -grpcio-status==1.53.0 +grpcio-status==1.60.0 # via # flytekit # google-api-core -gunicorn==21.2.0 - # via mlflow h11==0.14.0 # via uvicorn -h5py==3.9.0 - # via - # tensorflow - # vaex-hdf5 -htmlmin==0.1.12 - # via ydata-profiling -httptools==0.6.0 +h5py==3.10.0 + # via vaex-hdf5 +httptools==0.6.1 # via uvicorn -huggingface-hub==0.17.1 - # via datasets -identify==2.5.29 +identify==2.5.33 # via pre-commit -idna==3.4 +idna==3.6 # via # anyio - # jsonschema # requests - # snowflake-connector-python # yarl -imagehash==4.3.1 - # via - # visions - # ydata-profiling imagesize==1.4.1 # via sphinx -importlib-metadata==6.8.0 +importlib-metadata==7.0.1 # via # dask - # flask # flytekit - # jupyter-client - # jupyter-lsp - # jupyterlab - # jupyterlab-server # keyring - # markdown - # mlflow - # nbconvert # sphinx -importlib-resources==6.0.1 +importlib-resources==6.1.1 # via matplotlib ipydatawidgets==4.3.5 # via pythreejs -ipykernel==6.25.2 - # via - # jupyter - # jupyter-console - # jupyterlab - # qtconsole -ipyleaflet==0.17.4 +ipyleaflet==0.18.1 # via vaex-jupyter ipympl==0.9.3 # via vaex-jupyter -ipython==8.15.0 +ipython==8.18.1 # via - # great-expectations - # ipykernel # ipympl # ipywidgets - # jupyter-console ipython-genutils==0.2.0 - # via - # ipympl - # qtconsole + # via ipympl ipyvolume==0.6.3 # via vaex-jupyter ipyvue==1.10.1 @@ -447,159 +271,54 @@ ipywebrtc==0.6.0 ipywidgets==8.1.1 # via # bqplot - # great-expectations # ipydatawidgets # ipyleaflet # ipympl # ipyvolume # ipyvue - # jupyter # pythreejs isodate==0.6.1 # via azure-storage-blob -isoduration==20.11.0 - # via jsonschema -itsdangerous==2.1.2 - # via flask jaraco-classes==3.3.0 # via keyring -jedi==0.19.0 +jedi==0.19.1 # via ipython jinja2==3.1.2 # via - # altair # branca # cookiecutter - # distributed - # flask - # great-expectations - # jupyter-server - # jupyterlab - # jupyterlab-server - # mlflow - # nbconvert # sphinx # sphinx-autoapi - # torch # vaex-ml - # ydata-profiling jmespath==1.0.1 # via botocore joblib==1.3.2 # via # flytekit - # phik # scikit-learn -json5==0.9.14 - # via jupyterlab-server -jsonpatch==1.33 - # via great-expectations jsonpickle==3.0.2 # via flytekit -jsonpointer==2.4 - # via - # jsonpatch - # jsonschema -jsonschema[format-nongpl]==4.19.0 - # via - # altair - # great-expectations - # jupyter-events - # jupyterlab-server - # nbformat - # ray -jsonschema-specifications==2023.7.1 - # via jsonschema -jupyter==1.0.0 - # via -r doc-requirements.in -jupyter-client==8.3.1 - # via - # ipykernel - # jupyter-console - # jupyter-server - # nbclient - # qtconsole -jupyter-console==6.6.3 - # via jupyter -jupyter-core==5.3.1 - # via - # ipykernel - # jupyter-client - # jupyter-console - # jupyter-server - # jupyterlab - # nbclient - # nbconvert - # nbformat - # qtconsole -jupyter-events==0.7.0 - # via jupyter-server -jupyter-lsp==2.2.0 - # via jupyterlab -jupyter-server==2.7.3 - # via - # jupyter-lsp - # jupyterlab - # jupyterlab-server - # notebook - # notebook-shim -jupyter-server-terminals==0.4.4 - # via jupyter-server -jupyterlab==4.0.6 - # via notebook -jupyterlab-pygments==0.2.2 - # via nbconvert -jupyterlab-server==2.25.0 - # via - # jupyterlab - # notebook jupyterlab-widgets==3.0.9 # via ipywidgets -keras==2.13.1 - # via tensorflow -keyring==24.2.0 +keyring==24.3.0 # via flytekit kiwisolver==1.4.5 # via matplotlib -kubernetes==27.2.0 - # via - # -r doc-requirements.in - # flytekit -lazy-object-proxy==1.9.0 - # via astroid -libclang==16.0.6 - # via tensorflow -lit==16.0.6 - # via triton -llvmlite==0.40.1 +kubernetes==29.0.0 + # via flytekit +llvmlite==0.41.1 # via numba locket==1.0.0 - # via - # distributed - # partd -lxml==4.9.3 + # via partd +lxml==5.1.0 # via sphinx-material -makefun==1.15.1 - # via great-expectations -mako==1.2.4 - # via alembic -markdown==3.4.4 - # via - # -r doc-requirements.in - # mlflow - # tensorboard markdown-it-py==3.0.0 # via rich markupsafe==2.1.3 - # via - # jinja2 - # mako - # nbconvert - # werkzeug -marshmallow==3.20.1 + # via jinja2 +marshmallow==3.20.2 # via # dataclasses-json - # great-expectations # marshmallow-enum # marshmallow-jsonschema marshmallow-enum==1.5.1 @@ -608,299 +327,122 @@ marshmallow-enum==1.5.1 # flytekit marshmallow-jsonschema==0.13.0 # via flytekit -mashumaro==3.10 +mashumaro==3.11 # via # -r doc-requirements.in # flytekit -matplotlib==3.8.0 +matplotlib==3.8.2 # via # ipympl # ipyvolume - # mlflow - # phik - # seaborn # vaex-viz - # wordcloud - # ydata-profiling matplotlib-inline==0.1.6 - # via - # ipykernel - # ipython + # via ipython mdurl==0.1.2 # via markdown-it-py -mistune==3.0.1 - # via - # great-expectations - # nbconvert -mlflow==2.7.0 - # via -r doc-requirements.in -modin==0.22.3 - # via -r doc-requirements.in -more-itertools==10.1.0 +more-itertools==10.2.0 # via jaraco-classes -mpmath==1.3.0 - # via sympy -msal==1.24.0 +msal==1.26.0 # via # azure-datalake-store # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.1.0 # via azure-identity -msgpack==1.0.5 - # via - # distributed - # ray multidict==6.0.4 # via # aiohttp # yarl -multimethod==1.9.1 - # via - # pandera - # visions - # ydata-profiling -multiprocess==0.70.15 - # via datasets mypy-extensions==1.0.0 # via typing-inspect -nbclient==0.8.0 - # via - # nbconvert - # papermill -nbconvert==7.8.0 - # via - # jupyter - # jupyter-server -nbformat==5.9.2 - # via - # great-expectations - # jupyter-server - # nbclient - # nbconvert - # papermill nest-asyncio==1.5.8 - # via - # ipykernel - # vaex-core -networkx==3.1 - # via - # torch - # visions + # via vaex-core nodeenv==1.8.0 # via pre-commit -notebook==7.0.3 - # via - # great-expectations - # jupyter -notebook-shim==0.2.3 - # via - # jupyterlab - # notebook -numba==0.57.1 +numba==0.58.1 # via vaex-ml -numpy==1.23.5 +numpy==1.26.3 # via - # altair # astropy # bqplot # contourpy - # datasets - # flytekit - # great-expectations # h5py - # imagehash # ipydatawidgets # ipympl # ipyvolume # matplotlib - # mlflow - # modin # numba - # onnx - # onnxconverter-common - # opt-einsum # pandas - # pandera - # patsy - # phik # pyarrow # pyerfa # pythreejs - # pywavelets - # ray # scikit-learn # scipy - # seaborn - # statsmodels - # tensorboard - # tensorflow - # tf2onnx # vaex-core - # visions - # wordcloud # xarray - # ydata-profiling oauthlib==3.2.2 # via - # databricks-cli # kubernetes # requests-oauthlib -onnx==1.14.1 - # via - # onnxconverter-common - # skl2onnx - # tf2onnx -onnxconverter-common==1.13.0 - # via skl2onnx -opt-einsum==3.3.0 - # via tensorflow -oscrypto==1.3.0 - # via snowflake-connector-python -overrides==7.4.0 - # via jupyter-server -packaging==23.1 +packaging==23.2 # via # astropy # dask - # datasets - # distributed # docker - # google-cloud-bigquery - # great-expectations - # gunicorn - # huggingface-hub - # ipykernel - # jupyter-server - # jupyterlab - # jupyterlab-server # marshmallow # matplotlib - # mlflow - # modin - # nbconvert - # onnxconverter-common - # pandera - # plotly - # qtconsole - # qtpy - # ray - # snowflake-connector-python + # msal-extensions # sphinx - # statsmodels - # tensorflow # xarray -pandas==1.5.3 +pandas==2.1.4 # via - # altair # bqplot - # datasets - # dolt-integrations - # flytekit - # great-expectations - # mlflow - # modin - # pandera - # phik - # seaborn - # statsmodels # vaex-core - # visions # xarray - # ydata-profiling -pandas-profiling==3.6.6 - # via -r doc-requirements.in -pandera==0.14.5 - # via -r doc-requirements.in -pandocfilters==1.5.0 - # via nbconvert -papermill==2.4.0 - # via -r doc-requirements.in parso==0.8.3 # via jedi -partd==1.4.0 +partd==1.4.1 # via dask -patsy==0.5.3 - # via statsmodels -pexpect==4.8.0 - # via ipython -phik==0.12.3 - # via ydata-profiling -pickleshare==0.7.5 +pexpect==4.9.0 # via ipython -pillow==10.0.1 +pillow==10.2.0 # via - # imagehash # ipympl # ipyvolume # matplotlib + # sphinx-gallery # vaex-viz - # visions - # wordcloud -platformdirs==3.8.1 - # via - # jupyter-core - # snowflake-connector-python - # virtualenv - # whylogs -plotly==5.17.0 - # via -r doc-requirements.in -polars==0.19.3 - # via -r doc-requirements.in +platformdirs==4.1.0 + # via virtualenv portalocker==2.8.2 # via msal-extensions -pre-commit==3.4.0 +pre-commit==3.6.0 # via sphinx-tags -progressbar2==4.2.0 +progressbar2==4.3.2 # via vaex-core -prometheus-client==0.17.1 - # via jupyter-server -prompt-toolkit==3.0.39 - # via - # ipython - # jupyter-console -proto-plus==1.22.3 - # via google-cloud-bigquery -protobuf==4.24.3 +prompt-toolkit==3.0.43 + # via ipython +protobuf==4.24.4 # via # flyteidl + # flytekit # google-api-core - # google-cloud-bigquery # googleapis-common-protos # grpcio-status - # mlflow - # onnx - # onnxconverter-common - # proto-plus # protoc-gen-swagger - # ray - # tensorboard - # tensorflow - # whylogs protoc-gen-swagger==0.1.0 # via flyteidl -psutil==5.9.5 - # via - # distributed - # ipykernel - # modin ptyprocess==0.7.0 - # via - # pexpect - # terminado + # via pexpect pure-eval==0.2.2 # via stack-data py==1.11.0 # via retry -py4j==0.10.9.7 - # via pyspark -pyarrow==10.0.1 +pyarrow==14.0.2 # via - # datasets # flytekit - # mlflow # vaex-core -pyasn1==0.5.0 +pyasn1==0.5.1 # via # pyasn1-modules # rsa @@ -908,64 +450,43 @@ pyasn1-modules==0.3.0 # via google-auth pycparser==2.21 # via cffi -pycryptodomex==3.19.0 - # via snowflake-connector-python -pydantic==1.10.12 +pydantic==1.10.13 # via + # -r doc-requirements.in # fastapi - # great-expectations - # pandera # vaex-core - # ydata-profiling -pyerfa==2.0.0.3 +pyerfa==2.0.1.1 # via astropy -pygments==2.16.1 +pygments==2.17.2 # via # furo # ipython - # jupyter-console - # nbconvert - # qtconsole # rich # sphinx # sphinx-prompt pyjwt[crypto]==2.8.0 # via - # databricks-cli # msal - # snowflake-connector-python -pyopenssl==23.2.0 - # via - # snowflake-connector-python + # pyjwt pyparsing==3.1.1 - # via - # great-expectations - # matplotlib -pyspark==3.4.1 - # via -r doc-requirements.in + # via matplotlib python-dateutil==2.8.2 # via # arrow # botocore # croniter - # google-cloud-bigquery - # great-expectations - # jupyter-client # kubernetes # matplotlib # pandas - # whylabs-client python-dotenv==1.0.0 # via uvicorn python-json-logger==2.0.7 - # via - # flytekit - # jupyter-events + # via flytekit python-slugify[unidecode]==8.0.1 # via # cookiecutter # sphinx-material -python-utils==3.7.0 +python-utils==3.8.1 # via progressbar2 pythreejs==2.4.2 # via ipyvolume @@ -973,167 +494,71 @@ pytimeparse==1.1.8 # via flytekit pytz==2023.3.post1 # via - # great-expectations - # mlflow + # croniter # pandas - # snowflake-connector-python -pywavelets==1.4.1 - # via imagehash pyyaml==6.0.1 # via # astropy # cookiecutter # dask - # datasets - # distributed # flytekit - # huggingface-hub - # jupyter-events # kubernetes - # mlflow - # papermill # pre-commit - # ray # sphinx-autoapi # uvicorn # vaex-core - # ydata-profiling -pyzmq==25.1.1 - # via - # ipykernel - # jupyter-client - # jupyter-console - # jupyter-server - # qtconsole -qtconsole==5.4.4 - # via jupyter -qtpy==2.4.0 - # via qtconsole -querystring-parser==1.2.4 - # via mlflow -ray==2.6.3 - # via -r doc-requirements.in -referencing==0.30.2 - # via - # jsonschema - # jsonschema-specifications - # jupyter-events requests==2.31.0 # via # azure-core # azure-datalake-store # cookiecutter - # databricks-cli - # datasets # docker # flytekit - # fsspec # gcsfs # google-api-core - # google-cloud-bigquery # google-cloud-storage - # great-expectations - # huggingface-hub # ipyvolume - # jupyterlab-server # kubernetes - # mlflow # msal - # papermill - # ray # requests-oauthlib - # snowflake-connector-python # sphinx # sphinxcontrib-youtube - # tensorboard - # tf2onnx # vaex-core - # whylogs - # ydata-profiling requests-oauthlib==1.3.1 # via # google-auth-oauthlib # kubernetes retry==0.9.2 # via -r doc-requirements.in -rfc3339-validator==0.1.4 - # via - # jsonschema - # jupyter-events -rfc3986-validator==0.1.1 - # via - # jsonschema - # jupyter-events -rich==13.5.3 +rich==13.7.0 # via # cookiecutter # flytekit # rich-click # vaex-core -rich-click==1.6.1 +rich-click==1.7.3 # via flytekit -rpds-py==0.10.3 - # via - # jsonschema - # referencing rsa==4.9 # via google-auth -ruamel-yaml==0.17.17 - # via great-expectations -ruamel-yaml-clib==0.2.7 - # via ruamel-yaml -s3fs==2023.6.0 +s3fs==2023.9.2 # via flytekit -scikit-learn==1.3.0 - # via - # -r doc-requirements.in - # mlflow - # skl2onnx -scipy==1.11.2 - # via - # great-expectations - # imagehash - # mlflow - # phik - # scikit-learn - # statsmodels - # ydata-profiling -seaborn==0.12.2 - # via ydata-profiling -send2trash==1.8.2 - # via jupyter-server +scikit-learn==1.3.2 + # via -r doc-requirements.in +scipy==1.11.4 + # via scikit-learn six==1.16.0 # via # asttokens - # astunparse # azure-core - # bleach - # databricks-cli - # google-pasta # isodate # kubernetes - # patsy # python-dateutil - # querystring-parser - # rfc3339-validator # sphinx-code-include - # tensorflow - # tf2onnx # vaex-core -skl2onnx==1.15.0 - # via -r doc-requirements.in -smmap==5.0.1 - # via gitdb sniffio==1.3.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python==3.2.0 - # via -r doc-requirements.in -sortedcontainers==2.4.0 - # via - # distributed - # snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==4.5.0 @@ -1150,13 +575,14 @@ sphinx==4.5.0 # sphinx-material # sphinx-panels # sphinx-prompt + # sphinx-reredirects # sphinx-tags # sphinxcontrib-youtube sphinx-autoapi==2.0.1 # via -r doc-requirements.in sphinx-basic-ng==1.0.0b2 # via furo -sphinx-click==5.0.1 +sphinx-click==5.1.0 # via -r doc-requirements.in sphinx-code-include==1.1.1 # via -r doc-requirements.in @@ -1164,7 +590,7 @@ sphinx-copybutton==0.5.2 # via -r doc-requirements.in sphinx-fontawesome==0.0.6 # via -r doc-requirements.in -sphinx-gallery==0.14.0 +sphinx-gallery==0.15.0 # via -r doc-requirements.in sphinx-material==0.0.36 # via -r doc-requirements.in @@ -1172,6 +598,8 @@ sphinx-panels==0.6.0 # via -r doc-requirements.in sphinx-prompt==1.5.0 # via -r doc-requirements.in +sphinx-reredirects==0.1.3 + # via -r doc-requirements.in sphinx-tags==0.2.1 # via -r doc-requirements.in sphinxcontrib-applehelp==1.0.4 @@ -1188,111 +616,34 @@ sphinxcontrib-serializinghtml==1.1.5 # via sphinx sphinxcontrib-youtube==1.2.0 # via -r doc-requirements.in -sqlalchemy==2.0.20 - # via - # -r doc-requirements.in - # alembic - # mlflow -sqlparse==0.4.4 - # via mlflow -stack-data==0.6.2 +stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.32.0.post1 # via fastapi statsd==3.3.0 # via flytekit -statsmodels==0.14.0 - # via ydata-profiling -sympy==1.12 - # via torch tabulate==0.9.0 - # via - # databricks-cli - # vaex-core -tangled-up-in-unicode==0.2.0 - # via visions -tblib==2.0.0 - # via distributed -tenacity==8.2.3 - # via - # papermill - # plotly -tensorboard==2.13.0 - # via tensorflow -tensorboard-data-server==0.7.1 - # via tensorboard -tensorflow==2.13.0 - # via -r doc-requirements.in -tensorflow-estimator==2.13.0 - # via tensorflow -tensorflow==2.13.0 - # via tensorflow -termcolor==2.3.0 - # via tensorflow -terminado==0.17.1 - # via - # jupyter-server - # jupyter-server-terminals + # via vaex-core text-unidecode==1.3 # via python-slugify -textwrap3==0.9.2 - # via ansiwrap -tf2onnx==1.8.4 - # via -r doc-requirements.in threadpoolctl==3.2.0 # via scikit-learn -tinycss2==1.2.1 - # via nbconvert -tomli==2.0.1 - # via jupyterlab -tomlkit==0.12.1 - # via snowflake-connector-python toolz==0.12.0 # via - # altair # dask - # distributed # partd -torch==2.0.1 - # via -r doc-requirements.in -tornado==6.3.3 - # via - # distributed - # ipykernel - # jupyter-client - # jupyter-server - # jupyterlab - # notebook - # terminado - # vaex-server -tqdm==4.66.1 - # via - # datasets - # great-expectations - # huggingface-hub - # papermill - # ydata-profiling -traitlets==5.10.0 +tornado==6.4 + # via vaex-server +traitlets==5.14.1 # via # bqplot # comm - # ipykernel # ipympl # ipython # ipyvolume # ipywidgets - # jupyter-client - # jupyter-console - # jupyter-core - # jupyter-events - # jupyter-server - # jupyterlab # matplotlib-inline - # nbclient - # nbconvert - # nbformat # pythreejs - # qtconsole # traittypes # vaex-ml traittypes==0.2.1 @@ -1301,67 +652,43 @@ traittypes==0.2.1 # ipydatawidgets # ipyleaflet # ipyvolume -typed-ast==1.5.5 - # via doltcli -typeguard==2.13.3 - # via ydata-profiling -types-requests==2.31.0.2 - # via whylogs -types-urllib3==1.26.25.14 - # via types-requests -typing-extensions==4.5.0 +types-python-dateutil==2.8.19.20240106 + # via arrow +typing-extensions==4.9.0 # via # aioitertools - # alembic + # anyio # astroid - # async-lru # azure-core # azure-storage-blob # fastapi # flytekit - # great-expectations - # huggingface-hub # ipython # mashumaro - # onnx # pydantic # python-utils - # snowflake-connector-python - # sqlalchemy + # rich-click # starlette - # tensorflow - # torch # typing-inspect # uvicorn - # whylogs typing-inspect==0.9.0 - # via - # dataclasses-json - # pandera -tzlocal==5.0.1 - # via great-expectations -unidecode==1.3.6 + # via dataclasses-json +tzdata==2023.4 + # via pandas +unidecode==1.3.7 # via # python-slugify # sphinx-autoapi -uri-template==1.3.0 - # via jsonschema -urllib3==1.26.16 +urllib3==1.26.18 # via # botocore - # databricks-cli - # distributed # docker # flytekit - # google-auth - # great-expectations # kubernetes # requests - # snowflake-connector-python - # whylabs-client -uvicorn[standard]==0.23.2 +uvicorn[standard]==0.25.0 # via vaex-server -uvloop==0.17.0 +uvloop==0.19.0 # via uvicorn vaex==4.17.0 # via -r doc-requirements.in @@ -1388,66 +715,29 @@ vaex-viz==0.5.4 # via # vaex # vaex-jupyter -virtualenv==20.24.1 +virtualenv==20.25.0 # via pre-commit -visions[type_image_path]==0.7.5 - # via ydata-profiling -watchfiles==0.20.0 +watchfiles==0.21.0 # via uvicorn -wcwidth==0.2.6 +wcwidth==0.2.13 # via prompt-toolkit -webcolors==1.13 - # via jsonschema -webencodings==0.5.1 - # via - # bleach - # tinycss2 -websocket-client==1.6.3 +websocket-client==1.7.0 # via # docker - # jupyter-server # kubernetes -websockets==11.0.3 +websockets==12.0 # via uvicorn -werkzeug==2.3.7 - # via - # flask - # tensorboard -wheel==0.41.2 - # via - # astunparse - # tensorboard -whylabs-client==0.5.7 - # via - # -r doc-requirements.in - # whylogs -whylogs==1.3.3 - # via -r doc-requirements.in -whylogs-sketching==3.4.1.dev3 - # via whylogs widgetsnbextension==4.0.9 # via ipywidgets -wordcloud==1.9.2 - # via ydata-profiling -wrapt==1.15.0 - # via - # aiobotocore - # astroid - # pandera - # tensorflow -xarray==2023.8.0 +wrapt==1.16.0 + # via aiobotocore +xarray==2023.12.0 # via vaex-jupyter -xxhash==3.3.0 - # via datasets -xyzservices==2023.7.0 +xyzservices==2023.10.1 # via ipyleaflet -yarl==1.9.2 +yarl==1.9.4 # via aiohttp -ydata-profiling==4.5.1 - # via pandas-profiling -zict==3.0.0 - # via distributed -zipp==3.16.2 +zipp==3.17.0 # via # importlib-metadata # importlib-resources diff --git a/docs/source/conf.py b/docs/source/conf.py index 09eb276ca0..f86a8d2c06 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -57,6 +57,7 @@ "sphinx-prompt", "sphinx_copybutton", "sphinx_panels", + "sphinx_reredirects", "sphinxcontrib.youtube", "sphinx_tags", "sphinx_click", @@ -239,3 +240,27 @@ tags_create_tags = True tags_page_title = "Tag" tags_overview_title = "All Tags" + + +# Sphinx redirects to the monodocs +page_pattern = "https://docs.flyte.org/en/latest/api/flytekit/$source.html" + +if int(os.environ.get("ENABLE_SPHINX_REDIRECTS", 0)): + redirects = { + "generated/*": page_pattern, + "design/*": page_pattern, + "plugins/*": page_pattern, + "flytekit.html": "https://docs.flyte.org/en/latest/api/flytekit/flytekit.html", + "configuration.html": "https://docs.flyte.org/en/latest/api/flytekit/configuration.html", + "remote.html": "https://docs.flyte.org/en/latest/api/flytekit/remote.html", + "clients.html": "https://docs.flyte.org/en/latest/api/flytekit/clients.html", + "testing.html": "https://docs.flyte.org/en/latest/api/flytekit/testing.html", + "extend.html": "https://docs.flyte.org/en/latest/api/flytekit/extend.html", + "deck.html": "https://docs.flyte.org/en/latest/api/flytekit/deck.html", + "tasks*": page_pattern, + "types*": page_pattern, + "extras*": page_pattern, + "experimental.html": "https://docs.flyte.org/en/latest/api/flytekit/experimental.html", + "pyflyte.html": "https://docs.flyte.org/en/latest/api/flytekit/pyflyte.html", + "contributing.html": "https://docs.flyte.org/en/latest/api/flytekit/contributing.html", + } From ed91dd1f7fdd99cbeed1e34f646223c245d99b30 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 12 Jan 2024 01:42:57 +0800 Subject: [PATCH 28/63] Local Sync Agent (#2085) Signed-off-by: Future Outlier Co-authored-by: Future Outlier --- Makefile | 1 + flytekit/extend/backend/base_agent.py | 50 ++++++++++++++++-------- flytekit/sensor/base_sensor.py | 2 +- tests/flytekit/unit/extend/test_agent.py | 34 +++++++++++++++- 4 files changed, 68 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 5d672c21f5..6e82bd693e 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ update_boilerplate: .PHONY: setup setup: install-piptools ## Install requirements + pip install flyteidl --pre pip install -r dev-requirements.in .PHONY: fmt diff --git a/flytekit/extend/backend/base_agent.py b/flytekit/extend/backend/base_agent.py index 02c28287a1..eb8e7562b6 100644 --- a/flytekit/extend/backend/base_agent.py +++ b/flytekit/extend/backend/base_agent.py @@ -6,7 +6,7 @@ from abc import ABC from collections import OrderedDict from functools import partial -from types import FrameType +from types import FrameType, coroutine import grpc from flyteidl.admin.agent_pb2 import ( @@ -138,7 +138,8 @@ def convert_to_flyte_state(state: str) -> State: Convert the state from the agent to the state in flyte. """ state = state.lower() - if state in ["failed", "timedout", "canceled"]: + # timedout is the state of Databricks job. https://docs.databricks.com/en/workflows/jobs/jobs-2.0-api.html#runresultstate + if state in ["failed", "timeout", "timedout", "canceled"]: return RETRYABLE_FAILURE elif state in ["done", "succeeded", "success"]: return SUCCEEDED @@ -158,15 +159,27 @@ def get_agent_secret(secret_key: str) -> str: return flytekit.current_context().secrets.get(secret_key) +def _get_grpc_context() -> grpc.ServicerContext: + from unittest.mock import MagicMock + + grpc_ctx = MagicMock(spec=grpc.ServicerContext) + return grpc_ctx + + class AsyncAgentExecutorMixin: """ This mixin class is used to run the agent task locally, and it's only used for local execution. Task should inherit from this class if the task can be run in the agent. + It can handle asynchronous tasks and synchronous tasks. + Asynchronous tasks are tasks that take a long time to complete, such as running a query. + Synchronous tasks run quickly and can return their results instantly. Sending a prompt to ChatGPT and getting a response, or retrieving some metadata from a backend system. """ - _is_canceled = None - _agent = None - _entity = None + _clean_up_task: coroutine = None + _agent: AgentBase = None + _entity: PythonTask = None + _ctx: FlyteContext = FlyteContext.current_context() + _grpc_ctx: grpc.ServicerContext = _get_grpc_context() def execute(self, **kwargs) -> typing.Any: ctx = FlyteContext.current_context() @@ -180,10 +193,17 @@ def execute(self, **kwargs) -> typing.Any: self._agent = AgentRegistry.get_agent(task_template.type) res = asyncio.run(self._create(task_template, output_prefix, kwargs)) + + # If the task is synchronous, the agent will return the output from the resource literals. + if res.HasField("resource"): + if res.resource.state != SUCCEEDED: + raise FlyteUserException(f"Failed to run the task {self._entity.name}") + return LiteralMap.from_flyte_idl(res.resource.outputs) + res = asyncio.run(self._get(resource_meta=res.resource_meta)) if res.resource.state != SUCCEEDED: - raise FlyteUserException(f"Failed to run the task {self._entity.name} with error: {res.resource.message}") + raise FlyteUserException(f"Failed to run the task {self._entity.name}") # Read the literals from a remote file, if agent doesn't return the output literals. if task_template.interface.outputs and len(res.resource.outputs.literals) == 0: @@ -198,13 +218,11 @@ async def _create( self, task_template: TaskTemplate, output_prefix: str, inputs: typing.Dict[str, typing.Any] = None ) -> CreateTaskResponse: ctx = FlyteContext.current_context() - grpc_ctx = _get_grpc_context() # Convert python inputs to literals literals = inputs or {} for k, v in inputs.items(): literals[k] = TypeEngine.to_literal(ctx, v, type(v), self._entity.interface.inputs[k].type) - literal_map = LiteralMap(literals) if isinstance(self, PythonFunctionTask): @@ -215,9 +233,9 @@ async def _create( task_template = render_task_template(task_template, output_prefix) if self._agent.asynchronous: - res = await self._agent.async_create(grpc_ctx, output_prefix, task_template, literal_map) + res = await self._agent.async_create(self._grpc_ctx, output_prefix, task_template, literal_map) else: - res = self._agent.create(grpc_ctx, output_prefix, task_template, literal_map) + res = self._agent.create(self._grpc_ctx, output_prefix, task_template, literal_map) signal.signal(signal.SIGINT, partial(self.signal_handler, res.resource_meta)) # type: ignore return res @@ -225,7 +243,6 @@ async def _create( async def _get(self, resource_meta: bytes) -> GetTaskResponse: state = RUNNING grpc_ctx = _get_grpc_context() - res = State.PENDING progress = Progress(transient=True) task = progress.add_task(f"[cyan]Running Task {self._entity.name}...", total=None) @@ -235,8 +252,8 @@ async def _get(self, resource_meta: bytes) -> GetTaskResponse: time.sleep(1) if self._agent.asynchronous: res = await self._agent.async_get(grpc_ctx, resource_meta) - if self._is_canceled: - await self._is_canceled + if self._clean_up_task: + await self._clean_up_task sys.exit(1) else: res = self._agent.get(grpc_ctx, resource_meta) @@ -248,12 +265,11 @@ async def _get(self, resource_meta: bytes) -> GetTaskResponse: return res def signal_handler(self, resource_meta: bytes, signum: int, frame: FrameType) -> typing.Any: - grpc_ctx = _get_grpc_context() if self._agent.asynchronous: - if self._is_canceled is None: - self._is_canceled = asyncio.create_task(self._agent.async_delete(grpc_ctx, resource_meta)) + if self._clean_up_task is None: + self._clean_up_task = asyncio.create_task(self._agent.async_delete(self._grpc_ctx, resource_meta)) else: - self._agent.delete(grpc_ctx, resource_meta) + self._agent.delete(self._grpc_ctx, resource_meta) sys.exit(1) diff --git a/flytekit/sensor/base_sensor.py b/flytekit/sensor/base_sensor.py index 60beb6aa2b..0e40055ea5 100644 --- a/flytekit/sensor/base_sensor.py +++ b/flytekit/sensor/base_sensor.py @@ -35,7 +35,7 @@ def __init__( type_hints = get_type_hints(self.poke, include_extras=True) signature = inspect.signature(self.poke) inputs = collections.OrderedDict() - for k, v in signature.parameters.items(): # type: ignore + for k, _ in signature.parameters.items(): # type: ignore annotation = type_hints.get(k, None) inputs[k] = annotation diff --git a/tests/flytekit/unit/extend/test_agent.py b/tests/flytekit/unit/extend/test_agent.py index 2c52b70490..0010bccb1f 100644 --- a/tests/flytekit/unit/extend/test_agent.py +++ b/tests/flytekit/unit/extend/test_agent.py @@ -90,6 +90,20 @@ async def async_delete(self, context: grpc.ServicerContext, resource_meta: bytes return DeleteTaskResponse() +class SyncDummyAgent(AgentBase): + def __init__(self): + super().__init__(task_type="sync_dummy", asynchronous=True) + + async def async_create( + self, + context: grpc.ServicerContext, + output_prefix: str, + task_template: TaskTemplate, + inputs: typing.Optional[LiteralMap] = None, + ) -> CreateTaskResponse: + return CreateTaskResponse(resource=Resource(state=SUCCEEDED, outputs=LiteralMap({}).to_flyte_idl())) + + def get_task_template(task_type: str) -> TaskTemplate: @task def simple_task(i: int): @@ -118,6 +132,7 @@ def simple_task(i: int): dummy_template = get_task_template("dummy") async_dummy_template = get_task_template("async_dummy") +sync_dummy_template = get_task_template("sync_dummy") def test_dummy_agent(): @@ -161,6 +176,16 @@ async def test_async_dummy_agent(): assert res == DeleteTaskResponse() +@pytest.mark.asyncio +async def test_sync_dummy_agent(): + AgentRegistry.register(SyncDummyAgent()) + ctx = MagicMock(spec=grpc.ServicerContext) + agent = AgentRegistry.get_agent("sync_dummy") + res = await agent.async_create(ctx, "/tmp", sync_dummy_template, task_inputs) + assert res.resource.state == SUCCEEDED + assert res.resource.outputs == LiteralMap({}).to_flyte_idl() + + @pytest.mark.asyncio async def run_agent_server(): service = AsyncAgentService() @@ -171,6 +196,9 @@ async def run_agent_server(): async_request = CreateTaskRequest( inputs=task_inputs.to_flyte_idl(), output_prefix="/tmp", template=async_dummy_template.to_flyte_idl() ) + sync_request = CreateTaskRequest( + inputs=task_inputs.to_flyte_idl(), output_prefix="/tmp", template=sync_dummy_template.to_flyte_idl() + ) fake_agent = "fake" metadata_bytes = json.dumps(asdict(Metadata(job_id=dummy_id))).encode("utf-8") @@ -188,6 +216,10 @@ async def run_agent_server(): res = await service.DeleteTask(DeleteTaskRequest(task_type="async_dummy", resource_meta=metadata_bytes), ctx) assert isinstance(res, DeleteTaskResponse) + res = await service.CreateTask(sync_request, ctx) + assert res.resource.state == SUCCEEDED + assert res.resource.outputs == LiteralMap({}).to_flyte_idl() + res = await service.GetTask(GetTaskRequest(task_type=fake_agent, resource_meta=metadata_bytes), ctx) assert res is None @@ -198,7 +230,7 @@ def test_agent_server(): def test_is_terminal_state(): assert is_terminal_state(SUCCEEDED) - assert is_terminal_state(PERMANENT_FAILURE) + assert is_terminal_state(RETRYABLE_FAILURE) assert is_terminal_state(PERMANENT_FAILURE) assert not is_terminal_state(RUNNING) From ced0daf2c970cadd3008311b48eb4b0bd9261b7e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 21:07:03 -0800 Subject: [PATCH 29/63] Bump jinja2 from 3.1.2 to 3.1.3 in /plugins/flytekit-airflow (#2103) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.2...3.1.3) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- plugins/flytekit-airflow/dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-airflow/dev-requirements.txt b/plugins/flytekit-airflow/dev-requirements.txt index 744f2c092c..d865203000 100644 --- a/plugins/flytekit-airflow/dev-requirements.txt +++ b/plugins/flytekit-airflow/dev-requirements.txt @@ -528,7 +528,7 @@ itsdangerous==2.1.2 # connexion # flask # flask-wtf -jinja2==3.1.2 +jinja2==3.1.3 # via # apache-airflow # flask From cc94d3300c0e4467df84cd602dbb747b01a73b64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 23:59:28 -0800 Subject: [PATCH 30/63] Bump jinja2 from 3.1.2 to 3.1.3 (#2104) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.2...3.1.3) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dev-requirements.txt | 30 ++++++++++++++++++------------ doc-requirements.txt | 19 +++++++++---------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index a354b70ea4..0634217be7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -10,7 +10,7 @@ absl-py==2.0.0 # via # tensorboard # tensorflow -adlfs==2023.10.0 +adlfs==2023.9.0 # via flytekit aiobotocore==2.5.4 # via s3fs @@ -24,8 +24,6 @@ aioitertools==0.11.0 # via aiobotocore aiosignal==1.3.1 # via aiohttp -appnope==0.1.3 - # via ipython arrow==1.3.0 # via cookiecutter asttokens==2.4.1 @@ -97,6 +95,7 @@ cryptography==41.0.5 # azure-storage-blob # msal # pyjwt + # secretstorage dataclasses-json==0.5.9 # via flytekit decorator==5.1.1 @@ -224,7 +223,11 @@ jaraco-classes==3.3.0 # keyrings-alt jedi==0.19.1 # via ipython -jinja2==3.1.2 +jeepney==0.8.0 + # via + # keyring + # secretstorage +jinja2==3.1.3 # via cookiecutter jmespath==1.0.1 # via botocore @@ -297,7 +300,7 @@ nodeenv==1.8.0 # via pre-commit numpy==1.23.5 # via - # flytekit + # -r dev-requirements.in # h5py # keras-preprocessing # opt-einsum @@ -321,7 +324,7 @@ packaging==23.2 # pytest # setuptools-scm pandas==1.5.3 - # via flytekit + # via -r dev-requirements.in parso==0.8.3 # via jedi pexpect==4.8.0 @@ -411,6 +414,8 @@ python-dateutil==2.8.2 # pandas python-json-logger==2.0.7 # via flytekit +python-magic==0.4.27 ; platform_system == "Darwin" or platform_system == "Linux" + # via -r dev-requirements.in python-slugify==8.0.1 # via cookiecutter pytimeparse==1.1.8 @@ -459,6 +464,8 @@ scikit-learn==1.3.2 # via -r dev-requirements.in scipy==1.11.3 # via scikit-learn +secretstorage==3.3.3 + # via keyring setuptools-scm==8.0.4 # via -r dev-requirements.in six==1.16.0 @@ -473,8 +480,7 @@ six==1.16.0 # python-dateutil # tensorflow sortedcontainers==2.4.0 - # via - # hypothesis + # via hypothesis stack-data==0.6.3 # via ipython statsd==3.3.0 @@ -485,7 +491,7 @@ tensorboard-data-server==0.6.1 # via tensorboard tensorboard-plugin-wit==1.8.1 # via tensorboard -tensorflow==2.8.1 ; python_version < "3.11" and (platform_machine != "arm64" or platform_system != "Darwin") +tensorflow==2.8.1 ; python_version < "3.12" # via -r dev-requirements.in tensorflow-estimator==2.8.0 # via tensorflow @@ -503,8 +509,8 @@ tomli==2.0.1 # coverage # mypy # pytest - # . setuptools-scm -torch==1.12.1 ; python_version >= "3.11" or platform_system != "Windows" + # setuptools-scm +torch==1.12.1 ; python_version < "3.12" # via -r dev-requirements.in traitlets==5.13.0 # via @@ -530,7 +536,7 @@ typing-extensions==4.8.0 # mashumaro # mypy # rich-click - # . setuptools-scm + # setuptools-scm # tensorflow # torch # typing-inspect diff --git a/doc-requirements.txt b/doc-requirements.txt index e1c2a72dcb..52cc2a5d29 100644 --- a/doc-requirements.txt +++ b/doc-requirements.txt @@ -119,6 +119,7 @@ cryptography==41.0.7 # azure-storage-blob # msal # pyjwt + # secretstorage css-html-js-minify==2.5.5 # via sphinx-material cycler==0.12.1 @@ -241,9 +242,6 @@ importlib-metadata==7.0.1 # dask # flytekit # keyring - # sphinx -importlib-resources==6.1.1 - # via matplotlib ipydatawidgets==4.3.5 # via pythreejs ipyleaflet==0.18.1 @@ -283,7 +281,11 @@ jaraco-classes==3.3.0 # via keyring jedi==0.19.1 # via ipython -jinja2==3.1.2 +jeepney==0.8.0 + # via + # keyring + # secretstorage +jinja2==3.1.3 # via # branca # cookiecutter @@ -546,6 +548,8 @@ scikit-learn==1.3.2 # via -r doc-requirements.in scipy==1.11.4 # via scikit-learn +secretstorage==3.3.3 + # via keyring six==1.16.0 # via # asttokens @@ -656,19 +660,16 @@ types-python-dateutil==2.8.19.20240106 # via arrow typing-extensions==4.9.0 # via - # aioitertools # anyio # astroid # azure-core # azure-storage-blob # fastapi # flytekit - # ipython # mashumaro # pydantic # python-utils # rich-click - # starlette # typing-inspect # uvicorn typing-inspect==0.9.0 @@ -738,9 +739,7 @@ xyzservices==2023.10.1 yarl==1.9.4 # via aiohttp zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools From 76f6db2fd2001809354863d0cdbf5cf64169f50e Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Sat, 13 Jan 2024 16:10:09 +0800 Subject: [PATCH 31/63] Enhance Airflow Plugin (#2099) Signed-off-by: Future Outlier Co-authored-by: Future Outlier --- plugins/flytekit-airflow/flytekitplugins/airflow/task.py | 2 +- plugins/flytekit-airflow/setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/flytekit-airflow/flytekitplugins/airflow/task.py b/plugins/flytekit-airflow/flytekitplugins/airflow/task.py index 04fb848cb5..225115d12d 100644 --- a/plugins/flytekit-airflow/flytekitplugins/airflow/task.py +++ b/plugins/flytekit-airflow/flytekitplugins/airflow/task.py @@ -160,7 +160,7 @@ def _get_airflow_instance( return obj_def(**airflow_obj.parameters) -def _is_deferrable(cls: Type): +def _is_deferrable(cls: Type) -> bool: """ This function is used to check if the Airflow operator is deferrable. """ diff --git a/plugins/flytekit-airflow/setup.py b/plugins/flytekit-airflow/setup.py index 91214e6dbf..682cd72c18 100644 --- a/plugins/flytekit-airflow/setup.py +++ b/plugins/flytekit-airflow/setup.py @@ -7,6 +7,7 @@ plugin_requires = [ "apache-airflow", "flytekit>=1.9.0", + "flyteidl>=1.10.6", ] __version__ = "0.0.0+develop" From 729a925dfe0fb4f8ebcc0547889aa7f04a483667 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:10:21 -0800 Subject: [PATCH 32/63] Bump jinja2 from 3.1.2 to 3.1.3 in /plugins/flytekit-onnx-pytorch (#2102) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.2...3.1.3) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../dev-requirements.txt | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/plugins/flytekit-onnx-pytorch/dev-requirements.txt b/plugins/flytekit-onnx-pytorch/dev-requirements.txt index ef4045e458..44660a3ba9 100644 --- a/plugins/flytekit-onnx-pytorch/dev-requirements.txt +++ b/plugins/flytekit-onnx-pytorch/dev-requirements.txt @@ -11,7 +11,9 @@ charset-normalizer==3.3.2 coloredlogs==15.0.1 # via onnxruntime filelock==3.13.1 - # via torch + # via + # torch + # triton flatbuffers==23.5.26 # via onnxruntime fsspec==2023.10.0 @@ -20,7 +22,7 @@ humanfriendly==10.0 # via coloredlogs idna==3.4 # via requests -jinja2==3.1.2 +jinja2==3.1.3 # via torch markupsafe==2.1.3 # via jinja2 @@ -32,6 +34,37 @@ numpy==1.23.5 # via # onnxruntime # torchvision +nvidia-cublas-cu12==12.1.3.1 + # via + # nvidia-cudnn-cu12 + # nvidia-cusolver-cu12 + # torch +nvidia-cuda-cupti-cu12==12.1.105 + # via torch +nvidia-cuda-nvrtc-cu12==12.1.105 + # via torch +nvidia-cuda-runtime-cu12==12.1.105 + # via torch +nvidia-cudnn-cu12==8.9.2.26 + # via torch +nvidia-cufft-cu12==11.0.2.54 + # via torch +nvidia-curand-cu12==10.3.2.106 + # via torch +nvidia-cusolver-cu12==11.4.5.107 + # via torch +nvidia-cusparse-cu12==12.1.0.106 + # via + # nvidia-cusolver-cu12 + # torch +nvidia-nccl-cu12==2.18.1 + # via torch +nvidia-nvjitlink-cu12==12.3.101 + # via + # nvidia-cusolver-cu12 + # nvidia-cusparse-cu12 +nvidia-nvtx-cu12==12.1.105 + # via torch onnxruntime==1.16.1 # via -r dev-requirements.in packaging==23.2 @@ -52,6 +85,8 @@ torch==2.1.0 # via torchvision torchvision==0.16.0 # via -r dev-requirements.in +triton==2.1.0 + # via torch typing-extensions==4.8.0 # via torch urllib3==2.0.7 From 01e2392a994718d6f7b1f3b82fa416bb69157203 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:11:31 -0800 Subject: [PATCH 33/63] Bump jinja2 (#2101) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.2 to 3.1.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.2...3.1.3) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../remote/mock_flyte_repo/workflows/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt index 48ee6ca93c..5f4ef273a3 100644 --- a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt +++ b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt @@ -155,7 +155,7 @@ jeepney==0.8.0 # via # keyring # secretstorage -jinja2==3.1.2 +jinja2==3.1.3 # via cookiecutter jmespath==1.0.1 # via botocore From a3a640adb7d83c3ef405a5857d4ad5ff29f0cb56 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Sat, 13 Jan 2024 16:16:31 +0800 Subject: [PATCH 34/63] init (#2078) Signed-off-by: Future Outlier Co-authored-by: Future Outlier --- flytekit/core/container_task.py | 2 -- flytekit/core/node.py | 4 ---- flytekit/core/python_auto_container.py | 2 -- flytekit/core/python_customized_container_task.py | 2 -- flytekit/core/resources.py | 4 ---- flytekit/core/utils.py | 10 ---------- flytekit/models/task.py | 1 - tests/flytekit/common/parameterizers.py | 1 - tests/flytekit/unit/core/test_resources.py | 8 ++------ 9 files changed, 2 insertions(+), 32 deletions(-) diff --git a/flytekit/core/container_task.py b/flytekit/core/container_task.py index d606675299..1b078f83a7 100644 --- a/flytekit/core/container_task.py +++ b/flytekit/core/container_task.py @@ -124,12 +124,10 @@ def _get_container(self, settings: SerializationSettings) -> _task_model.Contain args=self._args, data_loading_config=self._get_data_loading_config(), environment=env, - storage_request=self.resources.requests.storage, ephemeral_storage_request=self.resources.requests.ephemeral_storage, cpu_request=self.resources.requests.cpu, gpu_request=self.resources.requests.gpu, memory_request=self.resources.requests.mem, - storage_limit=self.resources.limits.storage, ephemeral_storage_limit=self.resources.limits.ephemeral_storage, cpu_limit=self.resources.limits.cpu, gpu_limit=self.resources.limits.gpu, diff --git a/flytekit/core/node.py b/flytekit/core/node.py index b9edddeec0..6c84877c90 100644 --- a/flytekit/core/node.py +++ b/flytekit/core/node.py @@ -210,10 +210,6 @@ def _convert_resource_overrides( if resources.gpu is not None: resource_entries.append(_resources_model.ResourceEntry(_resources_model.ResourceName.GPU, resources.gpu)) - if resources.storage is not None: - resource_entries.append( - _resources_model.ResourceEntry(_resources_model.ResourceName.STORAGE, resources.storage) - ) if resources.ephemeral_storage is not None: resource_entries.append( _resources_model.ResourceEntry( diff --git a/flytekit/core/python_auto_container.py b/flytekit/core/python_auto_container.py index 2f9d8417fd..c43e3d4d14 100644 --- a/flytekit/core/python_auto_container.py +++ b/flytekit/core/python_auto_container.py @@ -196,12 +196,10 @@ def _get_container(self, settings: SerializationSettings) -> _task_model.Contain args=self.get_command(settings=settings), data_loading_config=None, environment=env, - storage_request=self.resources.requests.storage, ephemeral_storage_request=self.resources.requests.ephemeral_storage, cpu_request=self.resources.requests.cpu, gpu_request=self.resources.requests.gpu, memory_request=self.resources.requests.mem, - storage_limit=self.resources.limits.storage, ephemeral_storage_limit=self.resources.limits.ephemeral_storage, cpu_limit=self.resources.limits.cpu, gpu_limit=self.resources.limits.gpu, diff --git a/flytekit/core/python_customized_container_task.py b/flytekit/core/python_customized_container_task.py index 07493886a2..a3d89b0979 100644 --- a/flytekit/core/python_customized_container_task.py +++ b/flytekit/core/python_customized_container_task.py @@ -165,12 +165,10 @@ def get_container(self, settings: SerializationSettings) -> _task_model.Containe args=self.get_command(settings=settings), data_loading_config=None, environment=env, - storage_request=self.resources.requests.storage, ephemeral_storage_request=self.resources.requests.ephemeral_storage, cpu_request=self.resources.requests.cpu, gpu_request=self.resources.requests.gpu, memory_request=self.resources.requests.mem, - storage_limit=self.resources.limits.storage, ephemeral_storage_limit=self.resources.limits.ephemeral_storage, cpu_limit=self.resources.limits.cpu, gpu_limit=self.resources.limits.gpu, diff --git a/flytekit/core/resources.py b/flytekit/core/resources.py index 62b880f6ed..2e8388f986 100644 --- a/flytekit/core/resources.py +++ b/flytekit/core/resources.py @@ -29,7 +29,6 @@ class Resources(object): cpu: Optional[str] = None mem: Optional[str] = None gpu: Optional[str] = None - storage: Optional[str] = None ephemeral_storage: Optional[str] = None def __post_init__(self): @@ -42,7 +41,6 @@ def _check_none_or_str(value): _check_none_or_str(self.cpu) _check_none_or_str(self.mem) _check_none_or_str(self.gpu) - _check_none_or_str(self.storage) _check_none_or_str(self.ephemeral_storage) @@ -64,8 +62,6 @@ def _convert_resources_to_resource_entries(resources: Resources) -> List[_Resour resource_entries.append(_ResourceEntry(name=_ResourceName.MEMORY, value=resources.mem)) if resources.gpu is not None: resource_entries.append(_ResourceEntry(name=_ResourceName.GPU, value=resources.gpu)) - if resources.storage is not None: - resource_entries.append(_ResourceEntry(name=_ResourceName.STORAGE, value=resources.storage)) if resources.ephemeral_storage is not None: resource_entries.append(_ResourceEntry(name=_ResourceName.EPHEMERAL_STORAGE, value=resources.ephemeral_storage)) return resource_entries diff --git a/flytekit/core/utils.py b/flytekit/core/utils.py index c24d53c4e0..17cdfb3de9 100644 --- a/flytekit/core/utils.py +++ b/flytekit/core/utils.py @@ -62,20 +62,16 @@ def _get_container_definition( command: List[str], args: Optional[List[str]] = None, data_loading_config: Optional["task_models.DataLoadingConfig"] = None, - storage_request: Optional[str] = None, ephemeral_storage_request: Optional[str] = None, cpu_request: Optional[str] = None, gpu_request: Optional[str] = None, memory_request: Optional[str] = None, - storage_limit: Optional[str] = None, ephemeral_storage_limit: Optional[str] = None, cpu_limit: Optional[str] = None, gpu_limit: Optional[str] = None, memory_limit: Optional[str] = None, environment: Optional[Dict[str, str]] = None, ) -> "task_models.Container": - storage_limit = storage_limit - storage_request = storage_request ephemeral_storage_limit = ephemeral_storage_limit ephemeral_storage_request = ephemeral_storage_request cpu_limit = cpu_limit @@ -89,10 +85,6 @@ def _get_container_definition( # TODO: Use convert_resources_to_resource_model instead of manually fixing the resources. requests = [] - if storage_request: - requests.append( - task_models.Resources.ResourceEntry(task_models.Resources.ResourceName.STORAGE, storage_request) - ) if ephemeral_storage_request: requests.append( task_models.Resources.ResourceEntry( @@ -108,8 +100,6 @@ def _get_container_definition( requests.append(task_models.Resources.ResourceEntry(task_models.Resources.ResourceName.MEMORY, memory_request)) limits = [] - if storage_limit: - limits.append(task_models.Resources.ResourceEntry(task_models.Resources.ResourceName.STORAGE, storage_limit)) if ephemeral_storage_limit: limits.append( task_models.Resources.ResourceEntry( diff --git a/flytekit/models/task.py b/flytekit/models/task.py index 48a8abfde1..1da786ea6d 100644 --- a/flytekit/models/task.py +++ b/flytekit/models/task.py @@ -22,7 +22,6 @@ class ResourceName(object): CPU = _core_task.Resources.CPU GPU = _core_task.Resources.GPU MEMORY = _core_task.Resources.MEMORY - STORAGE = _core_task.Resources.STORAGE EPHEMERAL_STORAGE = _core_task.Resources.EPHEMERAL_STORAGE class ResourceEntry(_common.FlyteIdlEntity): diff --git a/tests/flytekit/common/parameterizers.py b/tests/flytekit/common/parameterizers.py index 96c30b69b4..84481a37ad 100644 --- a/tests/flytekit/common/parameterizers.py +++ b/tests/flytekit/common/parameterizers.py @@ -90,7 +90,6 @@ task.Resources.ResourceEntry(task.Resources.ResourceName.CPU, "1"), task.Resources.ResourceEntry(task.Resources.ResourceName.GPU, "1"), task.Resources.ResourceEntry(task.Resources.ResourceName.MEMORY, "1G"), - task.Resources.ResourceEntry(task.Resources.ResourceName.STORAGE, "1G"), task.Resources.ResourceEntry(task.Resources.ResourceName.EPHEMERAL_STORAGE, "1G"), ] diff --git a/tests/flytekit/unit/core/test_resources.py b/tests/flytekit/unit/core/test_resources.py index 25a637b2d6..e11be83462 100644 --- a/tests/flytekit/unit/core/test_resources.py +++ b/tests/flytekit/unit/core/test_resources.py @@ -22,10 +22,9 @@ def test_convert_no_requests_no_limits(): ({"cpu": "2"}, _ResourceName.CPU), ({"mem": "1Gi"}, _ResourceName.MEMORY), ({"gpu": "1"}, _ResourceName.GPU), - ({"storage": "100Mb"}, _ResourceName.STORAGE), ({"ephemeral_storage": "123Mb"}, _ResourceName.EPHEMERAL_STORAGE), ), - ids=("CPU", "MEMORY", "GPU", "STORAGE", "EPHEMERAL_STORAGE"), + ids=("CPU", "MEMORY", "GPU", "EPHEMERAL_STORAGE"), ) def test_convert_requests(resource_dict: Dict[str, str], expected_resource_name: _task_models.Resources): assert len(resource_dict) == 1 @@ -48,10 +47,9 @@ def test_convert_requests(resource_dict: Dict[str, str], expected_resource_name: ({"cpu": "2"}, _ResourceName.CPU), ({"mem": "1Gi"}, _ResourceName.MEMORY), ({"gpu": "1"}, _ResourceName.GPU), - ({"storage": "100Mb"}, _ResourceName.STORAGE), ({"ephemeral_storage": "123Mb"}, _ResourceName.EPHEMERAL_STORAGE), ), - ids=("CPU", "MEMORY", "GPU", "STORAGE", "EPHEMERAL_STORAGE"), + ids=("CPU", "MEMORY", "GPU", "EPHEMERAL_STORAGE"), ) def test_convert_limits(resource_dict: Dict[str, str], expected_resource_name: _task_models.Resources): assert len(resource_dict) == 1 @@ -75,7 +73,5 @@ def test_incorrect_type_resources(): Resources(mem=1) # type: ignore with pytest.raises(AssertionError): Resources(gpu=1) # type: ignore - with pytest.raises(AssertionError): - Resources(storage=1) # type: ignore with pytest.raises(AssertionError): Resources(ephemeral_storage=1) # type: ignore From ccc515e9a85ef97afa58fdb3860bc8e901491fba Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Sat, 13 Jan 2024 16:18:06 +0800 Subject: [PATCH 35/63] Fix Flyin Plugin VSCode download bug (#1991) Signed-off-by: Future Outlier Co-authored-by: Future Outlier --- plugins/flytekit-flyin/Dockerfile | 1 + .../flyin/vscode_lib/constants.py | 3 + .../flyin/vscode_lib/decorator.py | 68 +++++++++++------ .../flytekit-flyin/tests/test_flyin_plugin.py | 76 ++++++++++++++++++- 4 files changed, 124 insertions(+), 24 deletions(-) diff --git a/plugins/flytekit-flyin/Dockerfile b/plugins/flytekit-flyin/Dockerfile index 9d56a84826..765c0f94c0 100644 --- a/plugins/flytekit-flyin/Dockerfile +++ b/plugins/flytekit-flyin/Dockerfile @@ -27,6 +27,7 @@ RUN apt-get update \ && apt-get autoremove --yes \ && rm -rf /var/lib/{apt,dpkg,cache,log}/ \ && useradd -u 1000 flytekit \ + && chown -R flytekit:flytekit /tmp/code-server \ && chown flytekit: /root \ && chown flytekit: /home \ && : diff --git a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py index faabbfcaee..52d1e8e3ff 100644 --- a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py +++ b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/constants.py @@ -36,3 +36,6 @@ # Context attribute name of the task function's source file path TASK_FUNCTION_SOURCE_PATH = "TASK_FUNCTION_SOURCE_PATH" + +# Subprocess constants +EXIT_CODE_SUCCESS = 0 diff --git a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/decorator.py b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/decorator.py index 2bde13ae5a..9a936d7e46 100644 --- a/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/decorator.py +++ b/plugins/flytekit-flyin/flytekitplugins/flyin/vscode_lib/decorator.py @@ -10,7 +10,7 @@ import tarfile import time from threading import Event -from typing import Callable, Optional +from typing import Callable, List, Optional import fsspec from flytekitplugins.flyin.utils import load_module_from_path @@ -23,6 +23,7 @@ from .constants import ( DOWNLOAD_DIR, EXECUTABLE_NAME, + EXIT_CODE_SUCCESS, HEARTBEAT_CHECK_SECONDS, HEARTBEAT_PATH, INTERACTIVE_DEBUGGING_FILE_NAME, @@ -42,7 +43,7 @@ def execute_command(cmd): process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.info(f"cmd: {cmd}") stdout, stderr = process.communicate() - if process.returncode != 0: + if process.returncode != EXIT_CODE_SUCCESS: raise RuntimeError(f"Command {cmd} failed with error: {stderr}") logger.info(f"stdout: {stdout}") logger.info(f"stderr: {stderr}") @@ -176,6 +177,27 @@ def get_code_server_info(code_server_info_dict: dict) -> str: ) +def get_installed_extensions() -> List[str]: + """ + Get the list of installed extensions. + + Returns: + List[str]: The list of installed extensions. + """ + logger = flytekit.current_context().logging + + installed_extensions = subprocess.run(["code-server", "--list-extensions"], capture_output=True, text=True) + if installed_extensions.returncode != EXIT_CODE_SUCCESS: + logger.info(f"Command code-server --list-extensions failed with error: {installed_extensions.stderr}") + return [] + + return installed_extensions.stdout.splitlines() + + +def is_extension_installed(extension: str, installed_extensions: List[str]) -> bool: + return any(installed_extension in extension for installed_extension in installed_extensions) + + def download_vscode(config: VscodeConfig): """ Download vscode server and extension from remote to local and add the directory of binary executable to $PATH. @@ -190,34 +212,34 @@ def download_vscode(config: VscodeConfig): if executable_path is not None: logger.info(f"Code server binary already exists at {executable_path}") logger.info("Skipping downloading code server...") - return + else: + logger.info("Code server is not in $PATH, start downloading code server...") + # Create DOWNLOAD_DIR if not exist + logger.info(f"DOWNLOAD_DIR: {DOWNLOAD_DIR}") + os.makedirs(DOWNLOAD_DIR, exist_ok=True) - logger.info("Code server is not in $PATH, start downloading code server...") + logger.info(f"Start downloading files to {DOWNLOAD_DIR}") + # Download remote file to local + code_server_remote_path = get_code_server_info(config.code_server_remote_paths) + code_server_tar_path = download_file(code_server_remote_path, DOWNLOAD_DIR) - # Create DOWNLOAD_DIR if not exist - logger.info(f"DOWNLOAD_DIR: {DOWNLOAD_DIR}") - os.makedirs(DOWNLOAD_DIR, exist_ok=True) + # Extract the tarball + with tarfile.open(code_server_tar_path, "r:gz") as tar: + tar.extractall(path=DOWNLOAD_DIR) - logger.info(f"Start downloading files to {DOWNLOAD_DIR}") + code_server_dir_name = get_code_server_info(config.code_server_dir_names) + code_server_bin_dir = os.path.join(DOWNLOAD_DIR, code_server_dir_name, "bin") - # Download remote file to local - code_server_remote_path = get_code_server_info(config.code_server_remote_paths) - code_server_tar_path = download_file(code_server_remote_path, DOWNLOAD_DIR) + # Add the directory of code-server binary to $PATH + os.environ["PATH"] = code_server_bin_dir + os.pathsep + os.environ["PATH"] + # If the extension already exists in the container, skip downloading + installed_extensions = get_installed_extensions() extension_paths = [] for extension in config.extension_remote_paths: - file_path = download_file(extension, DOWNLOAD_DIR) - extension_paths.append(file_path) - - # Extract the tarball - with tarfile.open(code_server_tar_path, "r:gz") as tar: - tar.extractall(path=DOWNLOAD_DIR) - - code_server_dir_name = get_code_server_info(config.code_server_dir_names) - code_server_bin_dir = os.path.join(DOWNLOAD_DIR, code_server_dir_name, "bin") - - # Add the directory of code-server binary to $PATH - os.environ["PATH"] = code_server_bin_dir + os.pathsep + os.environ["PATH"] + if not is_extension_installed(extension, installed_extensions): + file_path = download_file(extension, DOWNLOAD_DIR) + extension_paths.append(file_path) for p in extension_paths: logger.info(f"Execute extension installation command to install extension {p}") diff --git a/plugins/flytekit-flyin/tests/test_flyin_plugin.py b/plugins/flytekit-flyin/tests/test_flyin_plugin.py index 085532e8fc..23cde516ce 100644 --- a/plugins/flytekit-flyin/tests/test_flyin_plugin.py +++ b/plugins/flytekit-flyin/tests/test_flyin_plugin.py @@ -14,7 +14,12 @@ jupyter, vscode, ) -from flytekitplugins.flyin.vscode_lib.decorator import get_code_server_info +from flytekitplugins.flyin.vscode_lib.constants import EXIT_CODE_SUCCESS +from flytekitplugins.flyin.vscode_lib.decorator import ( + get_code_server_info, + get_installed_extensions, + is_extension_installed, +) from flytekit import task, workflow from flytekit.configuration import Image, ImageConfig, SerializationSettings @@ -216,6 +221,20 @@ def wf(): mock_exit.assert_called_once() +def test_is_extension_installed(): + installed_extensions = [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.vscode-jupyter-slideshow", + ] + config = VscodeConfig() + for extension in config.extension_remote_paths: + assert is_extension_installed(extension, installed_extensions) + + def test_vscode_config(): config = VscodeConfig() assert config.code_server_remote_paths == DEFAULT_CODE_SERVER_REMOTE_PATHS @@ -345,3 +364,58 @@ def test_platform_unsupported(mock_machine, mock_code_server_info_dict): match="Automatic download is only supported on AMD64 and ARM64 architectures. If you are using a different architecture, please visit the code-server official website to manually download the appropriate version for your image.", ): get_code_server_info(mock_code_server_info_dict) + + +@mock.patch("subprocess.run") +def test_get_installed_extensions_succeeded(mock_run): + # Set up the mock process + mock_process = mock.Mock() + mock_process.returncode = EXIT_CODE_SUCCESS + mock_process.stdout = ( + "ms-python.python\n" + "ms-toolsai.jupyter\n" + "ms-toolsai.jupyter-keymap\n" + "ms-toolsai.jupyter-renderers\n" + "ms-toolsai.vscode-jupyter-cell-tags\n" + "ms-toolsai.vscode-jupyter-slideshow\n" + ) + mock_run.return_value = mock_process + + installed_extensions = get_installed_extensions() + + # Verify the correct command was called + mock_run.assert_called_once_with(["code-server", "--list-extensions"], capture_output=True, text=True) + + # Assert that the output matches the expected list of extensions + expected_extensions = [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.vscode-jupyter-slideshow", + ] + assert installed_extensions == expected_extensions + + +@mock.patch("subprocess.run") +def test_get_installed_extensions_failed(mock_run): + # Set up the mock process + mock_process = mock.Mock() + mock_process.returncode = 1 + mock_process.stdout = ( + "ms-python.python\n" + "ms-toolsai.jupyter\n" + "ms-toolsai.jupyter-keymap\n" + "ms-toolsai.jupyter-renderers\n" + "ms-toolsai.vscode-jupyter-cell-tags\n" + "ms-toolsai.vscode-jupyter-slideshow\n" + ) + mock_run.return_value = mock_process + + installed_extensions = get_installed_extensions() + + mock_run.assert_called_once_with(["code-server", "--list-extensions"], capture_output=True, text=True) + + expected_extensions = [] + assert installed_extensions == expected_extensions From 892b4741d0c38bd61b9cdaf1defb002d729acba2 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Sat, 13 Jan 2024 03:18:49 -0500 Subject: [PATCH 36/63] Adds conda support to envd plugin (#2020) Signed-off-by: Thomas J. Fan --- flytekit/image_spec/image_spec.py | 4 +++ plugins/flytekit-envd/README.md | 18 ++++++++++ .../flytekitplugins/envd/image_builder.py | 24 +++++++++---- .../flytekit-envd/tests/test_image_spec.py | 35 +++++++++++++++++++ tests/flytekit/unit/cli/pyflyte/test_run.py | 4 +-- 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 570a2a2952..5b437f0b17 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -29,6 +29,8 @@ class ImageSpec: env: environment variables of the image. registry: registry of the image. packages: list of python packages to install. + conda_packages: list of conda packages to install. + conda_channels: list of conda channels. requirements: path to the requirements.txt file. apt_packages: list of apt packages to install. cuda: version of cuda to install. @@ -47,6 +49,8 @@ class ImageSpec: env: Optional[typing.Dict[str, str]] = None registry: Optional[str] = None packages: Optional[List[str]] = None + conda_packages: Optional[List[str]] = None + conda_channels: Optional[List[str]] = None requirements: Optional[str] = None apt_packages: Optional[List[str]] = None cuda: Optional[str] = None diff --git a/plugins/flytekit-envd/README.md b/plugins/flytekit-envd/README.md index 374d2334ff..3b3a168d75 100644 --- a/plugins/flytekit-envd/README.md +++ b/plugins/flytekit-envd/README.md @@ -24,3 +24,21 @@ Example # def t1() -> str: # return "hello" ``` + +This plugin also supports install packages from `conda`: + +```python +from flytekit import task, ImageSpec + +image_spec = ImageSpec( + base_image="ubuntu:20.04", + python_version="3.11", + packages=["flytekit"], + conda_packages=["pytorch", "pytorch-cuda=12.1"], + conda_channels=["pytorch", "nvidia"] +) + +@task(container_image=image_spec) +def run_pytorch(): + ... +``` diff --git a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py index 0a534a6763..45d0b4676c 100644 --- a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py +++ b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py @@ -50,6 +50,12 @@ def build_image(self, image_spec: ImageSpec): self.execute_command(build_command) +def _create_str_from_package_list(packages): + if packages is None: + return "" + return ", ".join(f'"{name}"' for name in packages) + + def create_envd_config(image_spec: ImageSpec) -> str: base_image = DefaultImages.default_image() if image_spec.base_image is None else image_spec.base_image if image_spec.cuda: @@ -57,10 +63,12 @@ def create_envd_config(image_spec: ImageSpec) -> str: raise Exception("python_version is required when cuda and cudnn are specified") base_image = "ubuntu20.04" - packages = [] if image_spec.packages is None else image_spec.packages - apt_packages = [] if image_spec.apt_packages is None else image_spec.apt_packages + python_packages = _create_str_from_package_list(image_spec.packages) + conda_packages = _create_str_from_package_list(image_spec.conda_packages) + run_commands = _create_str_from_package_list(image_spec.commands) + conda_channels = _create_str_from_package_list(image_spec.conda_channels) + apt_packages = _create_str_from_package_list(image_spec.apt_packages) env = {"PYTHONPATH": "/root", _F_IMG_ID: image_spec.image_name()} - commands = [] if image_spec.commands is None else image_spec.commands if image_spec.env: env.update(image_spec.env) @@ -70,9 +78,9 @@ def create_envd_config(image_spec: ImageSpec) -> str: def build(): base(image="{base_image}", dev=False) - run(commands=[{', '.join(map(str, map(lambda x: f'"{x}"', commands)))}]) - install.python_packages(name=[{', '.join(map(str, map(lambda x: f'"{x}"', packages)))}]) - install.apt_packages(name=[{', '.join(map(str, map(lambda x: f'"{x}"', apt_packages)))}]) + run(commands=[{run_commands}]) + install.python_packages(name=[{python_packages}]) + install.apt_packages(name=[{apt_packages}]) runtime.environ(env={env}, extra_path=['/root']) config.pip_index(url="{pip_index}") """ @@ -80,6 +88,10 @@ def build(): cfg_path = ctx.file_access.get_random_local_path("build.envd") pathlib.Path(cfg_path).parent.mkdir(parents=True, exist_ok=True) + if conda_packages: + envd_config += " install.conda(use_mamba=True)\n" + envd_config += f" install.conda_packages(name=[{conda_packages}], channel=[{conda_channels}])\n" + if image_spec.requirements: requirement_path = f"{pathlib.Path(cfg_path).parent}{os.sep}{REQUIREMENTS_FILE_NAME}" shutil.copyfile(image_spec.requirements, requirement_path) diff --git a/plugins/flytekit-envd/tests/test_image_spec.py b/plugins/flytekit-envd/tests/test_image_spec.py index 49c8f5085a..36adebd346 100644 --- a/plugins/flytekit-envd/tests/test_image_spec.py +++ b/plugins/flytekit-envd/tests/test_image_spec.py @@ -1,4 +1,5 @@ from pathlib import Path +from textwrap import dedent from flytekitplugins.envd.image_builder import EnvdImageSpecBuilder, create_envd_config @@ -35,3 +36,37 @@ def build(): install.python(version="3.8") """ ) + + +def test_image_spec_conda(): + image_spec = ImageSpec( + base_image="ubuntu:20.04", + python_version="3.11", + packages=["flytekit"], + conda_packages=["pytorch", "cpuonly"], + conda_channels=["pytorch"], + ) + + EnvdImageSpecBuilder().build_image(image_spec) + config_path = create_envd_config(image_spec) + assert image_spec.platform == "linux/amd64" + image_name = image_spec.image_name() + contents = Path(config_path).read_text() + expected_contents = dedent( + f"""\ + # syntax=v1 + + def build(): + base(image="ubuntu:20.04", dev=False) + run(commands=[]) + install.python_packages(name=["flytekit"]) + install.apt_packages(name=[]) + runtime.environ(env={{'PYTHONPATH': '/root', '_F_IMG_ID': '{image_name}'}}, extra_path=['/root']) + config.pip_index(url="https://pypi.org/simple") + install.conda(use_mamba=True) + install.conda_packages(name=["pytorch", "cpuonly"], channel=["pytorch"]) + install.python(version="3.11") + """ + ) + + assert contents == expected_contents diff --git a/tests/flytekit/unit/cli/pyflyte/test_run.py b/tests/flytekit/unit/cli/pyflyte/test_run.py index cb8d8f6bc6..bab8b95b15 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_run.py +++ b/tests/flytekit/unit/cli/pyflyte/test_run.py @@ -287,9 +287,9 @@ def test_list_default_arguments(wf_path): ) ic_result_4 = ImageConfig( - default_image=Image(name="default", fqn="flytekit", tag="tbcFqCcdAEyJqPcyYsQ15A.."), + default_image=Image(name="default", fqn="flytekit", tag="DgQMqIi61py4I4P5iOeS0Q.."), images=[ - Image(name="default", fqn="flytekit", tag="tbcFqCcdAEyJqPcyYsQ15A.."), + Image(name="default", fqn="flytekit", tag="DgQMqIi61py4I4P5iOeS0Q.."), Image(name="xyz", fqn="docker.io/xyz", tag="latest"), Image(name="abc", fqn="docker.io/abc", tag=None), ], From 80ca660e1c22bf014b4cf7335fd362360436690f Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 16 Jan 2024 18:14:54 -0800 Subject: [PATCH 37/63] Fix is_deferrable in airflow agent (#2109) Signed-off-by: Kevin Su --- plugins/flytekit-airflow/dev-requirements.in | 1 + plugins/flytekit-airflow/dev-requirements.txt | 7 +++++-- .../flytekitplugins/airflow/task.py | 17 ++++++++++------- plugins/flytekit-airflow/tests/test_task.py | 4 +++- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/plugins/flytekit-airflow/dev-requirements.in b/plugins/flytekit-airflow/dev-requirements.in index 8ee20b47d1..b3913ef810 100644 --- a/plugins/flytekit-airflow/dev-requirements.in +++ b/plugins/flytekit-airflow/dev-requirements.in @@ -1 +1,2 @@ apache-airflow-providers-apache-beam[google] +apache-airflow[google] diff --git a/plugins/flytekit-airflow/dev-requirements.txt b/plugins/flytekit-airflow/dev-requirements.txt index d865203000..b7b8178724 100644 --- a/plugins/flytekit-airflow/dev-requirements.txt +++ b/plugins/flytekit-airflow/dev-requirements.txt @@ -21,8 +21,9 @@ annotated-types==0.6.0 # via pydantic anyio==4.0.0 # via httpx -apache-airflow==2.8.0 +apache-airflow[google]==2.8.0 # via + # -r dev-requirements.in # apache-airflow-providers-apache-beam # apache-airflow-providers-common-sql # apache-airflow-providers-ftp @@ -40,7 +41,9 @@ apache-airflow-providers-common-sql==1.8.0 apache-airflow-providers-ftp==3.6.0 # via apache-airflow apache-airflow-providers-google==10.11.0 - # via apache-airflow-providers-apache-beam + # via + # apache-airflow + # apache-airflow-providers-apache-beam apache-airflow-providers-http==4.6.0 # via apache-airflow apache-airflow-providers-imap==3.4.0 diff --git a/plugins/flytekit-airflow/flytekitplugins/airflow/task.py b/plugins/flytekit-airflow/flytekitplugins/airflow/task.py index 225115d12d..b49bc061f5 100644 --- a/plugins/flytekit-airflow/flytekitplugins/airflow/task.py +++ b/plugins/flytekit-airflow/flytekitplugins/airflow/task.py @@ -146,11 +146,7 @@ def _get_airflow_instance( obj_module = importlib.import_module(name=airflow_obj.module) obj_def = getattr(obj_module, airflow_obj.name) - if ( - issubclass(obj_def, airflow_models.BaseOperator) - and not issubclass(obj_def, airflow_sensors.BaseSensorOperator) - and _is_deferrable(obj_def) - ): + if _is_deferrable(obj_def): try: return obj_def(**airflow_obj.parameters, deferrable=True) except airflow.exceptions.AirflowException as e: @@ -163,12 +159,19 @@ def _get_airflow_instance( def _is_deferrable(cls: Type) -> bool: """ This function is used to check if the Airflow operator is deferrable. + If the operator is not deferrable, we run it in a container instead of the agent. """ + # Only Airflow operators are deferrable. + if not issubclass(cls, airflow_models.BaseOperator): + return False + # Airflow sensors are not deferrable. Sensor is a subclass of BaseOperator. + if issubclass(cls, airflow_sensors.BaseSensorOperator): + return False try: from airflow.providers.apache.beam.operators.beam import BeamBasePipelineOperator # Dataflow operators are not deferrable. - if not issubclass(cls, BeamBasePipelineOperator): + if issubclass(cls, BeamBasePipelineOperator): return False except ImportError: logger.debug("Failed to import BeamBasePipelineOperator") @@ -194,7 +197,7 @@ def _flyte_operator(*args, **kwargs): task_id = kwargs["task_id"] or cls.__name__ config = AirflowObj(module=cls.__module__, name=cls.__name__, parameters=kwargs) - if _is_deferrable(cls): + if not _is_deferrable(cls): # Dataflow operators are not deferrable, so we run them in a container. return AirflowContainerTask(name=task_id, task_config=config, container_image=container_image)() return AirflowTask(name=task_id, task_config=config)() diff --git a/plugins/flytekit-airflow/tests/test_task.py b/plugins/flytekit-airflow/tests/test_task.py index 3d6a954b7f..f55bcef5dd 100644 --- a/plugins/flytekit-airflow/tests/test_task.py +++ b/plugins/flytekit-airflow/tests/test_task.py @@ -1,5 +1,6 @@ import jsonpickle from airflow.providers.apache.beam.operators.beam import BeamRunJavaPipelineOperator +from airflow.providers.google.cloud.operators.dataproc import DataprocCreateClusterOperator from airflow.sensors.bash import BashSensor from airflow.utils.context import Context from flytekitplugins.airflow.task import ( @@ -34,8 +35,9 @@ def test_xcom_push(): def test_is_deferrable(): - assert _is_deferrable(BeamRunJavaPipelineOperator) is True + assert _is_deferrable(BeamRunJavaPipelineOperator) is False assert _is_deferrable(BashSensor) is False + assert _is_deferrable(DataprocCreateClusterOperator) is True def test_airflow_task(): From 1127206e826a0378c1ff016f7817f60d2f09e507 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Wed, 17 Jan 2024 18:19:31 +0800 Subject: [PATCH 38/63] Use Latest FlyteIdl in Integration Test (#2105) * Use Latest FlyteIdl in all Dockerfile Signed-off-by: Future Outlier * lint Signed-off-by: Future Outlier * Revert all dockerfile except dev version Signed-off-by: Future-Outlier * remove sqlalchemy dockerfile Signed-off-by: Future-Outlier --------- Signed-off-by: Future Outlier Signed-off-by: Future-Outlier Co-authored-by: Future Outlier --- Dockerfile.dev | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile.dev b/Dockerfile.dev index 97277b6837..8db65d8b9c 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -27,7 +27,9 @@ COPY . /flytekit # 3. Clean up the apt cache to reduce image size. Reference: https://gist.github.com/marvell/7c812736565928e602c4 # 4. Create a non-root user 'flytekit' and set appropriate permissions for directories. RUN apt-get update && apt-get install build-essential vim libmagic1 git -y \ - && pip install --no-cache-dir -e /flytekit \ + && pip install --no-cache-dir -U --pre \ + flyteidl \ + -e /flytekit \ -e /flytekit/plugins/flytekit-k8s-pod \ -e /flytekit/plugins/flytekit-deck-standard \ -e /flytekit/plugins/flytekit-flyin \ From 7996c2ebb6892341d31e6efbd7c7f0f8ca2c9b34 Mon Sep 17 00:00:00 2001 From: Ketan Umare <16888709+kumare3@users.noreply.github.com> Date: Wed, 17 Jan 2024 07:26:18 -0800 Subject: [PATCH 39/63] Changes default destination dir to be "working dir" (#2108) --- flytekit/clis/sdk_in_container/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/clis/sdk_in_container/run.py b/flytekit/clis/sdk_in_container/run.py index 79e7f0fce5..d5a96e069a 100644 --- a/flytekit/clis/sdk_in_container/run.py +++ b/flytekit/clis/sdk_in_container/run.py @@ -70,7 +70,7 @@ class RunLevelParams(PyFlyteParams): param_decls=["--destination-dir", "destination_dir"], required=False, type=str, - default="/root", + default=".", show_default=True, help="Directory inside the image where the tar file containing the code will be copied to", ) From aaf69f825625d67ce3dc2b5a9cbc944dc9b87131 Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Wed, 17 Jan 2024 18:42:39 -0800 Subject: [PATCH 40/63] Fix typos and run spellcheck in CI (#2074) * Fix typos and run spellcheck in CI Signed-off-by: Eduardo Apolinario * Remove --interactive Signed-off-by: Eduardo Apolinario * Remove CI check since it's run in pre-commit checks (lint make target) Signed-off-by: Eduardo Apolinario Signed-off-by: Eduardo Apolinario * Fix typo in accelerators.py Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- .pre-commit-config.yaml | 6 ++++++ Makefile | 3 ++- docs/source/design/control_plane.rst | 2 +- flytekit/clients/auth/auth_client.py | 2 +- flytekit/clients/auth/authenticator.py | 4 ++-- flytekit/clients/auth_helper.py | 4 ++-- flytekit/clis/flyte_cli/main.py | 4 ++-- flytekit/clis/sdk_in_container/fetch.py | 2 +- flytekit/configuration/__init__.py | 2 +- flytekit/core/context_manager.py | 2 +- flytekit/core/promise.py | 2 +- flytekit/core/type_engine.py | 2 +- flytekit/extras/accelerators.py | 2 +- flytekit/remote/backfill.py | 2 +- flytekit/remote/lazy_entity.py | 2 +- flytekit/remote/remote.py | 2 +- flytekit/tools/fast_registration.py | 2 +- flytekit/tools/script_mode.py | 2 +- flytekit/tools/translator.py | 2 +- flytekit/types/structured/structured_dataset.py | 2 +- plugins/flytekit-identity-aware-proxy/README.md | 8 ++++---- .../flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py | 2 +- .../flytekitplugins/onnxpytorch/schema.py | 2 +- .../flytekitplugins/pydantic/deserialization.py | 2 +- .../flytekit-whylogs/flytekitplugins/whylogs/renderer.py | 4 ++-- pyproject.toml | 4 ++++ tests/flytekit/integration/remote/test_remote.py | 2 +- tests/flytekit/unit/core/test_promise.py | 4 ++-- tests/flytekit/unit/core/test_python_function_task.py | 2 +- tests/flytekit/unit/core/test_type_engine.py | 4 ++-- tests/flytekit/unit/experimental/test_eager_workflows.py | 2 +- 31 files changed, 49 insertions(+), 38 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 822fa71eae..1e91d21bd1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,3 +22,9 @@ repos: rev: 0.0.9 hooks: - id: check_pdb_hook + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + additional_dependencies: + - tomli diff --git a/Makefile b/Makefile index 6e82bd693e..95e6d538ca 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,8 @@ lint: ## Run linters .PHONY: spellcheck spellcheck: ## Runs a spellchecker over all code and documentation - codespell -L "te,raison,fo" --skip="./docs/build,./.git" + # Configuration is in pyproject.toml + codespell .PHONY: test test: lint unit_test diff --git a/docs/source/design/control_plane.rst b/docs/source/design/control_plane.rst index 4c5357cb87..e05be52129 100644 --- a/docs/source/design/control_plane.rst +++ b/docs/source/design/control_plane.rst @@ -248,7 +248,7 @@ Additional arguments include: * ``project``: the project on which to execute the entity. * ``domain``: the domain on which to execute the entity. * ``type_hints``: a dictionary mapping Python types to their corresponding Flyte types. -* ``options``: options can be configured for a launch plan during registration or overriden during execution. Refer to :py:class:`~flytekit.remote.remote.Options` to know all the acceptable parameters. +* ``options``: options can be configured for a launch plan during registration or overridden during execution. Refer to :py:class:`~flytekit.remote.remote.Options` to know all the acceptable parameters. The following is an example demonstrating how to use the :py:class:`~flytekit.remote.remote.Options` class to configure a Flyte entity: diff --git a/flytekit/clients/auth/auth_client.py b/flytekit/clients/auth/auth_client.py index 8001bf8885..4735a446be 100644 --- a/flytekit/clients/auth/auth_client.py +++ b/flytekit/clients/auth/auth_client.py @@ -172,7 +172,7 @@ def __call__(cls, *args, **kwargs): class AuthorizationClient(metaclass=_SingletonPerEndpoint): """ Authorization client that stores the credentials in keyring and uses oauth2 standard flow to retrieve the - credentials. NOTE: This will open an web browser to retreive the credentials. + credentials. NOTE: This will open an web browser to retrieve the credentials. """ def __init__( diff --git a/flytekit/clients/auth/authenticator.py b/flytekit/clients/auth/authenticator.py index 6fc08c7140..fdf1d13eae 100644 --- a/flytekit/clients/auth/authenticator.py +++ b/flytekit/clients/auth/authenticator.py @@ -167,7 +167,7 @@ def refresh_credentials(self): class CommandAuthenticator(Authenticator): """ - This Authenticator retreives access_token using the provided command + This Authenticator retrieves access_token using the provided command """ def __init__(self, command: typing.List[str], header_key: str = None): @@ -305,7 +305,7 @@ def refresh_credentials(self): text = f"To Authenticate, navigate in a browser to the following URL: {click.style(resp.verification_uri, fg='blue', underline=True)} and enter code: {click.style(resp.user_code, fg='blue')}" click.secho(text) try: - # Currently the refresh token is not retreived. We may want to add support for refreshTokens so that + # Currently the refresh token is not retrieved. We may want to add support for refreshTokens so that # access tokens can be refreshed for once authenticated machines token, expires_in = token_client.poll_token_endpoint( resp, diff --git a/flytekit/clients/auth_helper.py b/flytekit/clients/auth_helper.py index 5b53856863..04028bc10a 100644 --- a/flytekit/clients/auth_helper.py +++ b/flytekit/clients/auth_helper.py @@ -116,7 +116,7 @@ def get_proxy_authenticator(cfg: PlatformConfig) -> Authenticator: def upgrade_channel_to_proxy_authenticated(cfg: PlatformConfig, in_channel: grpc.Channel) -> grpc.Channel: """ - If activated in the platform config, given a grpc.Channel, preferrably a secure channel, it returns a composed + If activated in the platform config, given a grpc.Channel, preferably a secure channel, it returns a composed channel that uses Interceptor to perform authentication with a proxy infront of Flyte :param cfg: PlatformConfig :param in_channel: grpc.Channel Precreated channel @@ -131,7 +131,7 @@ def upgrade_channel_to_proxy_authenticated(cfg: PlatformConfig, in_channel: grpc def upgrade_channel_to_authenticated(cfg: PlatformConfig, in_channel: grpc.Channel) -> grpc.Channel: """ - Given a grpc.Channel, preferrably a secure channel, it returns a composed channel that uses Interceptor to + Given a grpc.Channel, preferably a secure channel, it returns a composed channel that uses Interceptor to perform an Oauth2.0 Auth flow :param cfg: PlatformConfig :param in_channel: grpc.Channel Precreated channel diff --git a/flytekit/clis/flyte_cli/main.py b/flytekit/clis/flyte_cli/main.py index b190243e6e..75386896ce 100644 --- a/flytekit/clis/flyte_cli/main.py +++ b/flytekit/clis/flyte_cli/main.py @@ -1069,7 +1069,7 @@ def update_launch_plan(state, host, insecure, urn=None): else: # If the commandline parameter urn is not supplied, and neither # the input comes from a pipe, it means the user is not using - # this command approperiately + # this command appropriately raise _click.UsageError('Missing option "-u" / "--urn" or missing pipe inputs') except KeyboardInterrupt: _sys.stdout.flush() @@ -1185,7 +1185,7 @@ def terminate_execution(host, insecure, cause, urn=None): def list_executions(project, domain, host, insecure, token, limit, show_all, filter, sort_by, urns_only): """ List the key information of all the executions under the scope specified by {project, domain}. - Users can supply additional filter arguments to show only the desired exeuctions. + Users can supply additional filter arguments to show only the desired executions. Note that, when the ``--urns-only`` flag is not set, this command prints out the complete tabular output with key pieces of information such as the URN, the Name and the Status of the executions; diff --git a/flytekit/clis/sdk_in_container/fetch.py b/flytekit/clis/sdk_in_container/fetch.py index 8c83b5d548..841ca3f840 100644 --- a/flytekit/clis/sdk_in_container/fetch.py +++ b/flytekit/clis/sdk_in_container/fetch.py @@ -17,7 +17,7 @@ "--recursive", "-r", is_flag=True, - help="Fetch recursively, all variables in the URI. This is not needed for directrories as they" + help="Fetch recursively, all variables in the URI. This is not needed for directories as they" " are automatically recursively downloaded.", ) @click.argument("flyte-data-uri", type=str, required=True, metavar="FLYTE-DATA-URI (format flyte://...)") diff --git a/flytekit/configuration/__init__.py b/flytekit/configuration/__init__.py index b1794c73c6..aa9c2bf129 100644 --- a/flytekit/configuration/__init__.py +++ b/flytekit/configuration/__init__.py @@ -872,7 +872,7 @@ def with_serialized_context(self) -> SerializationSettings: """ Use this method to create a new SerializationSettings that has an environment variable set with the SerializedContext This is useful in transporting SerializedContext to serialized and registered tasks. - The setting will be availabe in the `env` field with the key `SERIALIZED_CONTEXT_ENV_VAR` + The setting will be available in the `env` field with the key `SERIALIZED_CONTEXT_ENV_VAR` :return: A newly constructed SerializationSettings, or self, if it already has the serializationSettings """ if self._has_serialized_context(): diff --git a/flytekit/core/context_manager.py b/flytekit/core/context_manager.py index 5b9143fb52..0b21ab24d9 100644 --- a/flytekit/core/context_manager.py +++ b/flytekit/core/context_manager.py @@ -672,7 +672,7 @@ def get_deck(self) -> typing.Union[str, "IPython.core.display.HTML"]: # type:ig my_task(...) ctx.get_deck() - OR if you wish to explicity display + OR if you wish to explicitly display .. code-block:: python diff --git a/flytekit/core/promise.py b/flytekit/core/promise.py index 46f95f5147..f15923ab5f 100644 --- a/flytekit/core/promise.py +++ b/flytekit/core/promise.py @@ -930,7 +930,7 @@ def create_and_link_node_from_remote( :param entity: RemoteEntity :param _inputs_not_allowed: Set of all variable names that should not be provided when using this entity. Useful for Launchplans with `fixed` inputs - :param _ignorable_inputs: Set of all variable names that are optional, but if provided will be overriden. Useful + :param _ignorable_inputs: Set of all variable names that are optional, but if provided will be overridden. Useful for launchplans with `default` inputs :param kwargs: Dict[str, Any] default inputs passed from the user to this entity. Can be promises. :return: Optional[Union[Tuple[Promise], Promise, VoidPromise]] diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 71c630b3b4..4220d6e7b6 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -1848,7 +1848,7 @@ def _check_and_covert_float(lv: Literal) -> float: def _check_and_convert_void(lv: Literal) -> None: if lv.scalar.none_type is None: - raise TypeTransformerFailedError(f"Cannot conver literal {lv} to None") + raise TypeTransformerFailedError(f"Cannot convert literal {lv} to None") return None diff --git a/flytekit/extras/accelerators.py b/flytekit/extras/accelerators.py index b58159e1f0..6f3fac9ffd 100644 --- a/flytekit/extras/accelerators.py +++ b/flytekit/extras/accelerators.py @@ -196,7 +196,7 @@ class _A100_Base(MultiInstanceGPUAccelerator): class _A100(_A100_Base): """ Class that represents an `NVIDIA A100 GPU `_. It is possible - to specify a partition of an A100 GPU by using the provided paritions on the class. For example, to specify a + to specify a partition of an A100 GPU by using the provided partitions on the class. For example, to specify a 10GB partition, use ``A100.partition_2g_10gb``. Refer to `Partitioned GPUs `_ """ diff --git a/flytekit/remote/backfill.py b/flytekit/remote/backfill.py index b36fc7919d..166f2e4745 100644 --- a/flytekit/remote/backfill.py +++ b/flytekit/remote/backfill.py @@ -43,7 +43,7 @@ def create_backfill_workflow( :param start_date: datetime generate a backfill starting at this datetime (exclusive) :param end_date: datetime generate a backfill ending at this datetime (inclusive) - :param for_lp: typing.Union[LaunchPlan, FlyteLaunchPlan] the backfill is generatd for this launchplan + :param for_lp: typing.Union[LaunchPlan, FlyteLaunchPlan] the backfill is generated for this launchplan :param parallel: if the backfill should be run in parallel. False (default) will run each bacfill sequentially :param per_node_timeout: timedelta Timeout to use per node :param per_node_retries: int Retries to user per node diff --git a/flytekit/remote/lazy_entity.py b/flytekit/remote/lazy_entity.py index 4755aad99d..1df2197329 100644 --- a/flytekit/remote/lazy_entity.py +++ b/flytekit/remote/lazy_entity.py @@ -10,7 +10,7 @@ class LazyEntity(RemoteEntity, typing.Generic[T]): """ Fetches the entity when the entity is called or when the entity is retrieved. - The entity is derived from RemoteEntity so that it behaves exactly like the mimiced entity. + The entity is derived from RemoteEntity so that it behaves exactly like the mimicked entity. """ def __init__(self, name: str, getter: typing.Callable[[], T], *args, **kwargs): diff --git a/flytekit/remote/remote.py b/flytekit/remote/remote.py index 9287372bcb..89c91a2719 100644 --- a/flytekit/remote/remote.py +++ b/flytekit/remote/remote.py @@ -1122,7 +1122,7 @@ def _resolve_identifier_kwargs( if not (ident.project and ident.domain and ident.name): raise ValueError( f"Cannot launch an execution with missing project/domain/name {ident} for entity type {type(entity)}." - f" Specify them in the execute method or when intializing FlyteRemote" + f" Specify them in the execute method or when initializing FlyteRemote" ) return ident diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index 16ee24084a..0664ebbd8d 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -106,7 +106,7 @@ def download_distribution(additional_distribution: str, destination: str): """ if not os.path.isdir(destination): raise ValueError("Destination path is required to download distribution and it should be a directory") - # NOTE the os.path.join(destination, ''). This is to ensure that the given path is infact a directory and all + # NOTE the os.path.join(destination, ''). This is to ensure that the given path is in fact a directory and all # downloaded data should be copied into this directory. We do this to account for a difference in behavior in # fsspec, which requires a trailing slash in case of pre-existing directory. FlyteContextManager.current_context().file_access.get_data(additional_distribution, os.path.join(destination, "")) diff --git a/flytekit/tools/script_mode.py b/flytekit/tools/script_mode.py index e5bbde04df..5c9990d819 100644 --- a/flytekit/tools/script_mode.py +++ b/flytekit/tools/script_mode.py @@ -104,7 +104,7 @@ def copy_module_to_destination( # Takes in a TarInfo and returns the modified TarInfo: # https://docs.python.org/3/library/tarfile.html#tarinfo-objects -# intented to be passed as a filter to tarfile.add +# intended to be passed as a filter to tarfile.add # https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add def tar_strip_file_attributes(tar_info: tarfile.TarInfo) -> tarfile.TarInfo: # set time to epoch timestamp 0, aka 00:00:00 UTC on 1 January 1970 diff --git a/flytekit/tools/translator.py b/flytekit/tools/translator.py index 72ebe7510c..21f9e1b376 100644 --- a/flytekit/tools/translator.py +++ b/flytekit/tools/translator.py @@ -773,7 +773,7 @@ def gather_dependent_entities( The ``get_serializable`` function above takes in an ``OrderedDict`` that helps keep track of dependent entities. For example, when serializing a workflow, all its tasks are also serialized. The ordered dict will also contain serialized entities that aren't as useful though, like nodes and branches. This is just a small helper function - that will pull out the serialzed tasks, workflows, and launch plans. This function is primarily used for testing. + that will pull out the serialized tasks, workflows, and launch plans. This function is primarily used for testing. :param serialized: This should be the filled in OrderedDict used in the get_serializable function above. :return: diff --git a/flytekit/types/structured/structured_dataset.py b/flytekit/types/structured/structured_dataset.py index bd01b871bc..1d7af31404 100644 --- a/flytekit/types/structured/structured_dataset.py +++ b/flytekit/types/structured/structured_dataset.py @@ -177,7 +177,7 @@ def __init__(self, python_type: Type[T], protocol: Optional[str] = None, support is capable of handling. :param supported_format: Arbitrary string representing the format. If not supplied then an empty string will be used. An empty string implies that the encoder works with any format. If the format being asked - for does not exist, the transformer enginer will look for the "" endcoder instead and write a warning. + for does not exist, the transformer enginer will look for the "" encoder instead and write a warning. """ self._python_type = python_type self._protocol = protocol.replace("://", "") if protocol else None diff --git a/plugins/flytekit-identity-aware-proxy/README.md b/plugins/flytekit-identity-aware-proxy/README.md index c6c631707c..64477e73a8 100644 --- a/plugins/flytekit-identity-aware-proxy/README.md +++ b/plugins/flytekit-identity-aware-proxy/README.md @@ -41,7 +41,7 @@ This flytekit plugin allows users to generate ID tokens via an external command --project < fill in the gcp project id where the secret was saved > ``` - A browser window should open, asking you to login with your GCP account. Then, a succesful log in should be confirmed with *"Successfully logged into accounts.google.com"*. + A browser window should open, asking you to login with your GCP account. Then, a successful log in should be confirmed with *"Successfully logged into accounts.google.com"*. Finally, the token beginning with `eyJhbG..."` should be printed to the console. @@ -97,7 +97,7 @@ Flyte is then deployed behind the Istio ingress gateway and does not need to be ### Deployment -1. If not already done, deploy the flyte-core helm chart, [activating auth](https://docs.flyte.org/en/latest/deployment/configuration/auth_setup.html#apply-oidc-configuration). Re-use the web app client id created for IAP (see section above). Disable the default ingress in the helm values by setting `common.ingress.enabled` to `false` in the helm values file. +1. If not already done, deploy the flyte-core helm chart, [activating auth](https://docs.flyte.org/en/latest/deployment/configuration/auth_setup.html#apply-oidc-configuration). Reuse the web app client id created for IAP (see section above). Disable the default ingress in the helm values by setting `common.ingress.enabled` to `false` in the helm values file. 2. Deployment of Istio and the Istio ingress gateway ([docs](https://istio.io/latest/docs/setup/install/helm/)) @@ -288,7 +288,7 @@ Flyte is then deployed behind the Istio ingress gateway and does not need to be 7. Connect flyteadmin and flyteconsole to the istio ingress gateway: - So far, we created a GCE ingress (which creates a GCP load balancer). The load balancer is configured to forward all requests to the istio ingress gatway at the edge of the service mesh via http2 and TLS. + So far, we created a GCE ingress (which creates a GCP load balancer). The load balancer is configured to forward all requests to the istio ingress gateway at the edge of the service mesh via http2 and TLS. Next, we configure the Istio service mesh to route requests from the Istio ingress gateway to flyteadmin and flyteconsole. @@ -387,7 +387,7 @@ Flyte is then deployed behind the Istio ingress gateway and does not need to be print(remote.recent_executions()) ``` - A browser window should open and ask you to login with your Google account. You should then see confirmation that you *"Successfully logged into accounts.google.com"* (this was for the IAP), finally followd by confirmation that you *"Successfully logged into 'your flyte domain'"* (this was for Flyte itself). + A browser window should open and ask you to login with your Google account. You should then see confirmation that you *"Successfully logged into accounts.google.com"* (this was for the IAP), finally followed by confirmation that you *"Successfully logged into 'your flyte domain'"* (this was for Flyte itself). diff --git a/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py b/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py index ffe21d90cd..46eb086ad0 100644 --- a/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py +++ b/plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py @@ -310,7 +310,7 @@ def _execute(self, **kwargs) -> Any: inherits from `FlyteRecoverableException`. RuntimeError: If the first exception raised in the local worker group is not and does not inherit from `FlyteRecoverableException`. - IgnoreOutputs: Raised when the task is succesfull in any worker group with index > 0. + IgnoreOutputs: Raised when the task is successful in any worker group with index > 0. """ try: from torch.distributed.launcher.api import LaunchConfig, elastic_launch diff --git a/plugins/flytekit-onnx-pytorch/flytekitplugins/onnxpytorch/schema.py b/plugins/flytekit-onnx-pytorch/flytekitplugins/onnxpytorch/schema.py index 0cca0616c5..78793b84d3 100644 --- a/plugins/flytekit-onnx-pytorch/flytekitplugins/onnxpytorch/schema.py +++ b/plugins/flytekit-onnx-pytorch/flytekitplugins/onnxpytorch/schema.py @@ -33,7 +33,7 @@ class PyTorch2ONNXConfig(DataClassJsonMixin): do_constant_folding: Whether to apply constant folding for optimization. dynamic_axes: Specify axes of tensors as dynamic. keep_initializers_as_inputs: Whether to add the initializers as inputs to the graph. - custom_opsets: A dictionary of opset doman name and version. + custom_opsets: A dictionary of opset domain name and version. export_modules_as_functions: Whether to export modules as functions. """ diff --git a/plugins/flytekit-pydantic/flytekitplugins/pydantic/deserialization.py b/plugins/flytekit-pydantic/flytekitplugins/pydantic/deserialization.py index 77a3e44f29..a7d602b402 100644 --- a/plugins/flytekit-pydantic/flytekitplugins/pydantic/deserialization.py +++ b/plugins/flytekit-pydantic/flytekitplugins/pydantic/deserialization.py @@ -26,7 +26,7 @@ class PydanticDeserializationLiteralStore: placeholders that it is trying to deserialize. """ - literal_store: Optional[serialization.LiteralStore] = None # attachement point for the literal map + literal_store: Optional[serialization.LiteralStore] = None # attachment point for the literal map def __init__(self) -> None: raise Exception("This class should not be instantiated") diff --git a/plugins/flytekit-whylogs/flytekitplugins/whylogs/renderer.py b/plugins/flytekit-whylogs/flytekitplugins/whylogs/renderer.py index b0b3acb30f..60cf064871 100644 --- a/plugins/flytekit-whylogs/flytekitplugins/whylogs/renderer.py +++ b/plugins/flytekit-whylogs/flytekitplugins/whylogs/renderer.py @@ -1,7 +1,7 @@ from flytekit import lazy_module why = lazy_module("whylogs") -constaints = lazy_module("whylogs.core.constraints") +constraints = lazy_module("whylogs.core.constraints") pd = lazy_module("pandas") @@ -60,7 +60,7 @@ class WhylogsConstraintsRenderer: """ @staticmethod - def to_html(constraints: constaints.Constraints) -> str: + def to_html(constraints: constraints.Constraints) -> str: viz = why.viz.NotebookProfileVisualizer() report = viz.constraints_report(constraints=constraints) return report.data diff --git a/pyproject.toml b/pyproject.toml index f85b00b535..6c5fd120e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,3 +137,7 @@ ignore = [ # unused-import "F401", ] + +[tool.codespell] +ignore-words-list="ot,te,raison,fo,lits" +skip="./docs/build,./.git,*.txt" diff --git a/tests/flytekit/integration/remote/test_remote.py b/tests/flytekit/integration/remote/test_remote.py index 4e97110bd0..f92489e9c4 100644 --- a/tests/flytekit/integration/remote/test_remote.py +++ b/tests/flytekit/integration/remote/test_remote.py @@ -95,7 +95,7 @@ def test_monitor_workflow_execution(register): execution = remote.sync_execution(execution, sync_nodes=True) if execution.node_executions: - assert execution.node_executions["start-node"].closure.phase == 3 # SUCCEEEDED + assert execution.node_executions["start-node"].closure.phase == 3 # SUCCEEDED for key in execution.node_executions: assert execution.node_executions[key].closure.phase == 3 diff --git a/tests/flytekit/unit/core/test_promise.py b/tests/flytekit/unit/core/test_promise.py index 3322d4dd59..979b80b45f 100644 --- a/tests/flytekit/unit/core/test_promise.py +++ b/tests/flytekit/unit/core/test_promise.py @@ -87,7 +87,7 @@ def wf(i: int, j: int): # Even if j is not provided it will default create_and_link_node_from_remote(ctx, lp, _inputs_not_allowed={"i"}, _ignorable_inputs={"j"}) - # value of `i` cannot be overriden + # value of `i` cannot be overridden with pytest.raises( FlyteAssertion, match="ixed inputs cannot be specified. Please remove the following inputs - {'i'}" ): @@ -195,7 +195,7 @@ def my_workflow() -> (str, str, str): o3 = t2(a=f.a) return o1, o2, o3 - # Run a local execution with promises having atrribute path + # Run a local execution with promises having attribute path o1, o2, o3 = my_workflow() assert o1 == "a" assert o2 == "b" diff --git a/tests/flytekit/unit/core/test_python_function_task.py b/tests/flytekit/unit/core/test_python_function_task.py index 9ba9d4c780..498228f3fe 100644 --- a/tests/flytekit/unit/core/test_python_function_task.py +++ b/tests/flytekit/unit/core/test_python_function_task.py @@ -107,7 +107,7 @@ def foo(i: str): assert foo_metadata.cache_serialize is True assert foo_metadata.cache_version == "1.0" - # test cache, cache_serialize, and cache_version at no unecessarily set + # test cache, cache_serialize, and cache_version at no unnecessarily set @task() def bar(i: str): print(f"{i}") diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index cc0d7d336a..7eed167246 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -1745,7 +1745,7 @@ def test_union_of_lists(): structure=TypeStructure(tag="Typed List"), ), ] - # Tags are deliberately NOT unique beacuse they are not required to encode the deep type structure, + # Tags are deliberately NOT unique because they are not required to encode the deep type structure, # only the top-level type transformer choice # # The stored typed will be used to differentiate union variants and must produce a unique choice. @@ -2032,7 +2032,7 @@ def constant_hash(df: pd.DataFrame) -> str: pandas_df_transformer.get_literal_type(pd.DataFrame), ) assert literal_with_hash_set.hash == "h4Sh" - # Confirm tha the loaded dataframe is not affected + # Confirm that the loaded dataframe is not affected python_df = TypeEngine.to_python_value(ctx, literal_with_hash_set, pd.DataFrame) expected_df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) assert expected_df.equals(python_df) diff --git a/tests/flytekit/unit/experimental/test_eager_workflows.py b/tests/flytekit/unit/experimental/test_eager_workflows.py index 6b01d2fc7a..d4438f91f7 100644 --- a/tests/flytekit/unit/experimental/test_eager_workflows.py +++ b/tests/flytekit/unit/experimental/test_eager_workflows.py @@ -64,7 +64,7 @@ async def eager_wf(x: int) -> int: @given(x_input=INTEGER_ST) @settings(deadline=DEADLINE, max_examples=5) def test_conditional_eager_workflow(x_input: int): - """Test eager workfow with conditional logic.""" + """Test eager workflow with conditional logic.""" @eager async def eager_wf(x: int) -> int: From 6279b817d5df76e651c866c6c5ba7e12a65e12b2 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 17 Jan 2024 18:57:22 -0800 Subject: [PATCH 41/63] Add @lru_cache to ImageBuildEngine.build (#2112) Signed-off-by: Kevin Su --- flytekit/image_spec/image_spec.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 5b437f0b17..cef455acbb 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -197,6 +197,7 @@ def register(cls, builder_type: str, image_spec_builder: ImageSpecBuilder): cls._REGISTRY[builder_type] = image_spec_builder @classmethod + @lru_cache def build(cls, image_spec: ImageSpec): img_name = image_spec.image_name() if img_name in cls._BUILT_IMAGES or image_spec.exist(): From 11232abc09d57eeac009cfeee74242be016c283f Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 18 Jan 2024 13:13:33 -0800 Subject: [PATCH 42/63] Skip errors when running pyflyte register (#2111) Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/register.py | 10 ++++++++++ flytekit/tools/repo.py | 22 ++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/flytekit/clis/sdk_in_container/register.py b/flytekit/clis/sdk_in_container/register.py index 4f8dddbc18..45e41efe47 100644 --- a/flytekit/clis/sdk_in_container/register.py +++ b/flytekit/clis/sdk_in_container/register.py @@ -117,6 +117,14 @@ callback=key_value_callback, help="Environment variables to set in the container, of the format `ENV_NAME=ENV_VALUE`", ) +@click.option( + "--skip-errors", + "--skip-error", + default=False, + is_flag=True, + help="Skip errors during registration. This is useful when registering multiple packages and you want to skip " + "errors for some packages.", +) @click.argument("package-or-module", type=click.Path(exists=True, readable=True, resolve_path=True), nargs=-1) @click.pass_context def register( @@ -135,6 +143,7 @@ def register( dry_run: bool, activate_launchplans: bool, env: typing.Optional[typing.Dict[str, str]], + skip_errors: bool, ): """ see help @@ -187,6 +196,7 @@ def register( env=env, dry_run=dry_run, activate_launchplans=activate_launchplans, + skip_errors=skip_errors, ) except Exception as e: raise e diff --git a/flytekit/tools/repo.py b/flytekit/tools/repo.py index 195e3eea17..c0055eb87d 100644 --- a/flytekit/tools/repo.py +++ b/flytekit/tools/repo.py @@ -220,6 +220,7 @@ def register( env: typing.Optional[typing.Dict[str, str]], dry_run: bool = False, activate_launchplans: bool = False, + skip_errors: bool = False, ): detected_root = find_common_root(package_or_module) click.secho(f"Detected Root {detected_root}, using this to create deployable package...", fg="yellow") @@ -276,14 +277,19 @@ def register( secho(og_id, "") try: if not dry_run: - i = remote.raw_register( - cp_entity, serialization_settings, version=version, create_default_launchplan=False - ) - secho(i) - if is_lp and activate_launchplans: - secho(og_id, "", op="Activation") - remote.activate_launchplan(i) - secho(i, reason="activated", op="Activation") + try: + i = remote.raw_register( + cp_entity, serialization_settings, version=version, create_default_launchplan=False + ) + secho(i, state="success") + if is_lp and activate_launchplans: + secho(og_id, "", op="Activation") + remote.activate_launchplan(i) + secho(i, reason="activated", op="Activation") + except Exception as e: + if not skip_errors: + raise e + secho(og_id, state="failed") else: secho(og_id, reason="Dry run Mode!") except RegistrationSkipped: From 475eecd9a1db9df13380526f9e673af5ca4a088e Mon Sep 17 00:00:00 2001 From: bstadlbauer <11799671+bstadlbauer@users.noreply.github.com> Date: Fri, 19 Jan 2024 15:07:41 +0100 Subject: [PATCH 43/63] Option to disable local caching (#2106) * Add `LocalConfig` Signed-off-by: Bernhard Stadlbauer * Use `LocalConfig.cache_enabled` to determine caching Signed-off-by: Bernhard Stadlbauer * Add test to confirm testing can be disabled Signed-off-by: Bernhard Stadlbauer * Rename `LOCAL` to `Local` Signed-off-by: Bernhard Stadlbauer --------- Signed-off-by: Bernhard Stadlbauer --- flytekit/configuration/__init__.py | 16 ++++++++++++++++ flytekit/configuration/internal.py | 5 +++++ flytekit/core/base_task.py | 5 +++-- tests/flytekit/unit/core/test_local_cache.py | 19 +++++++++++++++++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/flytekit/configuration/__init__.py b/flytekit/configuration/__init__.py index aa9c2bf129..853e08a166 100644 --- a/flytekit/configuration/__init__.py +++ b/flytekit/configuration/__init__.py @@ -611,6 +611,22 @@ def auto(cls, config_file: typing.Union[str, ConfigFile] = None) -> DataConfig: ) +@dataclass(init=True, repr=True, eq=True, frozen=True) +class LocalConfig(object): + """ + Any configuration specific to local runs. + """ + + cache_enabled: bool = True + + @classmethod + def auto(cls, config_file: typing.Union[str, ConfigFile] = None) -> LocalConfig: + config_file = get_config_file(config_file) + kwargs = {} + kwargs = set_if_exists(kwargs, "cache_enabled", _internal.Local.CACHE_ENABLED.read(config_file)) + return LocalConfig(**kwargs) + + @dataclass(init=True, repr=True, eq=True, frozen=True) class Config(object): """ diff --git a/flytekit/configuration/internal.py b/flytekit/configuration/internal.py index e559650e29..66e21b25d7 100644 --- a/flytekit/configuration/internal.py +++ b/flytekit/configuration/internal.py @@ -66,6 +66,11 @@ class AZURE(object): CLIENT_SECRET = ConfigEntry(LegacyConfigEntry(SECTION, "client_secret")) +class Local(object): + SECTION = "local" + CACHE_ENABLED = ConfigEntry(LegacyConfigEntry(SECTION, "cache_enabled", bool)) + + class Credentials(object): SECTION = "credentials" COMMAND = ConfigEntry(LegacyConfigEntry(SECTION, "command", list), YamlConfigEntry("admin.command", list)) diff --git a/flytekit/core/base_task.py b/flytekit/core/base_task.py index b9171f54b8..61b2ca89d4 100644 --- a/flytekit/core/base_task.py +++ b/flytekit/core/base_task.py @@ -27,7 +27,7 @@ from flyteidl.core import tasks_pb2 -from flytekit.configuration import SerializationSettings +from flytekit.configuration import LocalConfig, SerializationSettings from flytekit.core.context_manager import ( ExecutionParameters, ExecutionState, @@ -265,7 +265,8 @@ def local_execute( input_literal_map = _literal_models.LiteralMap(literals=kwargs) # if metadata.cache is set, check memoized version - if self.metadata.cache: + local_config = LocalConfig.auto() + if self.metadata.cache and local_config.cache_enabled: # TODO: how to get a nice `native_inputs` here? logger.info( f"Checking cache for task named {self.name}, cache version {self.metadata.cache_version} " diff --git a/tests/flytekit/unit/core/test_local_cache.py b/tests/flytekit/unit/core/test_local_cache.py index c79930148f..53c524e95f 100644 --- a/tests/flytekit/unit/core/test_local_cache.py +++ b/tests/flytekit/unit/core/test_local_cache.py @@ -100,6 +100,25 @@ def check_evenness(n: int) -> bool: assert n_cached_task_calls == 2 +def test_cache_can_be_disabled(monkeypatch): + monkeypatch.setenv("FLYTE_LOCAL_CACHE_ENABLED", "false") + + @task(cache=True, cache_version="v1") + def is_even(n: int) -> bool: + global n_cached_task_calls + n_cached_task_calls += 1 + return n % 2 == 0 + + assert n_cached_task_calls == 0 + # Run once and check that the counter is increased + assert is_even(n=1) is False + assert n_cached_task_calls == 1 + + # Run again and check that the counter is increased again i.e. no caching + assert is_even(n=1) is False + assert n_cached_task_calls == 2 + + def test_shared_tasks_in_two_separate_workflows(): @task(cache=True, cache_version="0.0.1") def is_odd(n: int) -> bool: From 139fa2fa1589bdc93ad1588d3b3e15fd8f55441a Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:21:02 -0800 Subject: [PATCH 44/63] Omit '=' from auto-generated version (#2117) * Omit '=' from auto-generated version Signed-off-by: Eduardo Apolinario * Lint Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- flytekit/remote/remote.py | 4 +++- tests/flytekit/unit/remote/test_remote.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/flytekit/remote/remote.py b/flytekit/remote/remote.py index 89c91a2719..e407ae53f9 100644 --- a/flytekit/remote/remote.py +++ b/flytekit/remote/remote.py @@ -877,7 +877,9 @@ def _version_from_hash( for s in additional_context: h.update(bytes(s, "utf-8")) - return base64.urlsafe_b64encode(h.digest()).decode("ascii") + # Omit the character '=' from the version as that's essentially padding used by the base64 encoding + # and does not increase entropy of the hash while making it very inconvenient to copy-and-paste. + return base64.urlsafe_b64encode(h.digest()).decode("ascii").rstrip("=") def register_script( self, diff --git a/tests/flytekit/unit/remote/test_remote.py b/tests/flytekit/unit/remote/test_remote.py index 6a869f4168..e7c296191a 100644 --- a/tests/flytekit/unit/remote/test_remote.py +++ b/tests/flytekit/unit/remote/test_remote.py @@ -207,6 +207,22 @@ def test_more_stuff(mock_client): assert computed_v2 != computed_v3 +@patch("flytekit.remote.remote.SynchronousFlyteClient") +def test_version_hash_special_characters(mock_client): + r = FlyteRemote(config=Config.auto(), default_project="project", default_domain="domain") + + serialization_settings = flytekit.configuration.SerializationSettings( + project="project", + domain="domain", + version="version", + env=None, + image_config=ImageConfig.auto(img_name=DefaultImages.default_image()), + ) + + computed_v = r._version_from_hash(b"", serialization_settings) + assert "=" not in computed_v + + def test_get_extra_headers_azure_blob_storage(): native_url = "abfs://flyte@storageaccount/container/path/to/file" headers = FlyteRemote.get_extra_headers_for_protocol(native_url) From 5ba545cb1b76796c5094649631c55ef1027e42b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20Nordstr=C3=B6m?= <131469540+knordstrom-muon@users.noreply.github.com> Date: Mon, 22 Jan 2024 18:53:15 +0000 Subject: [PATCH 45/63] Pass in env vars in FlyteRemote register_script (#2120) Signed-off-by: Karl Nordstrom --- flytekit/remote/remote.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flytekit/remote/remote.py b/flytekit/remote/remote.py index e407ae53f9..beef4108b0 100644 --- a/flytekit/remote/remote.py +++ b/flytekit/remote/remote.py @@ -894,6 +894,7 @@ def register_script( options: typing.Optional[Options] = None, source_path: typing.Optional[str] = None, module_name: typing.Optional[str] = None, + envs: typing.Optional[typing.Dict[str, str]] = None, ) -> typing.Union[FlyteWorkflow, FlyteTask]: """ Use this method to register a workflow via script mode. @@ -908,6 +909,7 @@ def register_script( :param options: Additional execution options that can be configured for the default launchplan :param source_path: The root of the project path :param module_name: the name of the module + :param envs: Environment variables to be passed to the serialization :return: """ if image_config is None: @@ -928,6 +930,7 @@ def register_script( domain=domain, image_config=image_config, git_repo=_get_git_repo_url(source_path), + env=envs, fast_serialization_settings=FastSerializationSettings( enabled=True, destination_dir=destination_dir, From cba830ea0c9844f8079342ae99a2105b507eca2e Mon Sep 17 00:00:00 2001 From: Paul Dittamo <37558497+pvditt@users.noreply.github.com> Date: Mon, 22 Jan 2024 12:01:43 -0800 Subject: [PATCH 46/63] [Bug] Map task caching failures (#2113) * only utilize bounded inputs for map task names instead of entire interface Signed-off-by: Paul Dittamo * add test Signed-off-by: Paul Dittamo * lint Signed-off-by: Paul Dittamo * order container vars for map tasks Signed-off-by: Paul Dittamo --------- Signed-off-by: Paul Dittamo --- flytekit/core/array_node_map_task.py | 5 +++-- flytekit/core/map_task.py | 5 +++-- .../unit/core/test_array_node_map_task.py | 22 ++++++++++++++----- tests/flytekit/unit/core/test_map_task.py | 21 ++++++++++++++---- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/flytekit/core/array_node_map_task.py b/flytekit/core/array_node_map_task.py index e4fef9ed10..5d10cadd45 100644 --- a/flytekit/core/array_node_map_task.py +++ b/flytekit/core/array_node_map_task.py @@ -77,8 +77,9 @@ def __init__( f = actual_task.lhs else: _, mod, f, _ = tracker.extract_task_module(cast(PythonFunctionTask, actual_task).task_function) + sorted_bounded_inputs = ",".join(sorted(self._bound_inputs)) h = hashlib.md5( - f"{collection_interface.__str__()}{concurrency}{min_successes}{min_success_ratio}".encode("utf-8") + f"{sorted_bounded_inputs}{concurrency}{min_successes}{min_success_ratio}".encode("utf-8") ).hexdigest() self._name = f"{mod}.map_{f}_{h}-arraynode" @@ -387,7 +388,7 @@ def load_task(self, loader_args: List[str], max_concurrency: int = 0) -> ArrayNo def loader_args(self, settings: SerializationSettings, t: ArrayNodeMapTask) -> List[str]: # type:ignore return [ "vars", - f'{",".join(t.bound_inputs)}', + f'{",".join(sorted(t.bound_inputs))}', "resolver", t.python_function_task.task_resolver.location, *t.python_function_task.task_resolver.loader_args(settings, t.python_function_task), diff --git a/flytekit/core/map_task.py b/flytekit/core/map_task.py index 1201a3ede0..aac31a1ee9 100644 --- a/flytekit/core/map_task.py +++ b/flytekit/core/map_task.py @@ -92,7 +92,8 @@ def __init__( f = actual_task.lhs else: _, mod, f, _ = tracker.extract_task_module(typing.cast(PythonFunctionTask, actual_task).task_function) - h = hashlib.md5(collection_interface.__str__().encode("utf-8")).hexdigest() + sorted_bounded_inputs = ",".join(sorted(self._bound_inputs)) + h = hashlib.md5(sorted_bounded_inputs.encode("utf-8")).hexdigest() name = f"{mod}.map_{f}_{h}" self._cmd_prefix: typing.Optional[typing.List[str]] = None @@ -404,7 +405,7 @@ def load_task(self, loader_args: List[str], max_concurrency: int = 0) -> MapPyth def loader_args(self, settings: SerializationSettings, t: MapPythonTask) -> List[str]: # type:ignore return [ "vars", - f'{",".join(t.bound_inputs)}', + f'{",".join(sorted(t.bound_inputs))}', "resolver", t.run_task.task_resolver.location, *t.run_task.task_resolver.loader_args(settings, t.run_task), diff --git a/tests/flytekit/unit/core/test_array_node_map_task.py b/tests/flytekit/unit/core/test_array_node_map_task.py index aeb727f5f8..40bb864c4f 100644 --- a/tests/flytekit/unit/core/test_array_node_map_task.py +++ b/tests/flytekit/unit/core/test_array_node_map_task.py @@ -7,7 +7,7 @@ from flytekit import task, workflow from flytekit.configuration import FastSerializationSettings, Image, ImageConfig, SerializationSettings -from flytekit.core.array_node_map_task import ArrayNodeMapTask +from flytekit.core.array_node_map_task import ArrayNodeMapTask, ArrayNodeMapTaskResolver from flytekit.core.task import TaskMetadata from flytekit.experimental import map_task as array_node_map_task from flytekit.tools.translator import get_serializable @@ -187,7 +187,7 @@ def many_inputs(a: int, b: str, c: float) -> str: assert m.python_interface.inputs == {"a": List[int], "b": List[str], "c": List[float]} assert ( m.name - == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_4ee240ef5cf979dbc133fb30035cb874-arraynode" + == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_bf51001578d0ae197a52c0af0a99dd89-arraynode" ) r_m = ArrayNodeMapTask(many_inputs) assert str(r_m.python_interface) == str(m.python_interface) @@ -197,7 +197,7 @@ def many_inputs(a: int, b: str, c: float) -> str: assert m.python_interface.inputs == {"a": List[int], "b": List[str], "c": float} assert ( m.name - == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_352fcdea8523a83134b51bbf5793f14e-arraynode" + == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_cb470e880fabd6265ec80e29fe60250d-arraynode" ) r_m = ArrayNodeMapTask(many_inputs, bound_inputs=set("c")) assert str(r_m.python_interface) == str(m.python_interface) @@ -207,7 +207,7 @@ def many_inputs(a: int, b: str, c: float) -> str: assert m.python_interface.inputs == {"a": List[int], "b": str, "c": float} assert ( m.name - == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_e224ba3a5b00e08083d541a6ca99b179-arraynode" + == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_316e10eb97f5d2abd585951048b807b9-arraynode" ) r_m = ArrayNodeMapTask(many_inputs, bound_inputs={"c", "b"}) assert str(r_m.python_interface) == str(m.python_interface) @@ -217,7 +217,7 @@ def many_inputs(a: int, b: str, c: float) -> str: assert m.python_interface.inputs == {"a": int, "b": str, "c": float} assert ( m.name - == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_f080e60be9d6faedeef0c74834d6812a-arraynode" + == "tests.flytekit.unit.core.test_array_node_map_task.map_many_inputs_758022acd59ad1c8b81670378d4de4f6-arraynode" ) r_m = ArrayNodeMapTask(many_inputs, bound_inputs={"a", "c", "b"}) assert str(r_m.python_interface) == str(m.python_interface) @@ -257,6 +257,18 @@ def task3(c: str, a: int, b: float) -> str: assert m1 == m2 == m3 == ["1 - 0.1 - c", "2 - 0.2 - c", "3 - 0.3 - c"] +def test_bounded_inputs_vars_order(serialization_settings): + @task() + def task1(a: int, b: float, c: str) -> str: + return f"{a} - {b} - {c}" + + mt = array_node_map_task(functools.partial(task1, c=1.0, b="hello", a=1)) + mtr = ArrayNodeMapTaskResolver() + args = mtr.loader_args(serialization_settings, mt) + + assert args[1] == "a,b,c" + + @pytest.mark.parametrize( "min_success_ratio, should_raise_error", [ diff --git a/tests/flytekit/unit/core/test_map_task.py b/tests/flytekit/unit/core/test_map_task.py index 26d1a71c3c..c87d4c6b1f 100644 --- a/tests/flytekit/unit/core/test_map_task.py +++ b/tests/flytekit/unit/core/test_map_task.py @@ -192,31 +192,36 @@ def many_inputs(a: int, b: str, c: float) -> str: m = map_task(many_inputs) assert m.python_interface.inputs == {"a": typing.List[int], "b": typing.List[str], "c": typing.List[float]} - assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_24c08b3a2f9c2e389ad9fc6a03482cf9" + assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_d41d8cd98f00b204e9800998ecf8427e" r_m = MapPythonTask(many_inputs) assert str(r_m.python_interface) == str(m.python_interface) p1 = functools.partial(many_inputs, c=1.0) m = map_task(p1) assert m.python_interface.inputs == {"a": typing.List[int], "b": typing.List[str], "c": float} - assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_697aa7389996041183cf6cfd102be4f7" + assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_4a8a08f09d37b73795649038408b5f33" r_m = MapPythonTask(many_inputs, bound_inputs=set("c")) assert str(r_m.python_interface) == str(m.python_interface) p2 = functools.partial(p1, b="hello") m = map_task(p2) assert m.python_interface.inputs == {"a": typing.List[int], "b": str, "c": float} - assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_cc18607da7494024a402a5fa4b3ea5c6" + assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_74aefa13d6ab8e4bfbd241583749dfe8" r_m = MapPythonTask(many_inputs, bound_inputs={"c", "b"}) assert str(r_m.python_interface) == str(m.python_interface) p3 = functools.partial(p2, a=1) m = map_task(p3) assert m.python_interface.inputs == {"a": int, "b": str, "c": float} - assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_52fe80b04781ea77ef6f025f4b49abef" + assert m.name == "tests.flytekit.unit.core.test_map_task.map_many_inputs_a44c56c8177e32d3613988f4dba7962e" r_m = MapPythonTask(many_inputs, bound_inputs={"a", "c", "b"}) assert str(r_m.python_interface) == str(m.python_interface) + p3_1 = functools.partial(p2, a=1) + m_1 = map_task(p3_1) + assert m_1.python_interface.inputs == {"a": int, "b": str, "c": float} + assert m_1.name == m.name + with pytest.raises(TypeError): m(a=[1, 2, 3]) @@ -348,3 +353,11 @@ def wf(x: typing.List[int]): map_task(my_mappable_task)(a=x).with_overrides(container_image="random:image") assert wf.nodes[0].flyte_entity.run_task.container_image == "random:image" + + +def test_bounded_inputs_vars_order(serialization_settings): + mt = map_task(functools.partial(t3, c=1.0, b="hello", a=1)) + mtr = MapTaskResolver() + args = mtr.loader_args(serialization_settings, mt) + + assert args[1] == "a,b,c" From 0006b01e4ba19b165bf6c39c1340908869267bb9 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 23 Jan 2024 00:20:57 -0800 Subject: [PATCH 47/63] Check envd version before building images (#2122) Signed-off-by: Kevin Su --- flytekit/image_spec/image_spec.py | 14 ++++++++++++++ .../flytekitplugins/envd/image_builder.py | 6 ++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index cef455acbb..35001eb31c 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -7,10 +7,14 @@ from abc import abstractmethod from dataclasses import asdict, dataclass from functools import lru_cache +from importlib import metadata from typing import List, Optional, Union import click import requests +from packaging.version import Version + +from flytekit.exceptions.user import FlyteAssertion DOCKER_HUB = "docker.io" _F_IMG_ID = "_F_IMG_ID" @@ -206,6 +210,16 @@ def build(cls, image_spec: ImageSpec): click.secho(f"Image {img_name} not found. Building...", fg="blue") if image_spec.builder not in cls._REGISTRY: raise Exception(f"Builder {image_spec.builder} is not registered.") + if image_spec.builder == "envd": + envd_version = metadata.version("envd") + # flytekit v1.10.2+ copies the workflow code to the WorkDir specified in the Dockerfile. However, envd<0.3.39 + # overwrites the WorkDir when building the image, resulting in a permission issue when flytekit downloads the file. + if Version(envd_version) < Version("0.3.39"): + raise FlyteAssertion( + f"envd version {envd_version} is not compatible with flytekit>v1.10.2." + f" Please upgrade envd to v0.3.39+." + ) + cls._REGISTRY[image_spec.builder].build_image(image_spec) cls._BUILT_IMAGES.add(img_name) diff --git a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py index 45d0b4676c..6c11bd6838 100644 --- a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py +++ b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py @@ -1,8 +1,8 @@ -import json import os import pathlib import shutil import subprocess +from importlib import metadata import click from packaging.version import Version @@ -108,9 +108,7 @@ def build(): if image_spec.source_root: shutil.copytree(image_spec.source_root, pathlib.Path(cfg_path).parent, dirs_exist_ok=True) - version_command = "envd version -s -f json" - envd_version = json.loads(EnvdImageSpecBuilder().execute_command(version_command)[0])["envd"].replace("v", "") - + envd_version = metadata.version("envd") # Indentation is required by envd if Version(envd_version) <= Version("0.3.37"): envd_config += ' io.copy(host_path="./", envd_path="/root")' From 1c8d4bd9d98cc147241537f30b1d3af54ef55d87 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 10:25:56 -0800 Subject: [PATCH 48/63] Bump pillow from 10.1.0 to 10.2.0 (#2128) Bumps [pillow](https://github.com/python-pillow/Pillow) from 10.1.0 to 10.2.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/10.1.0...10.2.0) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 0634217be7..c7de966245 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -329,7 +329,7 @@ parso==0.8.3 # via jedi pexpect==4.8.0 # via ipython -pillow==10.1.0 +pillow==10.2.0 # via -r dev-requirements.in platformdirs==3.11.0 # via virtualenv From f11515794fc9c83cd438144bd477fcf699ada474 Mon Sep 17 00:00:00 2001 From: Quinten <147648834+quinten-flwls@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:56:24 -0800 Subject: [PATCH 49/63] Support mashumaro DataClassORJSONMixin (#2080) * support DataClassJSONMixin Signed-off-by: Quinten Roets * make union syntax compatible with python3.8 Signed-off-by: Quinten Roets * add datetime attribute Signed-off-by: Quinten Roets * centralize serializable checking Signed-off-by: Quinten Roets * Incorporate feedback Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Quinten Roets Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- dev-requirements.in | 2 + flytekit/core/type_engine.py | 20 +++++---- tests/flytekit/unit/core/test_type_engine.py | 43 ++++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/dev-requirements.in b/dev-requirements.in index b89c389736..e9077aa353 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -45,3 +45,5 @@ pandas scikit-learn types-requests prometheus-client + +orjson diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index 4220d6e7b6..76f750233b 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -325,6 +325,13 @@ class Test(DataClassJsonMixin): def __init__(self): super().__init__("Object-Dataclass-Transformer", object) + self._serializable_classes = [DataClassJSONMixin, DataClassJsonMixin] + try: + from mashumaro.mixins.orjson import DataClassORJSONMixin + + self._serializable_classes.append(DataClassORJSONMixin) + except ModuleNotFoundError: + pass def assert_type(self, expected_type: Type[DataClassJsonMixin], v: T): # Skip iterating all attributes in the dataclass if the type of v already matches the expected_type @@ -417,7 +424,7 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: f"Type {t} cannot be parsed." ) - if not issubclass(t, DataClassJsonMixin) and not issubclass(t, DataClassJSONMixin): + if not self.is_serializable_class(t): raise AssertionError( f"Dataclass {t} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " f"serialized correctly" @@ -465,6 +472,9 @@ def get_literal_type(self, t: Type[T]) -> LiteralType: return _type_models.LiteralType(simple=_type_models.SimpleType.STRUCT, metadata=schema, structure=ts) + def is_serializable_class(self, class_: Type[T]) -> bool: + return any(issubclass(class_, serializable_class) for serializable_class in self._serializable_classes) + def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal: if isinstance(python_val, dict): json_str = json.dumps(python_val) @@ -475,9 +485,7 @@ def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], exp f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for " f"user defined datatypes in Flytekit" ) - if not issubclass(type(python_val), DataClassJsonMixin) and not issubclass( - type(python_val), DataClassJSONMixin - ): + if not self.is_serializable_class(type(python_val)): raise TypeTransformerFailedError( f"Dataclass {python_type} should be decorated with @dataclass_json or inherit DataClassJSONMixin to be " f"serialized correctly" @@ -730,9 +738,7 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type: f"{expected_python_type} is not of type @dataclass, only Dataclasses are supported for " "user defined datatypes in Flytekit" ) - if not issubclass(expected_python_type, DataClassJsonMixin) and not issubclass( - expected_python_type, DataClassJSONMixin - ): + if not self.is_serializable_class(expected_python_type): raise TypeTransformerFailedError( f"Dataclass {expected_python_type} should be decorated with @dataclass_json or mixin with DataClassJSONMixin to be " f"serialized correctly" diff --git a/tests/flytekit/unit/core/test_type_engine.py b/tests/flytekit/unit/core/test_type_engine.py index 7eed167246..6673c2fb65 100644 --- a/tests/flytekit/unit/core/test_type_engine.py +++ b/tests/flytekit/unit/core/test_type_engine.py @@ -22,6 +22,7 @@ from marshmallow_enum import LoadDumpOptions from marshmallow_jsonschema import JSONSchema from mashumaro.mixins.json import DataClassJSONMixin +from mashumaro.mixins.orjson import DataClassORJSONMixin from typing_extensions import Annotated, get_args, get_origin from flytekit import kwtypes @@ -2366,6 +2367,10 @@ def test_DataclassTransformer_get_literal_type(): class MyDataClassMashumaro(DataClassJsonMixin): x: int + @dataclass + class MyDataClassMashumaroORJSON(DataClassJsonMixin): + x: int + @dataclass_json @dataclass class MyDataClass: @@ -2379,6 +2384,9 @@ class MyDataClass: literal_type = de.get_literal_type(MyDataClassMashumaro) assert literal_type is not None + literal_type = de.get_literal_type(MyDataClassMashumaroORJSON) + assert literal_type is not None + invalid_json_str = "{ unbalanced_braces" with pytest.raises(Exception): Literal(scalar=Scalar(generic=_json_format.Parse(invalid_json_str, _struct.Struct()))) @@ -2389,6 +2397,10 @@ def test_DataclassTransformer_to_literal(): class MyDataClassMashumaro(DataClassJsonMixin): x: int + @dataclass + class MyDataClassMashumaroORJSON(DataClassORJSONMixin): + x: int + @dataclass_json @dataclass class MyDataClass: @@ -2398,12 +2410,19 @@ class MyDataClass: ctx = FlyteContext.current_context() my_dat_class_mashumaro = MyDataClassMashumaro(5) + my_dat_class_mashumaro_orjson = MyDataClassMashumaroORJSON(5) my_data_class = MyDataClass(5) lv_mashumaro = transformer.to_literal(ctx, my_dat_class_mashumaro, MyDataClassMashumaro, MyDataClassMashumaro) assert lv_mashumaro is not None assert lv_mashumaro.scalar.generic["x"] == 5 + lv_mashumaro_orjson = transformer.to_literal( + ctx, my_dat_class_mashumaro_orjson, MyDataClassMashumaroORJSON, MyDataClassMashumaroORJSON + ) + assert lv_mashumaro_orjson is not None + assert lv_mashumaro_orjson.scalar.generic["x"] == 5 + lv = transformer.to_literal(ctx, my_data_class, MyDataClass, MyDataClass) assert lv is not None assert lv.scalar.generic["x"] == 5 @@ -2414,6 +2433,10 @@ def test_DataclassTransformer_to_python_value(): class MyDataClassMashumaro(DataClassJsonMixin): x: int + @dataclass + class MyDataClassMashumaroORJSON(DataClassORJSONMixin): + x: int + @dataclass_json @dataclass class MyDataClass: @@ -2432,8 +2455,18 @@ class MyDataClass: assert isinstance(result, MyDataClassMashumaro) assert result.x == 5 + result = de.to_python_value(FlyteContext.current_context(), mock_literal, MyDataClassMashumaroORJSON) + assert isinstance(result, MyDataClassMashumaroORJSON) + assert result.x == 5 + def test_DataclassTransformer_guess_python_type(): + @dataclass + class DatumMashumaroORJSON(DataClassORJSONMixin): + x: int + y: Color + z: datetime.datetime + @dataclass class DatumMashumaro(DataClassJSONMixin): x: int @@ -2464,6 +2497,16 @@ class Datum(DataClassJSONMixin): assert datum_mashumaro.x == pv.x assert datum_mashumaro.y.value == pv.y + lt = TypeEngine.to_literal_type(DatumMashumaroORJSON) + now = datetime.datetime.now() + datum_mashumaro_orjson = DatumMashumaroORJSON(5, Color.RED, now) + lv = transformer.to_literal(ctx, datum_mashumaro_orjson, DatumMashumaroORJSON, lt) + gt = transformer.guess_python_type(lt) + pv = transformer.to_python_value(ctx, lv, expected_python_type=gt) + assert datum_mashumaro_orjson.x == pv.x + assert datum_mashumaro_orjson.y.value == pv.y + assert datum_mashumaro_orjson.z.isoformat() == pv.z + def test_ListTransformer_get_sub_type(): assert ListTransformer.get_sub_type_or_none(typing.List[str]) is str From 3966d1a0a1e33137a4bc41d9860d4ed5e264cbdf Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 24 Jan 2024 16:52:45 -0500 Subject: [PATCH 50/63] Extends ImageSpec to accept image names from plugin and have priority for plugins (#2119) * Extends image spec to have priority and accept image names from plugin Signed-off-by: Thomas J. Fan * TST Simplify implemenation of registry Signed-off-by: Thomas J. Fan * FIX Be more specific about max key Signed-off-by: Thomas J. Fan * FIX Fixes failing test Signed-off-by: Thomas J. Fan * DOC Adds comment about _IMAGE_NAME_TO_REAL_NAME Signed-off-by: Thomas J. Fan --------- Signed-off-by: Thomas J. Fan --- flytekit/image_spec/image_spec.py | 48 +++++++++++++------ .../flytekit-envd/tests/test_image_spec.py | 16 ++++++- .../unit/core/image_spec/test_image_spec.py | 22 +++++++++ 3 files changed, 71 insertions(+), 15 deletions(-) diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index 35001eb31c..7a8ef547da 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -8,7 +8,7 @@ from dataclasses import asdict, dataclass from functools import lru_cache from importlib import metadata -from typing import List, Optional, Union +from typing import Dict, List, Optional, Tuple, Union import click import requests @@ -48,7 +48,7 @@ class ImageSpec: name: str = "flytekit" python_version: str = None # Use default python in the base image if None. - builder: str = "envd" + builder: Optional[str] = None source_root: Optional[str] = None env: Optional[typing.Dict[str, str]] = None registry: Optional[str] = None @@ -71,9 +71,15 @@ def __post_init__(self): self.registry = self.registry.lower() def image_name(self) -> str: - """ - return full image name with tag. - """ + """Full image name with tag.""" + image_name = self._image_name() + try: + return ImageBuildEngine._IMAGE_NAME_TO_REAL_NAME[image_name] + except KeyError: + return image_name + + def _image_name(self) -> str: + """Construct full image name with tag.""" tag = calculate_hash_from_image_spec(self) container_image = f"{self.name}:{tag}" if self.registry: @@ -178,12 +184,15 @@ def with_apt_packages(self, apt_packages: Union[str, List[str]]) -> "ImageSpec": class ImageSpecBuilder: @abstractmethod - def build_image(self, image_spec: ImageSpec): + def build_image(self, image_spec: ImageSpec) -> Optional[str]: """ Build the docker image and push it to the registry. Args: image_spec: image spec of the task. + + Returns: + fully_qualified_image_name: Fully qualified image name. If None, then `image_spec.image_name()` is used. """ raise NotImplementedError("This method is not implemented in the base class.") @@ -193,24 +202,33 @@ class ImageBuildEngine: ImageBuildEngine contains a list of builders that can be used to build an ImageSpec. """ - _REGISTRY: typing.Dict[str, ImageSpecBuilder] = {} + _REGISTRY: typing.Dict[str, Tuple[ImageSpecBuilder, int]] = {} _BUILT_IMAGES: typing.Set[str] = set() + # _IMAGE_NAME_TO_REAL_NAME is used to keep track of the fully qualified image name + # returned by the image builder. This allows ImageSpec to map from `image_spc.image_name()` + # to the real qualified name. + _IMAGE_NAME_TO_REAL_NAME: Dict[str, str] = {} @classmethod - def register(cls, builder_type: str, image_spec_builder: ImageSpecBuilder): - cls._REGISTRY[builder_type] = image_spec_builder + def register(cls, builder_type: str, image_spec_builder: ImageSpecBuilder, priority: int = 5): + cls._REGISTRY[builder_type] = (image_spec_builder, priority) @classmethod @lru_cache - def build(cls, image_spec: ImageSpec): + def build(cls, image_spec: ImageSpec) -> str: + if image_spec.builder is None and cls._REGISTRY: + builder = max(cls._REGISTRY, key=lambda name: cls._REGISTRY[name][1]) + else: + builder = image_spec.builder + img_name = image_spec.image_name() if img_name in cls._BUILT_IMAGES or image_spec.exist(): click.secho(f"Image {img_name} found. Skip building.", fg="blue") else: click.secho(f"Image {img_name} not found. Building...", fg="blue") - if image_spec.builder not in cls._REGISTRY: - raise Exception(f"Builder {image_spec.builder} is not registered.") - if image_spec.builder == "envd": + if builder not in cls._REGISTRY: + raise Exception(f"Builder {builder} is not registered.") + if builder == "envd": envd_version = metadata.version("envd") # flytekit v1.10.2+ copies the workflow code to the WorkDir specified in the Dockerfile. However, envd<0.3.39 # overwrites the WorkDir when building the image, resulting in a permission issue when flytekit downloads the file. @@ -220,7 +238,9 @@ def build(cls, image_spec: ImageSpec): f" Please upgrade envd to v0.3.39+." ) - cls._REGISTRY[image_spec.builder].build_image(image_spec) + fully_qualified_image_name = cls._REGISTRY[builder][0].build_image(image_spec) + if fully_qualified_image_name is not None: + cls._IMAGE_NAME_TO_REAL_NAME[img_name] = fully_qualified_image_name cls._BUILT_IMAGES.add(img_name) diff --git a/plugins/flytekit-envd/tests/test_image_spec.py b/plugins/flytekit-envd/tests/test_image_spec.py index 36adebd346..d77b2ca89b 100644 --- a/plugins/flytekit-envd/tests/test_image_spec.py +++ b/plugins/flytekit-envd/tests/test_image_spec.py @@ -1,9 +1,23 @@ from pathlib import Path from textwrap import dedent +import pytest from flytekitplugins.envd.image_builder import EnvdImageSpecBuilder, create_envd_config -from flytekit.image_spec.image_spec import ImageSpec +from flytekit.image_spec.image_spec import ImageBuildEngine, ImageSpec + + +@pytest.fixture(scope="module", autouse=True) +def register_envd_higher_priority(): + # Register a new envd platform with the highest priority so the test in this file uses envd + highest_priority_builder = max(ImageBuildEngine._REGISTRY, key=ImageBuildEngine._REGISTRY.get) + highest_priority = ImageBuildEngine._REGISTRY[highest_priority_builder][1] + yield ImageBuildEngine.register( + "envd_high_priority", + EnvdImageSpecBuilder(), + priority=highest_priority + 1, + ) + del ImageBuildEngine._REGISTRY["envd_high_priority"] def test_image_spec(): diff --git a/tests/flytekit/unit/core/image_spec/test_image_spec.py b/tests/flytekit/unit/core/image_spec/test_image_spec.py index 81151beddd..38727d02fd 100644 --- a/tests/flytekit/unit/core/image_spec/test_image_spec.py +++ b/tests/flytekit/unit/core/image_spec/test_image_spec.py @@ -1,4 +1,5 @@ import os +from unittest.mock import Mock import pytest @@ -75,3 +76,24 @@ def test_image_spec(mock_image_spec_builder): # ImageSpec should be immutable image_spec.with_commands("ls") assert image_spec.commands == ["echo hello"] + + +def test_image_spec_engine_priority(): + image_spec = ImageSpec(name="FLYTEKIT") + image_name = image_spec.image_name() + + new_image_name = f"fqn.xyz/{image_name}" + mock_image_builder_10 = Mock() + mock_image_builder_10.build_image.return_value = new_image_name + mock_image_builder_default = Mock() + mock_image_builder_default.build_image.side_effect = ValueError("should not be called") + + ImageBuildEngine.register("build_10", mock_image_builder_10, priority=10) + ImageBuildEngine.register("build_default", mock_image_builder_default) + + ImageBuildEngine.build(image_spec) + mock_image_builder_10.build_image.assert_called_once_with(image_spec) + + assert image_spec.image_name() == new_image_name + del ImageBuildEngine._REGISTRY["build_10"] + del ImageBuildEngine._REGISTRY["build_default"] From cead901d80003c71b56bb8ac2ede0487c9a9fd1a Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Thu, 25 Jan 2024 09:51:12 -0800 Subject: [PATCH 51/63] Use logger in data_persistence.py (#2129) Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- flytekit/core/data_persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/core/data_persistence.py b/flytekit/core/data_persistence.py index 1579ef3f6b..b597c75b56 100644 --- a/flytekit/core/data_persistence.py +++ b/flytekit/core/data_persistence.py @@ -247,7 +247,7 @@ def get(self, from_path: str, to_path: str, recursive: bool = False, **kwargs): return shutil.copytree( self.strip_file_header(from_path), self.strip_file_header(to_path), dirs_exist_ok=True ) - print(f"Getting {from_path} to {to_path}") + logger.info(f"Getting {from_path} to {to_path}") dst = file_system.get(from_path, to_path, recursive=recursive, **kwargs) if isinstance(dst, (str, pathlib.Path)): return dst From 956e0be545e3540141cccbfd54ae44f33094d615 Mon Sep 17 00:00:00 2001 From: Neil <150836163+neilisaur@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:01:09 -0700 Subject: [PATCH 52/63] =?UTF-8?q?Adjust=20tar=20method=20to=20iterate=20ov?= =?UTF-8?q?er=20files/dirs=20in=20dir=20rather=20than=20strip=E2=80=A6=20(?= =?UTF-8?q?#2131)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Neil Stout --- flytekit/tools/script_mode.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flytekit/tools/script_mode.py b/flytekit/tools/script_mode.py index 5c9990d819..fba454ce76 100644 --- a/flytekit/tools/script_mode.py +++ b/flytekit/tools/script_mode.py @@ -48,7 +48,10 @@ def compress_scripts(source_path: str, destination: str, module_name: str): copy_module_to_destination(source_path, destination_path, module_name, visited) tar_path = os.path.join(tmp_dir, "tmp.tar") with tarfile.open(tar_path, "w") as tar: - tar.add(os.path.join(tmp_dir, "code"), arcname="", filter=tar_strip_file_attributes) + tmp_path: str = os.path.join(tmp_dir, "code") + files: typing.List[str] = os.listdir(tmp_path) + for ws_file in files: + tar.add(os.path.join(tmp_path, ws_file), arcname=ws_file, filter=tar_strip_file_attributes) with gzip.GzipFile(filename=destination, mode="wb", mtime=0) as gzipped: with open(tar_path, "rb") as tar_file: gzipped.write(tar_file.read()) From de0644f91f9edd4b1be70ee2b1e3bdef8b8539e9 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Fri, 26 Jan 2024 01:05:57 +0000 Subject: [PATCH 53/63] Include exception type in error messages (#2130) * Add test Signed-off-by: Thomas Newton * Fix Signed-off-by: Thomas Newton * Better formatting Signed-off-by: Thomas Newton --------- Signed-off-by: Thomas Newton --- flytekit/exceptions/scopes.py | 2 +- tests/flytekit/unit/exceptions/test_scopes.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/flytekit/exceptions/scopes.py b/flytekit/exceptions/scopes.py index f0a0f02189..a9a33b748d 100644 --- a/flytekit/exceptions/scopes.py +++ b/flytekit/exceptions/scopes.py @@ -39,7 +39,7 @@ def verbose_message(self): traceback_str = "\n ".join([""] + lines) format_str = "Traceback (most recent call last):\n" "{traceback}\n" "\n" "Message:\n" "\n" " {message}" - return format_str.format(traceback=traceback_str, message=str(self.value)) + return format_str.format(traceback=traceback_str, message=f"{self.type.__name__}: {self.value}") def __str__(self): return str(self.value) diff --git a/tests/flytekit/unit/exceptions/test_scopes.py b/tests/flytekit/unit/exceptions/test_scopes.py index 75ef74383e..0b60d5af95 100644 --- a/tests/flytekit/unit/exceptions/test_scopes.py +++ b/tests/flytekit/unit/exceptions/test_scopes.py @@ -50,6 +50,7 @@ def test_intercepted_scope_non_flyte_exception(): assert e.value == value_error assert "Bad value" in e.verbose_message assert "User error." in e.verbose_message + assert "ValueError:" in e.verbose_message assert e.error_code == "USER:Unknown" assert e.kind == _error_models.ContainerError.Kind.NON_RECOVERABLE @@ -60,6 +61,7 @@ def test_intercepted_scope_non_flyte_exception(): assert e.value == value_error assert "Bad value" in e.verbose_message assert "SYSTEM ERROR!" in e.verbose_message + assert "ValueError:" in e.verbose_message assert e.error_code == "SYSTEM:Unknown" assert e.kind == _error_models.ContainerError.Kind.RECOVERABLE @@ -74,6 +76,7 @@ def test_intercepted_scope_flyte_user_exception(): assert e.value == assertion_error assert "Bad assert" in e.verbose_message assert "User error." in e.verbose_message + assert "FlyteAssertion:" in e.verbose_message assert e.error_code == "USER:AssertionError" assert e.kind == _error_models.ContainerError.Kind.NON_RECOVERABLE @@ -84,6 +87,7 @@ def test_intercepted_scope_flyte_user_exception(): assert e.value == assertion_error assert "Bad assert" in e.verbose_message assert "User error." in e.verbose_message + assert "FlyteAssertion:" in e.verbose_message assert e.error_code == "USER:AssertionError" assert e.kind == _error_models.ContainerError.Kind.NON_RECOVERABLE @@ -98,6 +102,7 @@ def test_intercepted_scope_flyte_system_exception(): assert e.value == assertion_error assert "Bad assert" in e.verbose_message assert "SYSTEM ERROR!" in e.verbose_message + assert "FlyteSystemAssertion:" in e.verbose_message assert e.kind == _error_models.ContainerError.Kind.RECOVERABLE assert e.error_code == "SYSTEM:AssertionError" @@ -108,5 +113,6 @@ def test_intercepted_scope_flyte_system_exception(): assert e.value == assertion_error assert "Bad assert" in e.verbose_message assert "SYSTEM ERROR!" in e.verbose_message + assert "FlyteSystemAssertion:" in e.verbose_message assert e.error_code == "SYSTEM:AssertionError" assert e.kind == _error_models.ContainerError.Kind.RECOVERABLE From 859b0eb6d4dcdbac57ef62eb3095d0e8c4039b0c Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 29 Jan 2024 16:17:23 -0500 Subject: [PATCH 54/63] Adds get_default_image into configuration plugin (#2133) Signed-off-by: Thomas J. Fan --- flytekit/configuration/default_images.py | 8 ++++++++ flytekit/configuration/plugin.py | 9 +++++++++ .../unit/configuration/test_image_config.py | 13 +++++++++++++ 3 files changed, 30 insertions(+) diff --git a/flytekit/configuration/default_images.py b/flytekit/configuration/default_images.py index ea9d162a8d..380f428154 100644 --- a/flytekit/configuration/default_images.py +++ b/flytekit/configuration/default_images.py @@ -1,6 +1,7 @@ import enum import sys import typing +from contextlib import suppress class PythonVersion(enum.Enum): @@ -26,6 +27,13 @@ class DefaultImages(object): @classmethod def default_image(cls) -> str: + from flytekit.configuration.plugin import get_plugin + + with suppress(AttributeError): + default_image = get_plugin().get_default_image() + if default_image is not None: + return default_image + return cls.find_image_for() @classmethod diff --git a/flytekit/configuration/plugin.py b/flytekit/configuration/plugin.py index 051d421d04..e29b57f727 100644 --- a/flytekit/configuration/plugin.py +++ b/flytekit/configuration/plugin.py @@ -43,6 +43,10 @@ def configure_pyflyte_cli(main: Group) -> Group: def secret_requires_group() -> bool: """Return True if secrets require group entry.""" + @staticmethod + def get_default_image() -> Optional[str]: + """Get default image. Return None to use the images from flytekit.configuration.DefaultImages""" + class FlytekitPlugin: @staticmethod @@ -71,6 +75,11 @@ def secret_requires_group() -> bool: """Return True if secrets require group entry.""" return True + @staticmethod + def get_default_image() -> Optional[str]: + """Get default image. Return None to use the images from flytekit.configuration.DefaultImages""" + return None + def _get_plugin_from_entrypoint(): """Get plugin from entrypoint.""" diff --git a/tests/flytekit/unit/configuration/test_image_config.py b/tests/flytekit/unit/configuration/test_image_config.py index 7e3c8ad300..82aea000b0 100644 --- a/tests/flytekit/unit/configuration/test_image_config.py +++ b/tests/flytekit/unit/configuration/test_image_config.py @@ -1,9 +1,11 @@ import os import sys +from unittest.mock import Mock import mock import pytest +import flytekit from flytekit.configuration import ImageConfig from flytekit.configuration.default_images import DefaultImages, PythonVersion @@ -63,3 +65,14 @@ def test_image_create(): def test_get_version_suffix(): assert DefaultImages.get_version_suffix() == "latest" + + +def test_default_image_plugin(monkeypatch): + new_default_image = "registry/flytekit:py3.9-latest" + + plugin_mock = Mock() + plugin_mock.get_default_image.return_value = new_default_image + mock_global_plugin = {"plugin": plugin_mock} + monkeypatch.setattr(flytekit.configuration.plugin, "_GLOBAL_CONFIG", mock_global_plugin) + + assert DefaultImages.default_image() == new_default_image From a4975b69598c342c2e606bed5bb6e71e7f5176be Mon Sep 17 00:00:00 2001 From: Honnix Date: Mon, 29 Jan 2024 23:30:00 +0100 Subject: [PATCH 55/63] Add 3.12 as classifier (#2135) Signed-off-by: Hongxin Liang --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6c5fd120e8..ce911e63ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development", From f0586633bdd078a69bac646e3ada8ef1939b64ee Mon Sep 17 00:00:00 2001 From: Neil <150836163+neilisaur@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:31:24 -0700 Subject: [PATCH 56/63] Fixing copy-all version of tar file creation as well (#2134) Signed-off-by: Neil Stout --- flytekit/tools/fast_registration.py | 9 ++++++++- flytekit/tools/repo.py | 4 +++- tests/flytekit/unit/tools/test_fast_registration.py | 6 ++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index 0664ebbd8d..4b018ce94b 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -7,6 +7,7 @@ import subprocess as _subprocess import tarfile import tempfile +import typing from typing import Optional import click @@ -42,7 +43,13 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: b with tempfile.TemporaryDirectory() as tmp_dir: tar_path = os.path.join(tmp_dir, "tmp.tar") with tarfile.open(tar_path, "w", dereference=deref_symlinks) as tar: - tar.add(source, arcname="", filter=lambda x: ignore.tar_filter(tar_strip_file_attributes(x))) + files: typing.List[str] = os.listdir(source) + for ws_file in files: + tar.add( + os.path.join(source, ws_file), + arcname=ws_file, + filter=lambda x: ignore.tar_filter(tar_strip_file_attributes(x)), + ) with gzip.GzipFile(filename=archive_fname, mode="wb", mtime=0) as gzipped: with open(tar_path, "rb") as tar_file: gzipped.write(tar_file.read()) diff --git a/flytekit/tools/repo.py b/flytekit/tools/repo.py index c0055eb87d..c3a456c20b 100644 --- a/flytekit/tools/repo.py +++ b/flytekit/tools/repo.py @@ -97,7 +97,9 @@ def package( click.secho(f"Fast mode enabled: compressed archive {archive_fname}", dim=True) with tarfile.open(output, "w:gz") as tar: - tar.add(output_tmpdir, arcname="") + files: typing.List[str] = os.listdir(output_tmpdir) + for ws_file in files: + tar.add(os.path.join(output_tmpdir, ws_file), arcname=ws_file) click.secho(f"Successfully packaged {len(serializable_entities)} flyte objects into {output}", fg="green") diff --git a/tests/flytekit/unit/tools/test_fast_registration.py b/tests/flytekit/unit/tools/test_fast_registration.py index aae3995bcb..8d1c565a9d 100644 --- a/tests/flytekit/unit/tools/test_fast_registration.py +++ b/tests/flytekit/unit/tools/test_fast_registration.py @@ -46,8 +46,7 @@ def flyte_project(tmp_path): def test_package(flyte_project, tmp_path): archive_fname = fast_package(source=flyte_project, output_dir=tmp_path) with tarfile.open(archive_fname) as tar: - assert tar.getnames() == [ - "", # tar root, output removes leading '/' + assert sorted(tar.getnames()) == [ ".dockerignore", ".gitignore", "keep.foo", @@ -67,8 +66,7 @@ def test_package(flyte_project, tmp_path): def test_package_with_symlink(flyte_project, tmp_path): archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True) with tarfile.open(archive_fname, dereference=True) as tar: - assert tar.getnames() == [ - "", # tar root, output removes leading '/' + assert sorted(tar.getnames()) == [ "util", "workflows", "workflows/hello_world.py", From f9eaf553b42a83e313e8123f6d1fb32a8553e644 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:16:11 -0800 Subject: [PATCH 57/63] Bump pillow from 10.1.0 to 10.2.0 in /plugins/flytekit-onnx-pytorch (#2127) Bumps [pillow](https://github.com/python-pillow/Pillow) from 10.1.0 to 10.2.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/10.1.0...10.2.0) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- plugins/flytekit-onnx-pytorch/dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-onnx-pytorch/dev-requirements.txt b/plugins/flytekit-onnx-pytorch/dev-requirements.txt index 44660a3ba9..35a9b49a8f 100644 --- a/plugins/flytekit-onnx-pytorch/dev-requirements.txt +++ b/plugins/flytekit-onnx-pytorch/dev-requirements.txt @@ -69,7 +69,7 @@ onnxruntime==1.16.1 # via -r dev-requirements.in packaging==23.2 # via onnxruntime -pillow==10.1.0 +pillow==10.2.0 # via # -r dev-requirements.in # torchvision From d7dfbaf5743593cb4af4670dbce7b25038339faa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 00:41:59 -0800 Subject: [PATCH 58/63] Bump aiohttp from 3.8.6 to 3.9.2 (#2137) Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.8.6 to 3.9.2. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.8.6...v3.9.2) --- updated-dependencies: - dependency-name: aiohttp dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dev-requirements.txt | 8 ++++---- doc-requirements.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index c7de966245..dc929286e7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -14,7 +14,7 @@ adlfs==2023.9.0 # via flytekit aiobotocore==2.5.4 # via s3fs -aiohttp==3.8.6 +aiohttp==3.9.2 # via # adlfs # aiobotocore @@ -69,9 +69,7 @@ cfgv==3.4.0 chardet==5.2.0 # via binaryornot charset-normalizer==3.3.2 - # via - # aiohttp - # requests + # via requests click==8.1.7 # via # cookiecutter @@ -316,6 +314,8 @@ oauthlib==3.2.2 # requests-oauthlib opt-einsum==3.3.0 # via tensorflow +orjson==3.9.12 + # via -r dev-requirements.in packaging==23.2 # via # docker diff --git a/doc-requirements.txt b/doc-requirements.txt index 52cc2a5d29..5c5c660b38 100644 --- a/doc-requirements.txt +++ b/doc-requirements.txt @@ -10,7 +10,7 @@ adlfs==2023.9.0 # via flytekit aiobotocore==2.5.4 # via s3fs -aiohttp==3.9.1 +aiohttp==3.9.2 # via # adlfs # aiobotocore From b76375873e84545d05b8d580c5ec361bdf6b84d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 00:44:15 -0800 Subject: [PATCH 59/63] Bump aiohttp from 3.9.1 to 3.9.2 in /plugins/flytekit-spark (#2140) Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.9.1 to 3.9.2. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.1...v3.9.2) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- plugins/flytekit-spark/dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-spark/dev-requirements.txt b/plugins/flytekit-spark/dev-requirements.txt index 3335091569..5f5f8e283a 100644 --- a/plugins/flytekit-spark/dev-requirements.txt +++ b/plugins/flytekit-spark/dev-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile dev-requirements.in # -aiohttp==3.9.1 +aiohttp==3.9.2 # via aioresponses aioresponses==0.7.6 # via -r dev-requirements.in From d10bda1c86ed131e776de1e1965a7d81158b1c2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 00:44:25 -0800 Subject: [PATCH 60/63] Bump aiohttp from 3.9.0 to 3.9.2 in /plugins/flytekit-airflow (#2139) Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.9.0 to 3.9.2. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.0...v3.9.2) --- updated-dependencies: - dependency-name: aiohttp dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- plugins/flytekit-airflow/dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-airflow/dev-requirements.txt b/plugins/flytekit-airflow/dev-requirements.txt index b7b8178724..114279f520 100644 --- a/plugins/flytekit-airflow/dev-requirements.txt +++ b/plugins/flytekit-airflow/dev-requirements.txt @@ -6,7 +6,7 @@ # aiofiles==23.2.1 # via gcloud-aio-storage -aiohttp==3.9.0 +aiohttp==3.9.2 # via # apache-airflow-providers-http # gcloud-aio-auth From 1b8ad7e1aa870f2614782cb5e8ad68309aa03489 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 01:23:06 -0800 Subject: [PATCH 61/63] Bump aiohttp (#2138) Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.9.1 to 3.9.2. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.1...v3.9.2) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .../remote/mock_flyte_repo/workflows/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt index 5f4ef273a3..571db39e05 100644 --- a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt +++ b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt @@ -8,7 +8,7 @@ adlfs==2023.4.0 # via flytekit aiobotocore==2.5.2 # via s3fs -aiohttp==3.9.1 +aiohttp==3.9.2 # via # adlfs # aiobotocore From 1789db9928e60fc9e3be48bb5150a5fa12ad5763 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jan 2024 02:44:54 -0800 Subject: [PATCH 62/63] Bump pillow from 10.1.0 to 10.2.0 in /plugins/flytekit-onnx-tensorflow (#2126) Bumps [pillow](https://github.com/python-pillow/Pillow) from 10.1.0 to 10.2.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/10.1.0...10.2.0) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- plugins/flytekit-onnx-tensorflow/dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/flytekit-onnx-tensorflow/dev-requirements.txt b/plugins/flytekit-onnx-tensorflow/dev-requirements.txt index 689a985a98..38a63b116c 100644 --- a/plugins/flytekit-onnx-tensorflow/dev-requirements.txt +++ b/plugins/flytekit-onnx-tensorflow/dev-requirements.txt @@ -18,7 +18,7 @@ onnxruntime==1.16.1 # via -r dev-requirements.in packaging==23.2 # via onnxruntime -pillow==10.1.0 +pillow==10.2.0 # via -r dev-requirements.in protobuf==4.25.0 # via onnxruntime From 8861d0af04b6101e79e4c77906de63c348c2b8c3 Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Tue, 30 Jan 2024 14:20:14 -0800 Subject: [PATCH 63/63] Envvars local execution (#2132) * Pass environment variables to local executions Signed-off-by: Eduardo Apolinario * Pass environment variables to local executions Signed-off-by: Eduardo Apolinario * Fix test_get_entities_in_file Signed-off-by: Eduardo Apolinario * Lint Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- flytekit/clis/sdk_in_container/run.py | 3 ++ tests/flytekit/unit/cli/pyflyte/test_run.py | 44 ++++++++++++++++++++- tests/flytekit/unit/cli/pyflyte/workflow.py | 14 +++++++ 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/flytekit/clis/sdk_in_container/run.py b/flytekit/clis/sdk_in_container/run.py index d5a96e069a..ea0367cc83 100644 --- a/flytekit/clis/sdk_in_container/run.py +++ b/flytekit/clis/sdk_in_container/run.py @@ -513,6 +513,9 @@ def _run(*args, **kwargs): if not run_level_params.is_remote: with FlyteContextManager.with_context(_update_flyte_context(run_level_params)): + if run_level_params.envvars: + for env_var, value in run_level_params.envvars.items(): + os.environ[env_var] = value output = entity(**inputs) if inspect.iscoroutine(output): # TODO: make eager mode workflows run with local-mode diff --git a/tests/flytekit/unit/cli/pyflyte/test_run.py b/tests/flytekit/unit/cli/pyflyte/test_run.py index bab8b95b15..b42be184f8 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_run.py +++ b/tests/flytekit/unit/cli/pyflyte/test_run.py @@ -198,14 +198,24 @@ def test_union_type_with_invalid_input(): def test_get_entities_in_file(): e = get_entities_in_file(WORKFLOW_FILE, False) - assert e.workflows == ["my_wf", "wf_with_none"] - assert e.tasks == ["get_subset_df", "print_all", "show_sd", "task_with_optional", "test_union1", "test_union2"] + assert e.workflows == ["my_wf", "wf_with_env_vars", "wf_with_none"] + assert e.tasks == [ + "get_subset_df", + "print_all", + "show_sd", + "task_with_env_vars", + "task_with_optional", + "test_union1", + "test_union2", + ] assert e.all() == [ "my_wf", + "wf_with_env_vars", "wf_with_none", "get_subset_df", "print_all", "show_sd", + "task_with_env_vars", "task_with_optional", "test_union1", "test_union2", @@ -390,3 +400,33 @@ def test_pyflyte_run_with_none(a_val): else: assert output == a_val assert result.exit_code == 0 + + +@pytest.mark.parametrize( + "envs, envs_argument, expected_output", + [ + (["--env", "MY_ENV_VAR=hello"], '["MY_ENV_VAR"]', "hello"), + (["--env", "MY_ENV_VAR=hello", "--env", "ABC=42"], '["MY_ENV_VAR","ABC"]', "hello,42"), + ], +) +def test_envvar_local_execution(envs, envs_argument, expected_output): + runner = CliRunner() + args = ( + [ + "run", + ] + + envs + + [ + WORKFLOW_FILE, + "wf_with_env_vars", + "--env_vars", + ] + + [envs_argument] + ) + result = runner.invoke( + pyflyte.main, + args, + catch_exceptions=False, + ) + output = result.stdout.strip().split("\n")[-1].strip() + assert output == expected_output diff --git a/tests/flytekit/unit/cli/pyflyte/workflow.py b/tests/flytekit/unit/cli/pyflyte/workflow.py index 59b0e1b4b2..95535d2fc0 100644 --- a/tests/flytekit/unit/cli/pyflyte/workflow.py +++ b/tests/flytekit/unit/cli/pyflyte/workflow.py @@ -1,5 +1,6 @@ import datetime import enum +import os import typing from dataclasses import dataclass @@ -111,3 +112,16 @@ def task_with_optional(a: typing.Optional[str]) -> str: @workflow def wf_with_none(a: typing.Optional[str] = None) -> str: return task_with_optional(a=a) + + +@task +def task_with_env_vars(env_vars: typing.List[str]) -> str: + collated_env_vars = [] + for env_var in env_vars: + collated_env_vars.append(os.environ[env_var]) + return ",".join(collated_env_vars) + + +@workflow +def wf_with_env_vars(env_vars: typing.List[str]) -> str: + return task_with_env_vars(env_vars=env_vars)