diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d96734a..f5f6db3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -117,23 +117,18 @@ jobs: timeout-minutes: 15 needs: lint runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - env-list: [doc-build, doc-build-multi] steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: - # need to pin to 3.10 cause sphinx-multiversion latest code have but gather equal than 3.11 - python-version: '3.10' + python-version: 3.13 - name: Install Dependences run: | python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Build Docs Tests ${{ matrix.env-list }} + - name: Run Build Docs Tests run: | - python -m tox -vv -e ${{ matrix.env-list }} + python -m tox -vv -e doc-build local-ci: timeout-minutes: 15 needs: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1482c76..1e0c733 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,7 +46,7 @@ define by code, user usually do not care user, tenant, or queue exists or not. A a new workflow according the code definition. So we have some **models object** in `pydolphinscheduler/models` directory, their only check object exists or not, and create them if not exists. -More detail about core concept, please read [concept](https://dolphinscheduler.apache.org/python/main/concept.html) +More detail about core concept, please read [concept](https://dolphinscheduler.apache.org/python/concept.html) in our documentation. ## Syntax To Trigger Specific CI During Pull Request @@ -177,14 +177,10 @@ GitHub. You may locally ensure docs could be built successfully in case the fail ### Build Document Automatically with tox -We integrated document build process into tox, you can build the latest document and all document(including history documents) via -single command +We integrated document build process into tox, and you can build the latest documentation via a single command. ```shell -# Build the latest document in dev branch tox -e doc-build -# Build all documents, which including the latest and all history documents -tox -e doc-build-multi ``` ### Build Document Manually @@ -202,15 +198,6 @@ cd pydolphinscheduler/docs/ make clean && make html ``` -or if you want to build history documents, you should execute the command below - -```shell -# Fetch all history tags because we use tag to build history documents via [sphinx-multiversion](https://holzhaus.github.io/sphinx-multiversion/master/index.html) -git fetch --tags -cd pydolphinscheduler/docs/ -make clean && make multiversion -``` - ## Unit Test pydolphinscheduler using [pytest][pytest] to test our codebase. GitHub Action will run our test when you create diff --git a/README.md b/README.md index 34929c3..a0a90fb 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,6 @@ under the License. [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat-square&labelColor=ef8336)](https://pycqa.github.io/isort) [![CI](https://github.com/apache/dolphinscheduler-sdk-python/actions/workflows/ci.yaml/badge.svg)](https://github.com/apache/dolphinscheduler-sdk-python/actions/workflows/ci.yaml) [![Twitter Follow](https://img.shields.io/twitter/follow/dolphinschedule.svg?style=social&label=Follow)](https://twitter.com/dolphinschedule) -[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://s.apache.org/dolphinscheduler-slack) **PyDolphinScheduler** is python API for [Apache DolphinScheduler](https://dolphinscheduler.apache.org), which allow you definition your workflow by python code, aka workflow-as-codes. @@ -39,7 +38,7 @@ which allow you definition your workflow by python code, aka workflow-as-codes. ### Version Compatibility At Nov 7, 2022 we seperated PyDolphinScheduler from DolphinScheduler, and the version of PyDolphinScheduler 4.0.0 -can match multiple versions of DolphinScheduler, for more details, please refer to [version](https://dolphinscheduler.apache.org/python/main/index.html#version) +can match multiple versions of DolphinScheduler, for more details, please refer to [version](https://dolphinscheduler.apache.org/python/index.html#version) ### Installation @@ -96,7 +95,7 @@ Project Management, please refer to [DolphinScheduler Workflow](https://dolphins ## Documentation -For full documentation visit [document](https://dolphinscheduler.apache.org/python/main/index.html). This +For full documentation visit [document](https://dolphinscheduler.apache.org/python/index.html). This documentation is generated from this repository so please raise issues or pull requests for any additions, corrections, or clarifications. ## Contributing diff --git a/RELEASE.md b/RELEASE.md index 120f984..98980b1 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -31,8 +31,6 @@ but we also have a [PyPi](#release-to-pypi) repository for Python package distri add check whether apache/dolphinscheduler commit is released, for some commit not release should revert the change * Run all test locally, `tox -e local-ci && tox -e local-integrate-test`, after you start dolphinscheduler to pass `local-integrate-test` -* Remove `sphinx-multiversion` dependency in `setup.cfg`, we still can not fix this issue - [Distribute tarball and wheel error with direct dependency](https://github.com/apache/dolphinscheduler/issues/12238) ## Build and Sign Package diff --git a/docs/Makefile b/docs/Makefile index ff2c4eb..985198a 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -24,7 +24,6 @@ # Add opts `turn warnings into errors` strict sphinx-build behavior SPHINXOPTS ?= -W SPHINXBUILD ?= sphinx-build -SPHINXMULTIVERSION ?= sphinx-multiversion SOURCEDIR = source BUILDDIR = build @@ -38,7 +37,3 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -# Create multiple version of docs -multiversion: - @$(SPHINXMULTIVERSION) "$(SOURCEDIR)" "$(BUILDDIR)/html" diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst deleted file mode 100644 index f045c26..0000000 --- a/docs/source/changelog.rst +++ /dev/null @@ -1,7 +0,0 @@ -Changelog -========= - -.. changelog:: - :changelog-url: https://dolphinscheduler.apache.org/python/main#changelog - :github: https://github.com/apache/dolphinscheduler-sdk-python/releases/ - :pypi: https://pypi.org/project/apache-dolphinscheduler/ diff --git a/docs/source/conf.py b/docs/source/conf.py index 03f675d..fbf6925 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,17 +26,10 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -import os import sys from pathlib import Path -# For sphinx-multiversion, we need to build API docs of the corresponding package version, related issue: -# https://github.com/Holzhaus/sphinx-multiversion/issues/42 -pkg_src_dir = ( - Path(os.environ.get("SPHINX_MULTIVERSION_SOURCEDIR", default=".")) - .joinpath("../../src") - .resolve() -) +pkg_src_dir = Path(__file__).resolve().parents[2].joinpath("src") sys.path.insert(0, str(pkg_src_dir)) from pydolphinscheduler import __version__ # noqa @@ -76,26 +69,11 @@ # Add inline tabbed content "sphinx_inline_tabs", "sphinx_copybutton", - "sphinx_multiversion", - "sphinx_github_changelog", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] -# sphinx_multiversion configuration -html_sidebars = { - "**": [ - "versioning.html", - ], -} -# Match all exists tag for pydolphinscheduler expect version 2.0.4(not release apache dolphinscheduler) -smv_tag_whitelist = r"^(?!2.0.4)\d+\.\d+\.\d+$" -smv_branch_whitelist = "main" -smv_remote_whitelist = r"^(origin|upstream)$" -smv_released_pattern = "^refs/tags/.*$" -smv_outputdir_format = "versions/{ref.name}" - # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. diff --git a/docs/source/index.rst b/docs/source/index.rst index a5921a8..9c3473d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -111,7 +111,6 @@ then go and see :doc:`tutorial` for more detail. config api resources_plugin/index - changelog Indices and tables ================== diff --git a/setup.cfg b/setup.cfg index a1dd7fe..eaa5d7d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ [metadata] name = apache-dolphinscheduler version = attr: pydolphinscheduler.__version__ -url = https://dolphinscheduler.apache.org/python/main/index.html +url = https://dolphinscheduler.apache.org/python/index.html description = pydolphinscheduler is Apache DolphinScheduler Python API. long_description = file: README.md long_description_content_type = text/markdown @@ -51,8 +51,8 @@ classifiers = Programming Language :: Python :: Implementation :: PyPy Topic :: Software Development :: User Interfaces project_urls = - Homepage = https://dolphinscheduler.apache.org/python/main/index.html - Documentation = https://dolphinscheduler.apache.org/python/main/index.html + Homepage = https://dolphinscheduler.apache.org/python/index.html + Documentation = https://dolphinscheduler.apache.org/python/index.html Source = https://github.com/apache/dolphinscheduler-sdk-python Issue Tracker = https://github.com/apache/dolphinscheduler-sdk-python/issues Twitter = https://twitter.com/dolphinschedule @@ -109,11 +109,6 @@ doc = sphinx-click>=3.0 sphinx-inline-tabs sphinx-copybutton>=0.4.0 - # Unreleased package have a feature we want(use correct version package for API ref), so we install from - # GitHub directly, see also: - # https://github.com/Holzhaus/sphinx-multiversion/issues/42#issuecomment-1210539786 - sphinx-multiversion @ git+https://github.com/Holzhaus/sphinx-multiversion#egg=sphinx-multiversion - sphinx-github-changelog dev = # build apache-dolphinscheduler[build] @@ -165,7 +160,6 @@ envlist = auto-lint lint doc-build - doc-build-multi code-test integrate-test local-integrate-test @@ -204,17 +198,6 @@ commands = commands_post = find {toxinidir}/docs -type f -name "environment.pickle" -exec rm -rf \{\} \; -[testenv:doc-build-multi] -extras = doc -commands = - # Get all tags for `multiversion` subcommand - git fetch --tags - make -C {toxinidir}/docs clean - make -C {toxinidir}/docs multiversion -# Remove sensitive information from the generated documentation -commands_post = - find {toxinidir}/docs -type f -name "environment.pickle" -exec rm -rf \{\} \; - [testenv:integrate-test] extras = test commands = @@ -227,7 +210,6 @@ setenv = commands = {[testenv:integrate-test]commands} -# local-ci do not build `doc-build-multi` [testenv:local-ci] extras = dev commands = diff --git a/src/pydolphinscheduler/java_gateway.py b/src/pydolphinscheduler/java_gateway.py index 5230066..08bcfea 100644 --- a/src/pydolphinscheduler/java_gateway.py +++ b/src/pydolphinscheduler/java_gateway.py @@ -65,28 +65,11 @@ def __init__( auth_token: str | None = None, ): self._gateway = None + self._is_gateway_version_checked = False self.address = address or configuration.JAVA_GATEWAY_ADDRESS self.port = port or configuration.JAVA_GATEWAY_PORT self.auto_convert = auto_convert or configuration.JAVA_GATEWAY_AUTO_CONVERT self.auth_token = auth_token or configuration.JAVA_GATEWAY_AUTH_TOKEN - gateway_version = "unknown" - with contextlib.suppress(Py4JError): - # 1. Java gateway version is too old: doesn't have method 'getGatewayVersion()' - # 2. Error connecting to Java gateway - gateway_version = self.get_gateway_version() - if ( - not __version__.endswith("dev") - and gateway_version - and not version_match(Version.DS, gateway_version) - ): - warnings.warn( - f"Using unmatched version of pydolphinscheduler (version {__version__}) " - f"and Java gateway (version {gateway_version}) may cause errors. " - "We strongly recommend you to find the matched version " - "(check: https://pypi.org/project/apache-dolphinscheduler)", - UserWarning, - stacklevel=2, - ) @property def gateway(self) -> JavaGateway: @@ -108,6 +91,35 @@ def gateway(self) -> JavaGateway: self._gateway = JavaGateway(gateway_parameters=gateway_parameters) return self._gateway + def _check_gateway_version(self): + """Warn once when Python SDK and Java gateway versions do not match.""" + if self._is_gateway_version_checked: + return + self._is_gateway_version_checked = True + if __version__.endswith("dev"): + return + + gateway_version = "unknown" + with contextlib.suppress(Py4JError): + # 1. Java gateway version is too old: doesn't have method 'getGatewayVersion()' + # 2. Error connecting to Java gateway + gateway_version = self.gateway.entry_point.getGatewayVersion() + if gateway_version and not version_match(Version.DS, gateway_version): + warnings.warn( + f"Using unmatched version of pydolphinscheduler (version {__version__}) " + f"and Java gateway (version {gateway_version}) may cause errors. " + "We strongly recommend you to find the matched version " + "(check: https://pypi.org/project/apache-dolphinscheduler)", + UserWarning, + stacklevel=2, + ) + + @property + def entry_point(self): + """Return Java gateway entry point with lazy version validation.""" + self._check_gateway_version() + return self.gateway.entry_point + def get_gateway_version(self): """Get the java gateway version, expected to be equal with pydolphinscheduler.""" return self.gateway.entry_point.getGatewayVersion() @@ -120,29 +132,29 @@ def get_datasource(self, name: str, type: str | None = None): :param name: datasource name of the datasource to be queried :param type: datasource type of the datasource, only used to filter the result. """ - return self.gateway.entry_point.getDatasource(name, type) + return self.entry_point.getDatasource(name, type) def get_resources_file_info(self, program_type: str, main_package: str): """Get resources file info through java gateway.""" - return self.gateway.entry_point.getResourcesFileInfo(program_type, main_package) + return self.entry_point.getResourcesFileInfo(program_type, main_package) def create_or_update_resource(self, user_name: str, name: str, content: str): """Create or update resource through java gateway.""" - return self.gateway.entry_point.createOrUpdateResource(user_name, name, content) + return self.entry_point.createOrUpdateResource(user_name, name, content) def query_resources_file_info(self, user_name: str, name: str): """Get resources file info through java gateway.""" - return self.gateway.entry_point.queryResourcesFileInfo(user_name, name) + return self.entry_point.queryResourcesFileInfo(user_name, name) def query_environment_info(self, name: str): """Get environment info through java gateway.""" - return self.gateway.entry_point.getEnvironmentInfo(name) + return self.entry_point.getEnvironmentInfo(name) def get_code_and_version( self, project_name: str, workflow_name: str, task_name: str ): """Get code and version through java gateway.""" - return self.gateway.entry_point.getCodeAndVersion( + return self.entry_point.getCodeAndVersion( project_name, workflow_name, task_name ) @@ -150,39 +162,37 @@ def create_or_grant_project( self, user: str, name: str, description: str | None = None ): """Create or grant project through java gateway.""" - return self.gateway.entry_point.createOrGrantProject(user, name, description) + return self.entry_point.createOrGrantProject(user, name, description) def query_project_by_name(self, user: str, name: str): """Query project through java gateway.""" - return self.gateway.entry_point.queryProjectByName(user, name) + return self.entry_point.queryProjectByName(user, name) def update_project( self, user: str, project_code: int, project_name: str, description: str ): """Update project through java gateway.""" - return self.gateway.entry_point.updateProject( + return self.entry_point.updateProject( user, project_code, project_name, description ) def delete_project(self, user: str, code: int): """Delete project through java gateway.""" - return self.gateway.entry_point.deleteProject(user, code) + return self.entry_point.deleteProject(user, code) def create_tenant( self, tenant_name: str, queue_name: str, description: str | None = None ): """Create tenant through java gateway.""" - return self.gateway.entry_point.createTenant( - tenant_name, description, queue_name - ) + return self.entry_point.createTenant(tenant_name, description, queue_name) def query_tenant(self, tenant_code: str): """Query tenant through java gateway.""" - return self.gateway.entry_point.queryTenantByCode(tenant_code) + return self.entry_point.queryTenantByCode(tenant_code) def grant_tenant_to_user(self, user_name: str, tenant_code: str): """Grant tenant to user through java gateway.""" - return self.gateway.entry_point.grantTenantToUser(user_name, tenant_code) + return self.entry_point.grantTenantToUser(user_name, tenant_code) def update_tenant( self, @@ -193,13 +203,13 @@ def update_tenant( description: str | None = None, ): """Update tenant through java gateway.""" - return self.gateway.entry_point.updateTenant( + return self.entry_point.updateTenant( user, tenant_id, code, queue_id, description ) def delete_tenant(self, user: str, tenant_id: int): """Delete tenant through java gateway.""" - return self.gateway.entry_point.deleteTenantById(user, tenant_id) + return self.entry_point.deleteTenantById(user, tenant_id) def create_user( self, @@ -212,7 +222,7 @@ def create_user( status: int, ): """Create user through java gateway.""" - return self.gateway.entry_point.createUser( + return self.entry_point.createUser( name, password, email, phone, tenant, queue, status ) @@ -231,13 +241,13 @@ def update_user( status: int, ): """Update user through java gateway.""" - return self.gateway.entry_point.updateUser( + return self.entry_point.updateUser( name, password, email, phone, tenant, queue, status ) def delete_user(self, name: str, user_id: int): """Delete user through java gateway.""" - return self.gateway.entry_point.deleteUser(name, user_id) + return self.entry_point.deleteUser(name, user_id) def get_dependent_info( self, @@ -246,15 +256,11 @@ def get_dependent_info( task_name: str | None = None, ): """Get dependent info through java gateway.""" - return self.gateway.entry_point.getDependentInfo( - project_name, workflow_name, task_name - ) + return self.entry_point.getDependentInfo(project_name, workflow_name, task_name) def get_workflow_info(self, user_name: str, project_name: str, workflow_name: str): """Get workflow info through java gateway.""" - return self.gateway.entry_point.getWorkflowInfo( - user_name, project_name, workflow_name - ) + return self.entry_point.getWorkflowInfo(user_name, project_name, workflow_name) def create_or_update_workflow( self, @@ -276,7 +282,7 @@ def create_or_update_workflow( other_params_json: str | None = None, ): """Create or update workflow through java gateway.""" - return self.gateway.entry_point.createOrUpdateWorkflow( + return self.entry_point.createOrUpdateWorkflow( user_name, project_name, name, @@ -305,7 +311,7 @@ def exec_workflow_instance( warning_group_id: int, ): """Exec workflow instance through java gateway.""" - return self.gateway.entry_point.execWorkflowInstance( + return self.entry_point.execWorkflowInstance( user_name, project_name, workflow_name, diff --git a/src/pydolphinscheduler/tasks/flink.py b/src/pydolphinscheduler/tasks/flink.py index fa6a15d..2032e91 100644 --- a/src/pydolphinscheduler/tasks/flink.py +++ b/src/pydolphinscheduler/tasks/flink.py @@ -19,6 +19,7 @@ from __future__ import annotations +import pydolphinscheduler from pydolphinscheduler.constants import TaskType from pydolphinscheduler.core.engine import Engine, ProgramType @@ -59,7 +60,9 @@ def __init__( main_class: str, main_package: str, program_type: ProgramType | None = ProgramType.SCALA, - deploy_mode: DeployMode | None = DeployMode.CLUSTER, + deploy_mode: ( + pydolphinscheduler.tasks.flink.DeployMode | None + ) = DeployMode.CLUSTER, flink_version: FlinkVersion | None = FlinkVersion.LOW_VERSION, app_name: str | None = None, job_manager_memory: str | None = "1G", diff --git a/src/pydolphinscheduler/tasks/spark.py b/src/pydolphinscheduler/tasks/spark.py index 93adb6b..f34e965 100644 --- a/src/pydolphinscheduler/tasks/spark.py +++ b/src/pydolphinscheduler/tasks/spark.py @@ -19,6 +19,7 @@ from __future__ import annotations +import pydolphinscheduler from pydolphinscheduler.constants import TaskType from pydolphinscheduler.core.engine import Engine, ProgramType @@ -52,7 +53,9 @@ def __init__( main_class: str, main_package: str, program_type: ProgramType | None = ProgramType.SCALA, - deploy_mode: DeployMode | None = DeployMode.CLUSTER, + deploy_mode: ( + pydolphinscheduler.tasks.spark.DeployMode | None + ) = DeployMode.CLUSTER, app_name: str | None = None, driver_cores: int | None = 1, driver_memory: str | None = "512M",