forcedotcom
diff --git a/‎src/datacustomcode/cli.py‎
Lines changed: 14 additions & 2 deletions b/‎src/datacustomcode/cli.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎src/datacustomcode/io/reader/query_api.py‎
Lines changed: 39 additions & 6 deletions b/‎src/datacustomcode/io/reader/query_api.py‎
Lines changed: 39 additions & 6 deletions
diff --git a/‎src/datacustomcode/io/reader/sf_cli.py‎
Lines changed: 258 additions & 0 deletions b/‎src/datacustomcode/io/reader/sf_cli.py‎
Lines changed: 258 additions & 0 deletions
@@ -16,7 +16,11 @@
 import json
 import os
 import sys
-from typing import List, Union
+from typing import (
+    List,
+    Optional,
+    Union,
+)
 
 import click
 from loguru import logger
@@ -294,12 +298,20 @@ def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
 @click.option("--config-file", default=None)
 @click.option("--dependencies", default=[], multiple=True)
 @click.option("--profile", default="default")
+@click.option(
+    "--sf-cli-org",
+    default=None,
+    help="SF CLI org alias or username. Fetches credentials via `sf org display`.",
+)
 def run(
     entrypoint: str,
     config_file: Union[str, None],
     dependencies: List[str],
     profile: str,
+    sf_cli_org: Optional[str],
 ):
     from datacustomcode.run import run_entrypoint
 
-    run_entrypoint(entrypoint, config_file, dependencies, profile)
+    run_entrypoint(
+        entrypoint, config_file, dependencies, profile, sf_cli_org=sf_cli_org
+    )
@@ -36,6 +36,7 @@
 
 from datacustomcode.credentials import AuthType, Credentials
 from datacustomcode.io.reader.base import BaseDataCloudReader
+from datacustomcode.io.reader.sf_cli import SFCLIDataCloudReader
 
 if TYPE_CHECKING:
     import pandas
@@ -136,6 +137,7 @@ class QueryAPIDataCloudReader(BaseDataCloudReader):
     Supports multiple authentication methods:
     - OAuth Tokens (default, needs client_id/secret with refresh_token)
     - Client Credentials (server-to-server, needs client_id/secret only)
+    - SF CLI (uses ``sf org display`` access token via the REST API directly)
 
     Supports dataspace configuration for querying data within specific dataspaces.
     When a dataspace is provided (and not "default"), queries are executed within
@@ -149,6 +151,7 @@ def __init__(
         spark: SparkSession,
         credentials_profile: str = "default",
         dataspace: Optional[str] = None,
+        sf_cli_org: Optional[str] = None,
     ) -> None:
         """Initialize QueryAPIDataCloudReader.
 
@@ -160,14 +163,30 @@ def __init__(
             dataspace: Optional dataspace identifier. If provided and not "default",
                 the connection will be configured for the specified dataspace.
                 When None or "default", uses the default dataspace.
+            sf_cli_org: Optional SF CLI org alias or username.  When set, the
+                reader delegates to :class:`SFCLIDataCloudReader` which calls
+                the Data Cloud REST API directly using the token obtained from
+                ``sf org display``, bypassing the CDP token-exchange flow.
         """
         self.spark = spark
-        credentials = Credentials.from_available(profile=credentials_profile)
-        logger.debug(
-            "Initializing QueryAPIDataCloudReader with "
-            f"auth_type={credentials.auth_type.value}"
-        )
-        self._conn = create_cdp_connection(credentials, dataspace)
+        if sf_cli_org:
+            logger.debug(
+                f"Initializing QueryAPIDataCloudReader with SF CLI org '{sf_cli_org}'"
+            )
+            self._sf_cli_reader: Optional[SFCLIDataCloudReader] = SFCLIDataCloudReader(
+                spark=spark,
+                sf_cli_org=sf_cli_org,
+                dataspace=dataspace,
+            )
+            self._conn = None
+        else:
+            self._sf_cli_reader = None
+            credentials = Credentials.from_available(profile=credentials_profile)
+            logger.debug(
+                "Initializing QueryAPIDataCloudReader with "
+                f"auth_type={credentials.auth_type.value}"
+            )
+            self._conn = create_cdp_connection(credentials, dataspace)
 
     def read_dlo(
         self,
@@ -186,8 +205,15 @@ def read_dlo(
         Returns:
             PySparkDataFrame: The PySpark DataFrame.
         """
+        sf_cli_reader: Optional[SFCLIDataCloudReader] = getattr(
+            self, "_sf_cli_reader", None
+        )
+        if sf_cli_reader is not None:
+            return sf_cli_reader.read_dlo(name, schema, row_limit)
+
         query = SQL_QUERY_TEMPLATE.format(name, row_limit)
 
+        assert self._conn is not None
         pandas_df = self._conn.get_pandas_dataframe(query)
 
         # Convert pandas DataFrame to Spark DataFrame
@@ -214,8 +240,15 @@ def read_dmo(
         Returns:
             PySparkDataFrame: The PySpark DataFrame.
         """
+        sf_cli_reader: Optional[SFCLIDataCloudReader] = getattr(
+            self, "_sf_cli_reader", None
+        )
+        if sf_cli_reader is not None:
+            return sf_cli_reader.read_dmo(name, schema, row_limit)
+
         query = SQL_QUERY_TEMPLATE.format(name, row_limit)
 
+        assert self._conn is not None
         pandas_df = self._conn.get_pandas_dataframe(query)
 
         # Convert pandas DataFrame to Spark DataFrame
 
@@ -0,0 +1,258 @@
+# Copyright (c) 2025, Salesforce, Inc.
+# SPDX-License-Identifier: Apache-2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import json
+import logging
+import subprocess
+from typing import (
+    TYPE_CHECKING,
+    Final,
+    Optional,
+    Union,
+)
+
+import pandas as pd
+import pandas.api.types as pd_types
+from pyspark.sql.types import (
+    BooleanType,
+    DoubleType,
+    LongType,
+    StringType,
+    StructField,
+    StructType,
+    TimestampType,
+)
+import requests
+
+from datacustomcode.io.reader.base import BaseDataCloudReader
+
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame as PySparkDataFrame, SparkSession
+    from pyspark.sql.types import AtomicType
+
+logger = logging.getLogger(__name__)
+
+API_VERSION: Final = "v66.0"
+PANDAS_TYPE_MAPPING = {
+    "object": StringType(),
+    "int64": LongType(),
+    "float64": DoubleType(),
+    "bool": BooleanType(),
+}
+
+
+def _pandas_to_spark_schema(
+    pandas_df: pd.DataFrame, nullable: bool = True
+) -> StructType:
+    fields = []
+    for column, dtype in pandas_df.dtypes.items():
+        spark_type: AtomicType
+        if pd_types.is_datetime64_any_dtype(dtype):
+            spark_type = TimestampType()
+        else:
+            spark_type = PANDAS_TYPE_MAPPING.get(str(dtype), StringType())
+        fields.append(StructField(column, spark_type, nullable))
+    return StructType(fields)
+
+
+class SFCLIDataCloudReader(BaseDataCloudReader):
+    """DataCloud reader that authenticates via the Salesforce CLI.
+
+    Uses ``sf org display`` to obtain a fresh access token and queries
+    Data Cloud through the REST API directly
+    (``/services/data/{version}/ssot/query-sql``), bypassing the CDP
+    token-exchange flow that requires special OAuth scopes.
+    """
+
+    CONFIG_NAME = "SFCLIDataCloudReader"
+
+    def __init__(
+        self,
+        spark: SparkSession,
+        sf_cli_org: str,
+        dataspace: Optional[str] = None,
+    ) -> None:
+        """Initialize SFCLIDataCloudReader.
+
+        Args:
+            spark: SparkSession instance for creating DataFrames.
+            sf_cli_org: Salesforce org alias or username as known to the SF CLI
+                (e.g. the alias given to ``sf org login web --alias dev1``).
+            dataspace: Optional dataspace identifier.  If ``None`` or
+                ``"default"`` the query runs against the default dataspace.
+        """
+        self.spark = spark
+        self.sf_cli_org = sf_cli_org
+        self.dataspace = (
+            dataspace if dataspace and dataspace != "default" else "default"
+        )
+        logger.debug(f"Initialized SFCLIDataCloudReader for org '{sf_cli_org}'")
+
+    def _get_token(self) -> tuple[str, str]:
+        """Fetch a fresh access token and instance URL from the SF CLI.
+
+        Returns:
+            ``(access_token, instance_url)``
+
+        Raises:
+            RuntimeError: If the ``sf`` command is not on PATH, times out, or
+                returns an error.
+        """
+        try:
+            result = subprocess.run(
+                ["sf", "org", "display", "--target-org", self.sf_cli_org, "--json"],
+                capture_output=True,
+                text=True,
+                check=True,
+                timeout=30,
+            )
+        except FileNotFoundError as exc:
+            raise RuntimeError(
+                "The 'sf' command was not found.  "
+                "Please install Salesforce CLI: https://developer.salesforce.com/tools/salesforcecli"
+            ) from exc
+        except subprocess.TimeoutExpired as exc:
+            raise RuntimeError(
+                f"'sf org display' timed out for org '{self.sf_cli_org}'"
+            ) from exc
+        except subprocess.CalledProcessError as exc:
+            raise RuntimeError(
+                f"'sf org display' failed for org '{self.sf_cli_org}'.\n"
+                f"Ensure the org is authenticated via 'sf org login web'.\n"
+                f"stderr: {exc.stderr.strip()}"
+            ) from exc
+
+        try:
+            data = json.loads(result.stdout)
+        except json.JSONDecodeError as exc:
+            raise RuntimeError(
+                f"Failed to parse 'sf org display' output: {exc}"
+            ) from exc
+
+        if data.get("status") != 0:
+            raise RuntimeError(
+                f"SF CLI error for org '{self.sf_cli_org}': "
+                f"{data.get('message', 'unknown error')}"
+            )
+
+        org_result = data.get("result", {})
+        access_token = org_result.get("accessToken")
+        instance_url = org_result.get("instanceUrl")
+
+        if not access_token or not instance_url:
+            raise RuntimeError(
+                f"'sf org display' did not return an access token or instance URL "
+                f"for org '{self.sf_cli_org}'"
+            )
+
+        logger.debug(f"Fetched token from SF CLI for org '{self.sf_cli_org}'")
+        return access_token, instance_url
+
+    def _execute_query(self, sql: str, row_limit: int) -> pd.DataFrame:
+        """Execute *sql* against the Data Cloud REST endpoint.
+
+        Args:
+            sql: Base SQL query (no ``LIMIT`` clause).
+            row_limit: Maximum rows to return.
+
+        Returns:
+            Pandas DataFrame with query results.
+
+        Raises:
+            RuntimeError: On HTTP errors or unexpected response shapes.
+        """
+        access_token, instance_url = self._get_token()
+
+        url = f"{instance_url}/services/data/{API_VERSION}/ssot/query-sql"
+        headers = {"Authorization": f"Bearer {access_token}"}
+        params = {"dataspace": self.dataspace}
+        body = {"sql": f"{sql} LIMIT {row_limit}"}
+
+        logger.debug(f"Executing Data Cloud query: {body['sql']}")
+
+        try:
+            response = requests.post(
+                url,
+                json=body,
+                params=params,
+                headers=headers,
+                timeout=120,
+            )
+        except requests.RequestException as exc:
+            raise RuntimeError(f"Data Cloud query request failed: {exc}") from exc
+
+        if response.status_code >= 300:
+            error_msg = response.text
+            try:
+                error_data = response.json()
+                if isinstance(error_data, list) and error_data:
+                    error_msg = error_data[0].get("message", error_msg)
+            except (json.JSONDecodeError, KeyError):
+                pass
+            raise RuntimeError(
+                f"Data Cloud query failed (HTTP {response.status_code}): {error_msg}"
+            )
+
+        result = response.json()
+        metadata = result.get("metadata", [])
+        column_names = [col.get("name") for col in metadata]
+        rows = result.get("data", [])
+
+        if not rows:
+            return pd.DataFrame(columns=column_names)
+        return pd.DataFrame(rows, columns=column_names)
+
+    def read_dlo(
+        self,
+        name: str,
+        schema: Union[AtomicType, StructType, str, None] = None,
+        row_limit: int = 1000,
+    ) -> PySparkDataFrame:
+        """Read a Data Lake Object (DLO) from Data Cloud.
+
+        Args:
+            name: DLO name.
+            schema: Optional explicit schema.
+            row_limit: Maximum rows to fetch.
+
+        Returns:
+            PySpark DataFrame.
+        """
+        pandas_df = self._execute_query(f"SELECT * FROM {name}", row_limit)
+        if not schema:
+            schema = _pandas_to_spark_schema(pandas_df)
+        return self.spark.createDataFrame(pandas_df, schema)
+
+    def read_dmo(
+        self,
+        name: str,
+        schema: Union[AtomicType, StructType, str, None] = None,
+        row_limit: int = 1000,
+    ) -> PySparkDataFrame:
+        """Read a Data Model Object (DMO) from Data Cloud.
+
+        Args:
+            name: DMO name.
+            schema: Optional explicit schema.
+            row_limit: Maximum rows to fetch.
+
+        Returns:
+            PySpark DataFrame.
+        """
+        pandas_df = self._execute_query(f"SELECT * FROM {name}", row_limit)
+        if not schema:
+            schema = _pandas_to_spark_schema(pandas_df)
+        return self.spark.createDataFrame(pandas_df, schema)