add llm_gateway_generate_text

jcatt-sf · jcatt-sf · commit 4ad70a2f7654 · 2026-04-03T18:16:11.000-04:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,45 @@
 # Changelog
 
+## 1.0.1
+
+### Added
+
+- **`llm_gateway_generate_text()` UDF wrapper for AI-powered DataFrame transformations.**
+
+  New method on proxy providers to generate AI completions in DataFrame operations via the `llm_gateway_generate` UDF.
+
+  ```python
+  from datacustomcode import Client
+  from pyspark.sql.functions import col
+
+  client = Client()
+
+  # Generate summaries in a DataFrame column
+  df = df.withColumn(
+      "summary",
+      client._proxy.llm_gateway_generate_text(
+          "Summarize {company}: revenue={revenue}, CEO={ceo}",
+          {
+              "company": col("company"),
+              "revenue": col("revenue"),
+              "ceo": col("ceo")
+          },
+          llmModelId="sfdc_ai__DefaultGPT4Omni",
+          maxTokens=200
+      )
+  )
+  ```
+
+  **Local Development:** Returns placeholder string (doesn't execute)  
+  **BYOC Production:** Calls real `llm_gateway_generate` UDF
+
+  **Parameters:**
+  - `template` (str): Prompt template with {placeholder} syntax
+  - `values` (dict or Column): Dict mapping placeholders to Columns, or pre-built named_struct
+  - `llmModelId` (str): Model identifier (required, e.g., "sfdc_ai__DefaultGPT4Omni")
+  - `maxTokens` (int): Maximum response length (required, e.g., 200)
+
+
 ## 1.0.0
 
 ### Breaking Changes
diff --git a/README.md b/README.md
@@ -155,7 +155,7 @@ You should only need the following methods:
 * `write_to_dmo(name, spark_dataframe, write_mode)` – Write to a Data Lake Object by name with a Spark dataframe
 
 For example:
-```
+```python
 from datacustomcode import Client
 
 client = Client()
@@ -166,10 +166,34 @@ sdf = client.read_dlo('my_DLO')
 client.write_to_dlo('output_DLO')
 ```
 
+### LLM Gateway
 
-> [!WARNING]
-> Currently we only support reading from DMOs and writing to DMOs or reading from DLOs and writing to DLOs, but they cannot mix.
+Generate AI completions in DataFrame transformations using the LLM gateway UDF.
 
+```python
+from datacustomcode import Client
+from pyspark.sql.functions import col
+
+client = Client()
+
+# Use template with placeholders
+df = df.withColumn(
+    "summary",
+    client._proxy.llm_gateway_generate_text(
+        "Summarize {company}: revenue={revenue}, CEO={ceo}",
+        {
+            "company": col("company"),
+            "revenue": col("revenue"),
+            "ceo": col("ceo")
+        },
+        llmModelId="sfdc_ai__DefaultGPT4Omni",
+        maxTokens=200
+    )
+)
+```
+
+> [!WARNING]
+> This method returns a placeholder string in local development and won't execute. It only works when deployed, where it calls the real LLM Gateway service via the `llm_gateway_generate` UDF.
 
 ## CLI
 
diff --git a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py
@@ -27,3 +27,6 @@ def __init__(self, **kwargs: object) -> None:
 
     def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str:
         return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}"
+
+    def llm_gateway_generate_text(self, template, values, llmModelId: str, maxTokens: int):
+        return f"Using Generate Text with {llmModelId} and maxTokens: {maxTokens}"
diff --git a/src/datacustomcode/proxy/client/base.py b/src/datacustomcode/proxy/client/base.py
@@ -25,3 +25,6 @@ def __init__(self):
 
     @abstractmethod
     def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: ...
+
+    @abstractmethod
+    def llm_gateway_generate_text(self, template, values, llmModelId: str, maxTokens: int): ...