forcedotcom
diff --git a/‎.github/workflows/sf_cli_integration.yml‎
Lines changed: 4 additions & 6 deletions b/‎.github/workflows/sf_cli_integration.yml‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎src/datacustomcode/function/feature_types/chunking.py‎
Lines changed: 81 additions & 54 deletions b/‎src/datacustomcode/function/feature_types/chunking.py‎
Lines changed: 81 additions & 54 deletions
@@ -259,11 +259,10 @@ jobs:
       run: |
         sf data-code-extension function run \
           --entrypoint testFunction/payload/entrypoint.py \
-          --test-with testFunction/payload/tests/test.json \
-          -o dev1 || {
-          echo "::error::sf data-code-extension function run FAILED. Check mock server output above; the --entrypoint flag or SF CLI org auth contract may have changed."
-          exit 1
-        }
+          --test-with testFunction/payload/tests/test.json || {
+            echo "::error::sf data-code-extension function run FAILED. Check mock server output above; the --entrypoint flag or SF CLI org auth contract may have changed."
+            exit 1
+          }
 
       # ── Function: deploy ─────────────────────────────────────────────────────
 
@@ -275,7 +274,6 @@ jobs:
           --description "Test function deploy" \
           --package-dir testFunction/payload \
           --cpu-size CPU_2XL \
-          --function-invoke-opt UnstructuredChunking \
           -o dev1 || {
           echo "::error::sf data-code-extension function deploy FAILED. Check mock server output above for which endpoint failed. The deploy command flags or API contract may have changed."
           exit 1
 
@@ -16,154 +16,181 @@
 """
 Pydantic models for Search Index Chunking V1
 """
+from enum import Enum
 from typing import (
     Dict,
     List,
-    Union
+    Union,
 )
 
-from enum import Enum
-from pydantic import BaseModel, Field, ConfigDict
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+)
 
 
 class DocumentType(str, Enum):
     """Document type enumeration"""
+
     TEXT = "Text"
+    TITLE = "Title"
+    TABLE = "Table"
+    IMAGE = "Image"
+    LIST_ITEM = "ListItem"
+    CODE_SNIPPET = "CodeSnippet"
+    PAGE_METADATA = "PageMetadata"
+
+
+class ChunkType(str, Enum):
+    TEXT = "text"
 
 
 class SearchIndexChunkingV1PrependField(BaseModel):
     """Field to prepend to chunk content"""
+
     dmo_name: str = Field(
-        default="",
-        description="Data Model Object name",
-        examples=["udmo_1__dlm"]
+        default="", description="Data Model Object name", examples=["udmo_1__dlm"]
     )
     field_name: str = Field(
         default="",
         description="Field name to prepend",
-        examples=["ResolvedFilePath__c"]
+        examples=["ResolvedFilePath__c"],
     )
     value: str = Field(
         default="",
         description="Field value to prepend",
-        examples=["udlo_1__dll:quarterly_report.pdf"]
+        examples=["udlo_1__dll:quarterly_report.pdf"],
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
-class SearchIndexChunkingV1Metadata(BaseModel):
-    """Metadata for input documents"""
-    type: DocumentType = Field(
-        default=DocumentType.TEXT,
-        description="Document type (Text)",
-        examples=["Text"]
-    )
-    page_number: int = Field(
-        default=0,
-        description="Page number in the source document (0-based)",
-        examples=[1]
-    )
+class SearchIndexChunkingV1TranscriptField(BaseModel):
+    """Field to prepend to chunk content"""
+
     speaker: str = Field(
         default="",
         description="Speaker name for audio/video transcripts",
-        examples=["Narrator"]
+        examples=["Agent"],
     )
     start_timestamp: str = Field(
         default="",
         description="Start timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
-        examples=["2026-03-25T02:01:24.918000"]
+        examples=["2026-03-25T02:01:24.918000"],
     )
     end_timestamp: str = Field(
         default="",
         description="End timestamp in ISO8601 format: YYYY-MM-DDTHH:MM:SS.ffffff",
-        examples=["2026-03-25T02:01:30.500000"]
+        examples=["2026-03-25T02:01:30.500000"],
+    )
+    model_config = ConfigDict(extra="ignore")
+
+
+class SearchIndexChunkingV1Metadata(BaseModel):
+    """Metadata for input documents"""
+
+    type: DocumentType = Field(
+        default=DocumentType.TEXT, description="Document type (Text)", examples=["Text"]
+    )
+    transcript_fields: SearchIndexChunkingV1TranscriptField = Field(
+        default_factory=SearchIndexChunkingV1TranscriptField,
+        description=(
+            "Transcript information. Will only be there in case of audio-video files"
+        ),
+    )
+    page_number: int = Field(
+        default=0,
+        description="Page number in the source document (0-based)",
+        examples=[1],
     )
     text_as_html: str = Field(
         default="",
         description="HTML representation of the document text",
-        examples=["<p>Online Remittance Instructions</p>"]
+        examples=["<p>Online Remittance Instructions</p>"],
     )
     source_dmo_fields: Dict[str, Union[str, int]] = Field(
         default_factory=dict,
-        description="Source Data Model Object fields as key-value pairs (values can be string or int)",
+        description=(
+            "Source Data Model Object fields as key-value pairs "
+            "(values can be string or int)"
+        ),
         examples=[
             {
                 "FilePath__c": "quarterly_report.pdf",
                 "Size__c": 1377454,
                 "ContentType__c": "pdf",
-                "LastModified__c": "2026-03-25T02:01:24.918000"
+                "LastModified__c": "2026-03-25T02:01:24.918000",
             }
-        ]
+        ],
     )
     prepend: List[SearchIndexChunkingV1PrependField] = Field(
-        default_factory=list,
-        description="List of fields to prepend to each chunk"
+        default_factory=list, description="List of fields to prepend to each chunk"
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1DocElement(BaseModel):
     """Document element to be chunked"""
+
     text: str = Field(
         default="",
         description="Text content to be chunked",
-        examples=["Online Remittance Instructions\n\nTransfer proceeds from the sale of your ESOP/RSUs easily."]
+        examples=[
+            (
+                "Online Remittance Instructions\n\n"
+                "Transfer proceeds from the sale of your ESOP/RSUs easily."
+            )
+        ],
     )
     metadata: SearchIndexChunkingV1Metadata = Field(
         default_factory=SearchIndexChunkingV1Metadata,
-        description="Source document metadata"
+        description="Source document metadata",
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1Output(BaseModel):
     """Output chunk from the chunking process"""
+
     text: str = Field(
         default="",
         description="Chunk text content",
-        examples=["Online Remittance Instructions"]
+        examples=["Online Remittance Instructions"],
     )
     seq_no: int = Field(
-        default=0,
-        description="Sequential chunk number (1-based)",
-        ge=1,
-        examples=[1]
+        default=0, description="Sequential chunk number (1-based)", ge=1, examples=[1]
     )
     chunk_id: str = Field(
         default="",
         description="Unique identifier for this chunk (UUID format)",
-        examples=["550e8400-e29b-41d4-a716-446655440000"]
+        examples=["550e8400-e29b-41d4-a716-446655440000"],
     )
-    chunk_type: str = Field(
-        default="",
+    chunk_type: ChunkType = Field(
+        default=ChunkType.TEXT,
         description="Type of chunk (e.g., 'text')",
-        examples=["text"]
+        examples=["text"],
     )
     citations: Dict[str, str] = Field(
         default_factory=dict,
         description="Citation information as key-value pairs",
-        examples=[{"source": "quarterly_report.pdf"}]
+        examples=[{"source": "quarterly_report.pdf"}],
     )
-    metadata: str = Field(
-        default="",
-        description="JSON string containing metadata about the chunking output",
-        examples=['{"page": 1}']
-    )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1Request(BaseModel):
     """Request for Search Index Chunking"""
+
     input: List[SearchIndexChunkingV1DocElement] = Field(
-        default_factory=list,
-        description="List of documents to be chunked"
+        default_factory=list, description="List of documents to be chunked"
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")
 
 
 class SearchIndexChunkingV1Response(BaseModel):
     """Batch response for UDS chunking"""
+
     output: List[SearchIndexChunkingV1Output] = Field(
         default_factory=list, description="Flat list of chunks from all docs"
     )
-    model_config = ConfigDict(extra='ignore')
+    model_config = ConfigDict(extra="ignore")