From 5583f94595a497137f55523af9a9cc3c0abc69f1 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 31 Mar 2026 15:58:10 -0700 Subject: [PATCH 1/6] feat(extend): add Extend AI document processing integration --- apps/docs/components/icons.tsx | 9 + apps/docs/components/ui/icon-mapping.ts | 2 + apps/docs/content/docs/en/tools/extend.mdx | 61 +++++ apps/docs/content/docs/en/tools/meta.json | 1 + .../integrations/data/icon-mapping.ts | 2 + .../integrations/data/integrations.json | 18 ++ apps/sim/app/api/tools/extend/parse/route.ts | 179 +++++++++++++ apps/sim/blocks/blocks/extend.ts | 199 ++++++++++++++ apps/sim/blocks/registry.ts | 5 +- apps/sim/components/icons.tsx | 13 + apps/sim/tools/extend/index.ts | 1 + apps/sim/tools/extend/parser.ts | 250 ++++++++++++++++++ apps/sim/tools/extend/types.ts | 89 +++++++ apps/sim/tools/registry.ts | 3 + 14 files changed, 831 insertions(+), 1 deletion(-) create mode 100644 apps/docs/content/docs/en/tools/extend.mdx create mode 100644 apps/sim/app/api/tools/extend/parse/route.ts create mode 100644 apps/sim/blocks/blocks/extend.ts create mode 100644 apps/sim/tools/extend/index.ts create mode 100644 apps/sim/tools/extend/parser.ts create mode 100644 apps/sim/tools/extend/types.ts diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index 86ccbef282..13e69a1691 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -2041,6 +2041,15 @@ export function Mem0Icon(props: SVGProps) { ) } +export function ExtendIcon(props: SVGProps) { + return ( + + + + + ) +} + export function EvernoteIcon(props: SVGProps) { return ( diff --git a/apps/docs/components/ui/icon-mapping.ts b/apps/docs/components/ui/icon-mapping.ts index 4e9791b02d..841eaf4ed7 100644 --- a/apps/docs/components/ui/icon-mapping.ts +++ b/apps/docs/components/ui/icon-mapping.ts @@ -45,6 +45,7 @@ import { EnrichSoIcon, EvernoteIcon, ExaAIIcon, + ExtendIcon, EyeIcon, FathomIcon, FirecrawlIcon, @@ -222,6 +223,7 @@ export const blockTypeToIconMap: Record = { enrich: EnrichSoIcon, evernote: EvernoteIcon, exa: ExaAIIcon, + extend_v2: ExtendIcon, fathom: FathomIcon, file_v3: DocumentIcon, firecrawl: FirecrawlIcon, diff --git a/apps/docs/content/docs/en/tools/extend.mdx b/apps/docs/content/docs/en/tools/extend.mdx new file mode 100644 index 0000000000..ac0aa55b3a --- /dev/null +++ b/apps/docs/content/docs/en/tools/extend.mdx @@ -0,0 +1,61 @@ +--- +title: Extend +description: Parse and extract content from documents using Extend AI +--- + +import { BlockInfoCard } from "@/components/ui/block-info-card" + + + +## Description + +The Extend block connects to [Extend AI](https://www.extend.ai/) to parse and extract structured content from documents. It supports a wide range of file formats including PDFs, images (JPEG, PNG, TIFF, GIF, BMP, WebP), and Office documents (Word, PowerPoint, Excel). + +Extend uses advanced document understanding to convert unstructured documents into clean, structured output — returning parsed chunks and block-level elements with content type classification and spatial metadata. + +### Key Capabilities + +- **Document Parsing**: Extract text, tables, figures, and structured content from uploaded documents or URLs. +- **Multiple Output Formats**: Choose between Markdown (default) for clean text output, or Spatial for layout-preserving extraction. +- **Chunking Strategies**: Split output by page, document, or section depending on your downstream use case. +- **Engine Selection**: Use the default `Performance` engine for best quality, or `Light` for faster processing on simpler documents. + +### Authentication + +An Extend API key is required. You can generate one from the [Extend Developer Dashboard](https://dashboard.extend.ai). + +### Supported File Types + +PDF, JPEG, PNG, TIFF, GIF, BMP, WebP, HEIC/HEIF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), XML, HTML, CSV, TXT. + +## Tools + +### `extend_parser` + +#### Input + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `filePath` | string | No | URL to a document to be processed | +| `file` | file | No | Document file to be processed | +| `fileUpload` | object | No | File upload data from file-upload component | +| `outputFormat` | string | No | Target output format \(markdown or spatial\). Defaults to markdown. | +| `chunking` | string | No | Chunking strategy \(page, document, or section\). Defaults to page. | +| `engine` | string | No | Parsing engine \(parse_performance or parse_light\). Defaults to parse_performance. | +| `apiKey` | string | Yes | Extend API key | + +#### Output + +| Field | Type | Description | +| ----- | ---- | ----------- | +| `id` | string | Unique identifier for the parser run | +| `status` | string | Processing status | +| `chunks` | json | Parsed document content chunks | +| `blocks` | json | Block-level document elements with type and content | +| `pageCount` | number | Number of pages processed | +| `creditsUsed` | number | API credits consumed | + + diff --git a/apps/docs/content/docs/en/tools/meta.json b/apps/docs/content/docs/en/tools/meta.json index bd5b47ea46..280c7b18c4 100644 --- a/apps/docs/content/docs/en/tools/meta.json +++ b/apps/docs/content/docs/en/tools/meta.json @@ -39,6 +39,7 @@ "enrich", "evernote", "exa", + "extend", "fathom", "file", "firecrawl", diff --git a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts index 4e370b807c..c7e3f5780c 100644 --- a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts +++ b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts @@ -45,6 +45,7 @@ import { EnrichSoIcon, EvernoteIcon, ExaAIIcon, + ExtendIcon, EyeIcon, FathomIcon, FirecrawlIcon, @@ -222,6 +223,7 @@ export const blockTypeToIconMap: Record = { enrich: EnrichSoIcon, evernote: EvernoteIcon, exa: ExaAIIcon, + extend_v2: ExtendIcon, fathom: FathomIcon, file_v3: DocumentIcon, firecrawl: FirecrawlIcon, diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index 6c6120a545..26b55670d7 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -2978,6 +2978,24 @@ "integrationType": "search", "tags": ["web-scraping", "enrichment"] }, + { + "type": "extend_v2", + "slug": "extend", + "name": "Extend", + "description": "Parse and extract content from documents", + "longDescription": "Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.", + "bgColor": "#1A1A2E", + "iconName": "ExtendIcon", + "docsUrl": "https://docs.sim.ai/tools/extend", + "operations": [], + "operationCount": 0, + "triggers": [], + "triggerCount": 0, + "authType": "api-key", + "category": "tools", + "integrationType": "ai", + "tags": ["document-processing", "ocr"] + }, { "type": "fathom", "slug": "fathom", diff --git a/apps/sim/app/api/tools/extend/parse/route.ts b/apps/sim/app/api/tools/extend/parse/route.ts new file mode 100644 index 0000000000..67360859b4 --- /dev/null +++ b/apps/sim/app/api/tools/extend/parse/route.ts @@ -0,0 +1,179 @@ +import { createLogger } from '@sim/logger' +import { type NextRequest, NextResponse } from 'next/server' +import { z } from 'zod' +import { checkInternalAuth } from '@/lib/auth/hybrid' +import { + secureFetchWithPinnedIP, + validateUrlWithDNS, +} from '@/lib/core/security/input-validation.server' +import { generateRequestId } from '@/lib/core/utils/request' +import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas' +import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils' +import { resolveFileInputToUrl } from '@/lib/uploads/utils/file-utils.server' + +export const dynamic = 'force-dynamic' + +const logger = createLogger('ExtendParseAPI') + +const ExtendParseSchema = z.object({ + apiKey: z.string().min(1, 'API key is required'), + filePath: z.string().optional(), + file: RawFileInputSchema.optional(), + outputFormat: z.enum(['markdown', 'spatial']).optional(), + chunking: z.enum(['page', 'document', 'section']).optional(), + engine: z.enum(['parse_performance', 'parse_light']).optional(), +}) + +export async function POST(request: NextRequest) { + const requestId = generateRequestId() + + try { + const authResult = await checkInternalAuth(request, { requireWorkflowId: false }) + + if (!authResult.success || !authResult.userId) { + logger.warn(`[${requestId}] Unauthorized Extend parse attempt`, { + error: authResult.error || 'Missing userId', + }) + return NextResponse.json( + { + success: false, + error: authResult.error || 'Unauthorized', + }, + { status: 401 } + ) + } + + const userId = authResult.userId + const body = await request.json() + const validatedData = ExtendParseSchema.parse(body) + + logger.info(`[${requestId}] Extend parse request`, { + fileName: validatedData.file?.name, + filePath: validatedData.filePath, + isWorkspaceFile: validatedData.filePath ? isInternalFileUrl(validatedData.filePath) : false, + userId, + }) + + const resolution = await resolveFileInputToUrl({ + file: validatedData.file, + filePath: validatedData.filePath, + userId, + requestId, + logger, + }) + + if (resolution.error) { + return NextResponse.json( + { success: false, error: resolution.error.message }, + { status: resolution.error.status } + ) + } + + const fileUrl = resolution.fileUrl + if (!fileUrl) { + return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 }) + } + + const extendBody: Record = { + fileUrl, + } + + const config: Record = {} + + if (validatedData.outputFormat) { + config.targetFormat = validatedData.outputFormat + } + + if (validatedData.chunking) { + config.chunking = { strategy: validatedData.chunking } + } + + if (validatedData.engine) { + config.engine = validatedData.engine + } + + if (Object.keys(config).length > 0) { + extendBody.config = config + } + + const extendEndpoint = 'https://api.extend.ai/parse' + const extendValidation = await validateUrlWithDNS(extendEndpoint, 'Extend API URL') + if (!extendValidation.isValid) { + logger.error(`[${requestId}] Extend API URL validation failed`, { + error: extendValidation.error, + }) + return NextResponse.json( + { + success: false, + error: 'Failed to reach Extend API', + }, + { status: 502 } + ) + } + + const extendResponse = await secureFetchWithPinnedIP( + extendEndpoint, + extendValidation.resolvedIP!, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json', + Authorization: `Bearer ${validatedData.apiKey}`, + 'x-extend-api-version': '2025-04-21', + }, + body: JSON.stringify(extendBody), + } + ) + + if (!extendResponse.ok) { + const errorText = await extendResponse.text() + logger.error(`[${requestId}] Extend API error:`, errorText) + return NextResponse.json( + { + success: false, + error: `Extend API error: ${extendResponse.statusText}`, + }, + { status: extendResponse.status } + ) + } + + const extendData = await extendResponse.json() + + logger.info(`[${requestId}] Extend parse successful`) + + return NextResponse.json({ + success: true, + output: { + id: extendData.id ?? null, + status: extendData.status ?? 'PROCESSED', + chunks: extendData.chunks ?? [], + blocks: extendData.blocks ?? [], + pageCount: extendData.pageCount ?? extendData.page_count ?? null, + creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null, + }, + }) + } catch (error) { + if (error instanceof z.ZodError) { + logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors }) + return NextResponse.json( + { + success: false, + error: 'Invalid request data', + details: error.errors, + }, + { status: 400 } + ) + } + + logger.error(`[${requestId}] Error in Extend parse:`, error) + + return NextResponse.json( + { + success: false, + error: error instanceof Error ? error.message : 'Internal server error', + }, + { status: 500 } + ) + } +} diff --git a/apps/sim/blocks/blocks/extend.ts b/apps/sim/blocks/blocks/extend.ts new file mode 100644 index 0000000000..d2bde68a83 --- /dev/null +++ b/apps/sim/blocks/blocks/extend.ts @@ -0,0 +1,199 @@ +import { ExtendIcon } from '@/components/icons' +import { AuthMode, type BlockConfig, IntegrationType, type SubBlockType } from '@/blocks/types' +import { createVersionedToolSelector, normalizeFileInput } from '@/blocks/utils' +import type { ExtendParserOutput } from '@/tools/extend/types' + +export const ExtendBlock: BlockConfig = { + type: 'extend', + name: 'Extend', + description: 'Parse and extract content from documents', + hideFromToolbar: true, + authMode: AuthMode.ApiKey, + longDescription: + 'Integrate Extend AI into the workflow. Parse and extract structured content from documents including PDFs, images, and Office files.', + docsLink: 'https://docs.sim.ai/tools/extend', + category: 'tools', + integrationType: IntegrationType.AI, + tags: ['document-processing', 'ocr'], + bgColor: '#000000', + icon: ExtendIcon, + subBlocks: [ + { + id: 'fileUpload', + title: 'Document', + type: 'file-upload' as SubBlockType, + canonicalParamId: 'document', + acceptedTypes: + 'application/pdf,image/jpeg,image/png,image/tiff,image/gif,image/bmp,image/webp,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.presentationml.presentation,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + placeholder: 'Upload a document', + mode: 'basic', + maxSize: 50, + required: true, + }, + { + id: 'filePath', + title: 'Document', + type: 'short-input' as SubBlockType, + canonicalParamId: 'document', + placeholder: 'Document URL', + mode: 'advanced', + required: true, + }, + { + id: 'outputFormat', + title: 'Output Format', + type: 'dropdown', + options: [ + { id: 'markdown', label: 'Markdown' }, + { id: 'spatial', label: 'Spatial' }, + ], + }, + { + id: 'chunking', + title: 'Chunking Strategy', + type: 'dropdown', + options: [ + { id: 'page', label: 'Page' }, + { id: 'document', label: 'Document' }, + { id: 'section', label: 'Section' }, + ], + }, + { + id: 'engine', + title: 'Engine', + type: 'dropdown', + mode: 'advanced', + options: [ + { id: 'parse_performance', label: 'Performance' }, + { id: 'parse_light', label: 'Light' }, + ], + }, + { + id: 'apiKey', + title: 'API Key', + type: 'short-input' as SubBlockType, + placeholder: 'Enter your Extend API key', + password: true, + required: true, + }, + ], + tools: { + access: ['extend_parser'], + config: { + tool: () => 'extend_parser', + params: (params) => { + const parameters: Record = { + apiKey: params.apiKey.trim(), + } + + const documentInput = params.document + + if (typeof documentInput === 'object') { + parameters.file = documentInput + } else if (typeof documentInput === 'string') { + parameters.filePath = documentInput.trim() + } + + if (params.outputFormat) { + parameters.outputFormat = params.outputFormat + } + + if (params.chunking) { + parameters.chunking = params.chunking + } + + if (params.engine) { + parameters.engine = params.engine + } + + return parameters + }, + }, + }, + inputs: { + document: { + type: 'json', + description: 'Document input (canonical param for file upload or URL)', + }, + apiKey: { type: 'string', description: 'Extend API key' }, + outputFormat: { type: 'string', description: 'Output format (markdown or spatial)' }, + chunking: { type: 'string', description: 'Chunking strategy' }, + engine: { type: 'string', description: 'Parsing engine' }, + }, + outputs: { + id: { type: 'string', description: 'Unique identifier for the parser run' }, + status: { type: 'string', description: 'Processing status' }, + chunks: { type: 'json', description: 'Parsed document content chunks' }, + blocks: { type: 'json', description: 'Block-level document elements' }, + pageCount: { type: 'number', description: 'Number of pages processed' }, + creditsUsed: { type: 'number', description: 'API credits consumed' }, + }, +} + +const extendV2Inputs = ExtendBlock.inputs +const extendV2SubBlocks = (ExtendBlock.subBlocks || []).flatMap((subBlock) => { + if (subBlock.id === 'filePath') { + return [] + } + if (subBlock.id === 'fileUpload') { + return [ + subBlock, + { + id: 'fileReference', + title: 'Document', + type: 'short-input' as SubBlockType, + canonicalParamId: 'document', + placeholder: 'File reference', + mode: 'advanced' as const, + required: true, + }, + ] + } + return [subBlock] +}) + +export const ExtendV2Block: BlockConfig = { + ...ExtendBlock, + type: 'extend_v2', + name: 'Extend', + hideFromToolbar: false, + longDescription: + 'Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.', + subBlocks: extendV2SubBlocks, + tools: { + access: ['extend_parser_v2'], + config: { + tool: createVersionedToolSelector({ + baseToolSelector: () => 'extend_parser', + suffix: '_v2', + fallbackToolId: 'extend_parser_v2', + }), + params: (params) => { + const parameters: Record = { + apiKey: params.apiKey.trim(), + } + + const documentInput = normalizeFileInput(params.document, { single: true }) + if (!documentInput) { + throw new Error('Document file is required') + } + parameters.file = documentInput + + if (params.outputFormat) { + parameters.outputFormat = params.outputFormat + } + + if (params.chunking) { + parameters.chunking = params.chunking + } + + if (params.engine) { + parameters.engine = params.engine + } + + return parameters + }, + }, + }, + inputs: extendV2Inputs, +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 1f888b7a03..e11061c2fa 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -42,6 +42,7 @@ import { EnrichBlock } from '@/blocks/blocks/enrich' import { EvaluatorBlock } from '@/blocks/blocks/evaluator' import { EvernoteBlock } from '@/blocks/blocks/evernote' import { ExaBlock } from '@/blocks/blocks/exa' +import { ExtendBlock, ExtendV2Block } from '@/blocks/blocks/extend' import { FathomBlock } from '@/blocks/blocks/fathom' import { FileBlock, FileV2Block, FileV3Block } from '@/blocks/blocks/file' import { FirecrawlBlock } from '@/blocks/blocks/firecrawl' @@ -255,9 +256,11 @@ export const registry: Record = { elevenlabs: ElevenLabsBlock, fathom: FathomBlock, enrich: EnrichBlock, - evernote: EvernoteBlock, evaluator: EvaluatorBlock, + evernote: EvernoteBlock, exa: ExaBlock, + extend: ExtendBlock, + extend_v2: ExtendV2Block, file: FileBlock, file_v2: FileV2Block, file_v3: FileV3Block, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index 86ccbef282..ee5f156970 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -2041,6 +2041,19 @@ export function Mem0Icon(props: SVGProps) { ) } +export function ExtendIcon(props: SVGProps) { + return ( + + + + ) +} + export function EvernoteIcon(props: SVGProps) { return ( diff --git a/apps/sim/tools/extend/index.ts b/apps/sim/tools/extend/index.ts new file mode 100644 index 0000000000..cf20cf8dae --- /dev/null +++ b/apps/sim/tools/extend/index.ts @@ -0,0 +1 @@ +export { extendParserTool, extendParserV2Tool } from '@/tools/extend/parser' diff --git a/apps/sim/tools/extend/parser.ts b/apps/sim/tools/extend/parser.ts new file mode 100644 index 0000000000..4e7dab956b --- /dev/null +++ b/apps/sim/tools/extend/parser.ts @@ -0,0 +1,250 @@ +import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils' +import type { + ExtendParserInput, + ExtendParserOutput, + ExtendParserV2Input, +} from '@/tools/extend/types' +import type { ToolConfig } from '@/tools/types' + +export const extendParserTool: ToolConfig = { + id: 'extend_parser', + name: 'Extend Document Parser', + description: 'Parse and extract content from documents using Extend AI', + version: '1.0.0', + + params: { + filePath: { + type: 'string', + required: false, + visibility: 'user-only', + description: 'URL to a document to be processed', + }, + file: { + type: 'file', + required: false, + visibility: 'user-only', + description: 'Document file to be processed', + }, + fileUpload: { + type: 'object', + required: false, + visibility: 'hidden', + description: 'File upload data from file-upload component', + }, + outputFormat: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Target output format (markdown or spatial). Defaults to markdown.', + }, + chunking: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Chunking strategy (page, document, or section). Defaults to page.', + }, + engine: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: + 'Parsing engine (parse_performance or parse_light). Defaults to parse_performance.', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Extend API key', + }, + }, + + request: { + url: '/api/tools/extend/parse', + method: 'POST', + headers: (params) => ({ + 'Content-Type': 'application/json', + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params) => { + if (!params || typeof params !== 'object') { + throw new Error('Invalid parameters: Parameters must be provided as an object') + } + + if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') { + throw new Error('Missing or invalid API key: A valid Extend API key is required') + } + + const requestBody: Record = { + apiKey: params.apiKey, + } + + const fileInput = + params.file && typeof params.file === 'object' ? params.file : params.fileUpload + const hasFileUpload = fileInput && typeof fileInput === 'object' + const hasFilePath = + typeof params.filePath === 'string' && + params.filePath !== 'null' && + params.filePath.trim() !== '' + + if (hasFilePath) { + const filePathToValidate = params.filePath!.trim() + + if (filePathToValidate.startsWith('/')) { + if (!isInternalFileUrl(filePathToValidate)) { + throw new Error( + 'Invalid file path. Only uploaded files are supported for internal paths.' + ) + } + requestBody.filePath = filePathToValidate + } else { + let url + try { + url = new URL(filePathToValidate) + + if (!['http:', 'https:'].includes(url.protocol)) { + throw new Error( + `Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol` + ) + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + throw new Error( + `Invalid URL format: ${errorMessage}. Please provide a valid HTTP or HTTPS URL to a document.` + ) + } + + requestBody.filePath = url.toString() + } + } else if (hasFileUpload) { + requestBody.file = fileInput + } else { + throw new Error('Missing file input: Please provide a document URL or upload a file') + } + + if (params.outputFormat && ['markdown', 'spatial'].includes(params.outputFormat)) { + requestBody.outputFormat = params.outputFormat + } + + if (params.chunking && ['page', 'document', 'section'].includes(params.chunking)) { + requestBody.chunking = params.chunking + } + + if (params.engine && ['parse_performance', 'parse_light'].includes(params.engine)) { + requestBody.engine = params.engine + } + + return requestBody + }, + }, + + transformResponse: async (response) => { + const data = await response.json() + + if (!data || typeof data !== 'object') { + throw new Error('Invalid response format from Extend API') + } + + const extendData = data.output ?? data + + return { + success: true, + output: { + id: extendData.id ?? null, + status: extendData.status ?? null, + chunks: extendData.chunks ?? [], + blocks: extendData.blocks ?? [], + pageCount: extendData.pageCount ?? extendData.page_count ?? null, + creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null, + }, + } + }, + + outputs: { + id: { type: 'string', description: 'Unique identifier for the parser run' }, + status: { type: 'string', description: 'Processing status' }, + chunks: { + type: 'json', + description: 'Parsed document content chunks', + }, + blocks: { + type: 'json', + description: 'Block-level document elements with type and content', + }, + pageCount: { + type: 'number', + description: 'Number of pages processed', + optional: true, + }, + creditsUsed: { + type: 'number', + description: 'API credits consumed', + optional: true, + }, + }, +} + +export const extendParserV2Tool: ToolConfig = { + ...extendParserTool, + id: 'extend_parser_v2', + name: 'Extend Document Parser', + postProcess: undefined, + directExecution: undefined, + transformResponse: extendParserTool.transformResponse + ? (response: Response, params?: ExtendParserV2Input) => + extendParserTool.transformResponse!(response, params as unknown as ExtendParserInput) + : undefined, + params: { + file: { + type: 'file', + required: true, + visibility: 'user-only', + description: 'Document to be processed', + }, + outputFormat: extendParserTool.params.outputFormat, + chunking: extendParserTool.params.chunking, + engine: extendParserTool.params.engine, + apiKey: extendParserTool.params.apiKey, + }, + request: { + url: '/api/tools/extend/parse', + method: 'POST', + headers: (params) => ({ + 'Content-Type': 'application/json', + Accept: 'application/json', + Authorization: `Bearer ${params.apiKey}`, + }), + body: (params: ExtendParserV2Input) => { + if (!params || typeof params !== 'object') { + throw new Error('Invalid parameters: Parameters must be provided as an object') + } + + if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') { + throw new Error('Missing or invalid API key: A valid Extend API key is required') + } + + if (!params.file || typeof params.file !== 'object') { + throw new Error('Missing or invalid file: Please provide a file object') + } + + const requestBody: Record = { + apiKey: params.apiKey, + file: params.file, + } + + if (params.outputFormat && ['markdown', 'spatial'].includes(params.outputFormat)) { + requestBody.outputFormat = params.outputFormat + } + + if (params.chunking && ['page', 'document', 'section'].includes(params.chunking)) { + requestBody.chunking = params.chunking + } + + if (params.engine && ['parse_performance', 'parse_light'].includes(params.engine)) { + requestBody.engine = params.engine + } + + return requestBody + }, + }, +} diff --git a/apps/sim/tools/extend/types.ts b/apps/sim/tools/extend/types.ts new file mode 100644 index 0000000000..dd65c126e0 --- /dev/null +++ b/apps/sim/tools/extend/types.ts @@ -0,0 +1,89 @@ +import type { RawFileInput } from '@/lib/uploads/utils/file-utils' +import type { UserFile } from '@/executor/types' +import type { ToolResponse } from '@/tools/types' + +/** + * Input parameters for the Extend parser tool + */ +export interface ExtendParserInput { + /** URL to a document to be processed */ + filePath?: string + + file?: RawFileInput + + /** File upload data (from file-upload component) */ + fileUpload?: RawFileInput + + /** Extend API key for authentication */ + apiKey: string + + /** Target output format */ + outputFormat?: 'markdown' | 'spatial' + + /** Chunking strategy */ + chunking?: 'page' | 'document' | 'section' + + /** Parsing engine */ + engine?: 'parse_performance' | 'parse_light' +} + +export interface ExtendParserV2Input { + /** File to be processed */ + file: UserFile + + /** Extend API key for authentication */ + apiKey: string + + /** Target output format */ + outputFormat?: 'markdown' | 'spatial' + + /** Chunking strategy */ + chunking?: 'page' | 'document' | 'section' + + /** Parsing engine */ + engine?: 'parse_performance' | 'parse_light' +} + +/** + * Chunk from parsed document + */ +export interface ExtendParseChunk { + content: string + page?: number + metadata?: Record +} + +/** + * Block-level element from parsed document + */ +export interface ExtendParseBlock { + type: string + content: string + bbox?: { + left: number + top: number + width: number + height: number + page: number + } + metadata?: Record +} + +/** + * Native Extend API response structure for parsing + */ +export interface ExtendParserOutputData { + id: string + status: string + chunks: ExtendParseChunk[] + blocks: ExtendParseBlock[] + pageCount: number | null + creditsUsed: number | null +} + +/** + * Complete response from the Extend parser tool + */ +export interface ExtendParserOutput extends ToolResponse { + output: ExtendParserOutputData +} diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index 36bb3e2739..522980ae98 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -491,6 +491,7 @@ import { exaResearchTool, exaSearchTool, } from '@/tools/exa' +import { extendParserTool, extendParserV2Tool } from '@/tools/extend' import { fathomGetSummaryTool, fathomGetTranscriptTool, @@ -3491,6 +3492,8 @@ export const tools: Record = { enrich_search_posts: enrichSearchPostsTool, enrich_search_similar_companies: enrichSearchSimilarCompaniesTool, enrich_verify_email: enrichVerifyEmailTool, + extend_parser: extendParserTool, + extend_parser_v2: extendParserV2Tool, exa_search: exaSearchTool, exa_get_contents: exaGetContentsTool, exa_find_similar_links: exaFindSimilarLinksTool, From 2634fdb94e821c103fd3279337d86e74efa2a8cf Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 31 Mar 2026 16:02:43 -0700 Subject: [PATCH 2/6] fix(extend): cast json response to fix type error --- apps/sim/app/api/tools/extend/parse/route.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/app/api/tools/extend/parse/route.ts b/apps/sim/app/api/tools/extend/parse/route.ts index 67360859b4..3acd9f7f77 100644 --- a/apps/sim/app/api/tools/extend/parse/route.ts +++ b/apps/sim/app/api/tools/extend/parse/route.ts @@ -138,7 +138,7 @@ export async function POST(request: NextRequest) { ) } - const extendData = await extendResponse.json() + const extendData = (await extendResponse.json()) as Record logger.info(`[${requestId}] Extend parse successful`) From 073315b630248dc92621298b99ae743710b76347 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 31 Mar 2026 16:05:34 -0700 Subject: [PATCH 3/6] fix(extend): correct API request body structure per Extend docs --- apps/sim/app/api/tools/extend/parse/route.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/sim/app/api/tools/extend/parse/route.ts b/apps/sim/app/api/tools/extend/parse/route.ts index 3acd9f7f77..d65284f944 100644 --- a/apps/sim/app/api/tools/extend/parse/route.ts +++ b/apps/sim/app/api/tools/extend/parse/route.ts @@ -75,17 +75,17 @@ export async function POST(request: NextRequest) { } const extendBody: Record = { - fileUrl, + file: { fileUrl }, } const config: Record = {} if (validatedData.outputFormat) { - config.targetFormat = validatedData.outputFormat + config.target = validatedData.outputFormat } if (validatedData.chunking) { - config.chunking = { strategy: validatedData.chunking } + config.chunkingStrategy = { type: validatedData.chunking } } if (validatedData.engine) { From 4830a4c614cacb4ad7dd0c1599209b78c6fca5e2 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 31 Mar 2026 16:07:07 -0700 Subject: [PATCH 4/6] fix(extend): address PR review comments --- apps/docs/components/icons.tsx | 10 +++++++--- apps/sim/app/api/tools/extend/parse/route.ts | 11 ++++++++++- apps/sim/blocks/blocks/extend.ts | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/apps/docs/components/icons.tsx b/apps/docs/components/icons.tsx index 13e69a1691..ee5f156970 100644 --- a/apps/docs/components/icons.tsx +++ b/apps/docs/components/icons.tsx @@ -2043,9 +2043,13 @@ export function Mem0Icon(props: SVGProps) { export function ExtendIcon(props: SVGProps) { return ( - - - + + ) } diff --git a/apps/sim/app/api/tools/extend/parse/route.ts b/apps/sim/app/api/tools/extend/parse/route.ts index d65284f944..3f604c4810 100644 --- a/apps/sim/app/api/tools/extend/parse/route.ts +++ b/apps/sim/app/api/tools/extend/parse/route.ts @@ -129,10 +129,19 @@ export async function POST(request: NextRequest) { if (!extendResponse.ok) { const errorText = await extendResponse.text() logger.error(`[${requestId}] Extend API error:`, errorText) + let clientError = `Extend API error: ${extendResponse.statusText || extendResponse.status}` + try { + const parsedError = JSON.parse(errorText) + if (parsedError?.message || parsedError?.error) { + clientError = (parsedError.message ?? parsedError.error) as string + } + } catch { + // errorText is not JSON; keep generic message + } return NextResponse.json( { success: false, - error: `Extend API error: ${extendResponse.statusText}`, + error: clientError, }, { status: extendResponse.status } ) diff --git a/apps/sim/blocks/blocks/extend.ts b/apps/sim/blocks/blocks/extend.ts index d2bde68a83..3d1572eeb8 100644 --- a/apps/sim/blocks/blocks/extend.ts +++ b/apps/sim/blocks/blocks/extend.ts @@ -143,7 +143,7 @@ const extendV2SubBlocks = (ExtendBlock.subBlocks || []).flatMap((subBlock) => { title: 'Document', type: 'short-input' as SubBlockType, canonicalParamId: 'document', - placeholder: 'File reference', + placeholder: 'Connect a file output from another block', mode: 'advanced' as const, required: true, }, From 3b0d080133a6afd361251db6c9a6257cc28b5600 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 31 Mar 2026 16:13:17 -0700 Subject: [PATCH 5/6] fix(extend): sync integrations.json bgColor to #000000 --- apps/sim/app/(landing)/integrations/data/integrations.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index 26b55670d7..49fb113f9e 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -2984,7 +2984,7 @@ "name": "Extend", "description": "Parse and extract content from documents", "longDescription": "Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.", - "bgColor": "#1A1A2E", + "bgColor": "#000000", "iconName": "ExtendIcon", "docsUrl": "https://docs.sim.ai/tools/extend", "operations": [], From eb90d9640d3d1a9dfae6b0d66b23f08fbd8cc7b9 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Tue, 31 Mar 2026 16:14:42 -0700 Subject: [PATCH 6/6] lint --- apps/docs/content/docs/en/tools/extend.mdx | 32 ++++------------------ 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/apps/docs/content/docs/en/tools/extend.mdx b/apps/docs/content/docs/en/tools/extend.mdx index ac0aa55b3a..8cecfca278 100644 --- a/apps/docs/content/docs/en/tools/extend.mdx +++ b/apps/docs/content/docs/en/tools/extend.mdx @@ -1,35 +1,20 @@ --- title: Extend -description: Parse and extract content from documents using Extend AI +description: Parse and extract content from documents --- import { BlockInfoCard } from "@/components/ui/block-info-card" - -## Description +## Usage Instructions -The Extend block connects to [Extend AI](https://www.extend.ai/) to parse and extract structured content from documents. It supports a wide range of file formats including PDFs, images (JPEG, PNG, TIFF, GIF, BMP, WebP), and Office documents (Word, PowerPoint, Excel). +Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references. -Extend uses advanced document understanding to convert unstructured documents into clean, structured output — returning parsed chunks and block-level elements with content type classification and spatial metadata. -### Key Capabilities - -- **Document Parsing**: Extract text, tables, figures, and structured content from uploaded documents or URLs. -- **Multiple Output Formats**: Choose between Markdown (default) for clean text output, or Spatial for layout-preserving extraction. -- **Chunking Strategies**: Split output by page, document, or section depending on your downstream use case. -- **Engine Selection**: Use the default `Performance` engine for best quality, or `Light` for faster processing on simpler documents. - -### Authentication - -An Extend API key is required. You can generate one from the [Extend Developer Dashboard](https://dashboard.extend.ai). - -### Supported File Types - -PDF, JPEG, PNG, TIFF, GIF, BMP, WebP, HEIC/HEIF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), XML, HTML, CSV, TXT. ## Tools @@ -49,13 +34,6 @@ PDF, JPEG, PNG, TIFF, GIF, BMP, WebP, HEIC/HEIF, Word (.docx), PowerPoint (.pptx #### Output -| Field | Type | Description | -| ----- | ---- | ----------- | -| `id` | string | Unique identifier for the parser run | -| `status` | string | Processing status | -| `chunks` | json | Parsed document content chunks | -| `blocks` | json | Block-level document elements with type and content | -| `pageCount` | number | Number of pages processed | -| `creditsUsed` | number | API credits consumed | +This tool does not produce any outputs.