Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions apps/sim/app/api/knowledge/[id]/documents/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,11 +457,8 @@ describe('Knowledge Base Documents API Route', () => {
},
],
processingOptions: {
chunkSize: 1024,
minCharactersPerChunk: 100,
recipe: 'default',
lang: 'en',
chunkOverlap: 200,
},
}

Expand Down Expand Up @@ -533,11 +530,8 @@ describe('Knowledge Base Documents API Route', () => {
},
],
processingOptions: {
chunkSize: 50, // Invalid: too small
minCharactersPerChunk: 0, // Invalid: too small
recipe: 'default',
lang: 'en',
chunkOverlap: 1000, // Invalid: too large
},
}

Expand Down
29 changes: 8 additions & 21 deletions apps/sim/app/api/knowledge/[id]/documents/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,14 @@ const CreateDocumentSchema = z.object({
documentTagsData: z.string().optional(),
})

/**
* Schema for bulk document creation with processing options
*
* Processing options units:
* - chunkSize: tokens (1 token ≈ 4 characters)
* - minCharactersPerChunk: characters
* - chunkOverlap: characters
*/
const BulkCreateDocumentsSchema = z.object({
documents: z.array(CreateDocumentSchema),
processingOptions: z.object({
/** Maximum chunk size in tokens (1 token ≈ 4 characters) */
chunkSize: z.number().min(100).max(4000),
/** Minimum chunk size in characters */
minCharactersPerChunk: z.number().min(1).max(2000),
recipe: z.string(),
lang: z.string(),
/** Overlap between chunks in characters */
chunkOverlap: z.number().min(0).max(500),
}),
processingOptions: z
.object({
recipe: z.string().optional(),
lang: z.string().optional(),
})
.optional(),
bulk: z.literal(true),
})

Expand Down Expand Up @@ -246,8 +234,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
knowledgeBaseId,
documentsCount: createdDocuments.length,
uploadType: 'bulk',
chunkSize: validatedData.processingOptions.chunkSize,
recipe: validatedData.processingOptions.recipe,
recipe: validatedData.processingOptions?.recipe,
})
} catch (_e) {
// Silently fail
Expand All @@ -256,7 +243,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
processDocumentsWithQueue(
createdDocuments,
knowledgeBaseId,
validatedData.processingOptions,
validatedData.processingOptions ?? {},
requestId
).catch((error: unknown) => {
logger.error(`[${requestId}] Critical error in document processing pipeline:`, error)
Expand Down
18 changes: 8 additions & 10 deletions apps/sim/app/api/knowledge/[id]/documents/upsert/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@ const UpsertDocumentSchema = z.object({
fileSize: z.number().min(1, 'File size must be greater than 0'),
mimeType: z.string().min(1, 'MIME type is required'),
documentTagsData: z.string().optional(),
processingOptions: z.object({
chunkSize: z.number().min(100).max(4000),
minCharactersPerChunk: z.number().min(1).max(2000),
recipe: z.string(),
lang: z.string(),
chunkOverlap: z.number().min(0).max(500),
}),
processingOptions: z
.object({
recipe: z.string().optional(),
lang: z.string().optional(),
})
.optional(),
workflowId: z.string().optional(),
})

Expand Down Expand Up @@ -166,7 +165,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
processDocumentsWithQueue(
createdDocuments,
knowledgeBaseId,
validatedData.processingOptions,
validatedData.processingOptions ?? {},
requestId
).catch((error: unknown) => {
logger.error(`[${requestId}] Critical error in document processing pipeline:`, error)
Expand All @@ -178,8 +177,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
knowledgeBaseId,
documentsCount: 1,
uploadType: 'single',
chunkSize: validatedData.processingOptions.chunkSize,
recipe: validatedData.processingOptions.recipe,
recipe: validatedData.processingOptions?.recipe,
})
} catch (_e) {
// Silently fail
Expand Down
15 changes: 1 addition & 14 deletions apps/sim/app/api/v1/knowledge/[id]/documents/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ export async function POST(request: NextRequest, { params }: DocumentsRouteParam
requestId
)

const chunkingConfig = result.kb.chunkingConfig ?? { maxSize: 1024, minSize: 100, overlap: 200 }

const documentData: DocumentData = {
documentId: newDocument.id,
filename: file.name,
Expand All @@ -197,18 +195,7 @@ export async function POST(request: NextRequest, { params }: DocumentsRouteParam
mimeType: contentType,
}

processDocumentsWithQueue(
[documentData],
knowledgeBaseId,
{
chunkSize: chunkingConfig.maxSize,
minCharactersPerChunk: chunkingConfig.minSize,
chunkOverlap: chunkingConfig.overlap,
recipe: 'default',
lang: 'en',
},
requestId
).catch(() => {
processDocumentsWithQueue([documentData], knowledgeBaseId, {}, requestId).catch(() => {
// Processing errors are logged internally
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,6 @@ export function AddDocumentsModal({

try {
await uploadFiles([fileToRetry], knowledgeBaseId, {
chunkSize: chunkingConfig?.maxSize || 1024,
minCharactersPerChunk: chunkingConfig?.minSize || 1,
chunkOverlap: chunkingConfig?.overlap || 200,
recipe: 'default',
})
removeFile(index)
Expand All @@ -217,9 +214,6 @@ export function AddDocumentsModal({

try {
await uploadFiles(files, knowledgeBaseId, {
chunkSize: chunkingConfig?.maxSize || 1024,
minCharactersPerChunk: chunkingConfig?.minSize || 1,
chunkOverlap: chunkingConfig?.overlap || 200,
recipe: 'default',
})
logger.info(`Successfully uploaded ${files.length} files`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ interface BaseCardProps {
createdAt?: string
updatedAt?: string
connectorTypes?: string[]
chunkingConfig?: { maxSize: number; minSize: number; overlap: number }
onUpdate?: (id: string, name: string, description: string) => Promise<void>
onDelete?: (id: string) => Promise<void>
}
Expand Down Expand Up @@ -78,6 +79,7 @@ export function BaseCard({
description,
updatedAt,
connectorTypes = [],
chunkingConfig,
onUpdate,
onDelete,
}: BaseCardProps) {
Expand Down Expand Up @@ -256,6 +258,7 @@ export function BaseCard({
knowledgeBaseId={id}
initialName={title}
initialDescription={description === 'No description provided' ? '' : description}
chunkingConfig={chunkingConfig}
onSave={handleSave}
/>
)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,6 @@ export const CreateBaseModal = memo(function CreateBaseModal({
if (files.length > 0) {
try {
const uploadedFiles = await uploadFiles(files, newKnowledgeBase.id, {
chunkSize: data.maxChunkSize,
minCharactersPerChunk: data.minChunkSize,
chunkOverlap: data.overlapSize,
recipe: 'default',
})

Expand Down Expand Up @@ -358,25 +355,31 @@ export const CreateBaseModal = memo(function CreateBaseModal({
<Label htmlFor='minChunkSize'>Min Chunk Size (characters)</Label>
<Input
id='minChunkSize'
type='number'
min={1}
max={2000}
step={1}
placeholder='100'
{...register('minChunkSize', { valueAsNumber: true })}
className={cn(errors.minChunkSize && 'border-[var(--text-error)]')}
autoComplete='off'
data-form-type='other'
name='min-chunk-size'
/>
</div>

<div className='flex flex-col gap-2'>
<Label htmlFor='maxChunkSize'>Max Chunk Size (tokens)</Label>
<Input
id='maxChunkSize'
type='number'
min={100}
max={4000}
step={1}
placeholder='1024'
{...register('maxChunkSize', { valueAsNumber: true })}
className={cn(errors.maxChunkSize && 'border-[var(--text-error)]')}
autoComplete='off'
data-form-type='other'
name='max-chunk-size'
/>
</div>
</div>
Expand All @@ -385,12 +388,15 @@ export const CreateBaseModal = memo(function CreateBaseModal({
<Label htmlFor='overlapSize'>Overlap (tokens)</Label>
<Input
id='overlapSize'
type='number'
min={0}
max={500}
step={1}
placeholder='200'
{...register('overlapSize', { valueAsNumber: true })}
className={cn(errors.overlapSize && 'border-[var(--text-error)]')}
autoComplete='off'
data-form-type='other'
name='overlap-size'
/>
<p className='text-[var(--text-muted)] text-xs'>
1 token ≈ 4 characters. Max chunk size and overlap are in tokens.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
Textarea,
} from '@/components/emcn'
import { cn } from '@/lib/core/utils/cn'
import type { ChunkingConfig } from '@/lib/knowledge/types'

const logger = createLogger('EditKnowledgeBaseModal')

Expand All @@ -26,6 +27,7 @@ interface EditKnowledgeBaseModalProps {
knowledgeBaseId: string
initialName: string
initialDescription: string
chunkingConfig?: ChunkingConfig
onSave: (id: string, name: string, description: string) => Promise<void>
}

Expand All @@ -49,6 +51,7 @@ export const EditKnowledgeBaseModal = memo(function EditKnowledgeBaseModal({
knowledgeBaseId,
initialName,
initialDescription,
chunkingConfig,
onSave,
}: EditKnowledgeBaseModalProps) {
const [isSubmitting, setIsSubmitting] = useState(false)
Expand Down Expand Up @@ -137,6 +140,47 @@ export const EditKnowledgeBaseModal = memo(function EditKnowledgeBaseModal({
</p>
)}
</div>

{chunkingConfig && (
<div className='flex flex-col gap-2'>
<Label>Chunking Configuration</Label>
<div className='grid grid-cols-3 gap-2'>
<div className='rounded-sm border border-[var(--border-1)] bg-[var(--surface-2)] px-2.5 py-2'>
<p className='text-[var(--text-tertiary)] text-[11px] leading-tight'>
Max Size
</p>
<p className='font-medium text-[var(--text-primary)] text-sm'>
{chunkingConfig.maxSize.toLocaleString()}
<span className='ml-0.5 font-normal text-[var(--text-tertiary)] text-[11px]'>
tokens
</span>
</p>
</div>
<div className='rounded-sm border border-[var(--border-1)] bg-[var(--surface-2)] px-2.5 py-2'>
<p className='text-[var(--text-tertiary)] text-[11px] leading-tight'>
Min Size
</p>
<p className='font-medium text-[var(--text-primary)] text-sm'>
{chunkingConfig.minSize.toLocaleString()}
<span className='ml-0.5 font-normal text-[var(--text-tertiary)] text-[11px]'>
chars
</span>
</p>
</div>
<div className='rounded-sm border border-[var(--border-1)] bg-[var(--surface-2)] px-2.5 py-2'>
<p className='text-[var(--text-tertiary)] text-[11px] leading-tight'>
Overlap
</p>
<p className='font-medium text-[var(--text-primary)] text-sm'>
{chunkingConfig.overlap.toLocaleString()}
<span className='ml-0.5 font-normal text-[var(--text-tertiary)] text-[11px]'>
tokens
</span>
</p>
</div>
</div>
</div>
)}
</div>
</ModalBody>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ export interface UploadError {
}

export interface ProcessingOptions {
chunkSize?: number
minCharactersPerChunk?: number
chunkOverlap?: number
recipe?: string
}

Expand Down Expand Up @@ -1011,10 +1008,7 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
...file,
})),
processingOptions: {
chunkSize: processingOptions.chunkSize || 1024,
minCharactersPerChunk: processingOptions.minCharactersPerChunk || 1,
chunkOverlap: processingOptions.chunkOverlap || 200,
recipe: processingOptions.recipe || 'default',
recipe: processingOptions.recipe ?? 'default',
lang: 'en',
},
bulk: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ export function Knowledge() {
knowledgeBaseId={activeKnowledgeBase.id}
initialName={activeKnowledgeBase.name}
initialDescription={activeKnowledgeBase.description || ''}
chunkingConfig={activeKnowledgeBase.chunkingConfig}
onSave={handleUpdateKnowledgeBase}
/>
)}
Expand Down
3 changes: 0 additions & 3 deletions apps/sim/background/knowledge-processing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@ export type DocumentProcessingPayload = {
mimeType: string
}
processingOptions: {
chunkSize?: number
minCharactersPerChunk?: number
recipe?: string
lang?: string
chunkOverlap?: number
}
requestId: string
}
Expand Down
Loading
Loading