diff --git a/apps/sim/app/api/tools/image/generate/route.ts b/apps/sim/app/api/tools/image/generate/route.ts new file mode 100644 index 0000000000..1e4e9468da --- /dev/null +++ b/apps/sim/app/api/tools/image/generate/route.ts @@ -0,0 +1,197 @@ +import { createLogger } from '@sim/logger' +import { type NextRequest, NextResponse } from 'next/server' +import { checkInternalAuth } from '@/lib/auth/hybrid' +import { getInternalApiBaseUrl } from '@/lib/core/utils/urls' +import type { ImageGenerationRequestBody } from '@/tools/image/types' + +const logger = createLogger('ImageGenerateAPI') + +export const dynamic = 'force-dynamic' +export const maxDuration = 300 // 5 minutes for image generation with polling + +export async function POST(request: NextRequest) { + const requestId = crypto.randomUUID() + logger.info(`[${requestId}] Image generation request started`) + + try { + const authResult = await checkInternalAuth(request, { requireWorkflowId: false }) + if (!authResult.success) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) + } + + const body: ImageGenerationRequestBody = await request.json() + const { provider, apiKey, model, prompt, width, height, negativePrompt } = body + + if (!provider || !apiKey || !prompt) { + return NextResponse.json( + { error: 'Missing required fields: provider, apiKey, and prompt' }, + { status: 400 } + ) + } + + if (provider !== 'modelslab') { + return NextResponse.json( + { error: `Unsupported provider: ${provider}. Currently supports: modelslab` }, + { status: 400 } + ) + } + + if (prompt.length < 3 || prompt.length > 2000) { + return NextResponse.json( + { error: 'Prompt must be between 3 and 2000 characters' }, + { status: 400 } + ) + } + + const resolvedWidth = width && width > 0 ? width : 1024 + const resolvedHeight = height && height > 0 ? height : 1024 + + logger.info(`[${requestId}] Generating image with ModelsLab, model: ${model || 'flux'}`) + + const result = await generateWithModelsLab( + apiKey, + prompt, + model || 'flux', + resolvedWidth, + resolvedHeight, + negativePrompt, + requestId + ) + + // Fetch the image and convert to base64 via existing image proxy + let imageFile: string | undefined + if (result.imageUrl) { + try { + const baseUrl = getInternalApiBaseUrl() + const proxyUrl = new URL('/api/tools/image', baseUrl) + proxyUrl.searchParams.append('url', result.imageUrl) + + const { generateInternalToken } = await import('@/lib/auth/internal') + const token = await generateInternalToken() + + const imageResponse = await fetch(proxyUrl.toString(), { + headers: { + Accept: 'image/*, */*', + Authorization: `Bearer ${token}`, + }, + cache: 'no-store', + }) + + if (imageResponse.ok) { + const arrayBuffer = await imageResponse.arrayBuffer() + if (arrayBuffer.byteLength > 0) { + imageFile = Buffer.from(arrayBuffer).toString('base64') + } + } + } catch (error) { + logger.warn(`[${requestId}] Failed to fetch image for base64 conversion:`, error) + // Non-fatal: still return the URL + } + } + + logger.info(`[${requestId}] Image generation complete`) + + return NextResponse.json({ + imageUrl: result.imageUrl, + imageFile, + model: model || 'flux', + provider: 'modelslab', + }) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logger.error(`[${requestId}] Image generation error:`, { error: errorMessage }) + return NextResponse.json({ error: errorMessage }, { status: 500 }) + } +} + +async function generateWithModelsLab( + apiKey: string, + prompt: string, + model: string, + width: number, + height: number, + negativePrompt: string | undefined, + requestId: string +): Promise<{ imageUrl: string }> { + logger.info(`[${requestId}] Calling ModelsLab text2img, model: ${model}`) + + const requestBody: Record = { + key: apiKey, + model_id: model, + prompt, + width, + height, + samples: 1, + safety_checker: false, + enhance_prompt: false, + } + + if (negativePrompt) { + requestBody.negative_prompt = negativePrompt + } + + const createResponse = await fetch('https://modelslab.com/api/v6/images/text2img', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody), + }) + + if (!createResponse.ok) { + const errText = await createResponse.text() + throw new Error(`ModelsLab API error: ${createResponse.status} - ${errText}`) + } + + const createData = await createResponse.json() + logger.info(`[${requestId}] ModelsLab response status: ${createData.status}`) + + // Immediate success + if (createData.status === 'success' && createData.output?.length > 0) { + return { imageUrl: createData.output[0] } + } + + // Async processing — poll fetch endpoint + if (createData.status === 'processing' && createData.id) { + const jobId = String(createData.id) + const maxAttempts = 40 // 40 × 5s = 200s max + let attempts = 0 + + while (attempts < maxAttempts) { + await new Promise((resolve) => setTimeout(resolve, 5000)) + + const fetchResponse = await fetch( + `https://modelslab.com/api/v6/images/fetch/${jobId}`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ key: apiKey }), + } + ) + + if (!fetchResponse.ok) { + throw new Error(`ModelsLab fetch error: ${fetchResponse.status}`) + } + + const fetchData = await fetchResponse.json() + logger.info(`[${requestId}] Poll ${attempts + 1}: status=${fetchData.status}`) + + if (fetchData.status === 'success' && fetchData.output?.length > 0) { + return { imageUrl: fetchData.output[0] } + } + + if (fetchData.status === 'error' || fetchData.status === 'failed') { + throw new Error(`ModelsLab image generation failed: ${fetchData.message || 'Unknown error'}`) + } + + attempts++ + } + + throw new Error('ModelsLab image generation timed out after 200 seconds') + } + + // Error response + if (createData.status === 'error' || createData.error) { + throw new Error(`ModelsLab API error: ${createData.message || createData.error || 'Unknown error'}`) + } + + throw new Error(`ModelsLab unexpected response: ${JSON.stringify(createData)}`) +} diff --git a/apps/sim/blocks/blocks/image_generator.ts b/apps/sim/blocks/blocks/image_generator.ts index e2efad69d5..f94dea1e3d 100644 --- a/apps/sim/blocks/blocks/image_generator.ts +++ b/apps/sim/blocks/blocks/image_generator.ts @@ -8,12 +8,25 @@ export const ImageGeneratorBlock: BlockConfig = { description: 'Generate images', authMode: AuthMode.ApiKey, longDescription: - 'Integrate Image Generator into the workflow. Can generate images using DALL-E 3 or GPT Image.', + 'Integrate Image Generator into the workflow. Can generate images using DALL-E 3, GPT Image (OpenAI), or Flux and other community models via ModelsLab.', docsLink: 'https://docs.sim.ai/tools/image_generator', category: 'tools', bgColor: '#4D5FFF', icon: ImageIcon, subBlocks: [ + // Provider selection + { + id: 'provider', + title: 'Provider', + type: 'dropdown', + options: [ + { label: 'OpenAI', id: 'openai' }, + { label: 'ModelsLab', id: 'modelslab' }, + ], + value: () => 'openai', + }, + + // OpenAI model selection { id: 'model', title: 'Model', @@ -23,7 +36,26 @@ export const ImageGeneratorBlock: BlockConfig = { { label: 'GPT Image', id: 'gpt-image-1' }, ], value: () => 'dall-e-3', + condition: { field: 'provider', value: 'openai' }, + }, + + // ModelsLab model selection + { + id: 'model', + title: 'Model', + type: 'dropdown', + options: [ + { label: 'Flux (Schnell)', id: 'flux' }, + { label: 'Juggernaut XL', id: 'juggernaut-xl-v10' }, + { label: 'RealVisXL v5', id: 'realvisxlV50_v50Bakedvae' }, + { label: 'DreamShaper XL', id: 'dreamshaperXL10_alpha2Xl10' }, + { label: 'Stable Diffusion XL', id: 'sdxl' }, + ], + value: () => 'flux', + condition: { field: 'provider', value: 'modelslab' }, }, + + // Prompt — always shown { id: 'prompt', title: 'Prompt', @@ -31,6 +63,10 @@ export const ImageGeneratorBlock: BlockConfig = { required: true, placeholder: 'Describe the image you want to generate...', }, + + // === OpenAI options === + + // Size for DALL-E 3 { id: 'size', title: 'Size', @@ -41,8 +77,10 @@ export const ImageGeneratorBlock: BlockConfig = { { label: '1792x1024', id: '1792x1024' }, ], value: () => '1024x1024', - condition: { field: 'model', value: 'dall-e-3' }, + condition: { field: 'provider', value: 'openai', and: { field: 'model', value: 'dall-e-3' } }, }, + + // Size for GPT Image { id: 'size', title: 'Size', @@ -54,8 +92,14 @@ export const ImageGeneratorBlock: BlockConfig = { { label: '1024x1536', id: '1024x1536' }, ], value: () => 'auto', - condition: { field: 'model', value: 'gpt-image-1' }, + condition: { + field: 'provider', + value: 'openai', + and: { field: 'model', value: 'gpt-image-1' }, + }, }, + + // Quality (DALL-E 3 only) { id: 'quality', title: 'Quality', @@ -65,8 +109,10 @@ export const ImageGeneratorBlock: BlockConfig = { { label: 'HD', id: 'hd' }, ], value: () => 'standard', - condition: { field: 'model', value: 'dall-e-3' }, + condition: { field: 'provider', value: 'openai', and: { field: 'model', value: 'dall-e-3' } }, }, + + // Style (DALL-E 3 only) { id: 'style', title: 'Style', @@ -76,8 +122,10 @@ export const ImageGeneratorBlock: BlockConfig = { { label: 'Natural', id: 'natural' }, ], value: () => 'vivid', - condition: { field: 'model', value: 'dall-e-3' }, + condition: { field: 'provider', value: 'openai', and: { field: 'model', value: 'dall-e-3' } }, }, + + // Background (GPT Image only) { id: 'background', title: 'Background', @@ -88,22 +136,56 @@ export const ImageGeneratorBlock: BlockConfig = { { label: 'Opaque', id: 'opaque' }, ], value: () => 'auto', - condition: { field: 'model', value: 'gpt-image-1' }, + condition: { + field: 'provider', + value: 'openai', + and: { field: 'model', value: 'gpt-image-1' }, + }, + }, + + // === ModelsLab options === + + // Size for ModelsLab + { + id: 'size', + title: 'Size', + type: 'dropdown', + options: [ + { label: '512×512', id: '512x512' }, + { label: '768×768', id: '768x768' }, + { label: '1024×1024', id: '1024x1024' }, + { label: '1024×768 (landscape)', id: '1024x768' }, + { label: '768×1024 (portrait)', id: '768x1024' }, + { label: '1344×768 (wide)', id: '1344x768' }, + ], + value: () => '1024x1024', + condition: { field: 'provider', value: 'modelslab' }, }, + + // Negative prompt for ModelsLab + { + id: 'negativePrompt', + title: 'Negative Prompt', + type: 'long-input', + placeholder: 'What to exclude from the image (e.g. blurry, low quality)...', + condition: { field: 'provider', value: 'modelslab' }, + }, + + // API Key — always shown { id: 'apiKey', title: 'API Key', type: 'short-input', required: true, - placeholder: 'Enter your OpenAI API key', + placeholder: 'Enter your API key', password: true, connectionDroppable: false, }, ], tools: { - access: ['openai_image'], + access: ['openai_image', 'image_modelslab'], config: { - tool: () => 'openai_image', + tool: (params) => (params.provider === 'modelslab' ? 'image_modelslab' : 'openai_image'), params: (params) => { if (!params.apiKey) { throw new Error('API key is required') @@ -112,7 +194,27 @@ export const ImageGeneratorBlock: BlockConfig = { throw new Error('Prompt is required') } - // Base parameters for all models + const provider = params.provider || 'openai' + + if (provider === 'modelslab') { + // Parse size string (e.g. "1024x768") into width/height + const sizeStr = String(params.size || '1024x1024') + const [widthStr, heightStr] = sizeStr.split('x') + const width = parseInt(widthStr, 10) || 1024 + const height = parseInt(heightStr, 10) || 1024 + + return { + provider: 'modelslab', + apiKey: params.apiKey, + model: params.model || 'flux', + prompt: params.prompt, + width, + height, + negativePrompt: params.negativePrompt, + } + } + + // OpenAI (default) const baseParams = { prompt: params.prompt, model: params.model || 'dall-e-3', @@ -140,12 +242,14 @@ export const ImageGeneratorBlock: BlockConfig = { }, inputs: { prompt: { type: 'string', description: 'Image description prompt' }, + provider: { type: 'string', description: 'Image generation provider (openai or modelslab)' }, model: { type: 'string', description: 'Image generation model' }, size: { type: 'string', description: 'Image dimensions' }, - quality: { type: 'string', description: 'Image quality level' }, - style: { type: 'string', description: 'Image style' }, - background: { type: 'string', description: 'Background type' }, - apiKey: { type: 'string', description: 'OpenAI API key' }, + quality: { type: 'string', description: 'Image quality level (OpenAI DALL-E 3)' }, + style: { type: 'string', description: 'Image style (OpenAI DALL-E 3)' }, + background: { type: 'string', description: 'Background type (OpenAI GPT Image)' }, + negativePrompt: { type: 'string', description: 'Negative prompt (ModelsLab)' }, + apiKey: { type: 'string', description: 'API key for the selected provider' }, }, outputs: { content: { type: 'string', description: 'Generation response' }, diff --git a/apps/sim/tools/image/index.ts b/apps/sim/tools/image/index.ts new file mode 100644 index 0000000000..c4b6434c34 --- /dev/null +++ b/apps/sim/tools/image/index.ts @@ -0,0 +1,2 @@ +export { modelsLabImageTool } from './modelslab' +export * from './types' diff --git a/apps/sim/tools/image/modelslab.ts b/apps/sim/tools/image/modelslab.ts new file mode 100644 index 0000000000..79b9266a74 --- /dev/null +++ b/apps/sim/tools/image/modelslab.ts @@ -0,0 +1,116 @@ +import type { ToolConfig } from '@/tools/types' +import type { ImageGenerationParams, ImageGenerationRequestBody } from '@/tools/image/types' + +export interface ModelsLabImageResponse { + imageUrl?: string + imageFile?: string + model?: string + provider?: string +} + +export const modelsLabImageTool: ToolConfig = { + id: 'image_modelslab', + name: 'ModelsLab Image Generation', + description: + 'Generate images using ModelsLab with access to Flux, Juggernaut XL, RealVisXL, DreamShaper, and hundreds of community models', + version: '1.0.0', + + params: { + provider: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Image provider (modelslab)', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'ModelsLab API key', + }, + model: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'Model ID: flux (Flux Schnell), juggernaut-xl-v10 (Juggernaut XL), realvisxlV50_v50Bakedvae (RealVisXL v5), dreamshaperXL10_alpha2Xl10 (DreamShaper XL)', + }, + prompt: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Text description of the image to generate', + }, + width: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Image width in pixels (default: 1024)', + }, + height: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Image height in pixels (default: 1024)', + }, + negativePrompt: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'What to exclude from the image', + }, + }, + + request: { + url: '/api/tools/image/generate', + method: 'POST', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: ( + params: ImageGenerationParams & { + _context?: { workspaceId?: string; workflowId?: string; executionId?: string } + } + ): ImageGenerationRequestBody => ({ + provider: 'modelslab', + apiKey: params.apiKey, + model: params.model, + prompt: params.prompt, + width: params.width, + height: params.height, + negativePrompt: params.negativePrompt, + workspaceId: params._context?.workspaceId, + workflowId: params._context?.workflowId, + executionId: params._context?.executionId, + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok || data.error) { + return { + success: false, + error: data.error || 'Image generation failed', + output: {}, + } + } + + return { + success: true, + output: { + imageUrl: data.imageUrl, + imageFile: data.imageFile, + model: data.model, + provider: 'modelslab', + }, + } + }, + + outputs: { + imageUrl: { type: 'string', description: 'Generated image URL' }, + imageFile: { type: 'file', description: 'Base64-encoded image data' }, + model: { type: 'string', description: 'Model used for generation' }, + provider: { type: 'string', description: 'Provider used (modelslab)' }, + }, +} diff --git a/apps/sim/tools/image/types.ts b/apps/sim/tools/image/types.ts new file mode 100644 index 0000000000..89003ea1ec --- /dev/null +++ b/apps/sim/tools/image/types.ts @@ -0,0 +1,15 @@ +export interface ImageGenerationParams { + provider: string + model: string + prompt: string + width?: number + height?: number + negativePrompt?: string + apiKey: string +} + +export interface ImageGenerationRequestBody extends ImageGenerationParams { + workspaceId?: string + workflowId?: string + executionId?: string +} diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index c206509aca..6925b66eea 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -843,6 +843,7 @@ import { intercomUpdateContactV2Tool, intercomUpdateTicketV2Tool, } from '@/tools/intercom' +import { modelsLabImageTool } from '@/tools/image' import { jinaReadUrlTool, jinaSearchTool } from '@/tools/jina' import { jiraAddAttachmentTool, @@ -3163,6 +3164,7 @@ export const tools: Record = { datadog_list_downtimes: datadogListDowntimesTool, datadog_cancel_downtime: datadogCancelDowntimeTool, openai_image: openAIImageTool, + image_modelslab: modelsLabImageTool, microsoft_teams_read_chat: microsoftTeamsReadChatTool, microsoft_teams_write_chat: microsoftTeamsWriteChatTool, microsoft_teams_read_channel: microsoftTeamsReadChannelTool,