import type { AIMessage, CompletionChunk, CompletionParams, CompletionResponse } from '../types/index.js'; import { AIProviderError, AIErrorType } from '../types/index.js'; import { DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE } from '../constants.js'; import type { OpenWebUIHttpClient } from './openwebui-http.js'; import type { OllamaGenerateResponse, OpenWebUIChatResponse, OpenWebUIModelsResponse, OpenWebUIStreamChunk } from './openwebui-types.js'; /** * Strategy interface for OpenWebUI's two backend modes. * Selected at construction based on `useOllamaProxy`. */ export interface OpenWebUIStrategy { validateConnection(): Promise; complete(params: CompletionParams, defaultModel: string): Promise>; stream(params: CompletionParams, defaultModel: string): AsyncIterable; } // ============================================================================ // Chat strategy — OpenAI-compatible /api/chat/completions endpoint // ============================================================================ export class OpenWebUIChatStrategy implements OpenWebUIStrategy { constructor(private readonly http: OpenWebUIHttpClient) {} async validateConnection(): Promise { const response = await this.http.request('/api/models', 'GET'); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const data = await response.json() as OpenWebUIModelsResponse; if (!data.data || !Array.isArray(data.data)) { throw new Error('Invalid models response format'); } } async complete(params: CompletionParams, defaultModel: string): Promise> { const response = await this.http.request('/api/chat/completions', 'POST', { model: params.model || defaultModel, messages: convertMessages(params.messages), max_tokens: params.maxTokens || DEFAULT_MAX_TOKENS, temperature: params.temperature ?? DEFAULT_TEMPERATURE, top_p: params.topP, stop: params.stopSequences, stream: false }); const data = await response.json() as OpenWebUIChatResponse; return formatChatResponse(data); } async *stream(params: CompletionParams, defaultModel: string): AsyncIterable { const response = await this.http.request('/api/chat/completions', 'POST', { model: params.model || defaultModel, messages: convertMessages(params.messages), max_tokens: params.maxTokens || DEFAULT_MAX_TOKENS, temperature: params.temperature ?? DEFAULT_TEMPERATURE, top_p: params.topP, stop: params.stopSequences, stream: true }); if (!response.body) { throw new Error('No response body for streaming'); } const reader = response.body.getReader(); const decoder = new TextDecoder(); let buffer = ''; let messageId = ''; try { while (true) { const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); const lines = buffer.split('\n'); buffer = lines.pop() || ''; for (const line of lines) { const trimmed = line.trim(); if (!trimmed.startsWith('data: ')) continue; const data = trimmed.slice(6); if (data === '[DONE]') return; let chunk: OpenWebUIStreamChunk; try { chunk = JSON.parse(data) as OpenWebUIStreamChunk; } catch (parseError) { console.warn('Failed to parse streaming chunk:', parseError); continue; } if (chunk.id && !messageId) { messageId = chunk.id; } const delta = chunk.choices[0]?.delta; if (delta?.content) { yield { content: delta.content, isComplete: false, id: messageId || chunk.id }; } if (chunk.choices[0]?.finish_reason) { yield { content: '', isComplete: true, id: messageId || chunk.id, usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 } }; return; } } } } finally { reader.releaseLock(); } } } // ============================================================================ // Ollama strategy — direct /ollama/api/generate endpoint // ============================================================================ export class OpenWebUIOllamaStrategy implements OpenWebUIStrategy { constructor(private readonly http: OpenWebUIHttpClient) {} async validateConnection(): Promise { const response = await this.http.request('/ollama/api/tags', 'GET'); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } } async complete(params: CompletionParams, defaultModel: string): Promise> { const response = await this.http.request('/ollama/api/generate', 'POST', { model: params.model || defaultModel, prompt: convertMessagesToPrompt(params.messages), stream: false, options: { temperature: params.temperature ?? DEFAULT_TEMPERATURE, top_p: params.topP, num_predict: params.maxTokens || DEFAULT_MAX_TOKENS, stop: params.stopSequences } }); const data = await response.json() as OllamaGenerateResponse; return formatOllamaResponse(data); } async *stream(params: CompletionParams, defaultModel: string): AsyncIterable { const response = await this.http.request('/ollama/api/generate', 'POST', { model: params.model || defaultModel, prompt: convertMessagesToPrompt(params.messages), stream: true, options: { temperature: params.temperature ?? DEFAULT_TEMPERATURE, top_p: params.topP, num_predict: params.maxTokens || DEFAULT_MAX_TOKENS, stop: params.stopSequences } }); if (!response.body) { throw new Error('No response body for streaming'); } const reader = response.body.getReader(); const decoder = new TextDecoder(); let buffer = ''; const messageId = `ollama-${Date.now()}`; try { while (true) { const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); const lines = buffer.split('\n'); buffer = lines.pop() || ''; for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; let chunk: OllamaGenerateResponse; try { chunk = JSON.parse(trimmed) as OllamaGenerateResponse; } catch (parseError) { console.warn('Failed to parse Ollama streaming chunk:', parseError); continue; } if (chunk.response) { yield { content: chunk.response, isComplete: false, id: messageId }; } if (chunk.done) { yield { content: '', isComplete: true, id: messageId, usage: { promptTokens: chunk.prompt_eval_count || 0, completionTokens: chunk.eval_count || 0, totalTokens: (chunk.prompt_eval_count || 0) + (chunk.eval_count || 0) } }; return; } } } } finally { reader.releaseLock(); } } } // ============================================================================ // Shared message helpers // ============================================================================ function convertMessages(messages: AIMessage[]): Array<{ role: string; content: string }> { return messages.map(message => ({ role: message.role, content: message.content })); } function convertMessagesToPrompt(messages: AIMessage[]): string { let prompt = ''; for (const message of messages) { switch (message.role) { case 'system': prompt += `System: ${message.content}\n\n`; break; case 'user': prompt += `Human: ${message.content}\n\n`; break; case 'assistant': prompt += `Assistant: ${message.content}\n\n`; break; } } return prompt + 'Assistant: '; } function formatChatResponse(response: OpenWebUIChatResponse): CompletionResponse { const choice = response.choices[0]; if (!choice || !choice.message.content) { throw new AIProviderError('No content found in OpenWebUI response', AIErrorType.UNKNOWN); } return { content: choice.message.content, model: response.model, usage: { promptTokens: response.usage?.prompt_tokens || 0, completionTokens: response.usage?.completion_tokens || 0, totalTokens: response.usage?.total_tokens || 0 }, id: response.id, metadata: { finishReason: choice.finish_reason, created: response.created } }; } function formatOllamaResponse(response: OllamaGenerateResponse): CompletionResponse { return { content: response.response, model: response.model, usage: { promptTokens: response.prompt_eval_count || 0, completionTokens: response.eval_count || 0, totalTokens: (response.prompt_eval_count || 0) + (response.eval_count || 0) }, id: `ollama-${Date.now()}`, metadata: { created: new Date(response.created_at).getTime(), totalDuration: response.total_duration, loadDuration: response.load_duration, promptEvalDuration: response.prompt_eval_duration, evalDuration: response.eval_duration } }; }