diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js index a2ab752bc22..19f4a3930a2 100644 --- a/api/app/clients/AnthropicClient.js +++ b/api/app/clients/AnthropicClient.js @@ -746,15 +746,6 @@ class AnthropicClient extends BaseClient { metadata, }; - if (!/claude-3[-.]7/.test(model)) { - if (top_p !== undefined) { - requestOptions.top_p = top_p; - } - if (top_k !== undefined) { - requestOptions.top_k = top_k; - } - } - if (this.useMessages) { requestOptions.messages = payload; requestOptions.max_tokens = @@ -769,6 +760,14 @@ class AnthropicClient extends BaseClient { thinkingBudget: this.options.thinkingBudget, }); + if (!/claude-3[-.]7/.test(model)) { + requestOptions.top_p = top_p; + requestOptions.top_k = top_k; + } else if (requestOptions.thinking == null) { + requestOptions.topP = top_p; + requestOptions.topK = top_k; + } + if (this.systemMessage && this.supportsCacheControl === true) { requestOptions.system = [ { diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index 8d0bce25d2a..4bc2d66ca0f 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -1309,6 +1309,12 @@ ${convo} modelOptions.include_reasoning = true; reasoningKey = 'reasoning'; } + if (this.useOpenRouter && modelOptions.reasoning_effort != null) { + modelOptions.reasoning = { + effort: modelOptions.reasoning_effort, + }; + delete modelOptions.reasoning_effort; + } this.streamHandler = new SplitStreamHandler({ reasoningKey, diff --git a/api/app/clients/specs/AnthropicClient.test.js b/api/app/clients/specs/AnthropicClient.test.js index b565e6d1882..223f3038c0c 100644 --- a/api/app/clients/specs/AnthropicClient.test.js +++ b/api/app/clients/specs/AnthropicClient.test.js @@ -680,4 +680,53 @@ describe('AnthropicClient', () => { expect(capturedOptions).not.toHaveProperty('top_p'); }); }); + + it('should include top_k and top_p parameters for Claude-3.7 models when thinking is explicitly disabled', async () => { + const client = new AnthropicClient('test-api-key', { + modelOptions: { + model: 'claude-3-7-sonnet', + temperature: 0.7, + topK: 10, + topP: 0.9, + }, + thinking: false, + }); + + async function* mockAsyncGenerator() { + yield { type: 'message_start', message: { usage: {} } }; + yield { delta: { text: 'Test response' } }; + yield { type: 'message_delta', usage: {} }; + } + + jest.spyOn(client, 'createResponse').mockImplementation(() => { + return mockAsyncGenerator(); + }); + + let capturedOptions = null; + jest.spyOn(client, 'getClient').mockImplementation((options) => { + capturedOptions = options; + return {}; + }); + + const payload = [{ role: 'user', content: 'Test message' }]; + await client.sendCompletion(payload, {}); + + expect(capturedOptions).toHaveProperty('topK', 10); + expect(capturedOptions).toHaveProperty('topP', 0.9); + + client.setOptions({ + modelOptions: { + model: 'claude-3.7-sonnet', + temperature: 0.7, + topK: 10, + topP: 0.9, + }, + thinking: false, + }); + + await client.sendCompletion(payload, {}); + + expect(capturedOptions).toHaveProperty('topK', 10); + expect(capturedOptions).toHaveProperty('topP', 0.9); + }); }); diff --git a/api/package.json b/api/package.json index e386394acb0..2d83fddcbdc 100644 --- a/api/package.json +++ b/api/package.json @@ -45,7 +45,7 @@ "@langchain/google-genai": "^0.1.9", "@langchain/google-vertexai": "^0.2.0", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.1.3", + "@librechat/agents": "^2.1.7", "@waylaidwanderer/fetch-event-source": "^3.0.1", "axios": "1.7.8", "bcryptjs": "^2.4.3", diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js index bff7dc65eb1..99d64bb9a61 100644 --- a/api/server/controllers/agents/client.js +++ b/api/server/controllers/agents/client.js @@ -27,10 +27,10 @@ const { formatContentStrings, createContextHandlers, } = require('~/app/clients/prompts'); -const { encodeAndFormat } = require('~/server/services/Files/images/encode'); +const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); const { getBufferString, HumanMessage } = require('@langchain/core/messages'); +const { encodeAndFormat } = require('~/server/services/Files/images/encode'); const Tokenizer = require('~/server/services/Tokenizer'); -const { spendTokens } = require('~/models/spendTokens'); const BaseClient = require('~/app/clients/BaseClient'); const { createRun } = require('./run'); const { logger } = require('~/config'); @@ -380,15 +380,34 @@ class AgentClient extends BaseClient { if (!collectedUsage || !collectedUsage.length) { return; } - const input_tokens = collectedUsage[0]?.input_tokens || 0; + const input_tokens = + (collectedUsage[0]?.input_tokens || 0) + + (Number(collectedUsage[0]?.input_token_details?.cache_creation) || 0) + + (Number(collectedUsage[0]?.input_token_details?.cache_read) || 0); let output_tokens = 0; let previousTokens = input_tokens; // Start with original input for (let i = 0; i < collectedUsage.length; i++) { const usage = collectedUsage[i]; + if (!usage) { + continue; + } + + const cache_creation = Number(usage.input_token_details?.cache_creation) || 0; + const cache_read = Number(usage.input_token_details?.cache_read) || 0; + + const txMetadata = { + context, + conversationId: this.conversationId, + user: this.user ?? this.options.req.user?.id, + endpointTokenConfig: this.options.endpointTokenConfig, + model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model, + }; + if (i > 0) { // Count new tokens generated (input_tokens minus previous accumulated tokens) - output_tokens += (Number(usage.input_tokens) || 0) - previousTokens; + output_tokens += + (Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens; } // Add this message's output tokens @@ -396,16 +415,26 @@ class AgentClient extends BaseClient { // Update previousTokens to include this message's output previousTokens += Number(usage.output_tokens) || 0; - spendTokens( - { - context, - conversationId: this.conversationId, - user: this.user ?? this.options.req.user?.id, - endpointTokenConfig: this.options.endpointTokenConfig, - model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model, - }, - { promptTokens: usage.input_tokens, completionTokens: usage.output_tokens }, - ).catch((err) => { + + if (cache_creation > 0 || cache_read > 0) { + spendStructuredTokens(txMetadata, { + promptTokens: { + input: usage.input_tokens, + write: cache_creation, + read: cache_read, + }, + completionTokens: usage.output_tokens, + }).catch((err) => { + logger.error( + '[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens', + err, + ); + }); + } + spendTokens(txMetadata, { + promptTokens: usage.input_tokens, + completionTokens: usage.output_tokens, + }).catch((err) => { logger.error( '[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens', err, @@ -792,7 +821,10 @@ class AgentClient extends BaseClient { throw new Error('Run not initialized'); } const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator(); - const clientOptions = {}; + /** @type {import('@librechat/agents').ClientOptions} */ + const clientOptions = { + maxTokens: 75, + }; const providerConfig = this.options.req.app.locals[this.options.agent.provider]; if ( providerConfig && diff --git a/api/server/services/Endpoints/agents/title.js b/api/server/services/Endpoints/agents/title.js index 56fd28668df..f25746582e7 100644 --- a/api/server/services/Endpoints/agents/title.js +++ b/api/server/services/Endpoints/agents/title.js @@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => { const titleCache = getLogStores(CacheKeys.GEN_TITLE); const key = `${req.user.id}-${response.conversationId}`; + const responseText = + response?.content && Array.isArray(response?.content) + ? response.content.reduce((acc, block) => { + if (block?.type === 'text') { + return acc + block.text; + } + return acc; + }, '') + : (response?.content ?? response?.text ?? ''); const title = await client.titleConvo({ text, - responseText: response?.text ?? '', + responseText, conversationId: response.conversationId, }); await titleCache.set(key, title, 120000); diff --git a/api/server/services/Endpoints/anthropic/llm.js b/api/server/services/Endpoints/anthropic/llm.js index 186444cec8c..9f20b8e61df 100644 --- a/api/server/services/Endpoints/anthropic/llm.js +++ b/api/server/services/Endpoints/anthropic/llm.js @@ -1,6 +1,6 @@ const { HttpsProxyAgent } = require('https-proxy-agent'); const { anthropicSettings, removeNullishValues } = require('librechat-data-provider'); -const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers'); +const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers'); /** * Generates configuration options for creating an Anthropic language model (LLM) instance. @@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) { clientOptions: {}, }; + requestOptions = configureReasoning(requestOptions, systemOptions); + if (!/claude-3[-.]7/.test(mergedOptions.model)) { - if (mergedOptions.topP !== undefined) { - requestOptions.topP = mergedOptions.topP; - } - if (mergedOptions.topK !== undefined) { - requestOptions.topK = mergedOptions.topK; - } + requestOptions.topP = mergedOptions.topP; + requestOptions.topK = mergedOptions.topK; + } else if (requestOptions.thinking == null) { + requestOptions.topP = mergedOptions.topP; + requestOptions.topK = mergedOptions.topK; } const supportsCacheControl = diff --git a/api/server/services/Endpoints/anthropic/llm.spec.js b/api/server/services/Endpoints/anthropic/llm.spec.js index a1dc6a44b6a..9c453efb924 100644 --- a/api/server/services/Endpoints/anthropic/llm.spec.js +++ b/api/server/services/Endpoints/anthropic/llm.spec.js @@ -109,4 +109,45 @@ describe('getLLMConfig', () => { // Just verifying that the promptCache setting is processed expect(result.llmConfig).toBeDefined(); }); + + it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => { + // Test with thinking explicitly set to null/undefined + const result = getLLMConfig('test-api-key', { + modelOptions: { + model: 'claude-3-7-sonnet', + topK: 10, + topP: 0.9, + thinking: false, + }, + }); + + expect(result.llmConfig).toHaveProperty('topK', 10); + expect(result.llmConfig).toHaveProperty('topP', 0.9); + + // Test with thinking explicitly set to false + const result2 = getLLMConfig('test-api-key', { + modelOptions: { + model: 'claude-3-7-sonnet', + topK: 10, + topP: 0.9, + thinking: false, + }, + }); + + expect(result2.llmConfig).toHaveProperty('topK', 10); + expect(result2.llmConfig).toHaveProperty('topP', 0.9); + + // Test with decimal notation as well + const result3 = getLLMConfig('test-api-key', { + modelOptions: { + model: 'claude-3.7-sonnet', + topK: 10, + topP: 0.9, + thinking: false, + }, + }); + + expect(result3.llmConfig).toHaveProperty('topK', 10); + expect(result3.llmConfig).toHaveProperty('topP', 0.9); + }); }); diff --git a/api/server/services/Endpoints/openAI/llm.js b/api/server/services/Endpoints/openAI/llm.js index c12f835f2fc..0fa899b4a3c 100644 --- a/api/server/services/Endpoints/openAI/llm.js +++ b/api/server/services/Endpoints/openAI/llm.js @@ -29,7 +29,6 @@ function getLLMConfig(apiKey, options = {}) { const { modelOptions = {}, reverseProxyUrl, - useOpenRouter, defaultQuery, headers, proxy, @@ -56,9 +55,11 @@ function getLLMConfig(apiKey, options = {}) { }); } + let useOpenRouter; /** @type {OpenAIClientOptions['configuration']} */ const configOptions = {}; - if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) { + if (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) { + useOpenRouter = true; llmConfig.include_reasoning = true; configOptions.baseURL = reverseProxyUrl; configOptions.defaultHeaders = Object.assign( @@ -118,6 +119,13 @@ function getLLMConfig(apiKey, options = {}) { llmConfig.organization = process.env.OPENAI_ORGANIZATION; } + if (useOpenRouter && llmConfig.reasoning_effort != null) { + llmConfig.reasoning = { + effort: llmConfig.reasoning_effort, + }; + delete llmConfig.reasoning_effort; + } + return { /** @type {OpenAIClientOptions} */ llmConfig, diff --git a/client/src/components/Chat/Messages/Content/ContentParts.tsx b/client/src/components/Chat/Messages/Content/ContentParts.tsx index b997060c61f..ddf08976eb8 100644 --- a/client/src/components/Chat/Messages/Content/ContentParts.tsx +++ b/client/src/components/Chat/Messages/Content/ContentParts.tsx @@ -109,7 +109,9 @@ const ContentParts = memo( return val; }) } - label={isSubmitting ? localize('com_ui_thinking') : localize('com_ui_thoughts')} + label={ + isSubmitting && isLast ? localize('com_ui_thinking') : localize('com_ui_thoughts') + } /> )} diff --git a/package-lock.json b/package-lock.json index 01cdaa54358..15aabc1a77e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -64,7 +64,7 @@ "@langchain/google-genai": "^0.1.9", "@langchain/google-vertexai": "^0.2.0", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.1.3", + "@librechat/agents": "^2.1.7", "@waylaidwanderer/fetch-event-source": "^3.0.1", "axios": "1.7.8", "bcryptjs": "^2.4.3", @@ -15987,9 +15987,9 @@ } }, "node_modules/@librechat/agents": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.1.3.tgz", - "integrity": "sha512-4pPkLpjhA3DDiZQOULcrpbdQaOBC4JuUMdcVTUyYBHcA63SJT3olstmRQkGKNvoXLFLeQyJ0jkOqkEpzLJzk/g==", + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.1.7.tgz", + "integrity": "sha512-/+AvxH75K0dSSUeHqT8jPZCcqcQUWdB56g9ls7ho0Nw9vdxfezBhF/hXnOk5oORHeEXlGEKNE6YPyjAhCmNIOg==", "dependencies": { "@aws-crypto/sha256-js": "^5.2.0", "@aws-sdk/credential-provider-node": "^3.613.0",