From fe19133dd40ebddd4ebd6b4ef1071a6a7329708d Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 25 Feb 2025 17:32:12 +1100 Subject: [PATCH] FEATURE: full support for Sonnet 3.7 (#1151) * FEATURE: full support for Sonnet 3.7 - Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic - Fixes regression where provider params were not populated Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536 * FIX: open ai non reasoning models need to use deprecate max_tokens --- app/models/llm_model.rb | 4 ++ .../components/ai-llm-editor-form.gjs | 9 ++- config/locales/client.en.yml | 4 +- lib/completions/endpoints/anthropic.rb | 9 +++ lib/completions/endpoints/aws_bedrock.rb | 15 ++++- lib/completions/endpoints/open_ai.rb | 10 ++- lib/completions/llm.rb | 4 +- .../completions/endpoints/anthropic_spec.rb | 62 +++++++++++++++++++ .../completions/endpoints/aws_bedrock_spec.rb | 51 +++++++++++++++ .../lib/completions/endpoints/open_ai_spec.rb | 21 ++++++- spec/system/llms/ai_llm_spec.rb | 10 +-- 11 files changed, 184 insertions(+), 15 deletions(-) diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb index a5f5e510..084091e8 100644 --- a/app/models/llm_model.rb +++ b/app/models/llm_model.rb @@ -26,9 +26,13 @@ class LlmModel < ActiveRecord::Base access_key_id: :text, region: :text, disable_native_tools: :checkbox, + enable_reasoning: :checkbox, + reasoning_tokens: :number, }, anthropic: { disable_native_tools: :checkbox, + enable_reasoning: :checkbox, + reasoning_tokens: :number, }, open_ai: { organization: :text, diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs index 6ba6cd26..d3c02df3 100644 --- a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs +++ b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs @@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component { provider: model.provider, enabled_chat_bot: model.enabled_chat_bot, vision_enabled: model.vision_enabled, - provider_params: this.computeProviderParams(model.provider), + provider_params: this.computeProviderParams( + model.provider, + model.provider_params + ), llm_quotas: model.llm_quotas, }; } @@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component { return !this.args.model.isNew; } - computeProviderParams(provider) { + computeProviderParams(provider, currentParams = {}) { const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {}; return Object.fromEntries( Object.entries(params).map(([k, v]) => [ k, - v?.type === "enum" ? v.default : null, + currentParams[k] ?? (v?.type === "enum" ? v.default : null), ]) ); } diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index eb467523..20806679 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -390,7 +390,7 @@ en: model_description: none: "General settings that work for most language models" - anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model" + anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model" anthropic-claude-3-5-haiku: "Fast and cost-effective" anthropic-claude-3-opus: "Excels at writing and complex tasks" google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks" @@ -459,6 +459,8 @@ en: provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)" disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)" reasoning_effort: "Reasoning effort (only applicable to reasoning models)" + enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)" + reasoning_tokens: "Number of tokens used for reasoning" related_topics: title: "Related topics" diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb index ffbdb024..ed950d31 100644 --- a/lib/completions/endpoints/anthropic.rb +++ b/lib/completions/endpoints/anthropic.rb @@ -38,6 +38,15 @@ module DiscourseAi options = { model: mapped_model, max_tokens: max_tokens } + if llm_model.lookup_custom_param("enable_reasoning") + reasoning_tokens = + llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536) + + # this allows for lots of tokens beyond reasoning + options[:max_tokens] = reasoning_tokens + 30_000 + options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens } + end + options[:stop_sequences] = [""] if !dialect.native_tool_support? && dialect.prompt.has_tools? diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb index be1abb2c..75ed12cf 100644 --- a/lib/completions/endpoints/aws_bedrock.rb +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -26,7 +26,18 @@ module DiscourseAi max_tokens = 4096 max_tokens = 8192 if bedrock_model_id.match?(/3.5/) - { max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" } + result = { anthropic_version: "bedrock-2023-05-31" } + if llm_model.lookup_custom_param("enable_reasoning") + reasoning_tokens = + llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536) + + # this allows for ample tokens beyond reasoning + max_tokens = reasoning_tokens + 30_000 + result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens } + end + result[:max_tokens] = max_tokens + + result else {} end @@ -66,6 +77,8 @@ module DiscourseAi "anthropic.claude-3-5-sonnet-20241022-v2:0" when "claude-3-5-haiku" "anthropic.claude-3-5-haiku-20241022-v1:0" + when "claude-3-7-sonnet" + "anthropic.claude-3-7-sonnet-20250219-v1:0" else llm_model.name end diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb index 17bb260f..f2bbd797 100644 --- a/lib/completions/endpoints/open_ai.rb +++ b/lib/completions/endpoints/open_ai.rb @@ -11,9 +11,13 @@ module DiscourseAi def normalize_model_params(model_params) model_params = model_params.dup - # max_tokens is deprecated and is not functional on reasoning models - max_tokens = model_params.delete(:max_tokens) - model_params[:max_completion_tokens] = max_tokens if max_tokens + # max_tokens is deprecated however we still need to support it + # on older OpenAI models and older Azure models, so we will only normalize + # if our model name starts with o (to denote all the reasoning models) + if llm_model.name.starts_with?("o") + max_tokens = model_params.delete(:max_tokens) + model_params[:max_completion_tokens] = max_tokens if max_tokens + end # temperature is already supported if model_params[:stop_sequences] diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb index 1db8f090..540aae08 100644 --- a/lib/completions/llm.rb +++ b/lib/completions/llm.rb @@ -27,9 +27,9 @@ module DiscourseAi id: "anthropic", models: [ { - name: "claude-3-5-sonnet", + name: "claude-3-7-sonnet", tokens: 200_000, - display_name: "Claude 3.5 Sonnet", + display_name: "Claude 3.7 Sonnet", }, { name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" }, { name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" }, diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb index 72ba2422..b43c7625 100644 --- a/spec/lib/completions/endpoints/anthropic_spec.rb +++ b/spec/lib/completions/endpoints/anthropic_spec.rb @@ -334,6 +334,68 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do expect(requested_body).to eq(request_body) end + it "can support reasoning" do + body = <<~STRING + { + "content": [ + { + "text": "Hello!", + "type": "text" + } + ], + "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF", + "model": "claude-3-opus-20240229", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "input_tokens": 10, + "output_tokens": 25 + } + } + STRING + + parsed_body = nil + stub_request(:post, url).with( + body: + proc do |req_body| + parsed_body = JSON.parse(req_body, symbolize_names: true) + true + end, + headers: { + "Content-Type" => "application/json", + "X-Api-Key" => "123", + "Anthropic-Version" => "2023-06-01", + }, + ).to_return(status: 200, body: body) + + model.provider_params["enable_reasoning"] = true + model.provider_params["reasoning_tokens"] = 10_000 + model.save! + + proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + result = proxy.generate(prompt, user: Discourse.system_user) + expect(result).to eq("Hello!") + + expected_body = { + model: "claude-3-opus-20240229", + max_tokens: 40_000, + thinking: { + type: "enabled", + budget_tokens: 10_000, + }, + messages: [{ role: "user", content: "user1: hello" }], + system: "You are hello bot", + } + expect(parsed_body).to eq(expected_body) + + log = AiApiAuditLog.order(:id).last + expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic) + expect(log.request_tokens).to eq(10) + expect(log.response_tokens).to eq(25) + end + it "can operate in regular mode" do body = <<~STRING { diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb index f5329d3d..ebe8094b 100644 --- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb +++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb @@ -335,6 +335,57 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do expect(log.response_tokens).to eq(20) end + it "supports thinking" do + model.provider_params["enable_reasoning"] = true + model.provider_params["reasoning_tokens"] = 10_000 + model.save! + + proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + + request = nil + + content = { + content: [text: "hello sam"], + usage: { + input_tokens: 10, + output_tokens: 20, + }, + }.to_json + + stub_request( + :post, + "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + ) + .with do |inner_request| + request = inner_request + true + end + .to_return(status: 200, body: content) + + response = proxy.generate("hello world", user: user) + + expect(request.headers["Authorization"]).to be_present + expect(request.headers["X-Amz-Content-Sha256"]).to be_present + + expected = { + "max_tokens" => 40_000, + "thinking" => { + "type" => "enabled", + "budget_tokens" => 10_000, + }, + "anthropic_version" => "bedrock-2023-05-31", + "messages" => [{ "role" => "user", "content" => "hello world" }], + "system" => "You are a helpful bot", + } + expect(JSON.parse(request.body)).to eq(expected) + + expect(response).to eq("hello sam") + + log = AiApiAuditLog.order(:id).last + expect(log.request_tokens).to eq(10) + expect(log.response_tokens).to eq(20) + end + it "supports claude 3 streaming" do proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb index fe8b9fe4..67aabd55 100644 --- a/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -285,6 +285,23 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do end end + describe "max tokens remapping" do + it "remaps max_tokens to max_completion_tokens for reasoning models" do + model.update!(name: "o3-mini") + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + + body_parsed = nil + stub_request(:post, "https://api.openai.com/v1/chat/completions").with( + body: ->(body) { body_parsed = JSON.parse(body) }, + ).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json) + + llm.generate("test", user: user, max_tokens: 1000) + + expect(body_parsed["max_completion_tokens"]).to eq(1000) + expect(body_parsed["max_tokens"]).to be_nil + end + end + describe "forced tool use" do it "can properly force tool use" do llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") @@ -346,9 +363,11 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) }, ).to_return(body: response) - result = llm.generate(prompt, user: user) + result = llm.generate(prompt, user: user, max_tokens: 1000) expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } }) + # we expect this not to be remapped on older non reasoning models + expect(body_json[:max_tokens]).to eq(1000) log = AiApiAuditLog.order(:id).last expect(log.request_tokens).to eq(55) diff --git a/spec/system/llms/ai_llm_spec.rb b/spec/system/llms/ai_llm_spec.rb index 7431821f..f6a8982d 100644 --- a/spec/system/llms/ai_llm_spec.rb +++ b/spec/system/llms/ai_llm_spec.rb @@ -73,13 +73,15 @@ RSpec.describe "Managing LLM configurations", type: :system, js: true do context "when changing the provider" do it "has the correct provider params when visiting the edit page" do - llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {}) + llm = + Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true }) visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit" - expect(form).to have_field_with_name("provider_params.organization") expect(form).to have_field_with_name("provider_params.disable_native_tools") - expect(form).to have_field_with_name("provider_params.disable_streaming") - expect(form).to have_field_with_name("provider_params.reasoning_effort") + expect(form).to have_field_with_name("provider_params.reasoning_tokens") + + reasoning = form.field("provider_params.enable_reasoning") + expect(reasoning).to be_checked end it "correctly changes the provider params" do visit "/admin/plugins/discourse-ai/ai-llms"