diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb index 2688769a..bb4bb07e 100644 --- a/lib/ai_bot/bot.rb +++ b/lib/ai_bot/bot.rb @@ -6,8 +6,10 @@ module DiscourseAi attr_reader :model BOT_NOT_FOUND = Class.new(StandardError) + # the future is agentic, allow for more turns MAX_COMPLETIONS = 8 + # limit is arbitrary, but 5 which was used in the past was too low MAX_TOOLS = 20 @@ -71,6 +73,8 @@ module DiscourseAi end def force_tool_if_needed(prompt, context) + return if prompt.tool_choice == :none + context[:chosen_tools] ||= [] forced_tools = persona.force_tool_use.map { |tool| tool.name } force_tool = forced_tools.find { |name| !context[:chosen_tools].include?(name) } @@ -105,7 +109,7 @@ module DiscourseAi needs_newlines = false tools_ran = 0 - while total_completions <= MAX_COMPLETIONS && ongoing_chain + while total_completions < MAX_COMPLETIONS && ongoing_chain tool_found = false force_tool_if_needed(prompt, context) @@ -202,8 +206,8 @@ module DiscourseAi total_completions += 1 - # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS) - prompt.tools = [] if total_completions == MAX_COMPLETIONS + # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS - 1) + prompt.tool_choice = :none if total_completions == MAX_COMPLETIONS - 1 end embed_thinking(raw_context) diff --git a/lib/completions/dialects/dialect.rb b/lib/completions/dialects/dialect.rb index 041a5f1e..2a335a12 100644 --- a/lib/completions/dialects/dialect.rb +++ b/lib/completions/dialects/dialect.rb @@ -46,10 +46,6 @@ module DiscourseAi VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/ - def can_end_with_assistant_msg? - false - end - def native_tool_support? false end @@ -66,16 +62,58 @@ module DiscourseAi prompt.tool_choice end - def translate - messages = prompt.messages + def self.no_more_tool_calls_text + # note, Anthropic must never prefill with an ending whitespace + "I WILL NOT USE TOOLS IN THIS REPLY, user expressed they wanted to stop using tool calls.\nHere is the best, complete, answer I can come up with given the information I have." + end - # Some models use an assistant msg to improve long-context responses. - if messages.last[:type] == :model && can_end_with_assistant_msg? - messages = messages.dup - messages.pop + def self.no_more_tool_calls_text_user + "DO NOT USE TOOLS IN YOUR REPLY. Return the best answer you can given the information I supplied you." + end + + def no_more_tool_calls_text + self.class.no_more_tool_calls_text + end + + def no_more_tool_calls_text_user + self.class.no_more_tool_calls_text_user + end + + def translate + messages = trim_messages(prompt.messages) + last_message = messages.last + inject_done_on_last_tool_call = false + + if !native_tool_support? && last_message && last_message[:type].to_sym == :tool && + prompt.tool_choice == :none + inject_done_on_last_tool_call = true end - trim_messages(messages).map { |msg| send("#{msg[:type]}_msg", msg) }.compact + translated = + messages + .map do |msg| + case msg[:type].to_sym + when :system + system_msg(msg) + when :user + user_msg(msg) + when :model + model_msg(msg) + when :tool + if inject_done_on_last_tool_call && msg == last_message + tools_dialect.inject_done { tool_msg(msg) } + else + tool_msg(msg) + end + when :tool_call + tool_call_msg(msg) + else + raise ArgumentError, "Unknown message type: #{msg[:type]}" + end + end + .compact + + translated end def conversation_context diff --git a/lib/completions/dialects/xml_tools.rb b/lib/completions/dialects/xml_tools.rb index 2ca5c073..781c33bb 100644 --- a/lib/completions/dialects/xml_tools.rb +++ b/lib/completions/dialects/xml_tools.rb @@ -54,8 +54,11 @@ module DiscourseAi end end + DONE_MESSAGE = + "Regardless of what you think, REPLY IMMEDIATELY, WITHOUT MAKING ANY FURTHER TOOL CALLS, YOU ARE OUT OF TOOL CALL QUOTA!" + def from_raw_tool(raw_message) - (<<~TEXT).strip + result = (<<~TEXT).strip #{raw_message[:name] || raw_message[:id]} @@ -65,6 +68,12 @@ module DiscourseAi TEXT + + if @injecting_done + "#{result}\n\n#{DONE_MESSAGE}" + else + result + end end def from_raw_tool_call(raw_message) @@ -86,6 +95,13 @@ module DiscourseAi TEXT end + def inject_done(&blk) + @injecting_done = true + blk.call + ensure + @injecting_done = false + end + private attr_reader :raw_tools diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb index 75d1e460..dd44a82b 100644 --- a/lib/completions/endpoints/anthropic.rb +++ b/lib/completions/endpoints/anthropic.rb @@ -95,7 +95,18 @@ module DiscourseAi if prompt.has_tools? payload[:tools] = prompt.tools if dialect.tool_choice.present? - payload[:tool_choice] = { type: "tool", name: dialect.tool_choice } + if dialect.tool_choice == :none + payload[:tool_choice] = { type: "none" } + + # prefill prompt to nudge LLM to generate a response that is useful. + # without this LLM (even 3.7) can get confused and start text preambles for a tool calls. + payload[:messages] << { + role: "assistant", + content: dialect.no_more_tool_calls_text, + } + else + payload[:tool_choice] = { type: "tool", name: prompt.tool_choice } + end end end diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb index d5b56b70..915e9d3b 100644 --- a/lib/completions/endpoints/aws_bedrock.rb +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -122,7 +122,19 @@ module DiscourseAi if prompt.has_tools? payload[:tools] = prompt.tools if dialect.tool_choice.present? - payload[:tool_choice] = { type: "tool", name: dialect.tool_choice } + if dialect.tool_choice == :none + # not supported on bedrock as of 2025-03-24 + # retest in 6 months + # payload[:tool_choice] = { type: "none" } + + # prefill prompt to nudge LLM to generate a response that is useful, instead of trying to call a tool + payload[:messages] << { + role: "assistant", + content: dialect.no_more_tool_calls_text, + } + else + payload[:tool_choice] = { type: "tool", name: prompt.tool_choice } + end end end elsif dialect.is_a?(DiscourseAi::Completions::Dialects::Nova) diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb index d054813e..a99bb80b 100644 --- a/lib/completions/endpoints/gemini.rb +++ b/lib/completions/endpoints/gemini.rb @@ -72,10 +72,14 @@ module DiscourseAi function_calling_config = { mode: "AUTO" } if dialect.tool_choice.present? - function_calling_config = { - mode: "ANY", - allowed_function_names: [dialect.tool_choice], - } + if dialect.tool_choice == :none + function_calling_config = { mode: "NONE" } + else + function_calling_config = { + mode: "ANY", + allowed_function_names: [dialect.tool_choice], + } + end end payload[:tool_config] = { function_calling_config: function_calling_config } diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb index 24cd1285..f97a337d 100644 --- a/lib/completions/endpoints/open_ai.rb +++ b/lib/completions/endpoints/open_ai.rb @@ -92,12 +92,16 @@ module DiscourseAi if dialect.tools.present? payload[:tools] = dialect.tools if dialect.tool_choice.present? - payload[:tool_choice] = { - type: "function", - function: { - name: dialect.tool_choice, - }, - } + if dialect.tool_choice == :none + payload[:tool_choice] = "none" + else + payload[:tool_choice] = { + type: "function", + function: { + name: dialect.tool_choice, + }, + } + end end end end diff --git a/spec/lib/completions/dialects/dialect_spec.rb b/spec/lib/completions/dialects/dialect_spec.rb index 73a157a1..f210e0c4 100644 --- a/spec/lib/completions/dialects/dialect_spec.rb +++ b/spec/lib/completions/dialects/dialect_spec.rb @@ -7,6 +7,18 @@ class TestDialect < DiscourseAi::Completions::Dialects::Dialect trim_messages(messages) end + def system_msg(msg) + msg + end + + def user_msg(msg) + msg + end + + def model_msg(msg) + msg + end + def tokenizer DiscourseAi::Tokenizer::OpenAiTokenizer end @@ -15,6 +27,57 @@ end RSpec.describe DiscourseAi::Completions::Dialects::Dialect do fab!(:llm_model) + describe "#translate" do + let(:five_token_msg) { "This represents five tokens." } + let(:tools) do + [ + { + name: "echo", + description: "echo a string", + parameters: [ + { name: "text", type: "string", description: "string to echo", required: true }, + ], + }, + ] + end + + it "injects done message when tool_choice is :none and last message follows tool pattern" do + tool_call_prompt = { name: "echo", arguments: { text: "test message" } } + + prompt = DiscourseAi::Completions::Prompt.new("System instructions", tools: tools) + prompt.push(type: :user, content: "echo test message") + prompt.push(type: :tool_call, content: tool_call_prompt.to_json, id: "123", name: "echo") + prompt.push(type: :tool, content: "test message".to_json, name: "echo", id: "123") + prompt.tool_choice = :none + + dialect = TestDialect.new(prompt, llm_model) + dialect.max_prompt_tokens = 100 # Set high enough to avoid trimming + + translated = dialect.translate + + expect(translated).to eq( + [ + { type: :system, content: "System instructions" }, + { type: :user, content: "echo test message" }, + { + type: :tool_call, + content: + "\n\necho\n\ntest message\n\n\n", + id: "123", + name: "echo", + }, + { + type: :tool, + id: "123", + name: "echo", + content: + "\n\necho\n\n\"test message\"\n\n\n\n\n#{::DiscourseAi::Completions::Dialects::XmlTools::DONE_MESSAGE}", + }, + ], + ) + end + end + describe "#trim_messages" do let(:five_token_msg) { "This represents five tokens." } diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb index 625d2184..f2f79c5f 100644 --- a/spec/lib/completions/endpoints/anthropic_spec.rb +++ b/spec/lib/completions/endpoints/anthropic_spec.rb @@ -714,4 +714,59 @@ data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_ expect(parsed_body[:max_tokens]).to eq(500) end end + + describe "disabled tool use" do + it "can properly disable tool use with :none" do + prompt = + DiscourseAi::Completions::Prompt.new( + "You are a bot", + messages: [type: :user, id: "user1", content: "don't use any tools please"], + tools: [echo_tool], + tool_choice: :none, + ) + + response_body = { + id: "msg_01RdJkxCbsEj9VFyFYAkfy2S", + type: "message", + role: "assistant", + model: "claude-3-haiku-20240307", + content: [ + { type: "text", text: "I won't use any tools. Here's a direct response instead." }, + ], + stop_reason: "end_turn", + stop_sequence: nil, + usage: { + input_tokens: 345, + output_tokens: 65, + }, + }.to_json + + parsed_body = nil + stub_request(:post, url).with( + body: + proc do |req_body| + parsed_body = JSON.parse(req_body, symbolize_names: true) + true + end, + ).to_return(status: 200, body: response_body) + + result = llm.generate(prompt, user: Discourse.system_user) + + # Verify that tool_choice is set to { type: "none" } + expect(parsed_body[:tool_choice]).to eq({ type: "none" }) + + # Verify that an assistant message with no_more_tool_calls_text was added + messages = parsed_body[:messages] + expect(messages.length).to eq(2) # user message + added assistant message + + last_message = messages.last + expect(last_message[:role]).to eq("assistant") + + expect(last_message[:content]).to eq( + DiscourseAi::Completions::Dialects::Dialect.no_more_tool_calls_text, + ) + + expect(result).to eq("I won't use any tools. Here's a direct response instead.") + end + end end diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb index 373ba4c9..3a424451 100644 --- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb +++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb @@ -484,4 +484,66 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do expect(request_body["max_tokens"]).to eq(500) end end + + describe "disabled tool use" do + it "handles tool_choice: :none by adding a prefill message instead of using tool_choice param" do + proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + request = nil + + # Create a prompt with tool_choice: :none + prompt = + DiscourseAi::Completions::Prompt.new( + "You are a helpful assistant", + messages: [{ type: :user, content: "don't use any tools please" }], + tools: [ + { + name: "echo", + description: "echo something", + parameters: [ + { name: "text", type: "string", description: "text to echo", required: true }, + ], + }, + ], + tool_choice: :none, + ) + + # Mock response from Bedrock + content = { + content: [text: "I won't use any tools. Here's a direct response instead."], + usage: { + input_tokens: 25, + output_tokens: 15, + }, + }.to_json + + stub_request( + :post, + "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + ) + .with do |inner_request| + request = inner_request + true + end + .to_return(status: 200, body: content) + + proxy.generate(prompt, user: user) + + # Parse the request body + request_body = JSON.parse(request.body) + + # Verify that tool_choice is NOT present (not supported in Bedrock) + expect(request_body).not_to have_key("tool_choice") + + # Verify that an assistant message was added with no_more_tool_calls_text + messages = request_body["messages"] + expect(messages.length).to eq(2) # user message + added assistant message + + last_message = messages.last + expect(last_message["role"]).to eq("assistant") + + expect(last_message["content"]).to eq( + DiscourseAi::Completions::Dialects::Dialect.no_more_tool_calls_text, + ) + end + end end diff --git a/spec/lib/completions/endpoints/gemini_spec.rb b/spec/lib/completions/endpoints/gemini_spec.rb index 0c7b9208..fe7f4eb6 100644 --- a/spec/lib/completions/endpoints/gemini_spec.rb +++ b/spec/lib/completions/endpoints/gemini_spec.rb @@ -377,4 +377,60 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do expect(output.join).to eq("Hello World Sam") end + + it "can properly disable tool use with :none" do + prompt = DiscourseAi::Completions::Prompt.new("Hello", tools: [echo_tool], tool_choice: :none) + + response = gemini_mock.response("I won't use any tools").to_json + + req_body = nil + + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + url = "#{model.url}:generateContent?key=123" + + stub_request(:post, url).with( + body: + proc do |_req_body| + req_body = _req_body + true + end, + ).to_return(status: 200, body: response) + + response = llm.generate(prompt, user: user) + + expect(response).to eq("I won't use any tools") + + parsed = JSON.parse(req_body, symbolize_names: true) + + # Verify that function_calling_config mode is set to "NONE" + expect(parsed[:tool_config]).to eq({ function_calling_config: { mode: "NONE" } }) + end + + it "can properly force specific tool use" do + prompt = DiscourseAi::Completions::Prompt.new("Hello", tools: [echo_tool], tool_choice: "echo") + + response = gemini_mock.response("World").to_json + + req_body = nil + + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + url = "#{model.url}:generateContent?key=123" + + stub_request(:post, url).with( + body: + proc do |_req_body| + req_body = _req_body + true + end, + ).to_return(status: 200, body: response) + + response = llm.generate(prompt, user: user) + + parsed = JSON.parse(req_body, symbolize_names: true) + + # Verify that function_calling_config is correctly set to ANY mode with the specified tool + expect(parsed[:tool_config]).to eq( + { function_calling_config: { mode: "ANY", allowed_function_names: ["echo"] } }, + ) + end end diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb index fb9f07f3..d48bffb5 100644 --- a/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -395,6 +395,65 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do end end + describe "disabled tool use" do + it "can properly disable tool use with :none" do + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + + tools = [ + { + name: "echo", + description: "echo something", + parameters: [ + { name: "text", type: "string", description: "text to echo", required: true }, + ], + }, + ] + + prompt = + DiscourseAi::Completions::Prompt.new( + "You are a bot", + messages: [type: :user, id: "user1", content: "don't use any tools please"], + tools: tools, + tool_choice: :none, + ) + + response = { + id: "chatcmpl-9JxkAzzaeO4DSV3omWvok9TKhCjBH", + object: "chat.completion", + created: 1_714_544_914, + model: "gpt-4-turbo-2024-04-09", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "I won't use any tools. Here's a direct response instead.", + }, + logprobs: nil, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 55, + completion_tokens: 13, + total_tokens: 68, + }, + system_fingerprint: "fp_ea6eb70039", + }.to_json + + body_json = nil + stub_request(:post, "https://api.openai.com/v1/chat/completions").with( + body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) }, + ).to_return(body: response) + + result = llm.generate(prompt, user: user) + + # Verify that tool_choice is set to "none" in the request + expect(body_json[:tool_choice]).to eq("none") + expect(result).to eq("I won't use any tools. Here's a direct response instead.") + end + end + describe "parameter disabling" do it "excludes disabled parameters from the request" do model.update!(provider_params: { disable_top_p: true, disable_temperature: true })