From 6059b6e111e4779ba91cb88cb7ba3c18d2fbe3e6 Mon Sep 17 00:00:00 2001
From: Roman Rizzi <roman@discourse.org>
Date: Mon, 21 Jul 2025 15:36:39 -0300
Subject: [PATCH] FIX: Customizable max_output_tokens for AI triage. (#1510)

We enforced a hard limit of 700 tokens in this script, which is not enough when using thinking models, which can quickly use all of them.

A temporary solution could be bumping the limit, but there is no guarantee we won't hit it again, and it's hard to find one value that fits all scenarios. Another alternative could be removing it and relying on the LLM config's `max_output_token`, but if you want different rules and want to assign different limits, you are forced to duplicate the config each time.

Considering all this, we are adding a dedicated field for this in the triage script, giving you an easy way to tweak it to your needs. If empty, no limit is applied.
---
 config/locales/client.en.yml                     | 3 +++
 discourse_automation/llm_triage.rb               | 5 +++++
 lib/automation/llm_triage.rb                     | 3 ++-
 spec/lib/discourse_automation/llm_triage_spec.rb | 2 ++
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index 61fb4ff3..5a577657 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -180,6 +180,9 @@ en:
             temperature:
               label: "Temperature"
               description: "Temperature to use for the LLM. Increase to increase randomness (leave empty to use model default)"
+            max_output_tokens:
+              label: "Max output tokens"
+              description: "When specified, sets an upper bound to the maximum number of tokens the model can generate. Respects LLM's max output tokens limit"
 
     discourse_ai:
       title: "AI"
diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb
index 0bfc46e6..cda48c84 100644
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@@ -24,6 +24,7 @@ if defined?(DiscourseAutomation)
     field :max_post_tokens, component: :text
     field :stop_sequences, component: :text_list, required: false
     field :temperature, component: :text
+    field :max_output_tokens, component: :text
 
     # Actions
     field :category, component: :category
@@ -85,6 +86,9 @@ if defined?(DiscourseAutomation)
         temperature = temperature.to_f
       end
 
+      max_output_tokens = fields.dig("max_output_tokens", "value").to_i
+      max_output_tokens = nil if max_output_tokens <= 0
+
       max_post_tokens = nil if max_post_tokens <= 0
 
       stop_sequences = fields.dig("stop_sequences", "value")
@@ -122,6 +126,7 @@ if defined?(DiscourseAutomation)
           stop_sequences: stop_sequences,
           automation: self.automation,
           temperature: temperature,
+          max_output_tokens: max_output_tokens,
           action: context["action"],
         )
       rescue => e
diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb
index 9180b8c9..93f06712 100644
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@@ -21,6 +21,7 @@ module DiscourseAi
         temperature: nil,
         whisper: nil,
         reply_persona_id: nil,
+        max_output_tokens: nil,
         action: nil
       )
         if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
@@ -59,8 +60,8 @@ module DiscourseAi
         result =
           llm.generate(
             prompt,
+            max_tokens: max_output_tokens,
             temperature: temperature,
-            max_tokens: 700, # ~500 words
             user: Discourse.system_user,
             stop_sequences: stop_sequences,
             feature_name: "llm_triage",
diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb
index 1b3ca690..b09ceda6 100644
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@@ -95,6 +95,7 @@ describe DiscourseAi::Automation::LlmTriage do
     reply_user.update!(admin: true)
     add_automation_field("include_personal_messages", true, type: :boolean)
     add_automation_field("temperature", "0.2")
+    add_automation_field("max_output_tokens", "700")
     post = Fabricate(:post, topic: personal_message)
 
     prompt_options = nil
@@ -107,6 +108,7 @@ describe DiscourseAi::Automation::LlmTriage do
     end
 
     expect(prompt_options[:temperature]).to eq(0.2)
+    expect(prompt_options[:max_tokens]).to eq(700)
 
     last_post = post.topic.reload.posts.order(:post_number).last
     expect(last_post.raw).to eq(canned_reply_text)