diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 61fb4ff3..5a577657 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -180,6 +180,9 @@ en: temperature: label: "Temperature" description: "Temperature to use for the LLM. Increase to increase randomness (leave empty to use model default)" + max_output_tokens: + label: "Max output tokens" + description: "When specified, sets an upper bound to the maximum number of tokens the model can generate. Respects LLM's max output tokens limit" discourse_ai: title: "AI" diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb index 0bfc46e6..cda48c84 100644 --- a/discourse_automation/llm_triage.rb +++ b/discourse_automation/llm_triage.rb @@ -24,6 +24,7 @@ if defined?(DiscourseAutomation) field :max_post_tokens, component: :text field :stop_sequences, component: :text_list, required: false field :temperature, component: :text + field :max_output_tokens, component: :text # Actions field :category, component: :category @@ -85,6 +86,9 @@ if defined?(DiscourseAutomation) temperature = temperature.to_f end + max_output_tokens = fields.dig("max_output_tokens", "value").to_i + max_output_tokens = nil if max_output_tokens <= 0 + max_post_tokens = nil if max_post_tokens <= 0 stop_sequences = fields.dig("stop_sequences", "value") @@ -122,6 +126,7 @@ if defined?(DiscourseAutomation) stop_sequences: stop_sequences, automation: self.automation, temperature: temperature, + max_output_tokens: max_output_tokens, action: context["action"], ) rescue => e diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb index 9180b8c9..93f06712 100644 --- a/lib/automation/llm_triage.rb +++ b/lib/automation/llm_triage.rb @@ -21,6 +21,7 @@ module DiscourseAi temperature: nil, whisper: nil, reply_persona_id: nil, + max_output_tokens: nil, action: nil ) if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? && @@ -59,8 +60,8 @@ module DiscourseAi result = llm.generate( prompt, + max_tokens: max_output_tokens, temperature: temperature, - max_tokens: 700, # ~500 words user: Discourse.system_user, stop_sequences: stop_sequences, feature_name: "llm_triage", diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb index 1b3ca690..b09ceda6 100644 --- a/spec/lib/discourse_automation/llm_triage_spec.rb +++ b/spec/lib/discourse_automation/llm_triage_spec.rb @@ -95,6 +95,7 @@ describe DiscourseAi::Automation::LlmTriage do reply_user.update!(admin: true) add_automation_field("include_personal_messages", true, type: :boolean) add_automation_field("temperature", "0.2") + add_automation_field("max_output_tokens", "700") post = Fabricate(:post, topic: personal_message) prompt_options = nil @@ -107,6 +108,7 @@ describe DiscourseAi::Automation::LlmTriage do end expect(prompt_options[:temperature]).to eq(0.2) + expect(prompt_options[:max_tokens]).to eq(700) last_post = post.topic.reload.posts.order(:post_number).last expect(last_post.raw).to eq(canned_reply_text)