DEV: Use structured responses for summaries (#1252)

* DEV: Use structured responses for summaries * Fix system specs * Make response_format a first class citizen and update endpoints to support it * Response format can be specified in the persona * lint * switch to jsonb and make column nullable * Reify structured output chunks. Move JSON parsing to the depths of Completion * Switch to JsonStreamingTracker for partial JSON parsing
2025-05-06 10:09:39 -03:00 · 2025-05-06 10:09:39 -03:00 · c0a2d4c935
parent c6a307b473
commit c0a2d4c935
42 changed files with 822 additions and 68 deletions
--- a/app/controllers/discourse_ai/admin/ai_personas_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_personas_controller.rb
@ -221,6 +221,10 @@ module DiscourseAi
          permitted[:tools] = permit_tools(tools)
        end
        if response_format = params.dig(:ai_persona, :response_format)
          permitted[:response_format] = permit_response_format(response_format)
        end
        permitted
      end
@ -235,6 +239,18 @@ module DiscourseAi
          [tool, options, !!force_tool]
        end
      end
      def permit_response_format(response_format)
        return [] if !response_format.is_a?(Array)
        response_format.map do |element|
          if element && element.is_a?(ActionController::Parameters)
            element.permit!
          else
            false
          end
        end
      end
    end
  end
 end
--- a/app/models/ai_persona.rb
+++ b/app/models/ai_persona.rb
@ -325,9 +325,11 @@ class AiPersona < ActiveRecord::Base
  end
  def system_persona_unchangeable
    error_msg = I18n.t("discourse_ai.ai_bot.personas.cannot_edit_system_persona")
    if top_p_changed? || temperature_changed? || system_prompt_changed? || name_changed? ||
         description_changed?
-      errors.add(:base, I18n.t("discourse_ai.ai_bot.personas.cannot_edit_system_persona"))
+      errors.add(:base, error_msg)
    elsif tools_changed?
      old_tools = tools_change[0]
      new_tools = tools_change[1]
@ -335,9 +337,12 @@ class AiPersona < ActiveRecord::Base
      old_tool_names = old_tools.map { |t| t.is_a?(Array) ? t[0] : t }.to_set
      new_tool_names = new_tools.map { |t| t.is_a?(Array) ? t[0] : t }.to_set
-      if old_tool_names != new_tool_names
+      errors.add(:base, error_msg) if old_tool_names != new_tool_names
-        errors.add(:base, I18n.t("discourse_ai.ai_bot.personas.cannot_edit_system_persona"))
+    elsif response_format_changed?
-      end
+      old_format = response_format_change[0].map { |f| f["key"] }.to_set
      new_format = response_format_change[1].map { |f| f["key"] }.to_set
      errors.add(:base, error_msg) if old_format != new_format
    end
  end
@ -395,6 +400,7 @@ end
 #  rag_llm_model_id             :bigint
 #  default_llm_id               :bigint
 #  question_consolidator_llm_id :bigint
 #  response_format              :jsonb
 #
 # Indexes
 #
--- a/app/serializers/localized_ai_persona_serializer.rb
+++ b/app/serializers/localized_ai_persona_serializer.rb
@ -30,7 +30,8 @@ class LocalizedAiPersonaSerializer < ApplicationSerializer
             :allow_chat_direct_messages,
             :allow_topic_mentions,
             :allow_personal_messages,
-             :force_default_llm
+             :force_default_llm,
             :response_format
  has_one :user, serializer: BasicUserSerializer, embed: :object
  has_many :rag_uploads, serializer: UploadSerializer, embed: :object
--- a/assets/javascripts/discourse/admin/models/ai-persona.js
+++ b/assets/javascripts/discourse/admin/models/ai-persona.js
@ -33,6 +33,7 @@ const CREATE_ATTRIBUTES = [
  "allow_topic_mentions",
  "allow_chat_channel_mentions",
  "allow_chat_direct_messages",
  "response_format",
 ];
 const SYSTEM_ATTRIBUTES = [
@ -60,6 +61,7 @@ const SYSTEM_ATTRIBUTES = [
  "allow_topic_mentions",
  "allow_chat_channel_mentions",
  "allow_chat_direct_messages",
  "response_format",
 ];
 export default class AiPersona extends RestModel {
@ -151,6 +153,7 @@ export default class AiPersona extends RestModel {
    const attrs = this.getProperties(CREATE_ATTRIBUTES);
    this.populateTools(attrs);
    attrs.forced_tool_count = this.forced_tool_count || -1;
    attrs.response_format = attrs.response_format || [];
    return attrs;
  }
--- a/assets/javascripts/discourse/components/ai-persona-editor.gjs
+++ b/assets/javascripts/discourse/components/ai-persona-editor.gjs
@ -15,6 +15,7 @@ import Group from "discourse/models/group";
 import { i18n } from "discourse-i18n";
 import AdminUser from "admin/models/admin-user";
 import GroupChooser from "select-kit/components/group-chooser";
 import AiPersonaResponseFormatEditor from "../components/modal/ai-persona-response-format-editor";
 import AiLlmSelector from "./ai-llm-selector";
 import AiPersonaToolOptions from "./ai-persona-tool-options";
 import AiToolSelector from "./ai-tool-selector";
@ -325,6 +326,8 @@ export default class PersonaEditor extends Component {
          <field.Textarea />
        </form.Field>
        <AiPersonaResponseFormatEditor @form={{form}} @data={{data}} />
        <form.Field
          @name="default_llm_id"
          @title={{i18n "discourse_ai.ai_persona.default_llm"}}
--- a/assets/javascripts/discourse/components/modal/ai-persona-response-format-editor.gjs
+++ b/assets/javascripts/discourse/components/modal/ai-persona-response-format-editor.gjs
@ -0,0 +1,99 @@
 import Component from "@glimmer/component";
 import { tracked } from "@glimmer/tracking";
 import { fn, hash } from "@ember/helper";
 import { action } from "@ember/object";
 import { gt } from "truth-helpers";
 import ModalJsonSchemaEditor from "discourse/components/modal/json-schema-editor";
 import { prettyJSON } from "discourse/lib/formatter";
 import { i18n } from "discourse-i18n";
 export default class AiPersonaResponseFormatEditor extends Component {
  @tracked showJsonEditorModal = false;
  jsonSchema = {
    type: "array",
    uniqueItems: true,
    title: i18n("discourse_ai.ai_persona.response_format.modal.root_title"),
    items: {
      type: "object",
      title: i18n("discourse_ai.ai_persona.response_format.modal.key_title"),
      properties: {
        key: {
          type: "string",
        },
        type: {
          type: "string",
          enum: ["string", "integer", "boolean"],
        },
      },
    },
  };
  get editorTitle() {
    return i18n("discourse_ai.ai_persona.response_format.title");
  }
  get responseFormatAsJSON() {
    return JSON.stringify(this.args.data.response_format);
  }
  get displayJSON() {
    const toDisplay = {};
    this.args.data.response_format.forEach((keyDesc) => {
      toDisplay[keyDesc.key] = keyDesc.type;
    });
    return prettyJSON(toDisplay);
  }
  @action
  openModal() {
    this.showJsonEditorModal = true;
  }
  @action
  closeModal() {
    this.showJsonEditorModal = false;
  }
  @action
  updateResponseFormat(form, value) {
    form.set("response_format", JSON.parse(value));
  }
  <template>
    <@form.Container @title={{this.editorTitle}} @format="large">
      <div class="ai-persona-editor__response-format">
        {{#if (gt @data.response_format.length 0)}}
          <pre class="ai-persona-editor__response-format-pre">
            <code
            >{{this.displayJSON}}</code>
          </pre>
        {{else}}
          <div class="ai-persona-editor__response-format-none">
            {{i18n "discourse_ai.ai_persona.response_format.no_format"}}
          </div>
        {{/if}}
        <@form.Button
          @action={{this.openModal}}
          @label="discourse_ai.ai_persona.response_format.open_modal"
          @disabled={{@data.system}}
        />
      </div>
    </@form.Container>
    {{#if this.showJsonEditorModal}}
      <ModalJsonSchemaEditor
        @model={{hash
          value=this.responseFormatAsJSON
          updateValue=(fn this.updateResponseFormat @form)
          settingName=this.editorTitle
          jsonSchema=this.jsonSchema
        }}
        @closeModal={{this.closeModal}}
      />
    {{/if}}
  </template>
 }
--- a/assets/stylesheets/modules/ai-bot/common/ai-persona.scss
+++ b/assets/stylesheets/modules/ai-bot/common/ai-persona.scss
@ -52,6 +52,21 @@
    margin-bottom: 10px;
    font-size: var(--font-down-1);
  }
  &__response-format {
    width: 100%;
    display: block;
  }
  &__response-format-pre {
    margin-bottom: 0;
    white-space: pre-line;
  }
  &__response-format-none {
    margin-bottom: 1em;
    margin-top: 0.5em;
  }
 }
 .rag-options {
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -323,6 +323,13 @@ en:
        rag_conversation_chunks: "Search conversation chunks"
        rag_conversation_chunks_help: "The number of chunks to use for the RAG model searches. Increase to increase the amount of context the AI can use."
        persona_description: "Personas are a powerful feature that allows you to customize the behavior of the AI engine in your Discourse forum. They act as a 'system message' that guides the AI's responses and interactions, helping to create a more personalized and engaging user experience."
        response_format:
          title: "JSON response format"
          no_format: "No JSON format specified"
          open_modal: "Edit"
          modal:
            root_title: "Response structure"
            key_title: "Key"
        list:
          enabled: "AI Bot?"
--- a/db/fixtures/personas/603_ai_personas.rb
+++ b/db/fixtures/personas/603_ai_personas.rb
@ -72,6 +72,8 @@ DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
  persona.tools = tools.map { |name, value| [name, value] }
  persona.response_format = instance.response_format
  persona.system_prompt = instance.system_prompt
  persona.top_p = instance.top_p
  persona.temperature = instance.temperature
--- a/db/migrate/20250411121705_add_response_format_json_to_personass.rb
+++ b/db/migrate/20250411121705_add_response_format_json_to_personass.rb
@ -0,0 +1,6 @@
 # frozen_string_literal: true
 class AddResponseFormatJsonToPersonass < ActiveRecord::Migration[7.2]
  def change
    add_column :ai_personas, :response_format, :jsonb
  end
 end
--- a/lib/completions/anthropic_message_processor.rb
+++ b/lib/completions/anthropic_message_processor.rb
@ -10,7 +10,7 @@ class DiscourseAi::Completions::AnthropicMessageProcessor
      @raw_json = +""
      @tool_call = DiscourseAi::Completions::ToolCall.new(id: id, name: name, parameters: {})
      @streaming_parser =
-        DiscourseAi::Completions::ToolCallProgressTracker.new(self) if partial_tool_calls
+        DiscourseAi::Completions::JsonStreamingTracker.new(self) if partial_tool_calls
    end
    def append(json)
--- a/lib/completions/dialects/nova.rb
+++ b/lib/completions/dialects/nova.rb
@ -42,6 +42,7 @@ module DiscourseAi
            result = { system: system, messages: messages }
            result[:inferenceConfig] = inference_config if inference_config.present?
            result[:toolConfig] = tool_config if tool_config.present?
            result[:response_format] = { type: "json_object" } if options[:response_format].present?
            result
          end
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@ -88,10 +88,16 @@ module DiscourseAi
        def prepare_payload(prompt, model_params, dialect)
          @native_tool_support = dialect.native_tool_support?
-          payload = default_options(dialect).merge(model_params).merge(messages: prompt.messages)
+          payload =
            default_options(dialect).merge(model_params.except(:response_format)).merge(
              messages: prompt.messages,
            )
          payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
          payload[:stream] = true if @streaming_mode
          prefilled_message = +""
          if prompt.has_tools?
            payload[:tools] = prompt.tools
            if dialect.tool_choice.present?
@ -100,16 +106,24 @@ module DiscourseAi
                # prefill prompt to nudge LLM to generate a response that is useful.
                # without this LLM (even 3.7) can get confused and start text preambles for a tool calls.
-                payload[:messages] << {
+                prefilled_message << dialect.no_more_tool_calls_text
                  role: "assistant",
                  content: dialect.no_more_tool_calls_text,
                }
              else
                payload[:tool_choice] = { type: "tool", name: prompt.tool_choice }
              end
            end
          end
          # Prefill prompt to force JSON output.
          if model_params[:response_format].present?
            prefilled_message << " " if !prefilled_message.empty?
            prefilled_message << "{"
            @forced_json_through_prefill = true
          end
          if !prefilled_message.empty?
            payload[:messages] << { role: "assistant", content: prefilled_message }
          end
          payload
        end
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@ -116,9 +116,14 @@ module DiscourseAi
          payload = nil
          if dialect.is_a?(DiscourseAi::Completions::Dialects::Claude)
-            payload = default_options(dialect).merge(model_params).merge(messages: prompt.messages)
+            payload =
              default_options(dialect).merge(model_params.except(:response_format)).merge(
                messages: prompt.messages,
              )
            payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
            prefilled_message = +""
            if prompt.has_tools?
              payload[:tools] = prompt.tools
              if dialect.tool_choice.present?
@ -128,15 +133,23 @@ module DiscourseAi
                  # payload[:tool_choice] = { type: "none" }
                  # prefill prompt to nudge LLM to generate a response that is useful, instead of trying to call a tool
-                  payload[:messages] << {
+                  prefilled_message << dialect.no_more_tool_calls_text
                    role: "assistant",
                    content: dialect.no_more_tool_calls_text,
                  }
                else
                  payload[:tool_choice] = { type: "tool", name: prompt.tool_choice }
                end
              end
            end
            # Prefill prompt to force JSON output.
            if model_params[:response_format].present?
              prefilled_message << " " if !prefilled_message.empty?
              prefilled_message << "{"
              @forced_json_through_prefill = true
            end
            if !prefilled_message.empty?
              payload[:messages] << { role: "assistant", content: prefilled_message }
            end
          elsif dialect.is_a?(DiscourseAi::Completions::Dialects::Nova)
            payload = prompt.to_payload(default_options(dialect).merge(model_params))
          else
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -73,6 +73,7 @@ module DiscourseAi
          LlmQuota.check_quotas!(@llm_model, user)
          start_time = Time.now
          @forced_json_through_prefill = false
          @partial_tool_calls = partial_tool_calls
          @output_thinking = output_thinking
@ -106,6 +107,18 @@ module DiscourseAi
          prompt = dialect.translate
          structured_output = nil
          if model_params[:response_format].present?
            schema_properties =
              model_params[:response_format].dig(:json_schema, :schema, :properties)
            if schema_properties.present?
              structured_output =
                DiscourseAi::Completions::StructuredOutput.new(schema_properties)
            end
          end
          FinalDestination::HTTP.start(
            model_uri.host,
            model_uri.port,
@ -123,6 +136,10 @@ module DiscourseAi
            request = prepare_request(request_body)
            # Some providers rely on prefill to return structured outputs, so the start
            # of the JSON won't be included in the response. Supply it to keep JSON valid.
            structured_output << +"{" if structured_output && @forced_json_through_prefill
            http.request(request) do |response|
              if response.code.to_i != 200
                Rails.logger.error(
@ -140,10 +157,17 @@ module DiscourseAi
              xml_stripper =
                DiscourseAi::Completions::XmlTagStripper.new(to_strip) if to_strip.present?
-              if @streaming_mode && xml_stripper
+              if @streaming_mode
                blk =
                  lambda do |partial, cancel|
-                    partial = xml_stripper << partial if partial.is_a?(String)
+                    if partial.is_a?(String)
                      partial = xml_stripper << partial if xml_stripper
                      if structured_output.present?
                        structured_output << partial
                        partial = structured_output
                      end
                    end
                    orig_blk.call(partial, cancel) if partial
                  end
              end
@ -167,6 +191,7 @@ module DiscourseAi
                    xml_stripper: xml_stripper,
                    partials_raw: partials_raw,
                    response_raw: response_raw,
                    structured_output: structured_output,
                  )
                return response_data
              end
@ -373,7 +398,8 @@ module DiscourseAi
          xml_tool_processor:,
          xml_stripper:,
          partials_raw:,
-          response_raw:
+          response_raw:,
          structured_output:
        )
          response_raw << response.read_body
          response_data = decode(response_raw)
@ -403,6 +429,26 @@ module DiscourseAi
          response_data.reject!(&:blank?)
          if structured_output.present?
            has_string_response = false
            response_data =
              response_data.reduce([]) do |memo, data|
                if data.is_a?(String)
                  structured_output << data
                  has_string_response = true
                  next(memo)
                else
                  memo << data
                end
                memo
              end
            # We only include the structured output if there was actually a structured response
            response_data << structured_output if has_string_response
          end
          # this is to keep stuff backwards compatible
          response_data = response_data.first if response_data.length == 1
--- a/lib/completions/endpoints/canned_response.rb
+++ b/lib/completions/endpoints/canned_response.rb
@ -40,6 +40,8 @@ module DiscourseAi
                  "The number of completions you requested exceed the number of canned responses"
          end
          response = as_structured_output(response) if model_params[:response_format].present?
          raise response if response.is_a?(StandardError)
          @completions += 1
@ -54,6 +56,8 @@ module DiscourseAi
                yield(response, cancel_fn)
              elsif is_thinking?(response)
                yield(response, cancel_fn)
              elsif is_structured_output?(response)
                yield(response, cancel_fn)
              else
                response.each_char do |char|
                  break if cancelled
@ -80,6 +84,20 @@ module DiscourseAi
        def is_tool?(response)
          response.is_a?(DiscourseAi::Completions::ToolCall)
        end
        def is_structured_output?(response)
          response.is_a?(DiscourseAi::Completions::StructuredOutput)
        end
        def as_structured_output(response)
          schema_properties = model_params[:response_format].dig(:json_schema, :schema, :properties)
          return response if schema_properties.blank?
          output = DiscourseAi::Completions::StructuredOutput.new(schema_properties)
          output << { schema_properties.keys.first => response }.to_json
          output
        end
      end
    end
  end
--- a/lib/completions/endpoints/gemini.rb
+++ b/lib/completions/endpoints/gemini.rb
@ -84,7 +84,16 @@ module DiscourseAi
            payload[:tool_config] = { function_calling_config: function_calling_config }
          end
-          payload[:generationConfig].merge!(model_params) if model_params.present?
+          if model_params.present?
            payload[:generationConfig].merge!(model_params.except(:response_format))
            if model_params[:response_format].present?
              # https://ai.google.dev/api/generate-content#generationconfig
              payload[:generationConfig][:responseSchema] = model_params[:response_format]
              payload[:generationConfig][:responseMimeType] = "application/json"
            end
          end
          payload
        end
--- a/lib/completions/endpoints/samba_nova.rb
+++ b/lib/completions/endpoints/samba_nova.rb
@ -34,7 +34,12 @@ module DiscourseAi
        end
        def prepare_payload(prompt, model_params, dialect)
-          payload = default_options.merge(model_params).merge(messages: prompt)
+          payload =
            default_options.merge(model_params.except(:response_format)).merge(messages: prompt)
          if model_params[:response_format].present?
            payload[:response_format] = { type: "json_object" }
          end
          payload[:stream] = true if @streaming_mode
--- a/lib/completions/tool_call_progress_tracker.rb
+++ b/lib/completions/tool_call_progress_tracker.rb
@ -2,11 +2,11 @@
 module DiscourseAi
  module Completions
-    class ToolCallProgressTracker
+    class JsonStreamingTracker
-      attr_reader :current_key, :current_value, :tool_call
+      attr_reader :current_key, :current_value, :stream_consumer
-      def initialize(tool_call)
+      def initialize(stream_consumer)
-        @tool_call = tool_call
+        @stream_consumer = stream_consumer
        @current_key = nil
        @current_value = nil
        @parser = DiscourseAi::Completions::JsonStreamingParser.new
@ -18,7 +18,7 @@ module DiscourseAi
        @parser.value do |v|
          if @current_key
-            tool_call.notify_progress(@current_key, v)
+            stream_consumer.notify_progress(@current_key, v)
            @current_key = nil
          end
        end
@ -39,7 +39,7 @@ module DiscourseAi
        if @parser.state == :start_string && @current_key
          # this is is worth notifying
-          tool_call.notify_progress(@current_key, @parser.buf)
+          stream_consumer.notify_progress(@current_key, @parser.buf)
        end
        @current_key = nil if @parser.state == :end_value
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -304,6 +304,8 @@ module DiscourseAi
      # @param feature_context { Hash - Optional } - The feature context to use for the completion.
      # @param partial_tool_calls { Boolean - Optional } - If true, the completion will return partial tool calls.
      # @param output_thinking { Boolean - Optional } - If true, the completion will return the thinking output for thinking models.
      # @param response_format { Hash - Optional } - JSON schema passed to the API as the desired structured output.
      # @param [Experimental] extra_model_params { Hash - Optional } - Other params that are not available accross models. e.g. response_format JSON schema.
      #
      # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
      #
@ -321,6 +323,7 @@ module DiscourseAi
        feature_context: nil,
        partial_tool_calls: false,
        output_thinking: false,
        response_format: nil,
        extra_model_params: nil,
        &partial_read_blk
      )
@ -336,6 +339,7 @@ module DiscourseAi
            feature_context: feature_context,
            partial_tool_calls: partial_tool_calls,
            output_thinking: output_thinking,
            response_format: response_format,
            extra_model_params: extra_model_params,
          },
        )
@ -344,6 +348,7 @@ module DiscourseAi
        model_params[:temperature] = temperature if temperature
        model_params[:top_p] = top_p if top_p
        model_params[:response_format] = response_format if response_format
        model_params.merge!(extra_model_params) if extra_model_params
        if prompt.is_a?(String)
--- a/lib/completions/nova_message_processor.rb
+++ b/lib/completions/nova_message_processor.rb
@ -10,7 +10,7 @@ class DiscourseAi::Completions::NovaMessageProcessor
      @raw_json = +""
      @tool_call = DiscourseAi::Completions::ToolCall.new(id: id, name: name, parameters: {})
      @streaming_parser =
-        DiscourseAi::Completions::ToolCallProgressTracker.new(self) if partial_tool_calls
+        DiscourseAi::Completions::JsonStreamingTracker.new(self) if partial_tool_calls
    end
    def append(json)
--- a/lib/completions/open_ai_message_processor.rb
+++ b/lib/completions/open_ai_message_processor.rb
@ -59,7 +59,7 @@ module DiscourseAi::Completions
        if id.present? && name.present?
          @tool_arguments = +""
          @tool = ToolCall.new(id: id, name: name)
-          @streaming_parser = ToolCallProgressTracker.new(self) if @partial_tool_calls
+          @streaming_parser = JsonStreamingTracker.new(self) if @partial_tool_calls
        end
        @tool_arguments << arguments.to_s
--- a/lib/completions/structured_output.rb
+++ b/lib/completions/structured_output.rb
@ -0,0 +1,52 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Completions
    class StructuredOutput
      def initialize(json_schema_properties)
        @property_names = json_schema_properties.keys.map(&:to_sym)
        @property_cursors =
          json_schema_properties.reduce({}) do |m, (k, prop)|
            m[k.to_sym] = 0 if prop[:type] == "string"
            m
          end
        @tracked = {}
        @partial_json_tracker = JsonStreamingTracker.new(self)
      end
      attr_reader :last_chunk_buffer
      def <<(raw)
        @partial_json_tracker << raw
      end
      def read_latest_buffered_chunk
        @property_names.reduce({}) do |memo, pn|
          if @tracked[pn].present?
            # This means this property is a string and we want to return unread chunks.
            if @property_cursors[pn].present?
              unread = @tracked[pn][@property_cursors[pn]..]
              memo[pn] = unread if unread.present?
              @property_cursors[pn] = @tracked[pn].length
            else
              # Ints and bools are always returned as is.
              memo[pn] = @tracked[pn]
            end
          end
          memo
        end
      end
      def notify_progress(key, value)
        key_sym = key.to_sym
        return if !@property_names.include?(key_sym)
        @tracked[key_sym] = value
      end
    end
  end
 end
--- a/lib/personas/bot.rb
+++ b/lib/personas/bot.rb
@ -64,10 +64,13 @@ module DiscourseAi
        user = context.user
-        llm_kwargs = { user: user }
+        llm_kwargs = llm_args.dup
        llm_kwargs[:user] = user
        llm_kwargs[:temperature] = persona.temperature if persona.temperature
        llm_kwargs[:top_p] = persona.top_p if persona.top_p
-        llm_kwargs[:max_tokens] = llm_args[:max_tokens] if llm_args[:max_tokens].present?
+        llm_kwargs[:response_format] = build_json_schema(
          persona.response_format,
        ) if persona.response_format.present?
        needs_newlines = false
        tools_ran = 0
@ -148,6 +151,8 @@ module DiscourseAi
                      raw_context << partial
                      current_thinking << partial
                    end
                  elsif partial.is_a?(DiscourseAi::Completions::StructuredOutput)
                    update_blk.call(partial, cancel, nil, :structured_output)
                  else
                    update_blk.call(partial, cancel)
                  end
@ -176,6 +181,10 @@ module DiscourseAi
        embed_thinking(raw_context)
      end
      def returns_json?
        persona.response_format.present?
      end
      private
      def embed_thinking(raw_context)
@ -301,6 +310,30 @@ module DiscourseAi
        placeholder
      end
      def build_json_schema(response_format)
        properties =
          response_format
            .to_a
            .reduce({}) do |memo, format|
              memo[format[:key].to_sym] = { type: format[:type] }
              memo
            end
        {
          type: "json_schema",
          json_schema: {
            name: "reply",
            schema: {
              type: "object",
              properties: properties,
              required: properties.keys.map(&:to_s),
              additionalProperties: false,
            },
            strict: true,
          },
        }
      end
    end
  end
 end
--- a/lib/personas/bot_context.rb
+++ b/lib/personas/bot_context.rb
@ -49,6 +49,8 @@ module DiscourseAi
        @site_title = site_title
        @site_description = site_description
        @time = time
        @resource_url = resource_url
        @feature_name = feature_name
        @resource_url = resource_url
--- a/lib/personas/persona.rb
+++ b/lib/personas/persona.rb
@ -160,6 +160,10 @@ module DiscourseAi
        {}
      end
      def response_format
        nil
      end
      def available_tools
        self
          .class
--- a/lib/personas/short_summarizer.rb
+++ b/lib/personas/short_summarizer.rb
@ -18,9 +18,19 @@ module DiscourseAi
          - Limit the summary to a maximum of 40 words.
          - Do *NOT* repeat the discussion title in the summary.
-          Return the summary inside <ai></ai> tags.
+          Format your response as a JSON object with a single key named "summary", which has the summary as the value.
          Your output should be in the following format:
            <output>
              {"summary": "xx"}
            </output>
          Where "xx" is replaced by the summary.
        PROMPT
      end
      def response_format
        [{ key: "summary", type: "string" }]
      end
    end
  end
 end
--- a/lib/personas/summarizer.rb
+++ b/lib/personas/summarizer.rb
@ -18,8 +18,20 @@ module DiscourseAi
          - Example: link to the 6th post by jane: [agreed with]({resource_url}/6)
          - Example: link to the 13th post by joe: [joe]({resource_url}/13)
          - When formatting usernames either use @USERNAME OR [USERNAME]({resource_url}/POST_NUMBER)
          Format your response as a JSON object with a single key named "summary", which has the summary as the value.
          Your output should be in the following format:
            <output>
              {"summary": "xx"}
            </output>
          Where "xx" is replaced by the summary.
        PROMPT
      end
      def response_format
        [{ key: "summary", type: "string" }]
      end
    end
  end
 end
--- a/lib/summarization/fold_content.rb
+++ b/lib/summarization/fold_content.rb
@ -29,18 +29,10 @@ module DiscourseAi
        summary = fold(truncated_content, user, &on_partial_blk)
        clean_summary = Nokogiri::HTML5.fragment(summary).css("ai")&.first&.text || summary
        if persist_summaries
-          AiSummary.store!(
+          AiSummary.store!(strategy, llm_model, summary, truncated_content, human: user&.human?)
            strategy,
            llm_model,
            clean_summary,
            truncated_content,
            human: user&.human?,
          )
        else
-          AiSummary.new(summarized_text: clean_summary)
+          AiSummary.new(summarized_text: summary)
        end
      end
@ -118,9 +110,20 @@ module DiscourseAi
          )
        summary = +""
        buffer_blk =
-          Proc.new do |partial, cancel, placeholder, type|
+          Proc.new do |partial, cancel, _, type|
-            if type.blank?
+            if type == :structured_output
              json_summary_schema_key = bot.persona.response_format&.first.to_h
              partial_summary =
                partial.read_latest_buffered_chunk[json_summary_schema_key[:key].to_sym]
              if partial_summary.present?
                summary << partial_summary
                on_partial_blk.call(partial_summary, cancel) if on_partial_blk
              end
            elsif type.blank?
              # Assume response is a regular completion.
              summary << partial
              on_partial_blk.call(partial, cancel) if on_partial_blk
            end
@ -154,12 +157,6 @@ module DiscourseAi
        item
      end
      def text_only_update(&on_partial_blk)
        Proc.new do |partial, cancel, placeholder, type|
          on_partial_blk.call(partial, cancel) if type.blank?
        end
      end
    end
  end
 end
--- a/spec/jobs/regular/fast_track_topic_gist_spec.rb
+++ b/spec/jobs/regular/fast_track_topic_gist_spec.rb
@ -21,6 +21,7 @@ RSpec.describe Jobs::FastTrackTopicGist do
          created_at: 10.minutes.ago,
        )
      end
      let(:updated_gist) { "They updated me :(" }
      context "when it's up to date" do
--- a/spec/jobs/regular/stream_topic_ai_summary_spec.rb
+++ b/spec/jobs/regular/stream_topic_ai_summary_spec.rb
@ -51,7 +51,9 @@ RSpec.describe Jobs::StreamTopicAiSummary do
    end
    it "publishes updates with a partial summary" do
-      with_responses(["dummy"]) do
+      summary = "dummy"
      with_responses([summary]) do
        messages =
          MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
            job.execute(topic_id: topic.id, user_id: user.id)
@ -59,12 +61,16 @@ RSpec.describe Jobs::StreamTopicAiSummary do
        partial_summary_update = messages.first.data
        expect(partial_summary_update[:done]).to eq(false)
-        expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text)).to eq("dummy")
+        expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text).chomp("\"}")).to eq(
          summary,
        )
      end
    end
    it "publishes a final update to signal we're done and provide metadata" do
-      with_responses(["dummy"]) do
+      summary = "dummy"
      with_responses([summary]) do
        messages =
          MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
            job.execute(topic_id: topic.id, user_id: user.id)
@ -73,6 +79,7 @@ RSpec.describe Jobs::StreamTopicAiSummary do
        final_update = messages.last.data
        expect(final_update[:done]).to eq(true)
        expect(final_update.dig(:ai_topic_summary, :summarized_text)).to eq(summary)
        expect(final_update.dig(:ai_topic_summary, :algorithm)).to eq("fake")
        expect(final_update.dig(:ai_topic_summary, :outdated)).to eq(false)
        expect(final_update.dig(:ai_topic_summary, :can_regenerate)).to eq(true)
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@ -590,7 +590,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
      data: {"type": "content_block_stop", "index": 0}
      event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"AAA=="} }
+      data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"AAA=="} }
      event: ping
      data: {"type": "ping"}
@ -769,4 +769,92 @@ data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_
      expect(result).to eq("I won't use any tools. Here's a direct response instead.")
    end
  end
  describe "structured output via prefilling" do
    it "forces the response to be a JSON and using the given JSON schema" do
      schema = {
        type: "json_schema",
        json_schema: {
          name: "reply",
          schema: {
            type: "object",
            properties: {
              key: {
                type: "string",
              },
            },
            required: ["key"],
            additionalProperties: false,
          },
          strict: true,
        },
      }
      body = (<<~STRING).strip
      event: message_start
      data: {"type": "message_start", "message": {"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY", "type": "message", "role": "assistant", "content": [], "model": "claude-3-opus-20240229", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25, "output_tokens": 1}}}
      event: content_block_start
      data: {"type": "content_block_start", "index":0, "content_block": {"type": "text", "text": ""}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "\\""}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "key"}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "\\":\\""}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "Hello!"}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "\\"}"}}
      event: content_block_stop
      data: {"type": "content_block_stop", "index": 0}
      event: message_delta
      data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence":null, "usage":{"output_tokens": 15}}}
      event: message_stop
      data: {"type": "message_stop"}
    STRING
      parsed_body = nil
      stub_request(:post, url).with(
        body:
          proc do |req_body|
            parsed_body = JSON.parse(req_body, symbolize_names: true)
            true
          end,
        headers: {
          "Content-Type" => "application/json",
          "X-Api-Key" => "123",
          "Anthropic-Version" => "2023-06-01",
        },
      ).to_return(status: 200, body: body)
      structured_output = nil
      llm.generate(
        prompt,
        user: Discourse.system_user,
        feature_name: "testing",
        response_format: schema,
      ) { |partial, cancel| structured_output = partial }
      expect(structured_output.read_latest_buffered_chunk).to eq({ key: "Hello!" })
      expected_body = {
        model: "claude-3-opus-20240229",
        max_tokens: 4096,
        messages: [{ role: "user", content: "user1: hello" }, { role: "assistant", content: "{" }],
        system: "You are hello bot",
        stream: true,
      }
      expect(parsed_body).to eq(expected_body)
    end
  end
 end
--- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@ -546,4 +546,69 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
      )
    end
  end
  describe "structured output via prefilling" do
    it "forces the response to be a JSON and using the given JSON schema" do
      schema = {
        type: "json_schema",
        json_schema: {
          name: "reply",
          schema: {
            type: "object",
            properties: {
              key: {
                type: "string",
              },
            },
            required: ["key"],
            additionalProperties: false,
          },
          strict: true,
        },
      }
      messages =
        [
          { type: "message_start", message: { usage: { input_tokens: 9 } } },
          { type: "content_block_delta", delta: { text: "\"" } },
          { type: "content_block_delta", delta: { text: "key" } },
          { type: "content_block_delta", delta: { text: "\":\"" } },
          { type: "content_block_delta", delta: { text: "Hello!" } },
          { type: "content_block_delta", delta: { text: "\"}" } },
          { type: "message_delta", delta: { usage: { output_tokens: 25 } } },
        ].map { |message| encode_message(message) }
      proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
      request = nil
      bedrock_mock.with_chunk_array_support do
        stub_request(
          :post,
          "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream",
        )
          .with do |inner_request|
            request = inner_request
            true
          end
          .to_return(status: 200, body: messages)
        structured_output = nil
        proxy.generate("hello world", response_format: schema, user: user) do |partial|
          structured_output = partial
        end
        expected = {
          "max_tokens" => 4096,
          "anthropic_version" => "bedrock-2023-05-31",
          "messages" => [
            { "role" => "user", "content" => "hello world" },
            { "role" => "assistant", "content" => "{" },
          ],
          "system" => "You are a helpful bot",
        }
        expect(JSON.parse(request.body)).to eq(expected)
        expect(structured_output.read_latest_buffered_chunk).to eq({ key: "Hello!" })
      end
    end
  end
 end
--- a/spec/lib/completions/endpoints/cohere_spec.rb
+++ b/spec/lib/completions/endpoints/cohere_spec.rb
@ -308,4 +308,64 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Cohere do
    expect(audit.request_tokens).to eq(14)
    expect(audit.response_tokens).to eq(11)
  end
  it "is able to return structured outputs" do
    schema = {
      type: "json_schema",
      json_schema: {
        name: "reply",
        schema: {
          type: "object",
          properties: {
            key: {
              type: "string",
            },
          },
          required: ["key"],
          additionalProperties: false,
        },
        strict: true,
      },
    }
    body = <<~TEXT
      {"is_finished":false,"event_type":"stream-start","generation_id":"eb889b0f-c27d-45ea-98cf-567bdb7fc8bf"}
      {"is_finished":false,"event_type":"text-generation","text":"{\\""}
      {"is_finished":false,"event_type":"text-generation","text":"key"}
      {"is_finished":false,"event_type":"text-generation","text":"\\":\\""}
      {"is_finished":false,"event_type":"text-generation","text":"Hello!"}
      {"is_finished":false,"event_type":"text-generation","text":"\\"}"}|
      {"is_finished":true,"event_type":"stream-end","response":{"response_id":"d235db17-8555-493b-8d91-e601f76de3f9","text":"{\\"key\\":\\"Hello!\\"}","generation_id":"eb889b0f-c27d-45ea-98cf-567bdb7fc8bf","chat_history":[{"role":"USER","message":"user1: hello"},{"role":"CHATBOT","message":"hi user"},{"role":"USER","message":"user1: thanks"},{"role":"CHATBOT","message":"You're welcome! Is there anything else I can help you with?"}],"token_count":{"prompt_tokens":29,"response_tokens":14,"total_tokens":43,"billed_tokens":28},"meta":{"api_version":{"version":"1"},"billed_units":{"input_tokens":14,"output_tokens":14}}},"finish_reason":"COMPLETE"}
    TEXT
    parsed_body = nil
    structured_output = nil
    EndpointMock.with_chunk_array_support do
      stub_request(:post, "https://api.cohere.ai/v1/chat").with(
        body:
          proc do |req_body|
            parsed_body = JSON.parse(req_body, symbolize_names: true)
            true
          end,
        headers: {
          "Content-Type" => "application/json",
          "Authorization" => "Bearer ABC",
        },
      ).to_return(status: 200, body: body.split("|"))
      result =
        llm.generate(prompt, response_format: schema, user: user) do |partial, cancel|
          structured_output = partial
        end
    end
    expect(parsed_body[:preamble]).to eq("You are hello bot")
    expect(parsed_body[:chat_history]).to eq(
      [{ role: "USER", message: "user1: hello" }, { role: "CHATBOT", message: "hi user" }],
    )
    expect(parsed_body[:message]).to eq("user1: thanks")
    expect(structured_output.read_latest_buffered_chunk).to eq({ key: "Hello!" })
  end
 end
--- a/spec/lib/completions/endpoints/gemini_spec.rb
+++ b/spec/lib/completions/endpoints/gemini_spec.rb
@ -420,7 +420,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
      body:
        proc do |_req_body|
          req_body = _req_body
-          true
+          _req_body
        end,
    ).to_return(status: 200, body: response)
@ -433,4 +433,67 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
      { function_calling_config: { mode: "ANY", allowed_function_names: ["echo"] } },
    )
  end
  describe "structured output via JSON Schema" do
    it "forces the response to be a JSON" do
      schema = {
        type: "json_schema",
        json_schema: {
          name: "reply",
          schema: {
            type: "object",
            properties: {
              key: {
                type: "string",
              },
            },
            required: ["key"],
            additionalProperties: false,
          },
          strict: true,
        },
      }
      response = <<~TEXT.strip
        data: {"candidates": [{"content": {"parts": [{"text": "{\\""}],"role": "model"}}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
        data: {"candidates": [{"content": {"parts": [{"text": "key"}],"role": "model"},"safetyRatings": [{"category": "HARM_CATEGORY_HATE_SPEECH","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_DANGEROUS_CONTENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_HARASSMENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT","probability": "NEGLIGIBLE"}]}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
        data: {"candidates": [{"content": {"parts": [{"text": "\\":\\""}],"role": "model"},"safetyRatings": [{"category": "HARM_CATEGORY_HATE_SPEECH","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_DANGEROUS_CONTENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_HARASSMENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT","probability": "NEGLIGIBLE"}]}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
        data: {"candidates": [{"content": {"parts": [{"text": "Hello!"}],"role": "model"},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
        data: {"candidates": [{"content": {"parts": [{"text": "\\"}"}],"role": "model"},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
        data: {"candidates": [{"finishReason": "MALFORMED_FUNCTION_CALL"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
      TEXT
      req_body = nil
      llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
      url = "#{model.url}:streamGenerateContent?alt=sse&key=123"
      stub_request(:post, url).with(
        body:
          proc do |_req_body|
            req_body = _req_body
            true
          end,
      ).to_return(status: 200, body: response)
      structured_response = nil
      llm.generate("Hello", response_format: schema, user: user) do |partial|
        structured_response = partial
      end
      expect(structured_response.read_latest_buffered_chunk).to eq({ key: "Hello!" })
      parsed = JSON.parse(req_body, symbolize_names: true)
      # Verify that schema is passed following Gemini API specs.
      expect(parsed.dig(:generationConfig, :responseSchema)).to eq(schema)
      expect(parsed.dig(:generationConfig, :responseMimeType)).to eq("application/json")
    end
  end
 end
--- a/spec/lib/completions/structured_output_spec.rb
+++ b/spec/lib/completions/structured_output_spec.rb
@ -0,0 +1,69 @@
 # frozen_string_literal: true
 RSpec.describe DiscourseAi::Completions::StructuredOutput do
  subject(:structured_output) do
    described_class.new(
      {
        message: {
          type: "string",
        },
        bool: {
          type: "boolean",
        },
        number: {
          type: "integer",
        },
        status: {
          type: "string",
        },
      },
    )
  end
  describe "Parsing structured output on the fly" do
    it "acts as a buffer for an streamed JSON" do
      chunks = [
        +"{\"message\": \"Line 1\\n",
        +"Line 2\\n",
        +"Line 3\", ",
        +"\"bool\": true,",
        +"\"number\": 4",
        +"2,",
        +"\"status\": \"o",
        +"\\\"k\\\"\"}",
      ]
      structured_output << chunks[0]
      expect(structured_output.read_latest_buffered_chunk).to eq({ message: "Line 1\n" })
      structured_output << chunks[1]
      expect(structured_output.read_latest_buffered_chunk).to eq({ message: "Line 2\n" })
      structured_output << chunks[2]
      expect(structured_output.read_latest_buffered_chunk).to eq({ message: "Line 3" })
      structured_output << chunks[3]
      expect(structured_output.read_latest_buffered_chunk).to eq({ bool: true })
      # Waiting for number to be fully buffered.
      structured_output << chunks[4]
      expect(structured_output.read_latest_buffered_chunk).to eq({ bool: true })
      structured_output << chunks[5]
      expect(structured_output.read_latest_buffered_chunk).to eq({ bool: true, number: 42 })
      structured_output << chunks[6]
      expect(structured_output.read_latest_buffered_chunk).to eq(
        { bool: true, number: 42, status: "o" },
      )
      structured_output << chunks[7]
      expect(structured_output.read_latest_buffered_chunk).to eq(
        { bool: true, number: 42, status: "\"k\"" },
      )
      # No partial string left to read.
      expect(structured_output.read_latest_buffered_chunk).to eq({ bool: true, number: 42 })
    end
  end
 end
--- a/spec/lib/modules/summarization/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/fold_content_spec.rb
@ -23,17 +23,17 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do
      llm_model.update!(max_prompt_tokens: model_tokens)
    end
-    let(:single_summary) { "this is a summary" }
+    let(:summary) { "this is a summary" }
    fab!(:user)
    it "summarizes the content" do
      result =
-        DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
+        DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do |spy|
          summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
        end
-      expect(result.summarized_text).to eq(single_summary)
+      expect(result.summarized_text).to eq(summary)
    end
  end
--- a/spec/models/ai_persona_spec.rb
+++ b/spec/models/ai_persona_spec.rb
@ -267,6 +267,7 @@ RSpec.describe AiPersona do
        description: "system persona",
        system_prompt: "system persona",
        tools: %w[Search Time],
        response_format: [{ key: "summary", type: "string" }],
        system: true,
      )
    end
@ -285,6 +286,22 @@ RSpec.describe AiPersona do
          I18n.t("discourse_ai.ai_bot.personas.cannot_edit_system_persona"),
        )
      end
      it "doesn't accept response format changes" do
        new_format = [{ key: "summary2", type: "string" }]
        expect { system_persona.update!(response_format: new_format) }.to raise_error(
          ActiveRecord::RecordInvalid,
        )
      end
      it "doesn't accept additional format changes" do
        new_format = [{ key: "summary", type: "string" }, { key: "summary2", type: "string" }]
        expect { system_persona.update!(response_format: new_format) }.to raise_error(
          ActiveRecord::RecordInvalid,
        )
      end
    end
  end
 end
--- a/spec/requests/admin/ai_personas_controller_spec.rb
+++ b/spec/requests/admin/ai_personas_controller_spec.rb
@ -185,6 +185,7 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
          default_llm_id: llm_model.id,
          question_consolidator_llm_id: llm_model.id,
          forced_tool_count: 2,
          response_format: [{ key: "summary", type: "string" }],
        }
      end
@ -209,6 +210,9 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
          expect(persona_json["allow_chat_channel_mentions"]).to eq(true)
          expect(persona_json["allow_chat_direct_messages"]).to eq(true)
          expect(persona_json["question_consolidator_llm_id"]).to eq(llm_model.id)
          expect(persona_json["response_format"].map { |rf| rf["key"] }).to contain_exactly(
            "summary",
          )
          persona = AiPersona.find(persona_json["id"])
--- a/spec/requests/summarization/summary_controller_spec.rb
+++ b/spec/requests/summarization/summary_controller_spec.rb
@ -74,6 +74,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
      it "returns a summary" do
        summary_text = "This is a summary"
        DiscourseAi::Completions::Llm.with_prepared_responses([summary_text]) do
          get "/discourse-ai/summarization/t/#{topic.id}.json"
--- a/spec/services/discourse_ai/topic_summarization_spec.rb
+++ b/spec/services/discourse_ai/topic_summarization_spec.rb
@ -13,6 +13,8 @@ describe DiscourseAi::TopicSummarization do
  let(:strategy) { DiscourseAi::Summarization.topic_summary(topic) }
  let(:summary) { "This is the final summary" }
  describe "#summarize" do
    subject(:summarization) { described_class.new(strategy, user) }
@ -27,8 +29,6 @@ describe DiscourseAi::TopicSummarization do
    end
    context "when the content was summarized in a single chunk" do
      let(:summary) { "This is the final summary" }
      it "caches the summary" do
        DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
          section = summarization.summarize
@ -54,7 +54,6 @@ describe DiscourseAi::TopicSummarization do
    describe "invalidating cached summaries" do
      let(:cached_text) { "This is a cached summary" }
      let(:updated_summary) { "This is the final summary" }
      def cached_summary
        AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
@ -86,10 +85,10 @@ describe DiscourseAi::TopicSummarization do
          before { cached_summary.update!(original_content_sha: "outdated_sha") }
          it "returns a new summary" do
-            DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
+            DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
              section = summarization.summarize
-              expect(section.summarized_text).to eq(updated_summary)
+              expect(section.summarized_text).to eq(summary)
            end
          end
@ -106,10 +105,10 @@ describe DiscourseAi::TopicSummarization do
            end
            it "returns a new summary if the skip_age_check flag is passed" do
-              DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
+              DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
                section = summarization.summarize(skip_age_check: true)
-                expect(section.summarized_text).to eq(updated_summary)
+                expect(section.summarized_text).to eq(summary)
              end
            end
          end
@ -118,8 +117,6 @@ describe DiscourseAi::TopicSummarization do
    end
    describe "stream partial updates" do
      let(:summary) { "This is the final summary" }
      it "receives a blk that is passed to the underlying strategy and called with partial summaries" do
        partial_result = +""
@ -127,7 +124,8 @@ describe DiscourseAi::TopicSummarization do
          summarization.summarize { |partial_summary| partial_result << partial_summary }
        end
-        expect(partial_result).to eq(summary)
+        # In a real world example, this is removed in the returned AiSummary obj.
        expect(partial_result.chomp("\"}")).to eq(summary)
      end
    end
  end
--- a/spec/system/summarization/topic_summarization_spec.rb
+++ b/spec/system/summarization/topic_summarization_spec.rb
@ -12,7 +12,9 @@ RSpec.describe "Summarize a topic ", type: :system do
        "I like to eat pie. It is a very good dessert. Some people are wasteful by throwing pie at others but I do not do that. I always eat the pie.",
    )
  end
  let(:summarization_result) { "This is a summary" }
  let(:topic_page) { PageObjects::Pages::Topic.new }
  let(:summary_box) { PageObjects::Components::AiSummaryTrigger.new }