From 04eae76f68a2d94f086c9561e88cf1f94204a41a Mon Sep 17 00:00:00 2001
From: Roman Rizzi <roman@discourse.org>
Date: Fri, 12 Jan 2024 14:36:44 -0300
Subject: [PATCH] REFACTOR: Represent generic prompts with an Object. (#416)

* REFACTOR: Represent generic prompts with an Object.

* Adds a bit more validation for clarity

* Rewrite bot title prompt and fix quirk handling

---------

Co-authored-by: Sam Saffron <sam.saffron@gmail.com>
---
 app/models/completion_prompt.rb               |  17 +-
 lib/ai_bot/bot.rb                             |  48 ++--
 lib/ai_bot/personas/persona.rb                |  19 +-
 lib/ai_bot/playground.rb                      |  33 +--
 lib/ai_helper/assistant.rb                    |   4 +-
 lib/ai_helper/painter.rb                      |   6 +-
 lib/automation/report_runner.rb               |  15 +-
 lib/completions/dialects/chat_gpt.rb          |  76 ++----
 lib/completions/dialects/claude.rb            |  97 +++----
 lib/completions/dialects/dialect.rb           | 135 ++++------
 lib/completions/dialects/gemini.rb            | 132 ++++-----
 lib/completions/dialects/llama2_classic.rb    |  62 ++---
 lib/completions/dialects/mixtral.rb           |  58 ++--
 lib/completions/dialects/orca_style.rb        |  58 ++--
 lib/completions/llm.rb                        |  43 +--
 lib/completions/prompt.rb                     |  75 ++++++
 lib/embeddings/semantic_search.rb             |  10 +-
 lib/summarization/strategies/fold_content.rb  |  51 ++--
 .../lib/completions/dialects/chat_gpt_spec.rb | 122 ++-------
 spec/lib/completions/dialects/claude_spec.rb  | 199 ++------------
 .../completions/dialects/dialect_context.rb   | 101 +++++++
 spec/lib/completions/dialects/gemini_spec.rb  | 252 +++---------------
 .../dialects/llama2_classic_spec.rb           | 173 ++----------
 spec/lib/completions/dialects/mixtral_spec.rb | 175 ++----------
 .../completions/dialects/orca_style_spec.rb   | 180 +++----------
 .../completions/endpoints/anthropic_spec.rb   |   1 -
 .../completions/endpoints/aws_bedrock_spec.rb |   1 -
 .../endpoints/endpoint_examples.rb            |  29 +-
 spec/lib/completions/endpoints/gemini_spec.rb |   9 +-
 .../endpoints/hugging_face_spec.rb            |   1 -
 .../lib/completions/endpoints/open_ai_spec.rb |  15 +-
 spec/lib/completions/endpoints/vllm_spec.rb   |   1 -
 spec/lib/completions/llm_spec.rb              |  43 +--
 spec/lib/completions/prompt_spec.rb           |  66 +++++
 spec/lib/modules/ai_bot/bot_spec.rb           |  14 +-
 .../modules/ai_bot/personas/persona_spec.rb   |  15 +-
 spec/lib/modules/ai_bot/playground_spec.rb    |  56 ++--
 spec/models/completion_prompt_spec.rb         |  19 +-
 .../ai_helper/assistant_controller_spec.rb    |   8 +-
 39 files changed, 880 insertions(+), 1539 deletions(-)
 create mode 100644 lib/completions/prompt.rb
 create mode 100644 spec/lib/completions/dialects/dialect_context.rb
 create mode 100644 spec/lib/completions/prompt_spec.rb
diff --git a/app/models/completion_prompt.rb b/app/models/completion_prompt.rb
index 183b7adb..52650d9a 100644
--- a/app/models/completion_prompt.rb
+++ b/app/models/completion_prompt.rb
@@ -33,11 +33,18 @@ class CompletionPrompt < ActiveRecord::Base
         input
       end
 
-    messages_hash.merge(input: <<~TEXT)
-    <input>
-    #{user_input}
-    </input>
-    TEXT
+    instructions = [messages_hash[:insts], messages_hash[:post_insts].to_s].join("\n")
+
+    prompt = DiscourseAi::Completions::Prompt.new(instructions)
+
+    messages_hash[:examples].to_a do |example_pair|
+      prompt.push(type: :user, content: example_pair.first)
+      prompt.push(type: :model, content: example_pair.second)
+    end
+
+    prompt.push(type: :user, content: "<input>#{user_input}</input>")
+
+    prompt
   end
 
   private
diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb
index 30cdbf7a..b6d41c7a 100644
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@@ -18,14 +18,19 @@ module DiscourseAi
       attr_reader :bot_user
 
       def get_updated_title(conversation_context, post_user)
-        title_prompt = { insts: <<~TEXT, conversation_context: conversation_context }
-          You are titlebot. Given a topic, you will figure out a title.
-          You will never respond with anything but 7 word topic title.
+        system_insts = <<~TEXT.strip
+        You are titlebot. Given a topic, you will figure out a title.
+        You will never respond with anything but 7 word topic title.
         TEXT
 
-        title_prompt[
-          :input
-        ] = "Based on our previous conversation, suggest a 7 word title without quoting any of it."
+        title_prompt =
+          DiscourseAi::Completions::Prompt.new(system_insts, messages: conversation_context)
+
+        title_prompt.push(
+          type: :user,
+          content:
+            "Based on our previous conversation, suggest a 7 word title without quoting any of it.",
+        )
 
         DiscourseAi::Completions::Llm
           .proxy(model)
@@ -57,27 +62,30 @@ module DiscourseAi
                 tool_call_id = tool.tool_call_id
                 invocation_result_json = invoke_tool(tool, llm, cancel, &update_blk).to_json
 
-                invocation_context = {
-                  type: "tool",
-                  name: tool_call_id,
-                  content: invocation_result_json,
-                }
-                tool_context = {
-                  type: "tool_call",
-                  name: tool_call_id,
+                tool_call_message = {
+                  type: :tool_call,
+                  id: tool_call_id,
                   content: { name: tool.name, arguments: tool.parameters }.to_json,
                 }
 
-                prompt[:conversation_context] ||= []
+                tool_message = { type: :tool, id: tool_call_id, content: invocation_result_json }
 
                 if tool.standalone?
-                  prompt[:conversation_context] = [invocation_context, tool_context]
+                  standalone_conext =
+                    context.dup.merge(
+                      conversation_context: [
+                        context[:conversation_context].last,
+                        tool_call_message,
+                        tool_message,
+                      ],
+                    )
+                  prompt = persona.craft_prompt(standalone_conext)
                 else
-                  prompt[:conversation_context] = [invocation_context, tool_context] +
-                    prompt[:conversation_context]
+                  prompt.push(**tool_call_message)
+                  prompt.push(**tool_message)
                 end
 
-                raw_context << [tool_context[:content], tool_call_id, "tool_call"]
+                raw_context << [tool_call_message[:content], tool_call_id, "tool_call"]
                 raw_context << [invocation_result_json, tool_call_id, "tool"]
               else
                 update_blk.call(partial, cancel, nil)
@@ -91,7 +99,7 @@ module DiscourseAi
           total_completions += 1
 
           # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
-          prompt.delete(:tools) if total_completions == MAX_COMPLETIONS
+          prompt.tools = [] if total_completions == MAX_COMPLETIONS
         end
 
         raw_context
diff --git a/lib/ai_bot/personas/persona.rb b/lib/ai_bot/personas/persona.rb
index a28411fe..b5f9d662 100644
--- a/lib/ai_bot/personas/persona.rb
+++ b/lib/ai_bot/personas/persona.rb
@@ -100,17 +100,18 @@ module DiscourseAi
               found.nil? ? match : found.to_s
             end
 
-          insts = <<~TEXT
-          #{system_insts}
-          #{available_tools.map(&:custom_system_message).compact_blank.join("\n")}
+          prompt =
+            DiscourseAi::Completions::Prompt.new(
+              <<~TEXT.strip,
+            #{system_insts}
+            #{available_tools.map(&:custom_system_message).compact_blank.join("\n")}
           TEXT
+              messages: context[:conversation_context].to_a,
+            )
 
-          { insts: insts }.tap do |prompt|
-            prompt[:tools] = available_tools.map(&:signature) if available_tools
-            prompt[:conversation_context] = context[:conversation_context] if context[
-              :conversation_context
-            ]
-          end
+          prompt.tools = available_tools.map(&:signature) if available_tools
+
+          prompt
         end
 
         def find_tool(partial)
diff --git a/lib/ai_bot/playground.rb b/lib/ai_bot/playground.rb
index 2bb6940a..d29cbe30 100644
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@@ -36,8 +36,9 @@ module DiscourseAi
             .pluck(:raw, :username, "post_custom_prompts.custom_prompt")
 
         result = []
+        first = true
 
-        context.each do |raw, username, custom_prompt|
+        context.reverse_each do |raw, username, custom_prompt|
           custom_prompt_translation =
             Proc.new do |message|
               # We can't keep backwards-compatibility for stored functions.
@@ -45,27 +46,29 @@ module DiscourseAi
               if message[2] != "function"
                 custom_context = {
                   content: message[0],
-                  type: message[2].present? ? message[2] : "assistant",
+                  type: message[2].present? ? message[2].to_sym : :model,
                 }
 
-                custom_context[:name] = message[1] if custom_context[:type] != "assistant"
+                custom_context[:id] = message[1] if custom_context[:type] != :model
 
-                custom_context
+                result << custom_context
               end
             end
 
           if custom_prompt.present?
-            result << {
-              type: "multi_turn",
-              content: custom_prompt.reverse_each.map(&custom_prompt_translation).compact,
-            }
+            if first
+              custom_prompt.each(&custom_prompt_translation)
+              first = false
+            else
+              custom_prompt.first(2).each(&custom_prompt_translation)
+            end
           else
             context = {
               content: raw,
-              type: (available_bot_usernames.include?(username) ? "assistant" : "user"),
+              type: (available_bot_usernames.include?(username) ? :model : :user),
             }
 
-            context[:name] = clean_username(username) if context[:type] == "user"
+            context[:id] = username if context[:type] == :user
 
             result << context
           end
@@ -208,16 +211,6 @@ module DiscourseAi
       def available_bot_usernames
         @bot_usernames ||= DiscourseAi::AiBot::EntryPoint::BOTS.map(&:second)
       end
-
-      def clean_username(username)
-        if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
-          username
-        else
-          # not the best in the world, but this is what we have to work with
-          # if sites enable unicode usernames this can get messy
-          username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
-        end
-      end
     end
   end
 end
diff --git a/lib/ai_helper/assistant.rb b/lib/ai_helper/assistant.rb
index 12a70268..0c4078b5 100644
--- a/lib/ai_helper/assistant.rb
+++ b/lib/ai_helper/assistant.rb
@@ -36,10 +36,10 @@ module DiscourseAi
 
       def generate_prompt(completion_prompt, input, user, &block)
         llm = DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model)
-        generic_prompt = completion_prompt.messages_with_input(input)
+        prompt = completion_prompt.messages_with_input(input)
 
         llm.generate(
-          generic_prompt,
+          prompt,
           user: user,
           temperature: completion_prompt.temperature,
           stop_sequences: completion_prompt.stop_sequences,
diff --git a/lib/ai_helper/painter.rb b/lib/ai_helper/painter.rb
index 56e51d14..110a38ac 100644
--- a/lib/ai_helper/painter.rb
+++ b/lib/ai_helper/painter.rb
@@ -57,10 +57,14 @@ module DiscourseAi
       end
 
       def difussion_prompt(text, user)
-        prompt = { insts: <<~TEXT, input: text }
+        prompt =
+          DiscourseAi::Completions::Prompt.new(
+            <<~TEXT.strip,
           Provide me a StableDiffusion prompt to generate an image that illustrates the following post in 40 words or less, be creative.
           You'll find the post between <input></input> XML tags.
         TEXT
+            messages: [{ type: :user, content: text, id: user.username }],
+          )
 
         DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate(
           prompt,
diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb
index c942c3d5..beb9c09b 100644
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@@ -101,7 +101,7 @@ module DiscourseAi
             tokens_per_post: @tokens_per_post,
             tokenizer: @llm.tokenizer,
           )
-        input = <<~INPUT
+        input = <<~INPUT.strip
           #{@instructions}
 
           <context>
@@ -111,11 +111,14 @@ module DiscourseAi
           #{@instructions}
         INPUT
 
-        prompt = {
-          insts: "You are a helpful bot specializing in summarizing activity on Discourse sites",
-          input: input,
-          final_insts: "Here is the report I generated for you",
-        }
+        prompt =
+          DiscourseAi::Completions::Prompt.new(
+            "You are a helpful bot specializing in summarizing activity on Discourse sites",
+            messages: [
+              { type: :user, content: input },
+              { type: :model, content: "Here is the report I generated for you" },
+            ],
+          )
 
         result = +""
 
diff --git a/lib/completions/dialects/chat_gpt.rb b/lib/completions/dialects/chat_gpt.rb
index 73d00690..afea753f 100644
--- a/lib/completions/dialects/chat_gpt.rb
+++ b/lib/completions/dialects/chat_gpt.rb
@@ -22,28 +22,39 @@ module DiscourseAi
         end
 
         def translate
-          open_ai_prompt = [
-            { role: "system", content: [prompt[:insts], prompt[:post_insts].to_s].join("\n") },
-          ]
+          messages = prompt.messages
 
-          if prompt[:examples]
-            prompt[:examples].each do |example_pair|
-              open_ai_prompt << { role: "user", content: example_pair.first }
-              open_ai_prompt << { role: "assistant", content: example_pair.second }
+          # ChatGPT doesn't use an assistant msg to improve long-context responses.
+          messages.pop if messages.last[:type] == :model
+
+          trimmed_messages = trim_messages(messages)
+
+          trimmed_messages.map do |msg|
+            if msg[:type] == :system
+              { role: "system", content: msg[:content] }
+            elsif msg[:type] == :model
+              { role: "assistant", content: msg[:content] }
+            elsif msg[:type] == :tool_call
+              call_details = JSON.parse(msg[:content], symbolize_names: true)
+              call_details[:arguments] = call_details[:arguments].to_json
+
+              {
+                role: "assistant",
+                content: nil,
+                tool_calls: [{ type: "function", function: call_details, id: msg[:id] }],
+              }
+            elsif msg[:type] == :tool
+              { role: "tool", tool_call_id: msg[:id], content: msg[:content] }
+            else
+              { role: "user", content: msg[:content] }.tap do |user_msg|
+                user_msg[:name] = msg[:id] if msg[:id]
+              end
             end
           end
-
-          open_ai_prompt.concat(conversation_context) if prompt[:conversation_context]
-
-          open_ai_prompt << { role: "user", content: prompt[:input] } if prompt[:input]
-
-          open_ai_prompt
         end
 
         def tools
-          return if prompt[:tools].blank?
-
-          prompt[:tools].map do |t|
+          prompt.tools.map do |t|
             tool = t.dup
 
             tool[:parameters] = t[:parameters]
@@ -62,39 +73,6 @@ module DiscourseAi
           end
         end
 
-        def conversation_context
-          return [] if prompt[:conversation_context].blank?
-
-          flattened_context = flatten_context(prompt[:conversation_context])
-          trimmed_context = trim_context(flattened_context)
-
-          trimmed_context.reverse.map do |context|
-            if context[:type] == "tool_call"
-              function = JSON.parse(context[:content], symbolize_names: true)
-              function[:arguments] = function[:arguments].to_json
-
-              {
-                role: "assistant",
-                content: nil,
-                tool_calls: [{ type: "function", function: function, id: context[:name] }],
-              }
-            else
-              translated = context.slice(:content)
-              translated[:role] = context[:type]
-
-              if context[:name]
-                if translated[:role] == "tool"
-                  translated[:tool_call_id] = context[:name]
-                else
-                  translated[:name] = context[:name]
-                end
-              end
-
-              translated
-            end
-          end
-        end
-
         def max_prompt_tokens
           # provide a buffer of 120 tokens - our function counting is not
           # 100% accurate and getting numbers to align exactly is very hard
diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb
index 09b8ba85..acb0f538 100644
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@@ -14,39 +14,50 @@ module DiscourseAi
           end
         end
 
-        def pad_newlines!(prompt)
-          if prompt[-1..-1] != "\n"
-            prompt << "\n\n"
-          elsif prompt[-2..-1] != "\n\n"
-            prompt << "\n"
-          end
-        end
-
         def translate
-          claude_prompt = uses_system_message? ? +"" : +"Human: "
-          claude_prompt << prompt[:insts] << "\n"
+          messages = prompt.messages
 
-          claude_prompt << build_tools_prompt if prompt[:tools]
+          trimmed_messages = trim_messages(messages)
 
-          claude_prompt << build_examples(prompt[:examples]) if prompt[:examples]
+          # Need to include this differently
+          last_message = trimmed_messages.last[:type] == :assistant ? trimmed_messages.pop : nil
 
-          pad_newlines!(claude_prompt)
+          claude_prompt =
+            trimmed_messages.reduce(+"") do |memo, msg|
+              next(memo) if msg[:type] == :tool_call
 
-          claude_prompt << conversation_context if prompt[:conversation_context]
+              if msg[:type] == :system
+                memo << "Human: " unless uses_system_message?
+                memo << msg[:content]
+                if prompt.tools
+                  memo << "\n"
+                  memo << build_tools_prompt
+                end
+              elsif msg[:type] == :model
+                memo << "\n\nAssistant: #{msg[:content]}"
+              elsif msg[:type] == :tool
+                memo << "\n\nAssistant:\n"
 
-          pad_newlines!(claude_prompt)
+                memo << (<<~TEXT).strip
+                <function_results>
+                <result>
+                <tool_name>#{msg[:id]}</tool_name>
+                <json>
+                #{msg[:content]}
+                </json>
+                </result>
+                </function_results>
+                TEXT
+              else
+                memo << "\n\nHuman: #{msg[:content]}"
+              end
 
-          if uses_system_message? && (prompt[:input] || prompt[:post_insts])
-            claude_prompt << "Human: "
-          end
-          claude_prompt << "#{prompt[:input]}\n" if prompt[:input]
+              memo
+            end
 
-          claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts]
+          claude_prompt << "\n\nAssistant:"
+          claude_prompt << " #{last_message[:content]}:" if last_message
 
-          pad_newlines!(claude_prompt)
-
-          claude_prompt << "Assistant: "
-          claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts]
           claude_prompt
         end
 
@@ -54,49 +65,11 @@ module DiscourseAi
           100_000 # Claude-2.1 has a 200k context window.
         end
 
-        def conversation_context
-          return "" if prompt[:conversation_context].blank?
-
-          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
-          flattened_context = flatten_context(clean_context)
-          trimmed_context = trim_context(flattened_context)
-
-          trimmed_context
-            .reverse
-            .map do |context|
-              row = context[:type] == "user" ? +"Human:" : +"Assistant:"
-
-              if context[:type] == "tool"
-                row << "\n"
-                row << (<<~TEXT).strip
-                  <function_results>
-                  <result>
-                  <tool_name>#{context[:name]}</tool_name>
-                  <json>
-                  #{context[:content]}
-                  </json>
-                  </result>
-                  </function_results>
-                TEXT
-              else
-                row << " "
-                row << context[:content]
-              end
-            end
-            .join("\n\n")
-        end
-
         private
 
         def uses_system_message?
           model_name == "claude-2"
         end
-
-        def build_examples(examples_arr)
-          examples_arr.reduce("") do |memo, example|
-            memo += "<example>\nH: #{example[0]}\nA: #{example[1]}\n</example>\n"
-          end
-        end
       end
     end
   end
diff --git a/lib/completions/dialects/dialect.rb b/lib/completions/dialects/dialect.rb
index 89efb118..10b68fbc 100644
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@@ -31,6 +31,27 @@ module DiscourseAi
           def tokenizer
             raise NotImplemented
           end
+
+          def tool_preamble
+            <<~TEXT
+              In this environment you have access to a set of tools you can use to answer the user's question.
+              You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
+              <function_calls>
+              <invoke>
+              <tool_name>$TOOL_NAME</tool_name>
+              <parameters>
+              <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
+              ...
+              </parameters>
+              </invoke>
+              </function_calls>
+  
+              if a parameter type is an array, return a JSON array of values. For example:
+              [1,"two",3.0]
+  
+              Here are the tools available:
+            TEXT
+          end
         end
 
         def initialize(generic_prompt, model_name, opts: {})
@@ -46,7 +67,7 @@ module DiscourseAi
         def tools
           tools = +""
 
-          prompt[:tools].each do |function|
+          prompt.tools.each do |function|
             parameters = +""
             if function[:parameters].present?
               function[:parameters].each do |parameter|
@@ -89,114 +110,60 @@ module DiscourseAi
 
         attr_reader :prompt, :model_name, :opts
 
-        def trim_context(conversation_context)
+        def trim_messages(messages)
           prompt_limit = max_prompt_tokens
-          current_token_count = calculate_token_count_without_context
+          current_token_count = 0
           message_step_size = (max_prompt_tokens / 25).to_i * -1
 
-          conversation_context.reduce([]) do |memo, context|
-            break(memo) if current_token_count >= prompt_limit
+          reversed_trimmed_msgs =
+            messages
+              .reverse
+              .reduce([]) do |acc, msg|
+                message_tokens = calculate_message_token(msg)
 
-            dupped_context = context.dup
+                dupped_msg = msg.dup
 
-            message_tokens = calculate_message_token(dupped_context)
+                # Don't trim tool call metadata.
+                if msg[:type] == :tool_call
+                  current_token_count += message_tokens + per_message_overhead
+                  acc << dupped_msg
+                  next(acc)
+                end
 
-            # Don't trim tool call metadata.
-            if context[:type] == "tool_call"
-              current_token_count += calculate_message_token(context) + per_message_overhead
-              memo << context
-              next(memo)
-            end
+                # Trimming content to make sure we respect token limit.
+                while dupped_msg[:content].present? &&
+                        message_tokens + current_token_count + per_message_overhead > prompt_limit
+                  dupped_msg[:content] = dupped_msg[:content][0..message_step_size] || ""
+                  message_tokens = calculate_message_token(dupped_msg)
+                end
 
-            # Trimming content to make sure we respect token limit.
-            while dupped_context[:content].present? &&
-                    message_tokens + current_token_count + per_message_overhead > prompt_limit
-              dupped_context[:content] = dupped_context[:content][0..message_step_size] || ""
-              message_tokens = calculate_message_token(dupped_context)
-            end
+                next(acc) if dupped_msg[:content].blank?
 
-            next(memo) if dupped_context[:content].blank?
+                current_token_count += message_tokens + per_message_overhead
 
-            current_token_count += message_tokens + per_message_overhead
+                acc << dupped_msg
+              end
 
-            memo << dupped_context
-          end
-        end
-
-        def calculate_token_count_without_context
-          tokenizer = self.class.tokenizer
-
-          examples_count =
-            prompt[:examples].to_a.sum do |pair|
-              tokenizer.size(pair.join) + (per_message_overhead * 2)
-            end
-          input_count = tokenizer.size(prompt[:input].to_s) + per_message_overhead
-
-          examples_count + input_count +
-            prompt
-              .except(:conversation_context, :tools, :examples, :input)
-              .sum { |_, v| tokenizer.size(v) + per_message_overhead }
+          reversed_trimmed_msgs.reverse
         end
 
         def per_message_overhead
           0
         end
 
-        def calculate_message_token(context)
-          self.class.tokenizer.size(context[:content].to_s)
-        end
-
-        def self.tool_preamble
-          <<~TEXT
-            In this environment you have access to a set of tools you can use to answer the user's question.
-            You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
-            <function_calls>
-            <invoke>
-            <tool_name>$TOOL_NAME</tool_name>
-            <parameters>
-            <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
-            ...
-            </parameters>
-            </invoke>
-            </function_calls>
-
-            if a parameter type is an array, return a JSON array of values. For example:
-            [1,"two",3.0]
-
-            Here are the tools available:
-          TEXT
+        def calculate_message_token(msg)
+          self.class.tokenizer.size(msg[:content].to_s)
         end
 
         def build_tools_prompt
-          return "" if prompt[:tools].blank?
+          return "" if prompt.tools.blank?
 
-          <<~TEXT
+          (<<~TEXT).strip
             #{self.class.tool_preamble}
             <tools>
             #{tools}</tools>
           TEXT
         end
-
-        def flatten_context(context)
-          found_first_multi_turn = false
-
-          context
-            .map do |a_context|
-              if a_context[:type] == "multi_turn"
-                if found_first_multi_turn
-                  # Only take tool and tool_call_id from subsequent multi-turn interactions.
-                  # Drop assistant responses
-                  a_context[:content].last(2)
-                else
-                  found_first_multi_turn = true
-                  a_context[:content]
-                end
-              else
-                a_context
-              end
-            end
-            .flatten
-        end
       end
     end
   end
diff --git a/lib/completions/dialects/gemini.rb b/lib/completions/dialects/gemini.rb
index 6fd98287..cfbae79e 100644
--- a/lib/completions/dialects/gemini.rb
+++ b/lib/completions/dialects/gemini.rb
@@ -18,39 +18,60 @@ module DiscourseAi
           # Gemini complains if we don't alternate model/user roles.
           noop_model_response = { role: "model", parts: { text: "Ok." } }
 
-          gemini_prompt = [
-            {
-              role: "user",
-              parts: {
-                text: [prompt[:insts], prompt[:post_insts].to_s].join("\n"),
-              },
-            },
-            noop_model_response,
-          ]
+          messages = prompt.messages
 
-          if prompt[:examples]
-            prompt[:examples].each do |example_pair|
-              gemini_prompt << { role: "user", parts: { text: example_pair.first } }
-              gemini_prompt << { role: "model", parts: { text: example_pair.second } }
+          # Gemini doesn't use an assistant msg to improve long-context responses.
+          messages.pop if messages.last[:type] == :model
+
+          trim_messages(messages).reduce([]) do |memo, msg|
+            if msg[:type] == :system
+              memo << { role: "user", parts: { text: msg[:content] } }
+              memo << noop_model_response.dup
+            elsif msg[:type] == :model
+              memo << { role: "model", parts: { text: msg[:content] } }
+            elsif msg[:type] == :tool_call
+              call_details = JSON.parse(msg[:content], symbolize_names: true)
+
+              memo << {
+                role: "model",
+                parts: {
+                  functionCall: {
+                    name: call_details[:name],
+                    args: call_details[:arguments],
+                  },
+                },
+              }
+            elsif msg[:type] == :tool
+              memo << {
+                role: "function",
+                parts: {
+                  functionResponse: {
+                    name: msg[:id],
+                    response: {
+                      content: msg[:content],
+                    },
+                  },
+                },
+              }
+            else
+              # Gemini quirk. Doesn't accept tool -> user or user -> user msgs.
+              previous_msg_role = memo.last&.dig(:role)
+              if previous_msg_role == "user" || previous_msg_role == "tool"
+                memo << noop_model_response.dup
+              end
+
+              memo << { role: "user", parts: { text: msg[:content] } }
             end
+
+            memo
           end
-
-          gemini_prompt.concat(conversation_context) if prompt[:conversation_context]
-
-          if prompt[:input]
-            gemini_prompt << noop_model_response.dup if gemini_prompt.last[:role] == "user"
-
-            gemini_prompt << { role: "user", parts: { text: prompt[:input] } }
-          end
-
-          gemini_prompt
         end
 
         def tools
-          return if prompt[:tools].blank?
+          return if prompt.tools.blank?
 
           translated_tools =
-            prompt[:tools].map do |t|
+            prompt.tools.map do |t|
               tool = t.slice(:name, :description)
 
               if t[:parameters]
@@ -73,48 +94,6 @@ module DiscourseAi
           [{ function_declarations: translated_tools }]
         end
 
-        def conversation_context
-          return [] if prompt[:conversation_context].blank?
-
-          flattened_context = flatten_context(prompt[:conversation_context])
-          trimmed_context = trim_context(flattened_context)
-
-          trimmed_context.reverse.map do |context|
-            if context[:type] == "tool_call"
-              function = JSON.parse(context[:content], symbolize_names: true)
-
-              {
-                role: "model",
-                parts: {
-                  functionCall: {
-                    name: function[:name],
-                    args: function[:arguments],
-                  },
-                },
-              }
-            elsif context[:type] == "tool"
-              {
-                role: "function",
-                parts: {
-                  functionResponse: {
-                    name: context[:name],
-                    response: {
-                      content: context[:content],
-                    },
-                  },
-                },
-              }
-            else
-              {
-                role: context[:type] == "assistant" ? "model" : "user",
-                parts: {
-                  text: context[:content],
-                },
-              }
-            end
-          end
-        end
-
         def max_prompt_tokens
           16_384 # 50% of model tokens
         end
@@ -124,25 +103,6 @@ module DiscourseAi
         def calculate_message_token(context)
           self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)
         end
-
-        private
-
-        def flatten_context(context)
-          flattened = []
-          context.each do |c|
-            if c[:type] == "multi_turn"
-              # gemini quirk
-              if c[:content].first[:type] == "tool"
-                flattend << { type: "assistant", content: "ok." }
-              end
-
-              flattened.concat(c[:content])
-            else
-              flattened << c
-            end
-          end
-          flattened
-        end
       end
     end
   end
diff --git a/lib/completions/dialects/llama2_classic.rb b/lib/completions/dialects/llama2_classic.rb
index 63284c59..d5510719 100644
--- a/lib/completions/dialects/llama2_classic.rb
+++ b/lib/completions/dialects/llama2_classic.rb
@@ -15,58 +15,48 @@ module DiscourseAi
         end
 
         def translate
-          llama2_prompt = +<<~TEXT
-          [INST]
-          <<SYS>>
-          #{prompt[:insts]}
-          #{build_tools_prompt}#{prompt[:post_insts]}
-          <</SYS>>
-          [/INST]
-          TEXT
+          messages = prompt.messages
 
-          if prompt[:examples]
-            prompt[:examples].each do |example_pair|
-              llama2_prompt << "[INST]#{example_pair.first}[/INST]\n"
-              llama2_prompt << "#{example_pair.second}\n"
-            end
-          end
+          llama2_prompt =
+            trim_messages(messages).reduce(+"") do |memo, msg|
+              next(memo) if msg[:type] == :tool_call
 
-          llama2_prompt << conversation_context if prompt[:conversation_context].present?
-
-          llama2_prompt << "[INST]#{prompt[:input]}[/INST]\n"
-        end
-
-        def conversation_context
-          return "" if prompt[:conversation_context].blank?
-
-          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
-          flattened_context = flatten_context(clean_context)
-          trimmed_context = trim_context(flattened_context)
-
-          trimmed_context
-            .reverse
-            .reduce(+"") do |memo, context|
-              if context[:type] == "tool"
-                memo << <<~TEXT
+              if msg[:type] == :system
+                memo << (<<~TEXT).strip
                 [INST]
+                <<SYS>>
+                #{msg[:content]}
+                #{build_tools_prompt}
+                <</SYS>>
+                [/INST]
+                TEXT
+              elsif msg[:type] == :model
+                memo << "\n#{msg[:content]}"
+              elsif msg[:type] == :tool
+                tool = JSON.parse(msg[:content], symbolize_names: true)
+                memo << "\n[INST]\n"
+
+                memo << (<<~TEXT).strip
                 <function_results>
                 <result>
-                <tool_name>#{context[:name]}</tool_name>
+                <tool_name>#{msg[:id]}</tool_name>
                 <json>
-                #{context[:content]}
+                #{msg[:content]}
                 </json>
                 </result>
                 </function_results>
                 [/INST]
                 TEXT
-              elsif context[:type] == "assistant"
-                memo << "[INST]" << context[:content] << "[/INST]\n"
               else
-                memo << context[:content] << "\n"
+                memo << "\n[INST]#{msg[:content]}[/INST]"
               end
 
               memo
             end
+
+          llama2_prompt << "\n" if llama2_prompt.ends_with?("[/INST]")
+
+          llama2_prompt
         end
 
         def max_prompt_tokens
diff --git a/lib/completions/dialects/mixtral.rb b/lib/completions/dialects/mixtral.rb
index 4ac60d19..fd529b24 100644
--- a/lib/completions/dialects/mixtral.rb
+++ b/lib/completions/dialects/mixtral.rb
@@ -17,56 +17,44 @@ module DiscourseAi
         end
 
         def translate
-          mixtral_prompt = +<<~TEXT
-          <s> [INST]
-          #{prompt[:insts]}
-          #{build_tools_prompt}#{prompt[:post_insts]}
-          [/INST] Ok </s>
-          TEXT
+          messages = prompt.messages
 
-          if prompt[:examples]
-            prompt[:examples].each do |example_pair|
-              mixtral_prompt << "[INST] #{example_pair.first} [/INST]\n"
-              mixtral_prompt << "#{example_pair.second}</s>\n"
-            end
-          end
+          mixtral_prompt =
+            trim_messages(messages).reduce(+"") do |memo, msg|
+              next(memo) if msg[:type] == :tool_call
 
-          mixtral_prompt << conversation_context if prompt[:conversation_context].present?
-
-          mixtral_prompt << "[INST] #{prompt[:input]} [/INST]\n"
-        end
-
-        def conversation_context
-          return "" if prompt[:conversation_context].blank?
-
-          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
-          flattened_context = flatten_context(clean_context)
-          trimmed_context = trim_context(flattened_context)
-
-          trimmed_context
-            .reverse
-            .reduce(+"") do |memo, context|
-              memo << "[INST] " if context[:type] == "user"
-
-              if context[:type] == "tool"
-                memo << <<~TEXT
+              if msg[:type] == :system
+                memo << (<<~TEXT).strip
+                <s> [INST]
+                #{msg[:content]}
+                #{build_tools_prompt}
+                [/INST] Ok </s>
+                TEXT
+              elsif msg[:type] == :model
+                memo << "\n#{msg[:content]}</s>"
+              elsif msg[:type] == :tool
+                memo << "\n"
 
+                memo << (<<~TEXT).strip
                 <function_results>
                 <result>
-                <tool_name>#{context[:name]}</tool_name>
+                <tool_name>#{msg[:id]}</tool_name>
                 <json>
-                #{context[:content]}
+                #{msg[:content]}
                 </json>
                 </result>
                 </function_results>
                 TEXT
               else
-                memo << context[:content] << "\n"
-                memo << "[/INST]" if context[:type] == "user"
+                memo << "\n[INST]#{msg[:content]}[/INST]"
               end
 
               memo
             end
+
+          mixtral_prompt << "\n" if mixtral_prompt.ends_with?("[/INST]")
+
+          mixtral_prompt
         end
 
         def max_prompt_tokens
diff --git a/lib/completions/dialects/orca_style.rb b/lib/completions/dialects/orca_style.rb
index fa742402..a8c9e939 100644
--- a/lib/completions/dialects/orca_style.rb
+++ b/lib/completions/dialects/orca_style.rb
@@ -15,54 +15,48 @@ module DiscourseAi
         end
 
         def translate
-          orca_style_prompt = +<<~TEXT
-          ### System:
-          #{prompt[:insts]}
-          #{build_tools_prompt}#{prompt[:post_insts]}
-          TEXT
+          messages = prompt.messages
+          trimmed_messages = trim_messages(messages)
 
-          if prompt[:examples]
-            prompt[:examples].each do |example_pair|
-              orca_style_prompt << "### User:\n#{example_pair.first}\n"
-              orca_style_prompt << "### Assistant:\n#{example_pair.second}\n"
-            end
-          end
+          # Need to include this differently
+          last_message = trimmed_messages.last[:type] == :assistant ? trimmed_messages.pop : nil
 
-          orca_style_prompt << "### User:\n#{prompt[:input]}\n"
+          llama2_prompt =
+            trimmed_messages.reduce(+"") do |memo, msg|
+              next(memo) if msg[:type] == :tool_call
 
-          orca_style_prompt << "### Assistant:\n"
-        end
-
-        def conversation_context
-          return "" if prompt[:conversation_context].blank?
-
-          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
-          flattened_context = flatten_context(clean_context)
-          trimmed_context = trim_context(flattened_context)
-
-          trimmed_context
-            .reverse
-            .reduce(+"") do |memo, context|
-              memo << (context[:type] == "user" ? "### User:" : "### Assistant:")
-
-              if context[:type] == "tool"
-                memo << <<~TEXT
+              if msg[:type] == :system
+                memo << (<<~TEXT).strip
+                ### System:
+                #{msg[:content]}
+                #{build_tools_prompt}
+                TEXT
+              elsif msg[:type] == :model
+                memo << "\n### Assistant:\n#{msg[:content]}"
+              elsif msg[:type] == :tool
+                memo << "\n### Assistant:\n"
 
+                memo << (<<~TEXT).strip
                 <function_results>
                 <result>
-                <tool_name>#{context[:name]}</tool_name>
+                <tool_name>#{msg[:id]}</tool_name>
                 <json>
-                #{context[:content]}
+                #{msg[:content]}
                 </json>
                 </result>
                 </function_results>
                 TEXT
               else
-                memo << " " << context[:content] << "\n"
+                memo << "\n### User:\n#{msg[:content]}"
               end
 
               memo
             end
+
+          llama2_prompt << "\n### Assistant:\n"
+          llama2_prompt << "#{last_message[:content]}:" if last_message
+
+          llama2_prompt
         end
 
         def max_prompt_tokens
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
index e8cb14f3..a9b17f31 100644
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@@ -48,41 +48,7 @@ module DiscourseAi
 
       delegate :tokenizer, to: :dialect_klass
 
-      # @param generic_prompt { Hash } - Prompt using our generic format.
-      # We use the following keys from the hash:
-      #   - insts: String with instructions for the LLM.
-      #   - input: String containing user input
-      #   - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]].
-      #   - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt.
-      #   - conversation_context (optional): Array of hashes to provide context about an ongoing conversation with the model.
-      #     We translate the array in reverse order, meaning the first element would be the most recent message in the conversation.
-      #     Example:
-      #
-      #   [
-      #    { type: "user", name: "user1", content: "This is a new message by a user" },
-      #    { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-      #    { type: "tool", name: "tool_id", content: "I'm a tool result" },
-      #    { type: "tool_call_id", name: "tool_id", content: { name: "tool", args: { ...tool_args } } },
-      #    { type: "multi_turn", content: [assistant_reply_from_a_tool, tool_call, tool_call_id] }
-      #   ]
-      #
-      #   - tools (optional - only functions supported): Array of functions a model can call. Each function is defined as a hash. Example:
-      #
-      #     {
-      #       name: "get_weather",
-      #       description: "Get the weather in a city",
-      #       parameters: [
-      #         { name: "location", type: "string", description: "the city name", required: true },
-      #         {
-      #           name: "unit",
-      #           type: "string",
-      #           description: "the unit of measurement celcius c or fahrenheit f",
-      #           enum: %w[c f],
-      #           required: true,
-      #         },
-      #       ],
-      #     }
-      #
+      # @param generic_prompt { DiscourseAi::Completions::Prompt } - Our generic prompt object
       # @param user { User } - User requesting the summary.
       #
       # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
@@ -104,7 +70,7 @@ module DiscourseAi
       # </function_calls>
       #
       def generate(
-        generic_prompt,
+        prompt,
         temperature: nil,
         max_tokens: nil,
         stop_sequences: nil,
@@ -117,15 +83,14 @@ module DiscourseAi
           stop_sequences: stop_sequences,
         }
 
-        model_params.merge!(generic_prompt.dig(:params, model_name) || {})
         model_params.keys.each { |key| model_params.delete(key) if model_params[key].nil? }
 
-        dialect = dialect_klass.new(generic_prompt, model_name, opts: model_params)
+        dialect = dialect_klass.new(prompt, model_name, opts: model_params)
         gateway.perform_completion!(dialect, user, model_params, &partial_read_blk)
       end
 
       def max_prompt_tokens
-        dialect_klass.new({}, model_name).max_prompt_tokens
+        dialect_klass.new(DiscourseAi::Completions::Prompt.new(""), model_name).max_prompt_tokens
       end
 
       attr_reader :model_name
diff --git a/lib/completions/prompt.rb b/lib/completions/prompt.rb
new file mode 100644
index 00000000..7e852400
--- /dev/null
+++ b/lib/completions/prompt.rb
@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Completions
+    class Prompt
+      INVALID_TURN = Class.new(StandardError)
+
+      attr_reader :system_message, :messages
+      attr_accessor :tools
+
+      def initialize(system_msg, messages: [], tools: [])
+        raise ArgumentError, "messages must be an array" if !messages.is_a?(Array)
+        raise ArgumentError, "tools must be an array" if !tools.is_a?(Array)
+
+        system_message = { type: :system, content: system_msg }
+
+        @messages = [system_message].concat(messages)
+        @messages.each { |message| validate_message(message) }
+        @messages.each_cons(2) { |last_turn, new_turn| validate_turn(last_turn, new_turn) }
+
+        @tools = tools
+      end
+
+      def push(type:, content:, id: nil)
+        return if type == :system
+        new_message = { type: type, content: content }
+
+        new_message[:id] = type == :user ? clean_username(id) : id if id && type != :model
+
+        validate_message(new_message)
+        validate_turn(messages.last, new_message)
+
+        messages << new_message
+      end
+
+      private
+
+      def clean_username(username)
+        if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
+          username
+        else
+          # not the best in the world, but this is what we have to work with
+          # if sites enable unicode usernames this can get messy
+          username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
+        end
+      end
+
+      def validate_message(message)
+        valid_types = %i[system user model tool tool_call]
+        if !valid_types.include?(message[:type])
+          raise ArgumentError, "message type must be one of #{valid_types}"
+        end
+
+        valid_keys = %i[type content id]
+        if (invalid_keys = message.keys - valid_keys).any?
+          raise ArgumentError, "message contains invalid keys: #{invalid_keys}"
+        end
+
+        raise ArgumentError, "message content must be a string" if !message[:content].is_a?(String)
+      end
+
+      def validate_turn(last_turn, new_turn)
+        valid_types = %i[tool tool_call model user]
+        raise INVALID_TURN if !valid_types.include?(new_turn[:type])
+
+        if last_turn[:type] == :system && %i[tool tool_call model].include?(new_turn[:type])
+          raise INVALID_TURN
+        end
+
+        raise INVALID_TURN if new_turn[:type] == :tool && last_turn[:type] != :tool_call
+        raise INVALID_TURN if new_turn[:type] == :model && last_turn[:type] == :model
+      end
+    end
+  end
+end
diff --git a/lib/embeddings/semantic_search.rb b/lib/embeddings/semantic_search.rb
index 9db8b825..0e6131df 100644
--- a/lib/embeddings/semantic_search.rb
+++ b/lib/embeddings/semantic_search.rb
@@ -95,19 +95,19 @@ module DiscourseAi
       end
 
       def hypothetical_post_from(search_term)
-        prompt = {
-          insts: <<~TEXT,
+        prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
           You are a content creator for a forum. The forum description is as follows:
           #{SiteSetting.title}
           #{SiteSetting.site_description}
+
+          Put the forum post between <ai></ai> tags.
         TEXT
-          input: <<~TEXT,
+
+        prompt.push(type: :user, content: <<~TEXT.strip)
           Using this description, write a forum post about the subject inside the <input></input> XML tags:
 
           <input>#{search_term}</input>
         TEXT
-          post_insts: "Put the forum post between <ai></ai> tags.",
-        }
 
         llm_response =
           DiscourseAi::Completions::Llm.proxy(
diff --git a/lib/summarization/strategies/fold_content.rb b/lib/summarization/strategies/fold_content.rb
index ce10afb0..384355d3 100644
--- a/lib/summarization/strategies/fold_content.rb
+++ b/lib/summarization/strategies/fold_content.rb
@@ -105,16 +105,14 @@ module DiscourseAi
         def summarize_in_chunks(llm, chunks, user, opts)
           chunks.map do |chunk|
             prompt = summarization_prompt(chunk[:summary], opts)
-            prompt[:post_insts] = "Don't use more than 400 words for the summary."
 
-            chunk[:summary] = llm.generate(prompt, user: user)
+            chunk[:summary] = llm.generate(prompt, user: user, max_tokens: 300)
             chunk
           end
         end
 
         def concatenate_summaries(llm, summaries, user, &on_partial_blk)
-          prompt = {}
-          prompt[:insts] = <<~TEXT
+          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
             You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
             The narrative you create is in the form of one or multiple paragraphs.
             Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
@@ -123,7 +121,7 @@ module DiscourseAi
             You format the response, including links, using Markdown.
           TEXT
 
-          prompt[:input] = <<~TEXT
+          prompt.push(type: :user, content: <<~TEXT.strip)
             THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
 
             <input>
@@ -151,28 +149,39 @@ module DiscourseAi
                 For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
               TEXT
 
-          prompt = { insts: insts, input: <<~TEXT }
-              #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
-              Here are the posts, inside <input></input> XML tags:
-
-              <input>
-                #{input}
-              </input>
-          TEXT
+          prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
 
           if opts[:resource_path]
-            prompt[:examples] = [
-              [
-                "<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>",
+            prompt.push(
+              type: :user,
+              content: "<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>",
+            )
+            prompt.push(
+              type: :model,
+              content:
                 "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
-              ],
-              [
-                "<input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input>",
+            )
+
+            prompt.push(
+              type: :user,
+              content: "<input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input>",
+            )
+            prompt.push(
+              type: :model,
+              content:
                 "Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.",
-              ],
-            ]
+            )
           end
 
+          prompt.push(type: :user, content: <<~TEXT.strip)
+          #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
+          Here are the posts, inside <input></input> XML tags:
+
+          <input>
+            #{input}
+          </input>
+          TEXT
+
           prompt
         end
       end
diff --git a/spec/lib/completions/dialects/chat_gpt_spec.rb b/spec/lib/completions/dialects/chat_gpt_spec.rb
index 9fe23942..07acc23a 100644
--- a/spec/lib/completions/dialects/chat_gpt_spec.rb
+++ b/spec/lib/completions/dialects/chat_gpt_spec.rb
@@ -1,105 +1,30 @@
 # frozen_string_literal: true
 
+require_relative "dialect_context"
+
 RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
-  subject(:dialect) { described_class.new(prompt, "gpt-4") }
-
-  let(:tool) do
-    {
-      name: "get_weather",
-      description: "Get the weather in a city",
-      parameters: [
-        { name: "location", type: "string", description: "the city name", required: true },
-        {
-          name: "unit",
-          type: "string",
-          description: "the unit of measurement celcius c or fahrenheit f",
-          enum: %w[c f],
-          required: true,
-        },
-      ],
-    }
-  end
-
-  let(:prompt) do
-    {
-      insts: <<~TEXT,
-      I want you to act as a title generator for written pieces. I will provide you with a text,
-      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
-    TEXT
-      input: <<~TEXT,
-      Here is the text, inside <input></input> XML tags:
-      <input>
-        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-
-        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
-        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
-        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
-        dies so that a scene may be repeated.
-      </input>
-    TEXT
-      post_insts:
-        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-      tools: [tool],
-    }
-  end
+  let(:model_name) { "gpt-4" }
+  let(:context) { DialectContext.new(described_class, model_name) }
 
   describe "#translate" do
     it "translates a prompt written in our generic format to the ChatGPT format" do
       open_ai_version = [
-        { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
-        { role: "user", content: prompt[:input] },
+        { role: "system", content: context.system_insts },
+        { role: "user", content: context.simple_user_input },
       ]
 
-      translated = dialect.translate
+      translated = context.system_user_scenario
 
       expect(translated).to contain_exactly(*open_ai_version)
     end
 
-    it "include examples in the ChatGPT version" do
-      prompt[:examples] = [
-        [
-          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
-          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
-        ],
-      ]
-
-      open_ai_version = [
-        { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
-        { role: "user", content: prompt[:examples][0][0] },
-        { role: "assistant", content: prompt[:examples][0][1] },
-        { role: "user", content: prompt[:input] },
-      ]
-
-      translated = dialect.translate
-
-      expect(translated).to contain_exactly(*open_ai_version)
-    end
-  end
-
-  describe "#conversation_context" do
-    let(:context) do
-      [
-        { type: "user", name: "user1", content: "This is a new message by a user" },
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
-        {
-          type: "tool_call",
-          name: "tool_id",
-          content: { name: "get_weather", arguments: { location: "Sydney", unit: "c" } }.to_json,
-        },
-      ]
-    end
-
-    it "adds conversation in reverse order (first == newer)" do
-      prompt[:conversation_context] = context
-
-      translated_context = dialect.conversation_context
-
-      expect(translated_context).to eq(
+    it "translates tool_call and tool messages" do
+      expect(context.multi_turn_scenario).to eq(
         [
+          { role: "system", content: context.system_insts },
+          { role: "user", content: "This is a message by a user", name: "user1" },
+          { role: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
+          { role: "user", name: "user1", content: "This is a new message by a user" },
           {
             role: "assistant",
             content: nil,
@@ -114,21 +39,16 @@ RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
               },
             ],
           },
-          { role: "tool", content: context.third[:content], tool_call_id: context.third[:name] },
-          { role: "assistant", content: context.second[:content] },
-          { role: "user", content: context.first[:content], name: context.first[:name] },
+          { role: "tool", content: "I'm a tool result".to_json, tool_call_id: "tool_id" },
         ],
       )
     end
 
     it "trims content if it's getting too long" do
-      context.third[:content] = context.third[:content] * 1000
+      translated = context.long_user_input_scenario
 
-      prompt[:conversation_context] = context
-
-      translated_context = dialect.conversation_context
-
-      expect(translated_context.third[:content].length).to be < context.third[:content].length
+      expect(translated.last[:role]).to eq("user")
+      expect(translated.last[:content].length).to be < context.long_message_text.length
     end
   end
 
@@ -136,11 +56,11 @@ RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
     it "returns a list of available tools" do
       open_ai_tool_f = {
         function: {
-          description: tool[:description],
-          name: tool[:name],
+          description: context.tools.first[:description],
+          name: context.tools.first[:name],
           parameters: {
             properties:
-              tool[:parameters].reduce({}) do |memo, p|
+              context.tools.first[:parameters].reduce({}) do |memo, p|
                 memo[p[:name]] = { description: p[:description], type: p[:type] }
 
                 memo[p[:name]][:enum] = p[:enum] if p[:enum]
@@ -154,7 +74,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
         type: "function",
       }
 
-      expect(subject.tools).to contain_exactly(open_ai_tool_f)
+      expect(context.dialect_tools).to contain_exactly(open_ai_tool_f)
     end
   end
 end
diff --git a/spec/lib/completions/dialects/claude_spec.rb b/spec/lib/completions/dialects/claude_spec.rb
index 0e52409d..4e8b877a 100644
--- a/spec/lib/completions/dialects/claude_spec.rb
+++ b/spec/lib/completions/dialects/claude_spec.rb
@@ -1,209 +1,64 @@
 # frozen_string_literal: true
 
+require_relative "dialect_context"
+
 RSpec.describe DiscourseAi::Completions::Dialects::Claude do
-  subject(:dialect) { described_class.new(prompt, "claude-2") }
-
-  let(:tool) do
-    {
-      name: "get_weather",
-      description: "Get the weather in a city",
-      parameters: [
-        { name: "location", type: "string", description: "the city name", required: true },
-        {
-          name: "unit",
-          type: "string",
-          description: "the unit of measurement celcius c or fahrenheit f",
-          enum: %w[c f],
-          required: true,
-        },
-      ],
-    }
-  end
-
-  let(:prompt) do
-    {
-      insts: <<~TEXT,
-      I want you to act as a title generator for written pieces. I will provide you with a text,
-      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
-    TEXT
-      input: <<~TEXT,
-      Here is the text, inside <input></input> XML tags:
-      <input>
-        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-
-        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
-        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
-        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
-        dies so that a scene may be repeated.
-      </input>
-    TEXT
-      post_insts:
-        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-    }
-  end
+  let(:model_name) { "claude-2" }
+  let(:context) { DialectContext.new(described_class, model_name) }
 
   describe "#translate" do
     it "translates a prompt written in our generic format to Claude's format" do
-      anthropic_version = (<<~TEXT).strip + " "
-      #{prompt[:insts]}
-      Human: #{prompt[:input]}
-      #{prompt[:post_insts]}
-
-      Assistant:
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(anthropic_version)
-    end
-
-    it "knows how to translate examples to Claude's format" do
-      prompt[:examples] = [
-        [
-          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
-          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
-        ],
-      ]
-      anthropic_version = (<<~TEXT).strip + " "
-      #{prompt[:insts]}
-      <example>
-      H: #{prompt[:examples][0][0]}
-      A: #{prompt[:examples][0][1]}
-      </example>
-
-      Human: #{prompt[:input]}
-      #{prompt[:post_insts]}
-
-      Assistant:
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(anthropic_version)
-    end
-
-    it "include tools inside the prompt" do
-      prompt[:tools] = [tool]
-
-      anthropic_version = (<<~TEXT).strip + " "
-      #{prompt[:insts]}
-      #{DiscourseAi::Completions::Dialects::Claude.tool_preamble}
+      anthropic_version = (<<~TEXT).strip
+      #{context.system_insts}
+      #{described_class.tool_preamble}
       <tools>
-      #{dialect.tools}</tools>
-
-      Human: #{prompt[:input]}
-      #{prompt[:post_insts]}
+      #{context.dialect_tools}</tools>
 
+      Human: #{context.simple_user_input}
+      
       Assistant:
       TEXT
 
-      translated = dialect.translate
+      translated = context.system_user_scenario
 
       expect(translated).to eq(anthropic_version)
     end
 
-    it "includes all the right newlines" do
-      prompt.clear
-      prompt.merge!(
-        {
-          insts: "You are an artist",
-          conversation_context: [
-            { content: "draw another funny cat", type: "user", name: "sam" },
-            { content: "ok", type: "assistant" },
-            { content: "draw a funny cat", type: "user", name: "sam" },
-          ],
-        },
-      )
+    it "translates tool messages" do
+      expected = +(<<~TEXT).strip
+      #{context.system_insts}
+      #{described_class.tool_preamble}
+      <tools>
+      #{context.dialect_tools}</tools>
 
-      expected = (<<~TEXT).strip + " "
-        You are an artist
+      Human: This is a message by a user
 
-        Human: draw a funny cat
+      Assistant: I'm a previous bot reply, that's why there's no user
 
-        Assistant: ok
+      Human: This is a new message by a user
 
-        Human: draw another funny cat
-
-        Assistant:
-      TEXT
-
-      expect(dialect.translate).to eq(expected)
-    end
-  end
-
-  describe "#conversation_context" do
-    let(:context) do
-      [
-        { type: "user", name: "user1", content: "This is a new message by a user" },
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
-      ]
-    end
-
-    it "adds conversation in reverse order (first == newer)" do
-      prompt[:conversation_context] = context
-
-      expected = (<<~TEXT).strip
       Assistant:
       <function_results>
       <result>
       <tool_name>tool_id</tool_name>
       <json>
-      #{context.last[:content]}
+      "I'm a tool result"
       </json>
       </result>
       </function_results>
-
-      Assistant: #{context.second[:content]}
-
-      Human: #{context.first[:content]}
+      
+      Assistant:
       TEXT
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context).to eq(expected)
+      expect(context.multi_turn_scenario).to eq(expected)
     end
 
     it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 20_000
-      prompt[:conversation_context] = context
+      length = 19_000
 
-      translated_context = dialect.conversation_context
+      translated = context.long_user_input_scenario(length: length)
 
-      expect(translated_context.length).to be < context.last[:content].length
-    end
-  end
-
-  describe "#tools" do
-    it "translates tools to the tool syntax" do
-      prompt[:tools] = [tool]
-
-      translated_tool = <<~TEXT
-        <tool_description>
-        <tool_name>get_weather</tool_name>
-        <description>Get the weather in a city</description>
-        <parameters>
-        <parameter>
-        <name>location</name>
-        <type>string</type>
-        <description>the city name</description>
-        <required>true</required>
-        </parameter>
-        <parameter>
-        <name>unit</name>
-        <type>string</type>
-        <description>the unit of measurement celcius c or fahrenheit f</description>
-        <required>true</required>
-        <options>c,f</options>
-        </parameter>
-        </parameters>
-        </tool_description>
-      TEXT
-
-      expect(dialect.tools).to eq(translated_tool)
+      expect(translated.length).to be < context.long_message_text(length: length).length
     end
   end
 end
diff --git a/spec/lib/completions/dialects/dialect_context.rb b/spec/lib/completions/dialects/dialect_context.rb
new file mode 100644
index 00000000..ddbb6172
--- /dev/null
+++ b/spec/lib/completions/dialects/dialect_context.rb
@@ -0,0 +1,101 @@
+# frozen_string_literal: true
+
+class DialectContext
+  def initialize(dialect_klass, model_name)
+    @dialect_klass = dialect_klass
+    @model_name = model_name
+  end
+
+  def dialect(prompt)
+    @dialect_klass.new(prompt, @model_name)
+  end
+
+  def prompt
+    DiscourseAi::Completions::Prompt.new(system_insts, tools: tools)
+  end
+
+  def dialect_tools
+    dialect(prompt).tools
+  end
+
+  def system_user_scenario
+    a_prompt = prompt
+    a_prompt.push(type: :user, content: simple_user_input)
+
+    dialect(a_prompt).translate
+  end
+
+  def multi_turn_scenario
+    context_and_multi_turn = [
+      { type: :user, id: "user1", content: "This is a message by a user" },
+      { type: :model, content: "I'm a previous bot reply, that's why there's no user" },
+      { type: :user, id: "user1", content: "This is a new message by a user" },
+      {
+        type: :tool_call,
+        id: "tool_id",
+        content: { name: "get_weather", arguments: { location: "Sydney", unit: "c" } }.to_json,
+      },
+      { type: :tool, id: "tool_id", content: "I'm a tool result".to_json },
+    ]
+
+    a_prompt = prompt
+    context_and_multi_turn.each { |msg| a_prompt.push(**msg) }
+
+    dialect(a_prompt).translate
+  end
+
+  def long_user_input_scenario(length: 1_000)
+    long_message = long_message_text(length: length)
+    a_prompt = prompt
+    a_prompt.push(type: :user, content: long_message, id: "user1")
+
+    dialect(a_prompt).translate
+  end
+
+  def long_message_text(length: 1_000)
+    "This a message by a user" * length
+  end
+
+  def simple_user_input
+    <<~TEXT
+      Here is the text, inside <input></input> XML tags:
+      <input>
+        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+
+        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
+        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
+        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
+        dies so that a scene may be repeated.
+      </input>
+      TEXT
+  end
+
+  def system_insts
+    <<~TEXT
+    I want you to act as a title generator for written pieces. I will provide you with a text,
+    and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+    and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+    TEXT
+  end
+
+  def tools
+    [
+      {
+        name: "get_weather",
+        description: "Get the weather in a city",
+        parameters: [
+          { name: "location", type: "string", description: "the city name", required: true },
+          {
+            name: "unit",
+            type: "string",
+            description: "the unit of measurement celcius c or fahrenheit f",
+            enum: %w[c f],
+            required: true,
+          },
+        ],
+      },
+    ]
+  end
+end
diff --git a/spec/lib/completions/dialects/gemini_spec.rb b/spec/lib/completions/dialects/gemini_spec.rb
index ab9c8642..248f51cb 100644
--- a/spec/lib/completions/dialects/gemini_spec.rb
+++ b/spec/lib/completions/dialects/gemini_spec.rb
@@ -1,240 +1,70 @@
 # frozen_string_literal: true
 
+require_relative "dialect_context"
+
 RSpec.describe DiscourseAi::Completions::Dialects::Gemini do
-  subject(:dialect) { described_class.new(prompt, "gemini-pro") }
-
-  let(:tool) do
-    {
-      name: "get_weather",
-      description: "Get the weather in a city",
-      parameters: [
-        { name: "location", type: "string", description: "the city name", required: true },
-        {
-          name: "unit",
-          type: "string",
-          description: "the unit of measurement celcius c or fahrenheit f",
-          enum: %w[c f],
-          required: true,
-        },
-      ],
-    }
-  end
-
-  let(:prompt) do
-    {
-      insts: <<~TEXT,
-      I want you to act as a title generator for written pieces. I will provide you with a text,
-      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
-    TEXT
-      input: <<~TEXT,
-      Here is the text, inside <input></input> XML tags:
-      <input>
-        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-
-        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
-        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
-        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
-        dies so that a scene may be repeated.
-      </input>
-    TEXT
-      post_insts:
-        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-      tools: [tool],
-    }
-  end
+  let(:model_name) { "gemini-pro" }
+  let(:context) { DialectContext.new(described_class, model_name) }
 
   describe "#translate" do
     it "translates a prompt written in our generic format to the Gemini format" do
       gemini_version = [
-        { role: "user", parts: { text: [prompt[:insts], prompt[:post_insts]].join("\n") } },
+        { role: "user", parts: { text: context.system_insts } },
         { role: "model", parts: { text: "Ok." } },
-        { role: "user", parts: { text: prompt[:input] } },
+        { role: "user", parts: { text: context.simple_user_input } },
       ]
 
-      translated = dialect.translate
+      translated = context.system_user_scenario
 
       expect(translated).to eq(gemini_version)
     end
 
-    it "include examples in the Gemini version" do
-      prompt[:examples] = [
-        [
-          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
-          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
-        ],
-      ]
-
-      gemini_version = [
-        { role: "user", parts: { text: [prompt[:insts], prompt[:post_insts]].join("\n") } },
-        { role: "model", parts: { text: "Ok." } },
-        { role: "user", parts: { text: prompt[:examples][0][0] } },
-        { role: "model", parts: { text: prompt[:examples][0][1] } },
-        { role: "user", parts: { text: prompt[:input] } },
-      ]
-
-      translated = dialect.translate
-
-      expect(translated).to contain_exactly(*gemini_version)
-    end
-  end
-
-  describe "#conversation_context" do
-    let(:context) do
-      [
-        { type: "user", name: "user1", content: "This is a new message by a user" },
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
-      ]
-    end
-
-    it "adds conversation in reverse order (first == newer)" do
-      prompt[:conversation_context] = context
-
-      translated_context = dialect.conversation_context
-
-      expect(translated_context).to eq(
+    it "translates tool_call and tool messages" do
+      expect(context.multi_turn_scenario).to eq(
         [
+          { role: "user", parts: { text: context.system_insts } },
+          { role: "model", parts: { text: "Ok." } },
+          { role: "user", parts: { text: "This is a message by a user" } },
           {
-            role: "function",
+            role: "model",
             parts: {
-              functionResponse: {
-                name: context.last[:name],
-                response: {
-                  content: context.last[:content],
+              text: "I'm a previous bot reply, that's why there's no user",
+            },
+          },
+          { role: "user", parts: { text: "This is a new message by a user" } },
+          {
+            role: "model",
+            parts: {
+              functionCall: {
+                name: "get_weather",
+                args: {
+                  location: "Sydney",
+                  unit: "c",
+                },
+              },
+            },
+          },
+          {
+            role: "function",
+            parts: {
+              functionResponse: {
+                name: "tool_id",
+                response: {
+                  content: "I'm a tool result".to_json,
                 },
               },
             },
           },
-          { role: "model", parts: { text: context.second[:content] } },
-          { role: "user", parts: { text: context.first[:content] } },
         ],
       )
     end
 
     it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 1000
+      translated = context.long_user_input_scenario(length: 5_000)
 
-      prompt[:conversation_context] = context
-
-      translated_context = dialect.conversation_context
-
-      expect(translated_context.last.dig(:parts, :text).length).to be <
-        context.last[:content].length
-    end
-
-    context "when working with multi-turn contexts" do
-      context "when the multi-turn is for turn that doesn't chain" do
-        it "uses the tool_call context" do
-          prompt[:conversation_context] = [
-            {
-              type: "multi_turn",
-              content: [
-                {
-                  type: "tool_call",
-                  name: "get_weather",
-                  content: {
-                    name: "get_weather",
-                    arguments: {
-                      location: "Sydney",
-                      unit: "c",
-                    },
-                  }.to_json,
-                },
-                { type: "tool", name: "get_weather", content: "I'm a tool result" },
-              ],
-            },
-          ]
-
-          translated_context = dialect.conversation_context
-
-          expected = [
-            {
-              role: "function",
-              parts: {
-                functionResponse: {
-                  name: "get_weather",
-                  response: {
-                    content: "I'm a tool result",
-                  },
-                },
-              },
-            },
-            {
-              role: "model",
-              parts: {
-                functionCall: {
-                  name: "get_weather",
-                  args: {
-                    location: "Sydney",
-                    unit: "c",
-                  },
-                },
-              },
-            },
-          ]
-
-          expect(translated_context).to eq(expected)
-        end
-      end
-
-      context "when the multi-turn is from a chainable tool" do
-        it "uses the assistant context" do
-          prompt[:conversation_context] = [
-            {
-              type: "multi_turn",
-              content: [
-                {
-                  type: "tool_call",
-                  name: "get_weather",
-                  content: {
-                    name: "get_weather",
-                    arguments: {
-                      location: "Sydney",
-                      unit: "c",
-                    },
-                  }.to_json,
-                },
-                { type: "tool", name: "get_weather", content: "I'm a tool result" },
-                { type: "assistant", content: "I'm a bot reply!" },
-              ],
-            },
-          ]
-
-          translated_context = dialect.conversation_context
-
-          expected = [
-            { role: "model", parts: { text: "I'm a bot reply!" } },
-            {
-              role: "function",
-              parts: {
-                functionResponse: {
-                  name: "get_weather",
-                  response: {
-                    content: "I'm a tool result",
-                  },
-                },
-              },
-            },
-            {
-              role: "model",
-              parts: {
-                functionCall: {
-                  name: "get_weather",
-                  args: {
-                    location: "Sydney",
-                    unit: "c",
-                  },
-                },
-              },
-            },
-          ]
-
-          expect(translated_context).to eq(expected)
-        end
-      end
+      expect(translated.last[:role]).to eq("user")
+      expect(translated.last.dig(:parts, :text).length).to be <
+        context.long_message_text(length: 5_000).length
     end
   end
 
@@ -264,7 +94,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Gemini do
         ],
       }
 
-      expect(subject.tools).to contain_exactly(gemini_tools)
+      expect(context.dialect_tools).to contain_exactly(gemini_tools)
     end
   end
 end
diff --git a/spec/lib/completions/dialects/llama2_classic_spec.rb b/spec/lib/completions/dialects/llama2_classic_spec.rb
index 4c60e2ee..30a40bbf 100644
--- a/spec/lib/completions/dialects/llama2_classic_spec.rb
+++ b/spec/lib/completions/dialects/llama2_classic_spec.rb
@@ -1,183 +1,62 @@
 # frozen_string_literal: true
 
+require_relative "dialect_context"
+
 RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
-  subject(:dialect) { described_class.new(prompt, "Llama2-chat-hf") }
-
-  let(:tool) do
-    {
-      name: "get_weather",
-      description: "Get the weather in a city",
-      parameters: [
-        { name: "location", type: "string", description: "the city name", required: true },
-        {
-          name: "unit",
-          type: "string",
-          description: "the unit of measurement celcius c or fahrenheit f",
-          enum: %w[c f],
-          required: true,
-        },
-      ],
-    }
-  end
-
-  let(:prompt) do
-    {
-      insts: <<~TEXT,
-      I want you to act as a title generator for written pieces. I will provide you with a text,
-      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
-    TEXT
-      input: <<~TEXT,
-      Here is the text, inside <input></input> XML tags:
-      <input>
-        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-
-        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
-        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
-        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
-        dies so that a scene may be repeated.
-      </input>
-    TEXT
-      post_insts:
-        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-    }
-  end
+  let(:model_name) { "Llama2-chat-hf" }
+  let(:context) { DialectContext.new(described_class, model_name) }
 
   describe "#translate" do
     it "translates a prompt written in our generic format to the Llama2 format" do
       llama2_classic_version = <<~TEXT
       [INST]
       <<SYS>>
-      #{prompt[:insts]}
-      #{prompt[:post_insts]}
-      <</SYS>>
-      [/INST]
-      [INST]#{prompt[:input]}[/INST]
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(llama2_classic_version)
-    end
-
-    it "includes examples in the translation" do
-      prompt[:examples] = [
-        [
-          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
-          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
-        ],
-      ]
-
-      llama2_classic_version = <<~TEXT
-      [INST]
-      <<SYS>>
-      #{prompt[:insts]}
-      #{prompt[:post_insts]}
-      <</SYS>>
-      [/INST]
-      [INST]#{prompt[:examples][0][0]}[/INST]
-      #{prompt[:examples][0][1]}
-      [INST]#{prompt[:input]}[/INST]
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(llama2_classic_version)
-    end
-
-    it "include tools inside the prompt" do
-      prompt[:tools] = [tool]
-
-      llama2_classic_version = <<~TEXT
-      [INST]
-      <<SYS>>
-      #{prompt[:insts]}
-      #{DiscourseAi::Completions::Dialects::Llama2Classic.tool_preamble}
+      #{context.system_insts}
+      #{described_class.tool_preamble}
       <tools>
-      #{dialect.tools}</tools>
-      #{prompt[:post_insts]}
+      #{context.dialect_tools}</tools>
       <</SYS>>
       [/INST]
-      [INST]#{prompt[:input]}[/INST]
+      [INST]#{context.simple_user_input}[/INST]
       TEXT
 
-      translated = dialect.translate
+      translated = context.system_user_scenario
 
       expect(translated).to eq(llama2_classic_version)
     end
-  end
 
-  describe "#conversation_context" do
-    let(:context) do
-      [
-        { type: "user", name: "user1", content: "This is a new message by a user" },
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
-      ]
-    end
-
-    it "adds conversation in reverse order (first == newer)" do
-      prompt[:conversation_context] = context
-
-      expected = <<~TEXT
+    it "translates tool messages" do
+      expected = +(<<~TEXT)
+      [INST]
+      <<SYS>>
+      #{context.system_insts}
+      #{described_class.tool_preamble}
+      <tools>
+      #{context.dialect_tools}</tools>
+      <</SYS>>
+      [/INST]
+      [INST]This is a message by a user[/INST]
+      I'm a previous bot reply, that's why there's no user
+      [INST]This is a new message by a user[/INST]
       [INST]
       <function_results>
       <result>
       <tool_name>tool_id</tool_name>
       <json>
-      #{context.last[:content]}
+      "I'm a tool result"
       </json>
       </result>
       </function_results>
       [/INST]
-      [INST]#{context.second[:content]}[/INST]
-      #{context.first[:content]}
       TEXT
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context).to eq(expected)
+      expect(context.multi_turn_scenario).to eq(expected)
     end
 
     it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 1_000
-      prompt[:conversation_context] = context
+      translated = context.long_user_input_scenario
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context.length).to be < context.last[:content].length
-    end
-  end
-
-  describe "#tools" do
-    it "translates functions to the tool syntax" do
-      prompt[:tools] = [tool]
-
-      translated_tool = <<~TEXT
-        <tool_description>
-        <tool_name>get_weather</tool_name>
-        <description>Get the weather in a city</description>
-        <parameters>
-        <parameter>
-        <name>location</name>
-        <type>string</type>
-        <description>the city name</description>
-        <required>true</required>
-        </parameter>
-        <parameter>
-        <name>unit</name>
-        <type>string</type>
-        <description>the unit of measurement celcius c or fahrenheit f</description>
-        <required>true</required>
-        <options>c,f</options>
-        </parameter>
-        </parameters>
-        </tool_description>
-      TEXT
-
-      expect(dialect.tools).to eq(translated_tool)
+      expect(translated.length).to be < context.long_message_text.length
     end
   end
 end
diff --git a/spec/lib/completions/dialects/mixtral_spec.rb b/spec/lib/completions/dialects/mixtral_spec.rb
index bbcb95d3..528b2ac9 100644
--- a/spec/lib/completions/dialects/mixtral_spec.rb
+++ b/spec/lib/completions/dialects/mixtral_spec.rb
@@ -1,176 +1,57 @@
 # frozen_string_literal: true
 
+require_relative "dialect_context"
+
 RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do
-  subject(:dialect) { described_class.new(prompt, "mistralai/Mixtral-8x7B-Instruct-v0.1") }
-
-  let(:tool) do
-    {
-      name: "get_weather",
-      description: "Get the weather in a city",
-      parameters: [
-        { name: "location", type: "string", description: "the city name", required: true },
-        {
-          name: "unit",
-          type: "string",
-          description: "the unit of measurement celcius c or fahrenheit f",
-          enum: %w[c f],
-          required: true,
-        },
-      ],
-    }
-  end
-
-  let(:prompt) do
-    {
-      insts: <<~TEXT,
-      I want you to act as a title generator for written pieces. I will provide you with a text,
-      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
-    TEXT
-      input: <<~TEXT,
-      Here is the text, inside <input></input> XML tags:
-      <input>
-        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-
-        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
-        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
-        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
-        dies so that a scene may be repeated.
-      </input>
-    TEXT
-      post_insts:
-        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-    }
-  end
+  let(:model_name) { "mistralai/Mixtral-8x7B-Instruct-v0.1" }
+  let(:context) { DialectContext.new(described_class, model_name) }
 
   describe "#translate" do
-    it "translates a prompt written in our generic format to the Open AI format" do
-      orca_style_version = <<~TEXT
+    it "translates a prompt written in our generic format to the Llama2 format" do
+      llama2_classic_version = <<~TEXT
       <s> [INST]
-      #{prompt[:insts]}
-      #{prompt[:post_insts]}
-      [/INST] Ok </s>
-      [INST] #{prompt[:input]} [/INST]
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(orca_style_version)
-    end
-
-    it "include examples in the translated prompt" do
-      prompt[:examples] = [
-        [
-          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
-          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
-        ],
-      ]
-
-      orca_style_version = <<~TEXT
-      <s> [INST]
-      #{prompt[:insts]}
-      #{prompt[:post_insts]}
-      [/INST] Ok </s>
-      [INST] #{prompt[:examples][0][0]} [/INST]
-      #{prompt[:examples][0][1]}</s>
-      [INST] #{prompt[:input]} [/INST]
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(orca_style_version)
-    end
-
-    it "include tools inside the prompt" do
-      prompt[:tools] = [tool]
-
-      orca_style_version = <<~TEXT
-      <s> [INST]
-      #{prompt[:insts]}
-      #{DiscourseAi::Completions::Dialects::Mixtral.tool_preamble}
+      #{context.system_insts}
+      #{described_class.tool_preamble}
       <tools>
-      #{dialect.tools}</tools>
-      #{prompt[:post_insts]}
+      #{context.dialect_tools}</tools>
       [/INST] Ok </s>
-      [INST] #{prompt[:input]} [/INST]
+      [INST]#{context.simple_user_input}[/INST]
       TEXT
 
-      translated = dialect.translate
+      translated = context.system_user_scenario
 
-      expect(translated).to eq(orca_style_version)
-    end
-  end
-
-  describe "#conversation_context" do
-    let(:context) do
-      [
-        { type: "user", name: "user1", content: "This is a new message by a user" },
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
-      ]
+      expect(translated).to eq(llama2_classic_version)
     end
 
-    it "adds conversation in reverse order (first == newer)" do
-      prompt[:conversation_context] = context
-
-      expected = <<~TEXT
+    it "translates tool messages" do
+      expected = +(<<~TEXT).strip
+      <s> [INST]
+      #{context.system_insts}
+      #{described_class.tool_preamble}
+      <tools>
+      #{context.dialect_tools}</tools>
+      [/INST] Ok </s>
+      [INST]This is a message by a user[/INST]
+      I'm a previous bot reply, that's why there's no user</s>
+      [INST]This is a new message by a user[/INST]
       <function_results>
       <result>
       <tool_name>tool_id</tool_name>
       <json>
-      #{context.last[:content]}
+      "I'm a tool result"
       </json>
       </result>
       </function_results>
-      #{context.second[:content]}
-      [INST] #{context.first[:content]}
-      [/INST]
       TEXT
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context.strip).to eq(expected.strip)
+      expect(context.multi_turn_scenario).to eq(expected)
     end
 
     it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 6_000
-      prompt[:conversation_context] = context
+      length = 6_000
+      translated = context.long_user_input_scenario(length: length)
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context.length).to be < context.last[:content].length
-    end
-  end
-
-  describe "#tools" do
-    it "translates tools to the tool syntax" do
-      prompt[:tools] = [tool]
-
-      translated_tool = <<~TEXT
-        <tool_description>
-        <tool_name>get_weather</tool_name>
-        <description>Get the weather in a city</description>
-        <parameters>
-        <parameter>
-        <name>location</name>
-        <type>string</type>
-        <description>the city name</description>
-        <required>true</required>
-        </parameter>
-        <parameter>
-        <name>unit</name>
-        <type>string</type>
-        <description>the unit of measurement celcius c or fahrenheit f</description>
-        <required>true</required>
-        <options>c,f</options>
-        </parameter>
-        </parameters>
-        </tool_description>
-      TEXT
-
-      expect(dialect.tools).to eq(translated_tool)
+      expect(translated.length).to be < context.long_message_text(length: length).length
     end
   end
 end
diff --git a/spec/lib/completions/dialects/orca_style_spec.rb b/spec/lib/completions/dialects/orca_style_spec.rb
index 32e32f15..6c683505 100644
--- a/spec/lib/completions/dialects/orca_style_spec.rb
+++ b/spec/lib/completions/dialects/orca_style_spec.rb
@@ -1,181 +1,61 @@
 # frozen_string_literal: true
 
+require_relative "dialect_context"
+
 RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
-  subject(:dialect) { described_class.new(prompt, "StableBeluga2") }
-
-  let(:tool) do
-    {
-      name: "get_weather",
-      description: "Get the weather in a city",
-      parameters: [
-        { name: "location", type: "string", description: "the city name", required: true },
-        {
-          name: "unit",
-          type: "string",
-          description: "the unit of measurement celcius c or fahrenheit f",
-          enum: %w[c f],
-          required: true,
-        },
-      ],
-    }
-  end
-
-  let(:prompt) do
-    {
-      insts: <<~TEXT,
-      I want you to act as a title generator for written pieces. I will provide you with a text,
-      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
-    TEXT
-      input: <<~TEXT,
-      Here is the text, inside <input></input> XML tags:
-      <input>
-        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-
-        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
-        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
-        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
-        dies so that a scene may be repeated.
-      </input>
-    TEXT
-      post_insts:
-        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-    }
-  end
+  let(:model_name) { "StableBeluga2" }
+  let(:context) { DialectContext.new(described_class, model_name) }
 
   describe "#translate" do
-    it "translates a prompt written in our generic format to the Open AI format" do
-      orca_style_version = <<~TEXT
+    it "translates a prompt written in our generic format to the Llama2 format" do
+      llama2_classic_version = <<~TEXT
       ### System:
-      #{prompt[:insts]}
-      #{prompt[:post_insts]}
-      ### User:
-      #{prompt[:input]}
-      ### Assistant:
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(orca_style_version)
-    end
-
-    it "include examples in the translated prompt" do
-      prompt[:examples] = [
-        [
-          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
-          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
-        ],
-      ]
-
-      orca_style_version = <<~TEXT
-      ### System:
-      #{prompt[:insts]}
-      #{prompt[:post_insts]}
-      ### User:
-      #{prompt[:examples][0][0]}
-      ### Assistant:
-      #{prompt[:examples][0][1]}
-      ### User:
-      #{prompt[:input]}
-      ### Assistant:
-      TEXT
-
-      translated = dialect.translate
-
-      expect(translated).to eq(orca_style_version)
-    end
-
-    it "include tools inside the prompt" do
-      prompt[:tools] = [tool]
-
-      orca_style_version = <<~TEXT
-      ### System:
-      #{prompt[:insts]}
-      #{DiscourseAi::Completions::Dialects::OrcaStyle.tool_preamble}
+      #{context.system_insts}
+      #{described_class.tool_preamble}
       <tools>
-      #{dialect.tools}</tools>
-      #{prompt[:post_insts]}
+      #{context.dialect_tools}</tools>
       ### User:
-      #{prompt[:input]}
+      #{context.simple_user_input}
       ### Assistant:
       TEXT
 
-      translated = dialect.translate
+      translated = context.system_user_scenario
 
-      expect(translated).to eq(orca_style_version)
-    end
-  end
-
-  describe "#conversation_context" do
-    let(:context) do
-      [
-        { type: "user", name: "user1", content: "This is a new message by a user" },
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
-      ]
+      expect(translated).to eq(llama2_classic_version)
     end
 
-    it "adds conversation in reverse order (first == newer)" do
-      prompt[:conversation_context] = context
-
-      expected = <<~TEXT
+    it "translates tool messages" do
+      expected = +(<<~TEXT)
+      ### System:
+      #{context.system_insts}
+      #{described_class.tool_preamble}
+      <tools>
+      #{context.dialect_tools}</tools>
+      ### User:
+      This is a message by a user
+      ### Assistant:
+      I'm a previous bot reply, that's why there's no user
+      ### User:
+      This is a new message by a user
       ### Assistant:
       <function_results>
       <result>
       <tool_name>tool_id</tool_name>
       <json>
-      #{context.last[:content]}
+      "I'm a tool result"
       </json>
       </result>
       </function_results>
-      ### Assistant: #{context.second[:content]}
-      ### User: #{context.first[:content]}
+      ### Assistant:
       TEXT
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context).to eq(expected)
+      expect(context.multi_turn_scenario).to eq(expected)
     end
 
     it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 1_000
-      prompt[:conversation_context] = context
+      translated = context.long_user_input_scenario
 
-      translated_context = dialect.conversation_context
-
-      expect(translated_context.length).to be < context.last[:content].length
-    end
-  end
-
-  describe "#tools" do
-    it "translates tools to the tool syntax" do
-      prompt[:tools] = [tool]
-
-      translated_tool = <<~TEXT
-        <tool_description>
-        <tool_name>get_weather</tool_name>
-        <description>Get the weather in a city</description>
-        <parameters>
-        <parameter>
-        <name>location</name>
-        <type>string</type>
-        <description>the city name</description>
-        <required>true</required>
-        </parameter>
-        <parameter>
-        <name>unit</name>
-        <type>string</type>
-        <description>the unit of measurement celcius c or fahrenheit f</description>
-        <required>true</required>
-        <options>c,f</options>
-        </parameter>
-        </parameters>
-        </tool_description>
-      TEXT
-
-      expect(dialect.tools).to eq(translated_tool)
+      expect(translated.length).to be < context.long_message_text.length
     end
   end
 end
diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
index 9f74d377..61b23e69 100644
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
   subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
 
   let(:model_name) { "claude-2" }
-  let(:generic_prompt) { { insts: "write 3 words" } }
   let(:dialect) { DiscourseAi::Completions::Dialects::Claude.new(generic_prompt, model_name) }
   let(:prompt) { dialect.translate }
 
diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
index 123ab4ae..33c0d272 100644
--- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@@ -9,7 +9,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
 
   let(:model_name) { "claude-2" }
   let(:bedrock_name) { "claude-v2:1" }
-  let(:generic_prompt) { { insts: "write 3 words" } }
   let(:dialect) { DiscourseAi::Completions::Dialects::Claude.new(generic_prompt, model_name) }
   let(:prompt) { dialect.translate }
 
diff --git a/spec/lib/completions/endpoints/endpoint_examples.rb b/spec/lib/completions/endpoints/endpoint_examples.rb
index 0448fcbf..60eeac83 100644
--- a/spec/lib/completions/endpoints/endpoint_examples.rb
+++ b/spec/lib/completions/endpoints/endpoint_examples.rb
@@ -34,6 +34,13 @@ RSpec.shared_examples "an endpoint that can communicate with a completion servic
     Net.send(:const_set, :HTTP, @original_net_http)
   end
 
+  let(:generic_prompt) do
+    DiscourseAi::Completions::Prompt.new(
+      "You write words",
+      messages: [{ type: :user, content: "write 3 words" }],
+    )
+  end
+
   describe "#perform_completion!" do
     fab!(:user) { Fabricate(:user) }
 
@@ -97,16 +104,11 @@ RSpec.shared_examples "an endpoint that can communicate with a completion servic
       end
 
       context "with functions" do
-        let(:generic_prompt) do
-          {
-            insts: "You can tell me the weather",
-            input: "Return the weather in Sydney",
-            tools: [tool],
-          }
+        before do
+          generic_prompt.tools = [tool]
+          stub_response(prompt, tool_call, tool_call: true)
         end
 
-        before { stub_response(prompt, tool_call, tool_call: true) }
-
         it "returns a function invocation" do
           completion_response = model.perform_completion!(dialect, user)
 
@@ -153,16 +155,11 @@ RSpec.shared_examples "an endpoint that can communicate with a completion servic
       end
 
       context "with functions" do
-        let(:generic_prompt) do
-          {
-            insts: "You can tell me the weather",
-            input: "Return the weather in Sydney",
-            tools: [tool],
-          }
+        before do
+          generic_prompt.tools = [tool]
+          stub_streamed_response(prompt, tool_deltas, tool_call: true)
         end
 
-        before { stub_streamed_response(prompt, tool_deltas, tool_call: true) }
-
         it "waits for the invocation to finish before calling the partial" do
           buffered_partial = ""
 
diff --git a/spec/lib/completions/endpoints/gemini_spec.rb b/spec/lib/completions/endpoints/gemini_spec.rb
index 4e1d4b59..037b8c28 100644
--- a/spec/lib/completions/endpoints/gemini_spec.rb
+++ b/spec/lib/completions/endpoints/gemini_spec.rb
@@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
   subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
 
   let(:model_name) { "gemini-pro" }
-  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
   let(:dialect) { DiscourseAi::Completions::Dialects::Gemini.new(generic_prompt, model_name) }
   let(:prompt) { dialect.translate }
 
@@ -38,14 +37,18 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
     model
       .default_options
       .merge(contents: prompt)
-      .tap { |b| b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt[:tools] }
+      .tap do |b|
+        b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt.tools.present?
+      end
       .to_json
   end
   let(:stream_request_body) do
     model
       .default_options
       .merge(contents: prompt)
-      .tap { |b| b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt[:tools] }
+      .tap do |b|
+        b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt.tools.present?
+      end
       .to_json
   end
 
diff --git a/spec/lib/completions/endpoints/hugging_face_spec.rb b/spec/lib/completions/endpoints/hugging_face_spec.rb
index de69f8ed..0520b661 100644
--- a/spec/lib/completions/endpoints/hugging_face_spec.rb
+++ b/spec/lib/completions/endpoints/hugging_face_spec.rb
@@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::HuggingFace do
   subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::Llama2Tokenizer) }
 
   let(:model_name) { "Llama2-*-chat-hf" }
-  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
   let(:dialect) do
     DiscourseAi::Completions::Dialects::Llama2Classic.new(generic_prompt, model_name)
   end
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
index 00695830..3723d9b9 100644
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
   subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
 
   let(:model_name) { "gpt-3.5-turbo" }
-  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
   let(:dialect) { DiscourseAi::Completions::Dialects::ChatGpt.new(generic_prompt, model_name) }
   let(:prompt) { dialect.translate }
 
@@ -37,7 +36,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
     model
       .default_options
       .merge(messages: prompt)
-      .tap { |b| b[:tools] = dialect.tools if generic_prompt[:tools] }
+      .tap { |b| b[:tools] = dialect.tools if generic_prompt.tools.present? }
       .to_json
   end
 
@@ -45,7 +44,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
     model
       .default_options
       .merge(messages: prompt, stream: true)
-      .tap { |b| b[:tools] = dialect.tools if generic_prompt[:tools] }
+      .tap { |b| b[:tools] = dialect.tools if generic_prompt.tools.present? }
       .to_json
   end
 
@@ -183,7 +182,10 @@ data: [D|ONE]
 
       partials = []
       llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
-      llm.generate({ insts: "test" }, user: Discourse.system_user) { |partial| partials << partial }
+      llm.generate(
+        DiscourseAi::Completions::Prompt.new("test"),
+        user: Discourse.system_user,
+      ) { |partial| partials << partial }
 
       expect(partials.join).to eq("test,test2,test3,test4")
     end
@@ -212,7 +214,10 @@ data: [D|ONE]
 
       partials = []
       llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
-      llm.generate({ insts: "test" }, user: Discourse.system_user) { |partial| partials << partial }
+      llm.generate(
+        DiscourseAi::Completions::Prompt.new("test"),
+        user: Discourse.system_user,
+      ) { |partial| partials << partial }
 
       expect(partials.join).to eq("test,test1,test2,test3,test4")
     end
diff --git a/spec/lib/completions/endpoints/vllm_spec.rb b/spec/lib/completions/endpoints/vllm_spec.rb
index 54d9955d..143e40b9 100644
--- a/spec/lib/completions/endpoints/vllm_spec.rb
+++ b/spec/lib/completions/endpoints/vllm_spec.rb
@@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Vllm do
   subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::MixtralTokenizer) }
 
   let(:model_name) { "mistralai/Mixtral-8x7B-Instruct-v0.1" }
-  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
   let(:dialect) { DiscourseAi::Completions::Dialects::Mixtral.new(generic_prompt, model_name) }
   let(:prompt) { dialect.translate }
 
diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb
index 7c196eea..b76c6e6f 100644
--- a/spec/lib/completions/llm_spec.rb
+++ b/spec/lib/completions/llm_spec.rb
@@ -29,15 +29,21 @@ RSpec.describe DiscourseAi::Completions::Llm do
 
     let(:llm) { described_class.proxy("fake") }
 
+    let(:prompt) do
+      DiscourseAi::Completions::Prompt.new(
+        "You are fake",
+        messages: [{ type: :user, content: "fake orders" }],
+      )
+    end
+
     it "can generate a response" do
-      response = llm.generate({ input: "fake prompt" }, user: user)
+      response = llm.generate(prompt, user: user)
       expect(response).to be_present
     end
 
     it "can generate content via a block" do
       partials = []
-      response =
-        llm.generate({ input: "fake prompt" }, user: user) { |partial| partials << partial }
+      response = llm.generate(prompt, user: user) { |partial| partials << partial }
 
       expect(partials.length).to eq(10)
       expect(response).to eq(DiscourseAi::Completions::Endpoints::Fake.fake_content)
@@ -48,23 +54,22 @@ RSpec.describe DiscourseAi::Completions::Llm do
 
   describe "#generate" do
     let(:prompt) do
-      {
-        insts: <<~TEXT,
-        I want you to act as a title generator for written pieces. I will provide you with a text,
-        and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-        and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+      system_insts = (<<~TEXT).strip
+      I want you to act as a title generator for written pieces. I will provide you with a text,
+      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
       TEXT
-        input: <<~TEXT,
-        Here is the text, inside <input></input> XML tags:
-        <input>
-          To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-          discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-          defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-        </input>
-      TEXT
-        post_insts:
-          "Please put the translation between <ai></ai> tags and separate each title with a comma.",
-      }
+
+      DiscourseAi::Completions::Prompt
+        .new(system_insts)
+        .tap { |a_prompt| a_prompt.push(type: :user, content: (<<~TEXT).strip) }
+          Here is the text, inside <input></input> XML tags:
+          <input>
+            To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+            discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+            defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+          </input>
+          TEXT
     end
 
     let(:canned_response) do
diff --git a/spec/lib/completions/prompt_spec.rb b/spec/lib/completions/prompt_spec.rb
new file mode 100644
index 00000000..2fa150e5
--- /dev/null
+++ b/spec/lib/completions/prompt_spec.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::Prompt do
+  subject(:prompt) { described_class.new(system_insts) }
+
+  let(:system_insts) { "These are the system instructions." }
+  let(:user_msg) { "Write something nice" }
+  let(:username) { "username1" }
+
+  describe ".new" do
+    it "raises for invalid attributes" do
+      expect { described_class.new("a bot", messages: {}) }.to raise_error(ArgumentError)
+      expect { described_class.new("a bot", tools: {}) }.to raise_error(ArgumentError)
+
+      bad_messages = [{ type: :user, content: "a system message", unknown_attribute: :random }]
+      expect { described_class.new("a bot", messages: bad_messages) }.to raise_error(ArgumentError)
+
+      bad_messages2 = [{ type: :user }]
+      expect { described_class.new("a bot", messages: bad_messages2) }.to raise_error(ArgumentError)
+
+      bad_messages3 = [{ content: "some content associated to no one" }]
+      expect { described_class.new("a bot", messages: bad_messages3) }.to raise_error(ArgumentError)
+    end
+  end
+
+  describe "#push" do
+    describe "turn validations" do
+      it "validates that tool messages have a previous tool_call message" do
+        prompt.push(type: :user, content: user_msg, id: username)
+        prompt.push(type: :model, content: "I'm a model msg")
+
+        expect { prompt.push(type: :tool, content: "I'm the tool call results") }.to raise_error(
+          DiscourseAi::Completions::Prompt::INVALID_TURN,
+        )
+      end
+
+      it "validates that model messages have either a previous tool or user messages" do
+        prompt.push(type: :user, content: user_msg, id: username)
+        prompt.push(type: :model, content: "I'm a model msg")
+
+        expect { prompt.push(type: :model, content: "I'm a second model msg") }.to raise_error(
+          DiscourseAi::Completions::Prompt::INVALID_TURN,
+        )
+      end
+    end
+
+    it "system message is always first" do
+      prompt.push(type: :user, content: user_msg, id: username)
+
+      system_message = prompt.messages.first
+
+      expect(system_message[:type]).to eq(:system)
+      expect(system_message[:content]).to eq(system_insts)
+    end
+
+    it "includes the pushed message" do
+      prompt.push(type: :user, content: user_msg, id: username)
+
+      system_message = prompt.messages.last
+
+      expect(system_message[:type]).to eq(:user)
+      expect(system_message[:content]).to eq(user_msg)
+      expect(system_message[:id]).to eq(username)
+    end
+  end
+end
diff --git a/spec/lib/modules/ai_bot/bot_spec.rb b/spec/lib/modules/ai_bot/bot_spec.rb
index b3ab4506..d5faf055 100644
--- a/spec/lib/modules/ai_bot/bot_spec.rb
+++ b/spec/lib/modules/ai_bot/bot_spec.rb
@@ -11,18 +11,6 @@ RSpec.describe DiscourseAi::AiBot::Bot do
   let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT4_ID) }
 
   let!(:user) { Fabricate(:user) }
-  let!(:pm) do
-    Fabricate(
-      :private_message_topic,
-      title: "This is my special PM",
-      user: user,
-      topic_allowed_users: [
-        Fabricate.build(:topic_allowed_user, user: user),
-        Fabricate.build(:topic_allowed_user, user: bot_user),
-      ],
-    )
-  end
-  let!(:pm_post) { Fabricate(:post, topic: pm, user: user, raw: "Does my site has tags?") }
 
   let(:function_call) { <<~TEXT }
     Let me try using a function to get more info:<function_calls>
@@ -49,7 +37,7 @@ RSpec.describe DiscourseAi::AiBot::Bot do
 
         HTML
 
-        context = {}
+        context = { conversation_context: [{ type: :user, content: "Does my site has tags?" }] }
 
         DiscourseAi::Completions::Llm.with_prepared_responses(llm_responses) do
           bot.reply(context) do |_bot_reply_post, cancel, placeholder|
diff --git a/spec/lib/modules/ai_bot/personas/persona_spec.rb b/spec/lib/modules/ai_bot/personas/persona_spec.rb
index 97db50cf..dd19a8d4 100644
--- a/spec/lib/modules/ai_bot/personas/persona_spec.rb
+++ b/spec/lib/modules/ai_bot/personas/persona_spec.rb
@@ -52,14 +52,15 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
     freeze_time
 
     rendered = persona.craft_prompt(context)
+    system_message = rendered.messages.first[:content]
 
-    expect(rendered[:insts]).to include(Discourse.base_url)
-    expect(rendered[:insts]).to include("test site title")
-    expect(rendered[:insts]).to include("test site description")
-    expect(rendered[:insts]).to include("joe, jane")
-    expect(rendered[:insts]).to include(Time.zone.now.to_s)
+    expect(system_message).to include(Discourse.base_url)
+    expect(system_message).to include("test site title")
+    expect(system_message).to include("test site description")
+    expect(system_message).to include("joe, jane")
+    expect(system_message).to include(Time.zone.now.to_s)
 
-    tools = rendered[:tools]
+    tools = rendered.tools
 
     expect(tools.find { |t| t[:name] == "search" }).to be_present
     expect(tools.find { |t| t[:name] == "tags" }).to be_present
@@ -107,7 +108,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
 
       instance = custom_persona.new
       expect(instance.tools).to eq([DiscourseAi::AiBot::Tools::Image])
-      expect(instance.craft_prompt(context).dig(:insts)).to eq("you are pun bot\n\n")
+      expect(instance.craft_prompt(context).messages.first[:content]).to eq("you are pun bot")
 
       # should update
       persona.update!(name: "zzzpun_bot2")
diff --git a/spec/lib/modules/ai_bot/playground_spec.rb b/spec/lib/modules/ai_bot/playground_spec.rb
index a938db1b..27468801 100644
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@@ -155,9 +155,9 @@ RSpec.describe DiscourseAi::AiBot::Playground do
 
       expect(context).to contain_exactly(
         *[
-          { type: "user", name: user.username, content: third_post.raw },
-          { type: "assistant", content: second_post.raw },
-          { type: "user", name: user.username, content: first_post.raw },
+          { type: :user, id: user.username, content: third_post.raw },
+          { type: :model, content: second_post.raw },
+          { type: :user, id: user.username, content: first_post.raw },
         ],
       )
     end
@@ -169,8 +169,8 @@ RSpec.describe DiscourseAi::AiBot::Playground do
 
       expect(context).to contain_exactly(
         *[
-          { type: "user", name: user.username, content: third_post.raw },
-          { type: "assistant", content: second_post.raw },
+          { type: :user, id: user.username, content: third_post.raw },
+          { type: :model, content: second_post.raw },
         ],
       )
     end
@@ -197,16 +197,42 @@ RSpec.describe DiscourseAi::AiBot::Playground do
 
         expect(context).to contain_exactly(
           *[
-            { type: "user", name: user.username, content: third_post.raw },
-            {
-              type: "multi_turn",
-              content: [
-                { type: "assistant", content: custom_prompt.third.first },
-                { type: "tool_call", content: custom_prompt.second.first, name: "time" },
-                { type: "tool", name: "time", content: custom_prompt.first.first },
-              ],
-            },
-            { type: "user", name: user.username, content: first_post.raw },
+            { type: :user, id: user.username, content: third_post.raw },
+            { type: :model, content: custom_prompt.third.first },
+            { type: :tool_call, content: custom_prompt.second.first, id: "time" },
+            { type: :tool, id: "time", content: custom_prompt.first.first },
+            { type: :user, id: user.username, content: first_post.raw },
+          ],
+        )
+      end
+
+      it "include replies generated from tools only once" do
+        custom_prompt = [
+          [
+            { args: { timezone: "Buenos Aires" }, time: "2023-12-14 17:24:00 -0300" }.to_json,
+            "time",
+            "tool",
+          ],
+          [
+            { name: "time", arguments: { name: "time", timezone: "Buenos Aires" } }.to_json,
+            "time",
+            "tool_call",
+          ],
+          ["I replied this thanks to the time command", bot_user.username],
+        ]
+        PostCustomPrompt.create!(post: second_post, custom_prompt: custom_prompt)
+        PostCustomPrompt.create!(post: first_post, custom_prompt: custom_prompt)
+
+        context = playground.conversation_context(third_post)
+
+        expect(context).to contain_exactly(
+          *[
+            { type: :user, id: user.username, content: third_post.raw },
+            { type: :model, content: custom_prompt.third.first },
+            { type: :tool_call, content: custom_prompt.second.first, id: "time" },
+            { type: :tool, id: "time", content: custom_prompt.first.first },
+            { type: :tool_call, content: custom_prompt.second.first, id: "time" },
+            { type: :tool, id: "time", content: custom_prompt.first.first },
           ],
         )
       end
diff --git a/spec/models/completion_prompt_spec.rb b/spec/models/completion_prompt_spec.rb
index 6f159f9e..7ed4e2d7 100644
--- a/spec/models/completion_prompt_spec.rb
+++ b/spec/models/completion_prompt_spec.rb
@@ -26,18 +26,16 @@ RSpec.describe CompletionPrompt do
       let(:custom_prompt) { described_class.find(described_class::CUSTOM_PROMPT) }
 
       it "wraps the user input with <input> XML tags and adds a custom instruction if given" do
-        expected = <<~TEXT
-        <input>
-        Translate to Turkish:
-        #{user_input}
-        </input>
+        expected = <<~TEXT.strip
+        <input>Translate to Turkish:
+        #{user_input}</input>
         TEXT
 
         custom_prompt.custom_instruction = "Translate to Turkish"
 
         prompt = custom_prompt.messages_with_input(user_input)
 
-        expect(prompt[:input]).to eq(expected)
+        expect(prompt.messages.last[:content]).to eq(expected)
       end
     end
 
@@ -45,16 +43,13 @@ RSpec.describe CompletionPrompt do
       let(:title_prompt) { described_class.find(described_class::GENERATE_TITLES) }
 
       it "wraps user input with <input> XML tags" do
-        expected = <<~TEXT
-        <input>
-        #{user_input}
-        </input>
-        TEXT
+        expected = "<input>#{user_input}</input>"
+
         title_prompt.custom_instruction = "Translate to Turkish"
 
         prompt = title_prompt.messages_with_input(user_input)
 
-        expect(prompt[:input]).to eq(expected)
+        expect(prompt.messages.last[:content]).to eq(expected)
       end
     end
   end
diff --git a/spec/requests/ai_helper/assistant_controller_spec.rb b/spec/requests/ai_helper/assistant_controller_spec.rb
index e7238399..fd20b6ba 100644
--- a/spec/requests/ai_helper/assistant_controller_spec.rb
+++ b/spec/requests/ai_helper/assistant_controller_spec.rb
@@ -85,11 +85,9 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
         expected_diff =
           "<div class=\"inline-diff\"><p><ins>Un </ins><ins>usuario </ins><ins>escribio </ins><ins>esto</ins><del>A </del><del>user </del><del>wrote </del><del>this</del></p></div>"
 
-        expected_input = <<~TEXT
-        <input>
-        Translate to Spanish:
-        A user wrote this
-        </input>
+        expected_input = <<~TEXT.strip
+        <input>Translate to Spanish:
+        A user wrote this</input>
         TEXT
 
         DiscourseAi::Completions::Llm.with_prepared_responses([translated_text]) do |spy|