From 450ec915d896e43e070cd8abd1869030c793d2b0 Mon Sep 17 00:00:00 2001
From: Roman Rizzi <roman@discourse.org>
Date: Wed, 6 Dec 2023 19:00:24 -0300
Subject: [PATCH] FIX: Make FoldContent strategy more resilient when using
 models with low token count. (#341)

We'll recursively summarize  the content into smaller chunks until we are sure we can concatenate
them without going over the token limit.
---
 lib/summarization/strategies/fold_content.rb  | 88 +++++++++++++------
 .../strategies/fold_content_spec.rb           | 19 ++++
 2 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/lib/summarization/strategies/fold_content.rb b/lib/summarization/strategies/fold_content.rb
index 731e3931..16a15c05 100644
--- a/lib/summarization/strategies/fold_content.rb
+++ b/lib/summarization/strategies/fold_content.rb
@@ -21,52 +21,71 @@ module DiscourseAi
 
           llm = DiscourseAi::Completions::Llm.proxy(completion_model.model)
 
-          chunks = split_into_chunks(llm.tokenizer, content[:contents])
+          initial_chunks =
+            rebalance_chunks(
+              llm.tokenizer,
+              content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
+            )
 
-          if chunks.length == 1
+          # Special case where we can do all the summarization in one pass.
+          if initial_chunks.length == 1
             {
-              summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk),
+              summary:
+                summarize_single(llm, initial_chunks.first[:summary], user, opts, &on_partial_blk),
               chunks: [],
             }
           else
-            summaries = summarize_in_chunks(llm, chunks, user, opts)
-
-            {
-              summary:
-                concatenate_summaries(
-                  llm,
-                  summaries.map { |s| s[:summary] },
-                  user,
-                  &on_partial_blk
-                ),
-              chunks: summaries,
-            }
+            summarize_chunks(llm, initial_chunks, user, opts, &on_partial_blk)
           end
         end
 
         private
 
+        def summarize_chunks(llm, chunks, user, opts, &on_partial_blk)
+          # Safely assume we always have more than one chunk.
+          summarized_chunks = summarize_in_chunks(llm, chunks, user, opts)
+          total_summaries_size =
+            llm.tokenizer.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
+
+          if total_summaries_size < completion_model.available_tokens
+            # Chunks are small enough, we can concatenate them.
+            {
+              summary:
+                concatenate_summaries(
+                  llm,
+                  summarized_chunks.map { |s| s[:summary] },
+                  user,
+                  &on_partial_blk
+                ),
+              chunks: summarized_chunks,
+            }
+          else
+            # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
+            rebalanced_chunks = rebalance_chunks(llm.tokenizer, summarized_chunks)
+
+            summarize_chunks(llm, rebalanced_chunks, user, opts, &on_partial_blk)
+          end
+        end
+
         def format_content_item(item)
           "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
         end
 
-        def split_into_chunks(tokenizer, contents)
+        def rebalance_chunks(tokenizer, chunks)
           section = { ids: [], summary: "" }
 
           chunks =
-            contents.reduce([]) do |sections, item|
-              new_content = format_content_item(item)
-
+            chunks.reduce([]) do |sections, chunk|
               if tokenizer.can_expand_tokens?(
                    section[:summary],
-                   new_content,
+                   chunk[:summary],
                    completion_model.available_tokens,
                  )
-                section[:summary] += new_content
-                section[:ids] << item[:id]
+                section[:summary] += chunk[:summary]
+                section[:ids] = section[:ids].concat(chunk[:ids])
               else
                 sections << section
-                section = { ids: [item[:id]], summary: new_content }
+                section = chunk
               end
 
               sections
@@ -94,10 +113,22 @@ module DiscourseAi
         end
 
         def concatenate_summaries(llm, summaries, user, &on_partial_blk)
-          prompt = summarization_prompt(summaries.join("\n"), {})
+          prompt = {}
           prompt[:insts] = <<~TEXT
-            You are a bot that can concatenate disjoint summaries, creating a cohesive narrative.
-            Keep the resulting summary in the same language used in the text below.
+            You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
+            The narrative you create is in the form of one or multiple paragraphs.
+            Your reply MUST BE a single concatenated summary using the summaries I'll provide to you. 
+            I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
+            You understand and generate Discourse forum Markdown.
+            You format the response, including links, using Markdown.
+          TEXT
+
+          prompt[:input] = <<~TEXT
+            THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
+
+            <input>
+              #{summaries.join("\n")}
+            </input>
           TEXT
 
           llm.completion!(prompt, user, &on_partial_blk)
@@ -106,7 +137,8 @@ module DiscourseAi
         def summarization_prompt(input, opts)
           insts = <<~TEXT
             You are a summarization bot that effectively summarize any text
-            Your replies contain ONLY a summarized version of the text I provided and you, using the same language.
+            Your reply MUST BE a summarized version of the posts I provided, using the first language you detect.
+            I'm NOT interested in anything other than the summary, don't include additional text or comments.
             You understand and generate Discourse forum Markdown.
             You format the response, including links, using Markdown.
             Your summaries are always a cohesive narrative in the form of one or multiple paragraphs.
@@ -122,7 +154,7 @@ module DiscourseAi
           insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title]
 
           prompt = { insts: insts, input: <<~TEXT }
-              Here is the a list of posts, inside <input></input> XML tags:
+              Here are the posts, inside <input></input> XML tags:
 
               <input>
                 #{input}
diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
index 3598e883..eaff533e 100644
--- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
@@ -44,6 +44,25 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
 
         expect(result[:summary]).to eq(concatenated_summary)
       end
+
+      it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
+        content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+        max_length_response = "(1 asd said: This is a text "
+        chunk_of_chunks = "I'm smol"
+
+        result =
+          DiscourseAi::Completions::Llm.with_prepared_responses(
+            [
+              max_length_response,
+              max_length_response,
+              chunk_of_chunks,
+              chunk_of_chunks,
+              concatenated_summary,
+            ],
+          ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
+
+        expect(result[:summary]).to eq(concatenated_summary)
+      end
     end
   end
 end