From 450ec915d896e43e070cd8abd1869030c793d2b0 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Wed, 6 Dec 2023 19:00:24 -0300 Subject: [PATCH] FIX: Make FoldContent strategy more resilient when using models with low token count. (#341) We'll recursively summarize the content into smaller chunks until we are sure we can concatenate them without going over the token limit. --- lib/summarization/strategies/fold_content.rb | 88 +++++++++++++------ .../strategies/fold_content_spec.rb | 19 ++++ 2 files changed, 79 insertions(+), 28 deletions(-) diff --git a/lib/summarization/strategies/fold_content.rb b/lib/summarization/strategies/fold_content.rb index 731e3931..16a15c05 100644 --- a/lib/summarization/strategies/fold_content.rb +++ b/lib/summarization/strategies/fold_content.rb @@ -21,52 +21,71 @@ module DiscourseAi llm = DiscourseAi::Completions::Llm.proxy(completion_model.model) - chunks = split_into_chunks(llm.tokenizer, content[:contents]) + initial_chunks = + rebalance_chunks( + llm.tokenizer, + content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } }, + ) - if chunks.length == 1 + # Special case where we can do all the summarization in one pass. + if initial_chunks.length == 1 { - summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk), + summary: + summarize_single(llm, initial_chunks.first[:summary], user, opts, &on_partial_blk), chunks: [], } else - summaries = summarize_in_chunks(llm, chunks, user, opts) - - { - summary: - concatenate_summaries( - llm, - summaries.map { |s| s[:summary] }, - user, - &on_partial_blk - ), - chunks: summaries, - } + summarize_chunks(llm, initial_chunks, user, opts, &on_partial_blk) end end private + def summarize_chunks(llm, chunks, user, opts, &on_partial_blk) + # Safely assume we always have more than one chunk. + summarized_chunks = summarize_in_chunks(llm, chunks, user, opts) + total_summaries_size = + llm.tokenizer.size(summarized_chunks.map { |s| s[:summary].to_s }.join) + + if total_summaries_size < completion_model.available_tokens + # Chunks are small enough, we can concatenate them. + { + summary: + concatenate_summaries( + llm, + summarized_chunks.map { |s| s[:summary] }, + user, + &on_partial_blk + ), + chunks: summarized_chunks, + } + else + # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again. + rebalanced_chunks = rebalance_chunks(llm.tokenizer, summarized_chunks) + + summarize_chunks(llm, rebalanced_chunks, user, opts, &on_partial_blk) + end + end + def format_content_item(item) "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " end - def split_into_chunks(tokenizer, contents) + def rebalance_chunks(tokenizer, chunks) section = { ids: [], summary: "" } chunks = - contents.reduce([]) do |sections, item| - new_content = format_content_item(item) - + chunks.reduce([]) do |sections, chunk| if tokenizer.can_expand_tokens?( section[:summary], - new_content, + chunk[:summary], completion_model.available_tokens, ) - section[:summary] += new_content - section[:ids] << item[:id] + section[:summary] += chunk[:summary] + section[:ids] = section[:ids].concat(chunk[:ids]) else sections << section - section = { ids: [item[:id]], summary: new_content } + section = chunk end sections @@ -94,10 +113,22 @@ module DiscourseAi end def concatenate_summaries(llm, summaries, user, &on_partial_blk) - prompt = summarization_prompt(summaries.join("\n"), {}) + prompt = {} prompt[:insts] = <<~TEXT - You are a bot that can concatenate disjoint summaries, creating a cohesive narrative. - Keep the resulting summary in the same language used in the text below. + You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative. + The narrative you create is in the form of one or multiple paragraphs. + Your reply MUST BE a single concatenated summary using the summaries I'll provide to you. + I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments. + You understand and generate Discourse forum Markdown. + You format the response, including links, using Markdown. + TEXT + + prompt[:input] = <<~TEXT + THESE are the summaries, each one separated by a newline, all of them inside XML tags: + + + #{summaries.join("\n")} + TEXT llm.completion!(prompt, user, &on_partial_blk) @@ -106,7 +137,8 @@ module DiscourseAi def summarization_prompt(input, opts) insts = <<~TEXT You are a summarization bot that effectively summarize any text - Your replies contain ONLY a summarized version of the text I provided and you, using the same language. + Your reply MUST BE a summarized version of the posts I provided, using the first language you detect. + I'm NOT interested in anything other than the summary, don't include additional text or comments. You understand and generate Discourse forum Markdown. You format the response, including links, using Markdown. Your summaries are always a cohesive narrative in the form of one or multiple paragraphs. @@ -122,7 +154,7 @@ module DiscourseAi insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title] prompt = { insts: insts, input: <<~TEXT } - Here is the a list of posts, inside XML tags: + Here are the posts, inside XML tags: #{input} diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb index 3598e883..eaff533e 100644 --- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb +++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb @@ -44,6 +44,25 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do expect(result[:summary]).to eq(concatenated_summary) end + + it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do + content[:contents] << { poster: "asd2", id: 2, text: summarize_text } + max_length_response = "(1 asd said: This is a text " + chunk_of_chunks = "I'm smol" + + result = + DiscourseAi::Completions::Llm.with_prepared_responses( + [ + max_length_response, + max_length_response, + chunk_of_chunks, + chunk_of_chunks, + concatenated_summary, + ], + ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } } + + expect(result[:summary]).to eq(concatenated_summary) + end end end end