REFACTOR: Support of different summarization targets/prompts. (#835)

* DEV: Add summary types

* Refactor for different summary types

* Use enum for summary types

* Update lib/summarization/strategies/topic_summary.rb

Co-authored-by: Penar Musaraj <pmusaraj@gmail.com>

* Update lib/summarization/strategies/topic_gist.rb

Co-authored-by: Penar Musaraj <pmusaraj@gmail.com>

* Update lib/summarization/strategies/chat_messages.rb

Co-authored-by: Penar Musaraj <pmusaraj@gmail.com>

* Fix chat_messages single prompt

* Small tweak to the chat summarization prompt

---------

Co-authored-by: Penar Musaraj <pmusaraj@gmail.com>
This commit is contained in:
Roman Rizzi 2024-10-15 13:53:26 -03:00 committed by GitHub
parent 791fad1e6a
commit c7acb4a6a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 826 additions and 448 deletions

View File

@ -15,29 +15,21 @@ module DiscourseAi
channel = ::Chat::Channel.find(params[:channel_id]) channel = ::Chat::Channel.find(params[:channel_id])
guardian.ensure_can_join_chat_channel!(channel) guardian.ensure_can_join_chat_channel!(channel)
strategy = DiscourseAi::Summarization.default_strategy summarizer = DiscourseAi::Summarization.chat_channel_summary(channel, since)
raise Discourse::NotFound.new unless strategy raise Discourse::NotFound.new unless summarizer
guardian.ensure_can_request_summary! guardian.ensure_can_request_summary!
RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed! RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed!
hijack do hijack do
content = { content_title: channel.name } strategy = DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, since)
content[:contents] = channel
.chat_messages
.where("chat_messages.created_at > ?", since.hours.ago)
.includes(:user)
.order(created_at: :asc)
.pluck(:id, :username_lower, :message)
.map { { id: _1, poster: _2, text: _3 } }
summarized_text = summarized_text =
if content[:contents].empty? if strategy.targets_data[:contents].empty?
I18n.t("discourse_ai.summarization.chat.no_targets") I18n.t("discourse_ai.summarization.chat.no_targets")
else else
strategy.summarize(content, current_user).dig(:summary) summarizer.summarize(current_user)&.summarized_text
end end
render json: { summary: summarized_text } render json: { summary: summarized_text }

View File

@ -9,15 +9,19 @@ module DiscourseAi
topic = Topic.find(params[:topic_id]) topic = Topic.find(params[:topic_id])
guardian.ensure_can_see!(topic) guardian.ensure_can_see!(topic)
raise Discourse::NotFound if !guardian.can_see_summary?(topic) if !guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])
raise Discourse::NotFound
end
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
opts = params.permit(:skip_age_check) opts = params.permit(:skip_age_check)
skip_age_check = opts[:skip_age_check] == "true" skip_age_check = opts[:skip_age_check] == "true"
summarization_service = DiscourseAi::TopicSummarization.for(topic, current_user)
if params[:stream] && current_user if params[:stream] && current_user
cached_summary = DiscourseAi::TopicSummarization.cached_summary(topic, current_user) cached_summary = summarization_service.cached_summary
if cached_summary && !skip_age_check if cached_summary && !skip_age_check
render_serialized(cached_summary, AiTopicSummarySerializer) render_serialized(cached_summary, AiTopicSummarySerializer)
@ -34,12 +38,7 @@ module DiscourseAi
render json: success_json render json: success_json
else else
hijack do hijack do
summary = summary = summarization_service.summarize(skip_age_check: skip_age_check)
DiscourseAi::TopicSummarization.summarize(
topic,
current_user,
skip_age_check: skip_age_check,
)
render_serialized(summary, AiTopicSummarySerializer) render_serialized(summary, AiTopicSummarySerializer)
end end
end end

View File

@ -8,8 +8,11 @@ module Jobs
return unless topic = Topic.find_by(id: args[:topic_id]) return unless topic = Topic.find_by(id: args[:topic_id])
return unless user = User.find_by(id: args[:user_id]) return unless user = User.find_by(id: args[:user_id])
strategy = DiscourseAi::Summarization.default_strategy strategy = DiscourseAi::Summarization.topic_summary(topic)
return if strategy.nil? || !Guardian.new(user).can_see_summary?(topic) if strategy.nil? ||
!Guardian.new(user).can_see_summary?(topic, AiSummary.summary_types[:complete])
return
end
guardian = Guardian.new(user) guardian = Guardian.new(user)
return unless guardian.can_see?(topic) return unless guardian.can_see?(topic)
@ -21,7 +24,7 @@ module Jobs
summary = summary =
DiscourseAi::TopicSummarization DiscourseAi::TopicSummarization
.new(strategy, topic, user) .new(strategy, user)
.summarize(skip_age_check: skip_age_check) do |partial_summary| .summarize(skip_age_check: skip_age_check) do |partial_summary|
streamed_summary << partial_summary streamed_summary << partial_summary

View File

@ -3,6 +3,23 @@
class AiSummary < ActiveRecord::Base class AiSummary < ActiveRecord::Base
belongs_to :target, polymorphic: true belongs_to :target, polymorphic: true
enum :summary_type, { complete: 0, gist: 1 }
def self.store!(target, summary_type, model, summary, content_ids)
AiSummary.create!(
target: target,
algorithm: model,
content_range: (content_ids.first..content_ids.last),
summarized_text: summary,
original_content_sha: build_sha(content_ids.join),
summary_type: summary_type,
)
end
def self.build_sha(joined_ids)
Digest::SHA256.hexdigest(joined_ids)
end
def mark_as_outdated def mark_as_outdated
@outdated = true @outdated = true
end end
@ -25,6 +42,7 @@ end
# algorithm :string not null # algorithm :string not null
# created_at :datetime not null # created_at :datetime not null
# updated_at :datetime not null # updated_at :datetime not null
# summary_type :string default("complete"), not null
# #
# Indexes # Indexes
# #

View File

@ -1,145 +1,44 @@
# frozen_string_literal: true # frozen_string_literal: true
module DiscourseAi module DiscourseAi
# A cache layer on top of our topic summarization engine. Also handle permissions.
class TopicSummarization class TopicSummarization
def self.summarize(topic, user, skip_age_check: false, &on_partial_blk) def self.for(topic, user)
new(DiscourseAi::Summarization.default_strategy, topic, user).summarize( new(DiscourseAi::Summarization.topic_summary(topic), user)
skip_age_check: skip_age_check,
&on_partial_blk
)
end end
def self.cached_summary(topic, user) def initialize(summarizer, user)
new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary @summarizer = summarizer
end
def initialize(strategy, topic, user)
@strategy = strategy
@topic = topic
@user = user @user = user
end end
attr_reader :strategy, :topic, :user
def cached_summary def cached_summary
existing_summary summarizer.existing_summary
end end
def summarize(skip_age_check: false, &on_partial_blk) def summarize(skip_age_check: false, &on_partial_blk)
# Existing summary shouldn't be nil in this scenario because the controller checks its existence. # Existing summary shouldn't be nil in this scenario because the controller checks its existence.
return if !user && !existing_summary return if !user && !cached_summary
return existing_summary if use_cached?(skip_age_check) return cached_summary if use_cached?(skip_age_check)
delete_cached_summaries! if existing_summary summarizer.delete_cached_summaries! if cached_summary
content = { summarizer.summarize(user, &on_partial_blk)
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
content_title: topic.title,
contents: [],
}
summary_targets_data.map do |(pn, raw, username)|
raw_text = raw
if pn == 1 && topic.topic_embed&.embed_content_cache.present?
raw_text = topic.topic_embed&.embed_content_cache
end
content[:contents] << { poster: username, id: pn, text: raw_text }
end
summarization_result = strategy.summarize(content, user, &on_partial_blk)
cache_summary(summarization_result)
end
def summary_targets
topic.has_summary? ? best_replies : pick_selection
end end
private private
def summary_sha attr_reader :summarizer, :user
@summary_sha ||= build_sha(summary_targets_data.map(&:first))
end
def summary_targets_data
@summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username)
end
def existing_summary
if !defined?(@existing_summary)
@existing_summary = AiSummary.find_by(target: topic)
if @existing_summary && existing_summary.original_content_sha != summary_sha
@existing_summary.mark_as_outdated
end
end
@existing_summary
end
def best_replies
Post
.summary(topic.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.joins(:user)
.order(:post_number)
end
def pick_selection
posts =
Post
.where(topic_id: topic.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.order(:post_number)
post_numbers = posts.limit(5).pluck(:post_number)
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
Post
.where(topic_id: topic.id)
.joins(:user)
.where("post_number in (?)", post_numbers)
.order(:post_number)
end
def delete_cached_summaries!
AiSummary.where(target: topic).destroy_all
end
def use_cached?(skip_age_check) def use_cached?(skip_age_check)
can_summarize = Guardian.new(user).can_request_summary? can_summarize = Guardian.new(user).can_request_summary?
existing_summary && cached_summary &&
!( !(
can_summarize && new_targets? && can_summarize && cached_summary.outdated &&
(skip_age_check || existing_summary.created_at < 1.hour.ago) (skip_age_check || cached_summary.created_at < 1.hour.ago)
) )
end end
def new_targets?
existing_summary&.original_content_sha != summary_sha
end
def cache_summary(result)
post_numbers = summary_targets_data.map(&:first)
cached_summary =
AiSummary.create!(
target: topic,
algorithm: strategy.display_name,
content_range: (post_numbers.first..post_numbers.last),
summarized_text: result[:summary],
original_content_sha: summary_sha,
)
cached_summary
end
def build_sha(ids)
Digest::SHA256.hexdigest(ids.join)
end
end end
end end

View File

@ -0,0 +1,6 @@
# frozen_string_literal: true
class AddAiSummaryTypeColumn < ActiveRecord::Migration[7.1]
def change
add_column :ai_summaries, :summary_type, :integer, default: 0, null: false
end
end

View File

@ -2,7 +2,7 @@
module DiscourseAi module DiscourseAi
module GuardianExtensions module GuardianExtensions
def can_see_summary?(target) def can_see_summary?(target, summary_type)
return false if !SiteSetting.ai_summarization_enabled return false if !SiteSetting.ai_summarization_enabled
if target.class == Topic && target.private_message? if target.class == Topic && target.private_message?
@ -14,7 +14,7 @@ module DiscourseAi
return false if !allowed return false if !allowed
end end
has_cached_summary = AiSummary.exists?(target: target) has_cached_summary = AiSummary.exists?(target: target, summary_type: summary_type)
return has_cached_summary if user.nil? return has_cached_summary if user.nil?
has_cached_summary || can_request_summary? has_cached_summary || can_request_summary?

View File

@ -1,9 +1,36 @@
# frozen_string_literal: true # frozen_string_literal: true
module DiscourseAi module DiscourseAi
module Summarization module Summarization
def self.default_strategy def self.topic_summary(topic)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model) DiscourseAi::Summarization::FoldContent.new(
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
DiscourseAi::Summarization::Strategies::TopicSummary.new(topic),
)
else
nil
end
end
def self.topic_gist(topic)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
DiscourseAi::Summarization::FoldContent.new(
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
DiscourseAi::Summarization::Strategies::TopicGist.new(topic),
)
else
nil
end
end
def self.chat_channel_summary(channel, time_window_in_hours)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
DiscourseAi::Summarization::FoldContent.new(
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, time_window_in_hours),
persist_summaries: false,
)
else else
nil nil
end end

View File

@ -2,14 +2,6 @@
module DiscourseAi module DiscourseAi
module Summarization module Summarization
def self.default_strategy
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model)
else
nil
end
end
class EntryPoint class EntryPoint
def inject_into(plugin) def inject_into(plugin)
plugin.add_to_serializer(:current_user, :can_summarize) do plugin.add_to_serializer(:current_user, :can_summarize) do
@ -18,11 +10,11 @@ module DiscourseAi
end end
plugin.add_to_serializer(:topic_view, :summarizable) do plugin.add_to_serializer(:topic_view, :summarizable) do
scope.can_see_summary?(object.topic) scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete])
end end
plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do
scope.can_see_summary?(object.topic) scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete])
end end
end end
end end

View File

@ -0,0 +1,189 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
# This class offers a generic way of summarizing content from multiple sources using different prompts.
#
# It summarizes large amounts of content by recursively summarizing it in smaller chunks that
# fit the given model context window, finally concatenating the disjoint summaries
# into a final version.
#
class FoldContent
def initialize(llm, strategy, persist_summaries: true)
@llm = llm
@strategy = strategy
@persist_summaries = persist_summaries
end
attr_reader :llm, :strategy
# @param user { User } - User object used for auditing usage.
#
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
# Note: The block is only called with results of the final summary, not intermediate summaries.
#
# @returns { AiSummary } - Resulting summary.
def summarize(user, &on_partial_blk)
opts = content_to_summarize.except(:contents)
initial_chunks =
rebalance_chunks(
content_to_summarize[:contents].map do |c|
{ ids: [c[:id]], summary: format_content_item(c) }
end,
)
# Special case where we can do all the summarization in one pass.
result =
if initial_chunks.length == 1
{
summary:
summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
chunks: [],
}
else
summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
end
if persist_summaries
AiSummary.store!(
strategy.target,
strategy.type,
llm_model.name,
result[:summary],
content_to_summarize[:contents].map { |c| c[:id] },
)
else
AiSummary.new(summarized_text: result[:summary])
end
end
# @returns { AiSummary } - Resulting summary.
#
# Finds a summary matching the target and strategy. Marks it as outdates if the strategy found newer content
def existing_summary
if !defined?(@existing_summary)
summary = AiSummary.find_by(target: strategy.target, summary_type: strategy.type)
if summary
@existing_summary = summary
if existing_summary.original_content_sha != latest_sha
@existing_summary.mark_as_outdated
end
end
end
@existing_summary
end
def delete_cached_summaries!
AiSummary.where(target: strategy.target, summary_type: strategy.type).destroy_all
end
private
attr_reader :persist_summaries
def llm_model
llm.llm_model
end
def content_to_summarize
@targets_data ||= strategy.targets_data
end
def latest_sha
@latest_sha ||= AiSummary.build_sha(content_to_summarize[:contents].map { |c| c[:id] }.join)
end
def summarize_chunks(chunks, user, opts, &on_partial_blk)
# Safely assume we always have more than one chunk.
summarized_chunks = summarize_in_chunks(chunks, user, opts)
total_summaries_size =
llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
if total_summaries_size < available_tokens
# Chunks are small enough, we can concatenate them.
{
summary:
concatenate_summaries(
summarized_chunks.map { |s| s[:summary] },
user,
&on_partial_blk
),
chunks: summarized_chunks,
}
else
# We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
rebalanced_chunks = rebalance_chunks(summarized_chunks)
summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
end
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def rebalance_chunks(chunks)
section = { ids: [], summary: "" }
chunks =
chunks.reduce([]) do |sections, chunk|
if llm_model.tokenizer_class.can_expand_tokens?(
section[:summary],
chunk[:summary],
available_tokens,
)
section[:summary] += chunk[:summary]
section[:ids] = section[:ids].concat(chunk[:ids])
else
sections << section
section = chunk
end
sections
end
chunks << section if section[:summary].present?
chunks
end
def summarize_single(text, user, opts, &on_partial_blk)
prompt = strategy.summarize_single_prompt(text, opts)
llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
end
def summarize_in_chunks(chunks, user, opts)
chunks.map do |chunk|
prompt = strategy.summarize_single_prompt(chunk[:summary], opts)
chunk[:summary] = llm.generate(
prompt,
user: user,
max_tokens: 300,
feature_name: "summarize",
)
chunk
end
end
def concatenate_summaries(texts_to_summarize, user, &on_partial_blk)
prompt = strategy.concatenation_prompt(texts_to_summarize)
llm.generate(prompt, user: user, &on_partial_blk)
end
def available_tokens
# Reserve tokens for the response and the base prompt
# ~500 words
reserved_tokens = 700
llm_model.max_prompt_tokens - reserved_tokens
end
end
end
end

View File

@ -0,0 +1,59 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
# Objects inheriting from this class will get passed as a dependency to `DiscourseAi::Summarization::FoldContent`.
# This collaborator knows how to source the content to summarize and the prompts used in the process,
# one for summarizing a chunk and another for concatenating them if necessary.
class Base
def initialize(target)
@target = target
end
attr_reader :target
# The summary type differentiates instances of `AiSummary` pointing to a single target.
# See the `summary_type` enum for available options.
def type
raise NotImplementedError
end
# @returns { Hash } - Content to summarize.
#
# This method returns a hash with the content to summarize and additional information.
# The only mandatory key is `contents`, which must be an array of hashes with
# the following structure:
#
# {
# poster: A way to tell who write the content,
# id: A number to signal order,
# text: Text to summarize
# }
#
# Additionally, you could add more context, which will be available in the prompt. e.g.:
#
# {
# resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
# content_title: target.title,
# contents: [...]
# }
#
def targets_data
raise NotImplementedError
end
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when concatenating multiple chunks.
def contatenation_prompt(_texts_to_summarize)
raise NotImplementedError
end
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM on each chunk,
# and when the whole content fits in one call.
def summarize_single_prompt(_input, _opts)
raise NotImplementedError
end
end
end
end
end

View File

@ -0,0 +1,85 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class ChatMessages < Base
def type
AiSummary.summary_types[:complete]
end
def initialize(target, since)
super(target)
@since = since
end
def targets_data
content = { content_title: target.name }
content[:contents] = target
.chat_messages
.where("chat_messages.created_at > ?", since.hours.ago)
.includes(:user)
.order(created_at: :asc)
.pluck(:id, :username_lower, :message)
.map { { id: _1, poster: _2, text: _3 } }
content
end
def contatenation_prompt(texts_to_summarize)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot tasked with creating a cohesive narrative by intelligently merging multiple disjointed summaries.
Your response should consist of well-structured paragraphs that combines these summaries into a clear and comprehensive overview.
Avoid adding any additional text or commentary. Format your output using Discourse forum Markdown.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
<input>
#{texts_to_summarize.join("\n")}
</input>
TEXT
prompt
end
def summarize_single_prompt(input, opts)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics
and developments from a series of chat messages within a user-selected time window.
Analyze the messages to extract key themes, participants' intentions, and any significant conclusions or decisions.
Your summary should be concise yet comprehensive, providing an overview that is accessible to someone with no prior context of the conversation.
- Only include the summary, WITHOUT additional commentary.
- Don't mention the channel title. Avoid including extraneous details or subjective opinions.
- Maintain the original language of the text being summarized.
- The same user could write multiple messages in a row, don't treat them as different persons.
- Aim for summaries to be 400 words or less.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
#{opts[:content_title].present? ? "The name of the channel is: " + opts[:content_title] + ".\n" : ""}
Here are the messages, inside <input></input> XML tags:
<input>
#{input}
</input>
Generate a summary of the given chat messages.
TEXT
prompt
end
private
attr_reader :since
end
end
end
end

View File

@ -1,198 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class FoldContent
def initialize(completion_model)
@llm = DiscourseAi::Completions::Llm.proxy(completion_model)
raise "Invalid model provided for summarization strategy" if @llm.llm_model.nil?
end
attr_reader :llm
def summarize(content, user, &on_partial_blk)
opts = content.except(:contents)
initial_chunks =
rebalance_chunks(
content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
)
# Special case where we can do all the summarization in one pass.
if initial_chunks.length == 1
{
summary:
summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
chunks: [],
}
else
summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
end
end
def display_name
llm_model&.name || "unknown model"
end
private
def llm_model
llm.llm_model
end
def summarize_chunks(chunks, user, opts, &on_partial_blk)
# Safely assume we always have more than one chunk.
summarized_chunks = summarize_in_chunks(chunks, user, opts)
total_summaries_size =
llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
if total_summaries_size < available_tokens
# Chunks are small enough, we can concatenate them.
{
summary:
concatenate_summaries(
summarized_chunks.map { |s| s[:summary] },
user,
&on_partial_blk
),
chunks: summarized_chunks,
}
else
# We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
rebalanced_chunks = rebalance_chunks(summarized_chunks)
summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
end
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def rebalance_chunks(chunks)
section = { ids: [], summary: "" }
chunks =
chunks.reduce([]) do |sections, chunk|
if llm_model.tokenizer_class.can_expand_tokens?(
section[:summary],
chunk[:summary],
available_tokens,
)
section[:summary] += chunk[:summary]
section[:ids] = section[:ids].concat(chunk[:ids])
else
sections << section
section = chunk
end
sections
end
chunks << section if section[:summary].present?
chunks
end
def summarize_single(text, user, opts, &on_partial_blk)
prompt = summarization_prompt(text, opts)
llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
end
def summarize_in_chunks(chunks, user, opts)
chunks.map do |chunk|
prompt = summarization_prompt(chunk[:summary], opts)
chunk[:summary] = llm.generate(
prompt,
user: user,
max_tokens: 300,
feature_name: "summarize",
)
chunk
end
end
def concatenate_summaries(summaries, user, &on_partial_blk)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
The narrative you create is in the form of one or multiple paragraphs.
Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
You understand and generate Discourse forum Markdown.
You format the response, including links, using Markdown.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
<input>
#{summaries.join("\n")}
</input>
TEXT
llm.generate(prompt, user: user, &on_partial_blk)
end
def summarization_prompt(input, opts)
insts = +<<~TEXT
You are an advanced summarization bot that generates concise, coherent summaries of provided text.
- Only include the summary, without any additional commentary.
- You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
- Maintain the original language of the text being summarized.
- Aim for summaries to be 400 words or less.
TEXT
insts << <<~TEXT if opts[:resource_path]
- Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
- Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
- Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
- Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
- Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
- When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
TEXT
prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
if opts[:resource_path]
prompt.push(
type: :user,
content:
"Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
)
prompt.push(
type: :model,
content:
"Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
)
end
prompt.push(type: :user, content: <<~TEXT.strip)
#{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
Here are the posts, inside <input></input> XML tags:
<input>
#{input}
</input>
Generate a concise, coherent summary of the text above maintaining the original language.
TEXT
prompt
end
def available_tokens
# Reserve tokens for the response and the base prompt
# ~500 words
reserved_tokens = 700
llm_model.max_prompt_tokens - reserved_tokens
end
end
end
end
end

View File

@ -0,0 +1,90 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class TopicGist < Base
def type
AiSummary.summary_types[:gist]
end
def targets_data
content = { content_title: target.title, contents: [] }
op_post_number = 1
last_twenty_posts =
Post
.where(topic_id: target.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.order("post_number DESC")
.limit(20)
.pluck(:post_number)
posts_data =
Post
.where(topic_id: target.id)
.joins(:user)
.where("post_number IN (?)", last_twenty_posts << op_post_number)
.order(:post_number)
.pluck(:post_number, :raw, :username)
posts_data.each do |(pn, raw, username)|
raw_text = raw
if pn == 1 && target.topic_embed&.embed_content_cache.present?
raw_text = target.topic_embed&.embed_content_cache
end
content[:contents] << { poster: username, id: pn, text: raw_text }
end
content
end
def concatenation_prompt(texts_to_summarize)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement.
Your response should strictly be this single, comprehensive sentence, without any additional text or comments.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
<input>
#{texts_to_summarize.join("\n")}
</input>
TEXT
prompt
end
def summarize_single_prompt(input, opts)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are an advanced summarization bot. Your task is to analyze a given conversation and generate a single,
concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context.
- Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone.
- Avoid including extraneous details or subjective opinions.
- Maintain the original language of the text being summarized.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
#{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
Here are the posts, inside <input></input> XML tags:
<input>
#{input}
</input>
Generate a single sentence of the text above maintaining the original language.
TEXT
prompt
end
end
end
end
end

View File

@ -0,0 +1,142 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class TopicSummary < Base
def type
AiSummary.summary_types[:complete]
end
def targets_data
content = {
resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
content_title: target.title,
contents: [],
}
posts_data =
(target.has_summary? ? best_replies : pick_selection).pluck(
:post_number,
:raw,
:username,
)
posts_data.each do |(pn, raw, username)|
raw_text = raw
if pn == 1 && target.topic_embed&.embed_content_cache.present?
raw_text = target.topic_embed&.embed_content_cache
end
content[:contents] << { poster: username, id: pn, text: raw_text }
end
content
end
def concatenation_prompt(texts_to_summarize)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot that effectively concatenates disjointed summaries, creating a cohesive narrative.
The narrative you create is in the form of one or multiple paragraphs.
Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
You understand and generate Discourse forum Markdown.
You format the response, including links, using Markdown.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
<input>
#{texts_to_summarize.join("\n")}
</input>
TEXT
prompt
end
def summarize_single_prompt(input, opts)
insts = +<<~TEXT
You are an advanced summarization bot that generates concise, coherent summaries of provided text.
- Only include the summary, without any additional commentary.
- You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
- Maintain the original language of the text being summarized.
- Aim for summaries to be 400 words or less.
TEXT
insts << <<~TEXT if opts[:resource_path]
- Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
- Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
- Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
- Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
- Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
- When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
TEXT
prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
if opts[:resource_path]
prompt.push(
type: :user,
content:
"Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
)
prompt.push(
type: :model,
content:
"Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
)
end
prompt.push(type: :user, content: <<~TEXT.strip)
#{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
Here are the posts, inside <input></input> XML tags:
<input>
#{input}
</input>
Generate a concise, coherent summary of the text above maintaining the original language.
TEXT
prompt
end
private
attr_reader :topic
def best_replies
Post
.summary(target.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.joins(:user)
.order(:post_number)
end
def pick_selection
posts =
Post
.where(topic_id: target.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.order(:post_number)
post_numbers = posts.limit(5).pluck(:post_number)
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
Post
.where(topic_id: target.id)
.joins(:user)
.where("post_number in (?)", post_numbers)
.order(:post_number)
end
end
end
end
end

View File

@ -20,7 +20,7 @@ describe DiscourseAi::GuardianExtensions do
it "returns false" do it "returns false" do
SiteSetting.ai_custom_summarization_allowed_groups = "" SiteSetting.ai_custom_summarization_allowed_groups = ""
expect(guardian.can_see_summary?(topic)).to eq(false) expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false)
end end
it "returns true if there is a cached summary" do it "returns true if there is a cached summary" do
@ -29,9 +29,10 @@ describe DiscourseAi::GuardianExtensions do
summarized_text: "test", summarized_text: "test",
original_content_sha: "123", original_content_sha: "123",
algorithm: "test", algorithm: "test",
summary_type: AiSummary.summary_types[:complete],
) )
expect(guardian.can_see_summary?(topic)).to eq(true) expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
end end
end end
@ -39,7 +40,7 @@ describe DiscourseAi::GuardianExtensions do
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id } before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do
expect(guardian.can_see_summary?(topic)).to eq(true) expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
end end
end end
@ -48,12 +49,12 @@ describe DiscourseAi::GuardianExtensions do
let(:pm) { Fabricate(:private_message_topic) } let(:pm) { Fabricate(:private_message_topic) }
it "returns false" do it "returns false" do
expect(guardian.can_see_summary?(pm)).to eq(false) expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(false)
end end
it "returns true if user is in a group that is allowed summaries" do it "returns true if user is in a group that is allowed summaries" do
SiteSetting.ai_pm_summarization_allowed_groups = group.id SiteSetting.ai_pm_summarization_allowed_groups = group.id
expect(guardian.can_see_summary?(pm)).to eq(true) expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(true)
end end
end end
@ -61,7 +62,7 @@ describe DiscourseAi::GuardianExtensions do
let(:guardian) { Guardian.new } let(:guardian) { Guardian.new }
it "returns false for anons" do it "returns false for anons" do
expect(guardian.can_see_summary?(topic)).to eq(false) expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false)
end end
it "returns true for anons when there is a cached summary" do it "returns true for anons when there is a cached summary" do
@ -70,9 +71,10 @@ describe DiscourseAi::GuardianExtensions do
summarized_text: "test", summarized_text: "test",
original_content_sha: "123", original_content_sha: "123",
algorithm: "test", algorithm: "test",
summary_type: AiSummary.summary_types[:complete],
) )
expect(guardian.can_see_summary?(topic)).to eq(true) expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
end end
end end
end end

View File

@ -1,9 +1,14 @@
# frozen_string_literal: true # frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do RSpec.describe DiscourseAi::Summarization::FoldContent do
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
describe "#summarize" do describe "#summarize" do
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) } let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
before do before do
SiteSetting.ai_summarization_enabled = true SiteSetting.ai_summarization_enabled = true
@ -15,10 +20,6 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
llm_model.update!(max_prompt_tokens: model_tokens) llm_model.update!(max_prompt_tokens: model_tokens)
end end
let(:strategy) { DiscourseAi::Summarization.default_strategy }
let(:summarize_text) { "This is a text" }
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
let(:single_summary) { "this is a single summary" } let(:single_summary) { "this is a single summary" }
let(:concatenated_summary) { "this is a concatenated summary" } let(:concatenated_summary) { "this is a concatenated summary" }
@ -28,27 +29,26 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
it "does one call to summarize content" do it "does one call to summarize content" do
result = result =
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy| DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) } summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
end end
expect(result[:summary]).to eq(single_summary) expect(result.summarized_text).to eq(single_summary)
end end
end end
context "when the content to summarize doesn't fit in a single call" do context "when the content to summarize doesn't fit in a single call" do
it "summarizes each chunk and then concatenates them" do fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
it "summarizes each chunk and then concatenates them" do
result = result =
DiscourseAi::Completions::Llm.with_prepared_responses( DiscourseAi::Completions::Llm.with_prepared_responses(
[single_summary, single_summary, concatenated_summary], [single_summary, single_summary, concatenated_summary],
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } } ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(3) } }
expect(result[:summary]).to eq(concatenated_summary) expect(result.summarized_text).to eq(concatenated_summary)
end end
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
max_length_response = "(1 asd said: This is a text " max_length_response = "(1 asd said: This is a text "
chunk_of_chunks = "I'm smol" chunk_of_chunks = "I'm smol"
@ -61,9 +61,9 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
chunk_of_chunks, chunk_of_chunks,
concatenated_summary, concatenated_summary,
], ],
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } } ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(5) } }
expect(result[:summary]).to eq(concatenated_summary) expect(result.summarized_text).to eq(concatenated_summary)
end end
end end
end end

View File

@ -0,0 +1,70 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Strategies::TopicGist do
subject(:gist) { described_class.new(topic) }
fab!(:topic) { Fabricate(:topic, highest_post_number: 25) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
describe "#targets_data" do
context "when the topic has more than 20 posts" do
before do
offset = 3 # Already created posts 1 and 2
(topic.highest_post_number - 2).times do |i|
Fabricate(:post, topic: topic, post_number: i + offset)
end
end
it "includes the OP and the last 20 posts" do
content = gist.targets_data
post_numbers = content[:contents].map { |c| c[:id] }
expected = (6..25).to_a << 1
expect(post_numbers).to contain_exactly(*expected)
end
end
it "only includes visible posts" do
post_2.update!(hidden: true)
post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
expect(post_numbers).to contain_exactly(1)
end
it "doesn't include posts without users" do
post_2.update!(user_id: nil)
post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
expect(post_numbers).to contain_exactly(1)
end
it "doesn't include whispers" do
post_2.update!(post_type: Post.types[:whisper])
post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
expect(post_numbers).to contain_exactly(1)
end
context "when the topic has embed content cached" do
it "embed content is used instead of the raw text" do
topic_embed =
Fabricate(
:topic_embed,
topic: topic,
embed_content_cache: "<p>hello world new post :D</p>",
)
content = gist.targets_data
op_content = content[:contents].first[:text]
expect(op_content).to include(topic_embed.embed_content_cache)
end
end
end
end

View File

@ -0,0 +1,66 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
subject(:topic_summary) { described_class.new(topic) }
fab!(:topic) { Fabricate(:topic, highest_post_number: 25) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
describe "#targets_data" do
shared_examples "includes only public-visible topics" do
it "only includes visible posts" do
post_2.update!(hidden: true)
post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
expect(post_numbers).to contain_exactly(1)
end
it "doesn't include posts without users" do
post_2.update!(user_id: nil)
post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
expect(post_numbers).to contain_exactly(1)
end
it "doesn't include whispers" do
post_2.update!(post_type: Post.types[:whisper])
post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
expect(post_numbers).to contain_exactly(1)
end
end
context "when the topic has a best replies summary" do
before { topic.update(has_summary: true) }
it_behaves_like "includes only public-visible topics"
end
context "when the topic doesn't have a best replies summary" do
before { topic.update(has_summary: false) }
it_behaves_like "includes only public-visible topics"
end
context "when the topic has embed content cached" do
it "embed content is used instead of the raw text" do
topic_embed =
Fabricate(
:topic_embed,
topic: topic,
embed_content_cache: "<p>hello world new post :D</p>",
)
content = topic_summary.targets_data
op_content = content[:contents].first[:text]
expect(op_content).to include(topic_embed.embed_content_cache)
end
end
end
end

View File

@ -19,6 +19,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
summarized_text: "test", summarized_text: "test",
algorithm: "test", algorithm: "test",
original_content_sha: "test", original_content_sha: "test",
summary_type: AiSummary.summary_types[:complete],
) )
sign_in(Fabricate(:admin)) sign_in(Fabricate(:admin))
@ -47,6 +48,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
summarized_text: "test", summarized_text: "test",
algorithm: "test", algorithm: "test",
original_content_sha: "test", original_content_sha: "test",
summary_type: AiSummary.summary_types[:complete],
) )
get "/discourse-ai/summarization/t/#{topic.id}.json" get "/discourse-ai/summarization/t/#{topic.id}.json"
@ -133,6 +135,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
summarized_text: "test", summarized_text: "test",
algorithm: "test", algorithm: "test",
original_content_sha: "test", original_content_sha: "test",
summary_type: AiSummary.summary_types[:complete],
) )
get "/discourse-ai/summarization/t/#{topic.id}.json" get "/discourse-ai/summarization/t/#{topic.id}.json"

View File

@ -11,55 +11,14 @@ describe DiscourseAi::TopicSummarization do
SiteSetting.ai_summarization_enabled = true SiteSetting.ai_summarization_enabled = true
end end
let(:strategy) { DiscourseAi::Summarization.default_strategy } let(:strategy) { DiscourseAi::Summarization.topic_summary(topic) }
shared_examples "includes only public-visible topics" do
subject { DiscourseAi::TopicSummarization.new(strategy, topic, user) }
it "only includes visible posts" do
topic.first_post.update!(hidden: true)
posts = subject.summary_targets
expect(posts.none?(&:hidden?)).to eq(true)
end
it "doesn't include posts without users" do
topic.first_post.user.destroy!
posts = subject.summary_targets
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
end
it "doesn't include deleted posts" do
topic.first_post.update!(user_id: nil)
posts = subject.summary_targets
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
end
end
describe "#summary_targets" do
context "when the topic has a best replies summary" do
before { topic.has_summary = true }
it_behaves_like "includes only public-visible topics"
end
context "when the topic doesn't have a best replies summary" do
before { topic.has_summary = false }
it_behaves_like "includes only public-visible topics"
end
end
describe "#summarize" do describe "#summarize" do
subject(:summarization) { described_class.new(strategy, topic, user) } subject(:summarization) { described_class.new(strategy, user) }
def assert_summary_is_cached(topic, summary_response) def assert_summary_is_cached(topic, summary_response)
cached_summary = AiSummary.find_by(target: topic) cached_summary =
AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number)) expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number))
expect(cached_summary.summarized_text).to eq(summary) expect(cached_summary.summarized_text).to eq(summary)
@ -82,41 +41,15 @@ describe DiscourseAi::TopicSummarization do
summarization.summarize summarization.summarize
cached_summary_text = "This is a cached summary" cached_summary_text = "This is a cached summary"
AiSummary.find_by(target: topic).update!( AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete]).update!(
summarized_text: cached_summary_text, summarized_text: cached_summary_text,
updated_at: 24.hours.ago, updated_at: 24.hours.ago,
) )
summarization = described_class.new(strategy, topic, user) summarization = described_class.new(strategy, user)
section = summarization.summarize section = summarization.summarize
expect(section.summarized_text).to eq(cached_summary_text) expect(section.summarized_text).to eq(cached_summary_text)
end end
context "when the topic has embed content cached" do
it "embed content is used instead of the raw text" do
topic_embed =
Fabricate(
:topic_embed,
topic: topic,
embed_content_cache: "<p>hello world new post :D</p>",
)
DiscourseAi::Completions::Llm.with_prepared_responses(["A summary"]) do |spy|
summarization.summarize
prompt_raw =
spy
.prompt_messages
.reduce(+"") do |memo, m|
memo << m[:content] << "\n"
memo
end
expect(prompt_raw).to include(topic_embed.embed_content_cache)
end
end
end
end end
describe "invalidating cached summaries" do describe "invalidating cached summaries" do
@ -124,7 +57,7 @@ describe DiscourseAi::TopicSummarization do
let(:updated_summary) { "This is the final summary" } let(:updated_summary) { "This is the final summary" }
def cached_summary def cached_summary
AiSummary.find_by(target: topic) AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
end end
before do before do
@ -133,8 +66,8 @@ describe DiscourseAi::TopicSummarization do
# since it is glued to the old llm instance # since it is glued to the old llm instance
# so we create the cached summary totally independantly # so we create the cached summary totally independantly
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
strategy = DiscourseAi::Summarization.default_strategy strategy = DiscourseAi::Summarization.topic_summary(topic)
described_class.new(strategy, topic, user).summarize described_class.new(strategy, user).summarize
end end
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago) cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)

View File

@ -32,6 +32,7 @@ RSpec.describe "Summarize a topic ", type: :system do
summarized_text: summarization_result, summarized_text: summarization_result,
algorithm: "test", algorithm: "test",
original_content_sha: "test", original_content_sha: "test",
summary_type: AiSummary.summary_types[:complete],
) )
end end