diff --git a/app/controllers/discourse_ai/summarization/chat_summary_controller.rb b/app/controllers/discourse_ai/summarization/chat_summary_controller.rb
index 96d95d8c..5354c539 100644
--- a/app/controllers/discourse_ai/summarization/chat_summary_controller.rb
+++ b/app/controllers/discourse_ai/summarization/chat_summary_controller.rb
@@ -15,29 +15,21 @@ module DiscourseAi
channel = ::Chat::Channel.find(params[:channel_id])
guardian.ensure_can_join_chat_channel!(channel)
- strategy = DiscourseAi::Summarization.default_strategy
- raise Discourse::NotFound.new unless strategy
+ summarizer = DiscourseAi::Summarization.chat_channel_summary(channel, since)
+ raise Discourse::NotFound.new unless summarizer
guardian.ensure_can_request_summary!
RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed!
hijack do
- content = { content_title: channel.name }
-
- content[:contents] = channel
- .chat_messages
- .where("chat_messages.created_at > ?", since.hours.ago)
- .includes(:user)
- .order(created_at: :asc)
- .pluck(:id, :username_lower, :message)
- .map { { id: _1, poster: _2, text: _3 } }
+ strategy = DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, since)
summarized_text =
- if content[:contents].empty?
+ if strategy.targets_data[:contents].empty?
I18n.t("discourse_ai.summarization.chat.no_targets")
else
- strategy.summarize(content, current_user).dig(:summary)
+ summarizer.summarize(current_user)&.summarized_text
end
render json: { summary: summarized_text }
diff --git a/app/controllers/discourse_ai/summarization/summary_controller.rb b/app/controllers/discourse_ai/summarization/summary_controller.rb
index 549bbb44..a7900549 100644
--- a/app/controllers/discourse_ai/summarization/summary_controller.rb
+++ b/app/controllers/discourse_ai/summarization/summary_controller.rb
@@ -9,15 +9,19 @@ module DiscourseAi
topic = Topic.find(params[:topic_id])
guardian.ensure_can_see!(topic)
- raise Discourse::NotFound if !guardian.can_see_summary?(topic)
+ if !guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])
+ raise Discourse::NotFound
+ end
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
opts = params.permit(:skip_age_check)
skip_age_check = opts[:skip_age_check] == "true"
+ summarization_service = DiscourseAi::TopicSummarization.for(topic, current_user)
+
if params[:stream] && current_user
- cached_summary = DiscourseAi::TopicSummarization.cached_summary(topic, current_user)
+ cached_summary = summarization_service.cached_summary
if cached_summary && !skip_age_check
render_serialized(cached_summary, AiTopicSummarySerializer)
@@ -34,12 +38,7 @@ module DiscourseAi
render json: success_json
else
hijack do
- summary =
- DiscourseAi::TopicSummarization.summarize(
- topic,
- current_user,
- skip_age_check: skip_age_check,
- )
+ summary = summarization_service.summarize(skip_age_check: skip_age_check)
render_serialized(summary, AiTopicSummarySerializer)
end
end
diff --git a/app/jobs/regular/stream_topic_ai_summary.rb b/app/jobs/regular/stream_topic_ai_summary.rb
index d3212f58..d22a7b81 100644
--- a/app/jobs/regular/stream_topic_ai_summary.rb
+++ b/app/jobs/regular/stream_topic_ai_summary.rb
@@ -8,8 +8,11 @@ module Jobs
return unless topic = Topic.find_by(id: args[:topic_id])
return unless user = User.find_by(id: args[:user_id])
- strategy = DiscourseAi::Summarization.default_strategy
- return if strategy.nil? || !Guardian.new(user).can_see_summary?(topic)
+ strategy = DiscourseAi::Summarization.topic_summary(topic)
+ if strategy.nil? ||
+ !Guardian.new(user).can_see_summary?(topic, AiSummary.summary_types[:complete])
+ return
+ end
guardian = Guardian.new(user)
return unless guardian.can_see?(topic)
@@ -21,7 +24,7 @@ module Jobs
summary =
DiscourseAi::TopicSummarization
- .new(strategy, topic, user)
+ .new(strategy, user)
.summarize(skip_age_check: skip_age_check) do |partial_summary|
streamed_summary << partial_summary
diff --git a/app/models/ai_summary.rb b/app/models/ai_summary.rb
index f947dc0d..3267efbd 100644
--- a/app/models/ai_summary.rb
+++ b/app/models/ai_summary.rb
@@ -3,6 +3,23 @@
class AiSummary < ActiveRecord::Base
belongs_to :target, polymorphic: true
+ enum :summary_type, { complete: 0, gist: 1 }
+
+ def self.store!(target, summary_type, model, summary, content_ids)
+ AiSummary.create!(
+ target: target,
+ algorithm: model,
+ content_range: (content_ids.first..content_ids.last),
+ summarized_text: summary,
+ original_content_sha: build_sha(content_ids.join),
+ summary_type: summary_type,
+ )
+ end
+
+ def self.build_sha(joined_ids)
+ Digest::SHA256.hexdigest(joined_ids)
+ end
+
def mark_as_outdated
@outdated = true
end
@@ -25,6 +42,7 @@ end
# algorithm :string not null
# created_at :datetime not null
# updated_at :datetime not null
+# summary_type :string default("complete"), not null
#
# Indexes
#
diff --git a/app/services/discourse_ai/topic_summarization.rb b/app/services/discourse_ai/topic_summarization.rb
index 15a94193..3d33e21d 100644
--- a/app/services/discourse_ai/topic_summarization.rb
+++ b/app/services/discourse_ai/topic_summarization.rb
@@ -1,145 +1,44 @@
# frozen_string_literal: true
module DiscourseAi
+ # A cache layer on top of our topic summarization engine. Also handle permissions.
class TopicSummarization
- def self.summarize(topic, user, skip_age_check: false, &on_partial_blk)
- new(DiscourseAi::Summarization.default_strategy, topic, user).summarize(
- skip_age_check: skip_age_check,
- &on_partial_blk
- )
+ def self.for(topic, user)
+ new(DiscourseAi::Summarization.topic_summary(topic), user)
end
- def self.cached_summary(topic, user)
- new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary
- end
-
- def initialize(strategy, topic, user)
- @strategy = strategy
- @topic = topic
+ def initialize(summarizer, user)
+ @summarizer = summarizer
@user = user
end
- attr_reader :strategy, :topic, :user
-
def cached_summary
- existing_summary
+ summarizer.existing_summary
end
def summarize(skip_age_check: false, &on_partial_blk)
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
- return if !user && !existing_summary
+ return if !user && !cached_summary
- return existing_summary if use_cached?(skip_age_check)
+ return cached_summary if use_cached?(skip_age_check)
- delete_cached_summaries! if existing_summary
+ summarizer.delete_cached_summaries! if cached_summary
- content = {
- resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
- content_title: topic.title,
- contents: [],
- }
-
- summary_targets_data.map do |(pn, raw, username)|
- raw_text = raw
-
- if pn == 1 && topic.topic_embed&.embed_content_cache.present?
- raw_text = topic.topic_embed&.embed_content_cache
- end
-
- content[:contents] << { poster: username, id: pn, text: raw_text }
- end
-
- summarization_result = strategy.summarize(content, user, &on_partial_blk)
- cache_summary(summarization_result)
- end
-
- def summary_targets
- topic.has_summary? ? best_replies : pick_selection
+ summarizer.summarize(user, &on_partial_blk)
end
private
- def summary_sha
- @summary_sha ||= build_sha(summary_targets_data.map(&:first))
- end
-
- def summary_targets_data
- @summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username)
- end
-
- def existing_summary
- if !defined?(@existing_summary)
- @existing_summary = AiSummary.find_by(target: topic)
- if @existing_summary && existing_summary.original_content_sha != summary_sha
- @existing_summary.mark_as_outdated
- end
- end
- @existing_summary
- end
-
- def best_replies
- Post
- .summary(topic.id)
- .where("post_type = ?", Post.types[:regular])
- .where("NOT hidden")
- .joins(:user)
- .order(:post_number)
- end
-
- def pick_selection
- posts =
- Post
- .where(topic_id: topic.id)
- .where("post_type = ?", Post.types[:regular])
- .where("NOT hidden")
- .order(:post_number)
-
- post_numbers = posts.limit(5).pluck(:post_number)
- post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
- post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
-
- Post
- .where(topic_id: topic.id)
- .joins(:user)
- .where("post_number in (?)", post_numbers)
- .order(:post_number)
- end
-
- def delete_cached_summaries!
- AiSummary.where(target: topic).destroy_all
- end
+ attr_reader :summarizer, :user
def use_cached?(skip_age_check)
can_summarize = Guardian.new(user).can_request_summary?
- existing_summary &&
+ cached_summary &&
!(
- can_summarize && new_targets? &&
- (skip_age_check || existing_summary.created_at < 1.hour.ago)
+ can_summarize && cached_summary.outdated &&
+ (skip_age_check || cached_summary.created_at < 1.hour.ago)
)
end
-
- def new_targets?
- existing_summary&.original_content_sha != summary_sha
- end
-
- def cache_summary(result)
- post_numbers = summary_targets_data.map(&:first)
-
- cached_summary =
- AiSummary.create!(
- target: topic,
- algorithm: strategy.display_name,
- content_range: (post_numbers.first..post_numbers.last),
- summarized_text: result[:summary],
- original_content_sha: summary_sha,
- )
-
- cached_summary
- end
-
- def build_sha(ids)
- Digest::SHA256.hexdigest(ids.join)
- end
end
end
diff --git a/db/migrate/20240909180908_add_ai_summary_type_column.rb b/db/migrate/20240909180908_add_ai_summary_type_column.rb
new file mode 100644
index 00000000..82284106
--- /dev/null
+++ b/db/migrate/20240909180908_add_ai_summary_type_column.rb
@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+class AddAiSummaryTypeColumn < ActiveRecord::Migration[7.1]
+ def change
+ add_column :ai_summaries, :summary_type, :integer, default: 0, null: false
+ end
+end
diff --git a/lib/guardian_extensions.rb b/lib/guardian_extensions.rb
index d4ee455e..075ae96c 100644
--- a/lib/guardian_extensions.rb
+++ b/lib/guardian_extensions.rb
@@ -2,7 +2,7 @@
module DiscourseAi
module GuardianExtensions
- def can_see_summary?(target)
+ def can_see_summary?(target, summary_type)
return false if !SiteSetting.ai_summarization_enabled
if target.class == Topic && target.private_message?
@@ -14,7 +14,7 @@ module DiscourseAi
return false if !allowed
end
- has_cached_summary = AiSummary.exists?(target: target)
+ has_cached_summary = AiSummary.exists?(target: target, summary_type: summary_type)
return has_cached_summary if user.nil?
has_cached_summary || can_request_summary?
diff --git a/lib/summarization.rb b/lib/summarization.rb
index e8b037df..e338794f 100644
--- a/lib/summarization.rb
+++ b/lib/summarization.rb
@@ -1,9 +1,36 @@
# frozen_string_literal: true
+
module DiscourseAi
module Summarization
- def self.default_strategy
+ def self.topic_summary(topic)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
- DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model)
+ DiscourseAi::Summarization::FoldContent.new(
+ DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
+ DiscourseAi::Summarization::Strategies::TopicSummary.new(topic),
+ )
+ else
+ nil
+ end
+ end
+
+ def self.topic_gist(topic)
+ if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
+ DiscourseAi::Summarization::FoldContent.new(
+ DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
+ DiscourseAi::Summarization::Strategies::TopicGist.new(topic),
+ )
+ else
+ nil
+ end
+ end
+
+ def self.chat_channel_summary(channel, time_window_in_hours)
+ if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
+ DiscourseAi::Summarization::FoldContent.new(
+ DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
+ DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, time_window_in_hours),
+ persist_summaries: false,
+ )
else
nil
end
diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb
index 3066be47..052926df 100644
--- a/lib/summarization/entry_point.rb
+++ b/lib/summarization/entry_point.rb
@@ -2,14 +2,6 @@
module DiscourseAi
module Summarization
- def self.default_strategy
- if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
- DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model)
- else
- nil
- end
- end
-
class EntryPoint
def inject_into(plugin)
plugin.add_to_serializer(:current_user, :can_summarize) do
@@ -18,11 +10,11 @@ module DiscourseAi
end
plugin.add_to_serializer(:topic_view, :summarizable) do
- scope.can_see_summary?(object.topic)
+ scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete])
end
plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do
- scope.can_see_summary?(object.topic)
+ scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete])
end
end
end
diff --git a/lib/summarization/fold_content.rb b/lib/summarization/fold_content.rb
new file mode 100644
index 00000000..00b2c80b
--- /dev/null
+++ b/lib/summarization/fold_content.rb
@@ -0,0 +1,189 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Summarization
+ # This class offers a generic way of summarizing content from multiple sources using different prompts.
+ #
+ # It summarizes large amounts of content by recursively summarizing it in smaller chunks that
+ # fit the given model context window, finally concatenating the disjoint summaries
+ # into a final version.
+ #
+ class FoldContent
+ def initialize(llm, strategy, persist_summaries: true)
+ @llm = llm
+ @strategy = strategy
+ @persist_summaries = persist_summaries
+ end
+
+ attr_reader :llm, :strategy
+
+ # @param user { User } - User object used for auditing usage.
+ #
+ # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
+ # Note: The block is only called with results of the final summary, not intermediate summaries.
+ #
+ # @returns { AiSummary } - Resulting summary.
+ def summarize(user, &on_partial_blk)
+ opts = content_to_summarize.except(:contents)
+
+ initial_chunks =
+ rebalance_chunks(
+ content_to_summarize[:contents].map do |c|
+ { ids: [c[:id]], summary: format_content_item(c) }
+ end,
+ )
+
+ # Special case where we can do all the summarization in one pass.
+ result =
+ if initial_chunks.length == 1
+ {
+ summary:
+ summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
+ chunks: [],
+ }
+ else
+ summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
+ end
+
+ if persist_summaries
+ AiSummary.store!(
+ strategy.target,
+ strategy.type,
+ llm_model.name,
+ result[:summary],
+ content_to_summarize[:contents].map { |c| c[:id] },
+ )
+ else
+ AiSummary.new(summarized_text: result[:summary])
+ end
+ end
+
+ # @returns { AiSummary } - Resulting summary.
+ #
+ # Finds a summary matching the target and strategy. Marks it as outdates if the strategy found newer content
+ def existing_summary
+ if !defined?(@existing_summary)
+ summary = AiSummary.find_by(target: strategy.target, summary_type: strategy.type)
+
+ if summary
+ @existing_summary = summary
+
+ if existing_summary.original_content_sha != latest_sha
+ @existing_summary.mark_as_outdated
+ end
+ end
+ end
+ @existing_summary
+ end
+
+ def delete_cached_summaries!
+ AiSummary.where(target: strategy.target, summary_type: strategy.type).destroy_all
+ end
+
+ private
+
+ attr_reader :persist_summaries
+
+ def llm_model
+ llm.llm_model
+ end
+
+ def content_to_summarize
+ @targets_data ||= strategy.targets_data
+ end
+
+ def latest_sha
+ @latest_sha ||= AiSummary.build_sha(content_to_summarize[:contents].map { |c| c[:id] }.join)
+ end
+
+ def summarize_chunks(chunks, user, opts, &on_partial_blk)
+ # Safely assume we always have more than one chunk.
+ summarized_chunks = summarize_in_chunks(chunks, user, opts)
+ total_summaries_size =
+ llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
+
+ if total_summaries_size < available_tokens
+ # Chunks are small enough, we can concatenate them.
+ {
+ summary:
+ concatenate_summaries(
+ summarized_chunks.map { |s| s[:summary] },
+ user,
+ &on_partial_blk
+ ),
+ chunks: summarized_chunks,
+ }
+ else
+ # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
+ rebalanced_chunks = rebalance_chunks(summarized_chunks)
+
+ summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
+ end
+ end
+
+ def format_content_item(item)
+ "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+ end
+
+ def rebalance_chunks(chunks)
+ section = { ids: [], summary: "" }
+
+ chunks =
+ chunks.reduce([]) do |sections, chunk|
+ if llm_model.tokenizer_class.can_expand_tokens?(
+ section[:summary],
+ chunk[:summary],
+ available_tokens,
+ )
+ section[:summary] += chunk[:summary]
+ section[:ids] = section[:ids].concat(chunk[:ids])
+ else
+ sections << section
+ section = chunk
+ end
+
+ sections
+ end
+
+ chunks << section if section[:summary].present?
+
+ chunks
+ end
+
+ def summarize_single(text, user, opts, &on_partial_blk)
+ prompt = strategy.summarize_single_prompt(text, opts)
+
+ llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
+ end
+
+ def summarize_in_chunks(chunks, user, opts)
+ chunks.map do |chunk|
+ prompt = strategy.summarize_single_prompt(chunk[:summary], opts)
+
+ chunk[:summary] = llm.generate(
+ prompt,
+ user: user,
+ max_tokens: 300,
+ feature_name: "summarize",
+ )
+
+ chunk
+ end
+ end
+
+ def concatenate_summaries(texts_to_summarize, user, &on_partial_blk)
+ prompt = strategy.concatenation_prompt(texts_to_summarize)
+
+ llm.generate(prompt, user: user, &on_partial_blk)
+ end
+
+ def available_tokens
+ # Reserve tokens for the response and the base prompt
+ # ~500 words
+ reserved_tokens = 700
+
+ llm_model.max_prompt_tokens - reserved_tokens
+ end
+ end
+ end
+end
diff --git a/lib/summarization/strategies/base.rb b/lib/summarization/strategies/base.rb
new file mode 100644
index 00000000..2ca76383
--- /dev/null
+++ b/lib/summarization/strategies/base.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Summarization
+ module Strategies
+ # Objects inheriting from this class will get passed as a dependency to `DiscourseAi::Summarization::FoldContent`.
+ # This collaborator knows how to source the content to summarize and the prompts used in the process,
+ # one for summarizing a chunk and another for concatenating them if necessary.
+ class Base
+ def initialize(target)
+ @target = target
+ end
+
+ attr_reader :target
+
+ # The summary type differentiates instances of `AiSummary` pointing to a single target.
+ # See the `summary_type` enum for available options.
+ def type
+ raise NotImplementedError
+ end
+
+ # @returns { Hash } - Content to summarize.
+ #
+ # This method returns a hash with the content to summarize and additional information.
+ # The only mandatory key is `contents`, which must be an array of hashes with
+ # the following structure:
+ #
+ # {
+ # poster: A way to tell who write the content,
+ # id: A number to signal order,
+ # text: Text to summarize
+ # }
+ #
+ # Additionally, you could add more context, which will be available in the prompt. e.g.:
+ #
+ # {
+ # resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
+ # content_title: target.title,
+ # contents: [...]
+ # }
+ #
+ def targets_data
+ raise NotImplementedError
+ end
+
+ # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when concatenating multiple chunks.
+ def contatenation_prompt(_texts_to_summarize)
+ raise NotImplementedError
+ end
+
+ # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM on each chunk,
+ # and when the whole content fits in one call.
+ def summarize_single_prompt(_input, _opts)
+ raise NotImplementedError
+ end
+ end
+ end
+ end
+end
diff --git a/lib/summarization/strategies/chat_messages.rb b/lib/summarization/strategies/chat_messages.rb
new file mode 100644
index 00000000..3af267ff
--- /dev/null
+++ b/lib/summarization/strategies/chat_messages.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Summarization
+ module Strategies
+ class ChatMessages < Base
+ def type
+ AiSummary.summary_types[:complete]
+ end
+
+ def initialize(target, since)
+ super(target)
+ @since = since
+ end
+
+ def targets_data
+ content = { content_title: target.name }
+
+ content[:contents] = target
+ .chat_messages
+ .where("chat_messages.created_at > ?", since.hours.ago)
+ .includes(:user)
+ .order(created_at: :asc)
+ .pluck(:id, :username_lower, :message)
+ .map { { id: _1, poster: _2, text: _3 } }
+
+ content
+ end
+
+ def contatenation_prompt(texts_to_summarize)
+ prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+ You are a summarization bot tasked with creating a cohesive narrative by intelligently merging multiple disjointed summaries.
+ Your response should consist of well-structured paragraphs that combines these summaries into a clear and comprehensive overview.
+ Avoid adding any additional text or commentary. Format your output using Discourse forum Markdown.
+ TEXT
+
+ prompt.push(type: :user, content: <<~TEXT.strip)
+ THESE are the summaries, each one separated by a newline, all of them inside XML tags:
+
+
+ #{texts_to_summarize.join("\n")}
+
+ TEXT
+
+ prompt
+ end
+
+ def summarize_single_prompt(input, opts)
+ prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+ You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics
+ and developments from a series of chat messages within a user-selected time window.
+
+ Analyze the messages to extract key themes, participants' intentions, and any significant conclusions or decisions.
+ Your summary should be concise yet comprehensive, providing an overview that is accessible to someone with no prior context of the conversation.
+
+ - Only include the summary, WITHOUT additional commentary.
+ - Don't mention the channel title. Avoid including extraneous details or subjective opinions.
+ - Maintain the original language of the text being summarized.
+ - The same user could write multiple messages in a row, don't treat them as different persons.
+ - Aim for summaries to be 400 words or less.
+
+ TEXT
+
+ prompt.push(type: :user, content: <<~TEXT.strip)
+ #{opts[:content_title].present? ? "The name of the channel is: " + opts[:content_title] + ".\n" : ""}
+
+ Here are the messages, inside XML tags:
+
+
+ #{input}
+
+
+ Generate a summary of the given chat messages.
+ TEXT
+
+ prompt
+ end
+
+ private
+
+ attr_reader :since
+ end
+ end
+ end
+end
diff --git a/lib/summarization/strategies/fold_content.rb b/lib/summarization/strategies/fold_content.rb
deleted file mode 100644
index 788e09f6..00000000
--- a/lib/summarization/strategies/fold_content.rb
+++ /dev/null
@@ -1,198 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module Summarization
- module Strategies
- class FoldContent
- def initialize(completion_model)
- @llm = DiscourseAi::Completions::Llm.proxy(completion_model)
- raise "Invalid model provided for summarization strategy" if @llm.llm_model.nil?
- end
-
- attr_reader :llm
-
- def summarize(content, user, &on_partial_blk)
- opts = content.except(:contents)
-
- initial_chunks =
- rebalance_chunks(
- content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
- )
-
- # Special case where we can do all the summarization in one pass.
- if initial_chunks.length == 1
- {
- summary:
- summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
- chunks: [],
- }
- else
- summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
- end
- end
-
- def display_name
- llm_model&.name || "unknown model"
- end
-
- private
-
- def llm_model
- llm.llm_model
- end
-
- def summarize_chunks(chunks, user, opts, &on_partial_blk)
- # Safely assume we always have more than one chunk.
- summarized_chunks = summarize_in_chunks(chunks, user, opts)
- total_summaries_size =
- llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
-
- if total_summaries_size < available_tokens
- # Chunks are small enough, we can concatenate them.
- {
- summary:
- concatenate_summaries(
- summarized_chunks.map { |s| s[:summary] },
- user,
- &on_partial_blk
- ),
- chunks: summarized_chunks,
- }
- else
- # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
- rebalanced_chunks = rebalance_chunks(summarized_chunks)
-
- summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
- end
- end
-
- def format_content_item(item)
- "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
- end
-
- def rebalance_chunks(chunks)
- section = { ids: [], summary: "" }
-
- chunks =
- chunks.reduce([]) do |sections, chunk|
- if llm_model.tokenizer_class.can_expand_tokens?(
- section[:summary],
- chunk[:summary],
- available_tokens,
- )
- section[:summary] += chunk[:summary]
- section[:ids] = section[:ids].concat(chunk[:ids])
- else
- sections << section
- section = chunk
- end
-
- sections
- end
-
- chunks << section if section[:summary].present?
-
- chunks
- end
-
- def summarize_single(text, user, opts, &on_partial_blk)
- prompt = summarization_prompt(text, opts)
-
- llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
- end
-
- def summarize_in_chunks(chunks, user, opts)
- chunks.map do |chunk|
- prompt = summarization_prompt(chunk[:summary], opts)
-
- chunk[:summary] = llm.generate(
- prompt,
- user: user,
- max_tokens: 300,
- feature_name: "summarize",
- )
- chunk
- end
- end
-
- def concatenate_summaries(summaries, user, &on_partial_blk)
- prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
- You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
- The narrative you create is in the form of one or multiple paragraphs.
- Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
- I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
- You understand and generate Discourse forum Markdown.
- You format the response, including links, using Markdown.
- TEXT
-
- prompt.push(type: :user, content: <<~TEXT.strip)
- THESE are the summaries, each one separated by a newline, all of them inside XML tags:
-
-
- #{summaries.join("\n")}
-
- TEXT
-
- llm.generate(prompt, user: user, &on_partial_blk)
- end
-
- def summarization_prompt(input, opts)
- insts = +<<~TEXT
- You are an advanced summarization bot that generates concise, coherent summaries of provided text.
-
- - Only include the summary, without any additional commentary.
- - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
- - Maintain the original language of the text being summarized.
- - Aim for summaries to be 400 words or less.
-
- TEXT
-
- insts << <<~TEXT if opts[:resource_path]
- - Each post is formatted as ") "
- - Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
- - Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
- - Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
- - Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
- - When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
- TEXT
-
- prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
-
- if opts[:resource_path]
- prompt.push(
- type: :user,
- content:
- "Here are the posts inside XML tags:\n\n1) user1 said: I love Mondays 2) user2 said: I hate Mondays\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
- )
- prompt.push(
- type: :model,
- content:
- "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
- )
- end
-
- prompt.push(type: :user, content: <<~TEXT.strip)
- #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
- Here are the posts, inside XML tags:
-
-
- #{input}
-
-
- Generate a concise, coherent summary of the text above maintaining the original language.
- TEXT
-
- prompt
- end
-
- def available_tokens
- # Reserve tokens for the response and the base prompt
- # ~500 words
- reserved_tokens = 700
-
- llm_model.max_prompt_tokens - reserved_tokens
- end
- end
- end
- end
-end
diff --git a/lib/summarization/strategies/topic_gist.rb b/lib/summarization/strategies/topic_gist.rb
new file mode 100644
index 00000000..f52fbc39
--- /dev/null
+++ b/lib/summarization/strategies/topic_gist.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Summarization
+ module Strategies
+ class TopicGist < Base
+ def type
+ AiSummary.summary_types[:gist]
+ end
+
+ def targets_data
+ content = { content_title: target.title, contents: [] }
+
+ op_post_number = 1
+
+ last_twenty_posts =
+ Post
+ .where(topic_id: target.id)
+ .where("post_type = ?", Post.types[:regular])
+ .where("NOT hidden")
+ .order("post_number DESC")
+ .limit(20)
+ .pluck(:post_number)
+
+ posts_data =
+ Post
+ .where(topic_id: target.id)
+ .joins(:user)
+ .where("post_number IN (?)", last_twenty_posts << op_post_number)
+ .order(:post_number)
+ .pluck(:post_number, :raw, :username)
+
+ posts_data.each do |(pn, raw, username)|
+ raw_text = raw
+
+ if pn == 1 && target.topic_embed&.embed_content_cache.present?
+ raw_text = target.topic_embed&.embed_content_cache
+ end
+
+ content[:contents] << { poster: username, id: pn, text: raw_text }
+ end
+
+ content
+ end
+
+ def concatenation_prompt(texts_to_summarize)
+ prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+ You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement.
+ Your response should strictly be this single, comprehensive sentence, without any additional text or comments.
+ TEXT
+
+ prompt.push(type: :user, content: <<~TEXT.strip)
+ THESE are the summaries, each one separated by a newline, all of them inside XML tags:
+
+
+ #{texts_to_summarize.join("\n")}
+
+ TEXT
+
+ prompt
+ end
+
+ def summarize_single_prompt(input, opts)
+ prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+ You are an advanced summarization bot. Your task is to analyze a given conversation and generate a single,
+ concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context.
+
+ - Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone.
+ - Avoid including extraneous details or subjective opinions.
+ - Maintain the original language of the text being summarized.
+ TEXT
+
+ prompt.push(type: :user, content: <<~TEXT.strip)
+ #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
+
+ Here are the posts, inside XML tags:
+
+
+ #{input}
+
+
+ Generate a single sentence of the text above maintaining the original language.
+ TEXT
+
+ prompt
+ end
+ end
+ end
+ end
+end
diff --git a/lib/summarization/strategies/topic_summary.rb b/lib/summarization/strategies/topic_summary.rb
new file mode 100644
index 00000000..945caace
--- /dev/null
+++ b/lib/summarization/strategies/topic_summary.rb
@@ -0,0 +1,142 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Summarization
+ module Strategies
+ class TopicSummary < Base
+ def type
+ AiSummary.summary_types[:complete]
+ end
+
+ def targets_data
+ content = {
+ resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
+ content_title: target.title,
+ contents: [],
+ }
+
+ posts_data =
+ (target.has_summary? ? best_replies : pick_selection).pluck(
+ :post_number,
+ :raw,
+ :username,
+ )
+
+ posts_data.each do |(pn, raw, username)|
+ raw_text = raw
+
+ if pn == 1 && target.topic_embed&.embed_content_cache.present?
+ raw_text = target.topic_embed&.embed_content_cache
+ end
+
+ content[:contents] << { poster: username, id: pn, text: raw_text }
+ end
+
+ content
+ end
+
+ def concatenation_prompt(texts_to_summarize)
+ prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+ You are a summarization bot that effectively concatenates disjointed summaries, creating a cohesive narrative.
+ The narrative you create is in the form of one or multiple paragraphs.
+ Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
+ I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
+ You understand and generate Discourse forum Markdown.
+ You format the response, including links, using Markdown.
+ TEXT
+
+ prompt.push(type: :user, content: <<~TEXT.strip)
+ THESE are the summaries, each one separated by a newline, all of them inside XML tags:
+
+
+ #{texts_to_summarize.join("\n")}
+
+ TEXT
+
+ prompt
+ end
+
+ def summarize_single_prompt(input, opts)
+ insts = +<<~TEXT
+ You are an advanced summarization bot that generates concise, coherent summaries of provided text.
+
+ - Only include the summary, without any additional commentary.
+ - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
+ - Maintain the original language of the text being summarized.
+ - Aim for summaries to be 400 words or less.
+
+ TEXT
+
+ insts << <<~TEXT if opts[:resource_path]
+ - Each post is formatted as ") "
+ - Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
+ - Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
+ - Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
+ - Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
+ - When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
+ TEXT
+
+ prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
+
+ if opts[:resource_path]
+ prompt.push(
+ type: :user,
+ content:
+ "Here are the posts inside XML tags:\n\n1) user1 said: I love Mondays 2) user2 said: I hate Mondays\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
+ )
+ prompt.push(
+ type: :model,
+ content:
+ "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
+ )
+ end
+
+ prompt.push(type: :user, content: <<~TEXT.strip)
+ #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
+ Here are the posts, inside XML tags:
+
+
+ #{input}
+
+
+ Generate a concise, coherent summary of the text above maintaining the original language.
+ TEXT
+
+ prompt
+ end
+
+ private
+
+ attr_reader :topic
+
+ def best_replies
+ Post
+ .summary(target.id)
+ .where("post_type = ?", Post.types[:regular])
+ .where("NOT hidden")
+ .joins(:user)
+ .order(:post_number)
+ end
+
+ def pick_selection
+ posts =
+ Post
+ .where(topic_id: target.id)
+ .where("post_type = ?", Post.types[:regular])
+ .where("NOT hidden")
+ .order(:post_number)
+
+ post_numbers = posts.limit(5).pluck(:post_number)
+ post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
+ post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
+
+ Post
+ .where(topic_id: target.id)
+ .joins(:user)
+ .where("post_number in (?)", post_numbers)
+ .order(:post_number)
+ end
+ end
+ end
+ end
+end
diff --git a/spec/lib/guardian_extensions_spec.rb b/spec/lib/guardian_extensions_spec.rb
index 5787516c..38268ce1 100644
--- a/spec/lib/guardian_extensions_spec.rb
+++ b/spec/lib/guardian_extensions_spec.rb
@@ -20,7 +20,7 @@ describe DiscourseAi::GuardianExtensions do
it "returns false" do
SiteSetting.ai_custom_summarization_allowed_groups = ""
- expect(guardian.can_see_summary?(topic)).to eq(false)
+ expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false)
end
it "returns true if there is a cached summary" do
@@ -29,9 +29,10 @@ describe DiscourseAi::GuardianExtensions do
summarized_text: "test",
original_content_sha: "123",
algorithm: "test",
+ summary_type: AiSummary.summary_types[:complete],
)
- expect(guardian.can_see_summary?(topic)).to eq(true)
+ expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
end
end
@@ -39,7 +40,7 @@ describe DiscourseAi::GuardianExtensions do
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do
- expect(guardian.can_see_summary?(topic)).to eq(true)
+ expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
end
end
@@ -48,12 +49,12 @@ describe DiscourseAi::GuardianExtensions do
let(:pm) { Fabricate(:private_message_topic) }
it "returns false" do
- expect(guardian.can_see_summary?(pm)).to eq(false)
+ expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(false)
end
it "returns true if user is in a group that is allowed summaries" do
SiteSetting.ai_pm_summarization_allowed_groups = group.id
- expect(guardian.can_see_summary?(pm)).to eq(true)
+ expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(true)
end
end
@@ -61,7 +62,7 @@ describe DiscourseAi::GuardianExtensions do
let(:guardian) { Guardian.new }
it "returns false for anons" do
- expect(guardian.can_see_summary?(topic)).to eq(false)
+ expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false)
end
it "returns true for anons when there is a cached summary" do
@@ -70,9 +71,10 @@ describe DiscourseAi::GuardianExtensions do
summarized_text: "test",
original_content_sha: "123",
algorithm: "test",
+ summary_type: AiSummary.summary_types[:complete],
)
- expect(guardian.can_see_summary?(topic)).to eq(true)
+ expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
end
end
end
diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/fold_content_spec.rb
similarity index 66%
rename from spec/lib/modules/summarization/strategies/fold_content_spec.rb
rename to spec/lib/modules/summarization/fold_content_spec.rb
index 16ebc892..b1f84519 100644
--- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/fold_content_spec.rb
@@ -1,9 +1,14 @@
# frozen_string_literal: true
-RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
+RSpec.describe DiscourseAi::Summarization::FoldContent do
+ subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
+
describe "#summarize" do
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
+ fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
+ fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
+
before do
SiteSetting.ai_summarization_enabled = true
@@ -15,10 +20,6 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
llm_model.update!(max_prompt_tokens: model_tokens)
end
- let(:strategy) { DiscourseAi::Summarization.default_strategy }
- let(:summarize_text) { "This is a text" }
- let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
-
let(:single_summary) { "this is a single summary" }
let(:concatenated_summary) { "this is a concatenated summary" }
@@ -28,27 +29,26 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
it "does one call to summarize content" do
result =
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
- strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) }
+ summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
end
- expect(result[:summary]).to eq(single_summary)
+ expect(result.summarized_text).to eq(single_summary)
end
end
context "when the content to summarize doesn't fit in a single call" do
- it "summarizes each chunk and then concatenates them" do
- content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+ fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
+ it "summarizes each chunk and then concatenates them" do
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
[single_summary, single_summary, concatenated_summary],
- ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
+ ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(3) } }
- expect(result[:summary]).to eq(concatenated_summary)
+ expect(result.summarized_text).to eq(concatenated_summary)
end
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
- content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
max_length_response = "(1 asd said: This is a text "
chunk_of_chunks = "I'm smol"
@@ -61,9 +61,9 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
chunk_of_chunks,
concatenated_summary,
],
- ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
+ ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(5) } }
- expect(result[:summary]).to eq(concatenated_summary)
+ expect(result.summarized_text).to eq(concatenated_summary)
end
end
end
diff --git a/spec/lib/modules/summarization/strategies/topic_gist_spec.rb b/spec/lib/modules/summarization/strategies/topic_gist_spec.rb
new file mode 100644
index 00000000..ecea03ec
--- /dev/null
+++ b/spec/lib/modules/summarization/strategies/topic_gist_spec.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Summarization::Strategies::TopicGist do
+ subject(:gist) { described_class.new(topic) }
+
+ fab!(:topic) { Fabricate(:topic, highest_post_number: 25) }
+ fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
+ fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
+
+ describe "#targets_data" do
+ context "when the topic has more than 20 posts" do
+ before do
+ offset = 3 # Already created posts 1 and 2
+ (topic.highest_post_number - 2).times do |i|
+ Fabricate(:post, topic: topic, post_number: i + offset)
+ end
+ end
+
+ it "includes the OP and the last 20 posts" do
+ content = gist.targets_data
+ post_numbers = content[:contents].map { |c| c[:id] }
+
+ expected = (6..25).to_a << 1
+
+ expect(post_numbers).to contain_exactly(*expected)
+ end
+ end
+
+ it "only includes visible posts" do
+ post_2.update!(hidden: true)
+
+ post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+
+ expect(post_numbers).to contain_exactly(1)
+ end
+
+ it "doesn't include posts without users" do
+ post_2.update!(user_id: nil)
+
+ post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+
+ expect(post_numbers).to contain_exactly(1)
+ end
+
+ it "doesn't include whispers" do
+ post_2.update!(post_type: Post.types[:whisper])
+
+ post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+
+ expect(post_numbers).to contain_exactly(1)
+ end
+
+ context "when the topic has embed content cached" do
+ it "embed content is used instead of the raw text" do
+ topic_embed =
+ Fabricate(
+ :topic_embed,
+ topic: topic,
+ embed_content_cache: "
hello world new post :D
",
+ )
+
+ content = gist.targets_data
+
+ op_content = content[:contents].first[:text]
+
+ expect(op_content).to include(topic_embed.embed_content_cache)
+ end
+ end
+ end
+end
diff --git a/spec/lib/modules/summarization/strategies/topic_summary_spec.rb b/spec/lib/modules/summarization/strategies/topic_summary_spec.rb
new file mode 100644
index 00000000..329404a5
--- /dev/null
+++ b/spec/lib/modules/summarization/strategies/topic_summary_spec.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
+ subject(:topic_summary) { described_class.new(topic) }
+
+ fab!(:topic) { Fabricate(:topic, highest_post_number: 25) }
+ fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
+ fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
+
+ describe "#targets_data" do
+ shared_examples "includes only public-visible topics" do
+ it "only includes visible posts" do
+ post_2.update!(hidden: true)
+
+ post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
+
+ expect(post_numbers).to contain_exactly(1)
+ end
+
+ it "doesn't include posts without users" do
+ post_2.update!(user_id: nil)
+
+ post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
+
+ expect(post_numbers).to contain_exactly(1)
+ end
+
+ it "doesn't include whispers" do
+ post_2.update!(post_type: Post.types[:whisper])
+
+ post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
+
+ expect(post_numbers).to contain_exactly(1)
+ end
+ end
+
+ context "when the topic has a best replies summary" do
+ before { topic.update(has_summary: true) }
+
+ it_behaves_like "includes only public-visible topics"
+ end
+
+ context "when the topic doesn't have a best replies summary" do
+ before { topic.update(has_summary: false) }
+
+ it_behaves_like "includes only public-visible topics"
+ end
+
+ context "when the topic has embed content cached" do
+ it "embed content is used instead of the raw text" do
+ topic_embed =
+ Fabricate(
+ :topic_embed,
+ topic: topic,
+ embed_content_cache: "
hello world new post :D
",
+ )
+
+ content = topic_summary.targets_data
+
+ op_content = content[:contents].first[:text]
+
+ expect(op_content).to include(topic_embed.embed_content_cache)
+ end
+ end
+ end
+end
diff --git a/spec/requests/summarization/summary_controller_spec.rb b/spec/requests/summarization/summary_controller_spec.rb
index 9206c3d9..493d7c85 100644
--- a/spec/requests/summarization/summary_controller_spec.rb
+++ b/spec/requests/summarization/summary_controller_spec.rb
@@ -19,6 +19,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
+ summary_type: AiSummary.summary_types[:complete],
)
sign_in(Fabricate(:admin))
@@ -47,6 +48,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
+ summary_type: AiSummary.summary_types[:complete],
)
get "/discourse-ai/summarization/t/#{topic.id}.json"
@@ -133,6 +135,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
+ summary_type: AiSummary.summary_types[:complete],
)
get "/discourse-ai/summarization/t/#{topic.id}.json"
diff --git a/spec/services/discourse_ai/topic_summarization_spec.rb b/spec/services/discourse_ai/topic_summarization_spec.rb
index ba505bc1..eca5a13e 100644
--- a/spec/services/discourse_ai/topic_summarization_spec.rb
+++ b/spec/services/discourse_ai/topic_summarization_spec.rb
@@ -11,55 +11,14 @@ describe DiscourseAi::TopicSummarization do
SiteSetting.ai_summarization_enabled = true
end
- let(:strategy) { DiscourseAi::Summarization.default_strategy }
-
- shared_examples "includes only public-visible topics" do
- subject { DiscourseAi::TopicSummarization.new(strategy, topic, user) }
-
- it "only includes visible posts" do
- topic.first_post.update!(hidden: true)
-
- posts = subject.summary_targets
-
- expect(posts.none?(&:hidden?)).to eq(true)
- end
-
- it "doesn't include posts without users" do
- topic.first_post.user.destroy!
-
- posts = subject.summary_targets
-
- expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
- end
-
- it "doesn't include deleted posts" do
- topic.first_post.update!(user_id: nil)
-
- posts = subject.summary_targets
-
- expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
- end
- end
-
- describe "#summary_targets" do
- context "when the topic has a best replies summary" do
- before { topic.has_summary = true }
-
- it_behaves_like "includes only public-visible topics"
- end
-
- context "when the topic doesn't have a best replies summary" do
- before { topic.has_summary = false }
-
- it_behaves_like "includes only public-visible topics"
- end
- end
+ let(:strategy) { DiscourseAi::Summarization.topic_summary(topic) }
describe "#summarize" do
- subject(:summarization) { described_class.new(strategy, topic, user) }
+ subject(:summarization) { described_class.new(strategy, user) }
def assert_summary_is_cached(topic, summary_response)
- cached_summary = AiSummary.find_by(target: topic)
+ cached_summary =
+ AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number))
expect(cached_summary.summarized_text).to eq(summary)
@@ -82,41 +41,15 @@ describe DiscourseAi::TopicSummarization do
summarization.summarize
cached_summary_text = "This is a cached summary"
- AiSummary.find_by(target: topic).update!(
+ AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete]).update!(
summarized_text: cached_summary_text,
updated_at: 24.hours.ago,
)
- summarization = described_class.new(strategy, topic, user)
+ summarization = described_class.new(strategy, user)
section = summarization.summarize
expect(section.summarized_text).to eq(cached_summary_text)
end
-
- context "when the topic has embed content cached" do
- it "embed content is used instead of the raw text" do
- topic_embed =
- Fabricate(
- :topic_embed,
- topic: topic,
- embed_content_cache: "
hello world new post :D
",
- )
-
- DiscourseAi::Completions::Llm.with_prepared_responses(["A summary"]) do |spy|
- summarization.summarize
-
- prompt_raw =
- spy
- .prompt_messages
- .reduce(+"") do |memo, m|
- memo << m[:content] << "\n"
-
- memo
- end
-
- expect(prompt_raw).to include(topic_embed.embed_content_cache)
- end
- end
- end
end
describe "invalidating cached summaries" do
@@ -124,7 +57,7 @@ describe DiscourseAi::TopicSummarization do
let(:updated_summary) { "This is the final summary" }
def cached_summary
- AiSummary.find_by(target: topic)
+ AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
end
before do
@@ -133,8 +66,8 @@ describe DiscourseAi::TopicSummarization do
# since it is glued to the old llm instance
# so we create the cached summary totally independantly
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
- strategy = DiscourseAi::Summarization.default_strategy
- described_class.new(strategy, topic, user).summarize
+ strategy = DiscourseAi::Summarization.topic_summary(topic)
+ described_class.new(strategy, user).summarize
end
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
diff --git a/spec/system/summarization/topic_summarization_spec.rb b/spec/system/summarization/topic_summarization_spec.rb
index a5aae5ab..fabe2dfa 100644
--- a/spec/system/summarization/topic_summarization_spec.rb
+++ b/spec/system/summarization/topic_summarization_spec.rb
@@ -32,6 +32,7 @@ RSpec.describe "Summarize a topic ", type: :system do
summarized_text: summarization_result,
algorithm: "test",
original_content_sha: "test",
+ summary_type: AiSummary.summary_types[:complete],
)
end