REFACTOR: Support of different summarization targets/prompts. (#835)
* DEV: Add summary types * Refactor for different summary types * Use enum for summary types * Update lib/summarization/strategies/topic_summary.rb Co-authored-by: Penar Musaraj <pmusaraj@gmail.com> * Update lib/summarization/strategies/topic_gist.rb Co-authored-by: Penar Musaraj <pmusaraj@gmail.com> * Update lib/summarization/strategies/chat_messages.rb Co-authored-by: Penar Musaraj <pmusaraj@gmail.com> * Fix chat_messages single prompt * Small tweak to the chat summarization prompt --------- Co-authored-by: Penar Musaraj <pmusaraj@gmail.com>
This commit is contained in:
parent
791fad1e6a
commit
c7acb4a6a0
|
@ -15,29 +15,21 @@ module DiscourseAi
|
||||||
channel = ::Chat::Channel.find(params[:channel_id])
|
channel = ::Chat::Channel.find(params[:channel_id])
|
||||||
guardian.ensure_can_join_chat_channel!(channel)
|
guardian.ensure_can_join_chat_channel!(channel)
|
||||||
|
|
||||||
strategy = DiscourseAi::Summarization.default_strategy
|
summarizer = DiscourseAi::Summarization.chat_channel_summary(channel, since)
|
||||||
raise Discourse::NotFound.new unless strategy
|
raise Discourse::NotFound.new unless summarizer
|
||||||
|
|
||||||
guardian.ensure_can_request_summary!
|
guardian.ensure_can_request_summary!
|
||||||
|
|
||||||
RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed!
|
RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed!
|
||||||
|
|
||||||
hijack do
|
hijack do
|
||||||
content = { content_title: channel.name }
|
strategy = DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, since)
|
||||||
|
|
||||||
content[:contents] = channel
|
|
||||||
.chat_messages
|
|
||||||
.where("chat_messages.created_at > ?", since.hours.ago)
|
|
||||||
.includes(:user)
|
|
||||||
.order(created_at: :asc)
|
|
||||||
.pluck(:id, :username_lower, :message)
|
|
||||||
.map { { id: _1, poster: _2, text: _3 } }
|
|
||||||
|
|
||||||
summarized_text =
|
summarized_text =
|
||||||
if content[:contents].empty?
|
if strategy.targets_data[:contents].empty?
|
||||||
I18n.t("discourse_ai.summarization.chat.no_targets")
|
I18n.t("discourse_ai.summarization.chat.no_targets")
|
||||||
else
|
else
|
||||||
strategy.summarize(content, current_user).dig(:summary)
|
summarizer.summarize(current_user)&.summarized_text
|
||||||
end
|
end
|
||||||
|
|
||||||
render json: { summary: summarized_text }
|
render json: { summary: summarized_text }
|
||||||
|
|
|
@ -9,15 +9,19 @@ module DiscourseAi
|
||||||
topic = Topic.find(params[:topic_id])
|
topic = Topic.find(params[:topic_id])
|
||||||
guardian.ensure_can_see!(topic)
|
guardian.ensure_can_see!(topic)
|
||||||
|
|
||||||
raise Discourse::NotFound if !guardian.can_see_summary?(topic)
|
if !guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])
|
||||||
|
raise Discourse::NotFound
|
||||||
|
end
|
||||||
|
|
||||||
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
|
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
|
||||||
|
|
||||||
opts = params.permit(:skip_age_check)
|
opts = params.permit(:skip_age_check)
|
||||||
skip_age_check = opts[:skip_age_check] == "true"
|
skip_age_check = opts[:skip_age_check] == "true"
|
||||||
|
|
||||||
|
summarization_service = DiscourseAi::TopicSummarization.for(topic, current_user)
|
||||||
|
|
||||||
if params[:stream] && current_user
|
if params[:stream] && current_user
|
||||||
cached_summary = DiscourseAi::TopicSummarization.cached_summary(topic, current_user)
|
cached_summary = summarization_service.cached_summary
|
||||||
|
|
||||||
if cached_summary && !skip_age_check
|
if cached_summary && !skip_age_check
|
||||||
render_serialized(cached_summary, AiTopicSummarySerializer)
|
render_serialized(cached_summary, AiTopicSummarySerializer)
|
||||||
|
@ -34,12 +38,7 @@ module DiscourseAi
|
||||||
render json: success_json
|
render json: success_json
|
||||||
else
|
else
|
||||||
hijack do
|
hijack do
|
||||||
summary =
|
summary = summarization_service.summarize(skip_age_check: skip_age_check)
|
||||||
DiscourseAi::TopicSummarization.summarize(
|
|
||||||
topic,
|
|
||||||
current_user,
|
|
||||||
skip_age_check: skip_age_check,
|
|
||||||
)
|
|
||||||
render_serialized(summary, AiTopicSummarySerializer)
|
render_serialized(summary, AiTopicSummarySerializer)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,8 +8,11 @@ module Jobs
|
||||||
return unless topic = Topic.find_by(id: args[:topic_id])
|
return unless topic = Topic.find_by(id: args[:topic_id])
|
||||||
return unless user = User.find_by(id: args[:user_id])
|
return unless user = User.find_by(id: args[:user_id])
|
||||||
|
|
||||||
strategy = DiscourseAi::Summarization.default_strategy
|
strategy = DiscourseAi::Summarization.topic_summary(topic)
|
||||||
return if strategy.nil? || !Guardian.new(user).can_see_summary?(topic)
|
if strategy.nil? ||
|
||||||
|
!Guardian.new(user).can_see_summary?(topic, AiSummary.summary_types[:complete])
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
guardian = Guardian.new(user)
|
guardian = Guardian.new(user)
|
||||||
return unless guardian.can_see?(topic)
|
return unless guardian.can_see?(topic)
|
||||||
|
@ -21,7 +24,7 @@ module Jobs
|
||||||
|
|
||||||
summary =
|
summary =
|
||||||
DiscourseAi::TopicSummarization
|
DiscourseAi::TopicSummarization
|
||||||
.new(strategy, topic, user)
|
.new(strategy, user)
|
||||||
.summarize(skip_age_check: skip_age_check) do |partial_summary|
|
.summarize(skip_age_check: skip_age_check) do |partial_summary|
|
||||||
streamed_summary << partial_summary
|
streamed_summary << partial_summary
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,23 @@
|
||||||
class AiSummary < ActiveRecord::Base
|
class AiSummary < ActiveRecord::Base
|
||||||
belongs_to :target, polymorphic: true
|
belongs_to :target, polymorphic: true
|
||||||
|
|
||||||
|
enum :summary_type, { complete: 0, gist: 1 }
|
||||||
|
|
||||||
|
def self.store!(target, summary_type, model, summary, content_ids)
|
||||||
|
AiSummary.create!(
|
||||||
|
target: target,
|
||||||
|
algorithm: model,
|
||||||
|
content_range: (content_ids.first..content_ids.last),
|
||||||
|
summarized_text: summary,
|
||||||
|
original_content_sha: build_sha(content_ids.join),
|
||||||
|
summary_type: summary_type,
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.build_sha(joined_ids)
|
||||||
|
Digest::SHA256.hexdigest(joined_ids)
|
||||||
|
end
|
||||||
|
|
||||||
def mark_as_outdated
|
def mark_as_outdated
|
||||||
@outdated = true
|
@outdated = true
|
||||||
end
|
end
|
||||||
|
@ -25,6 +42,7 @@ end
|
||||||
# algorithm :string not null
|
# algorithm :string not null
|
||||||
# created_at :datetime not null
|
# created_at :datetime not null
|
||||||
# updated_at :datetime not null
|
# updated_at :datetime not null
|
||||||
|
# summary_type :string default("complete"), not null
|
||||||
#
|
#
|
||||||
# Indexes
|
# Indexes
|
||||||
#
|
#
|
||||||
|
|
|
@ -1,145 +1,44 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
|
# A cache layer on top of our topic summarization engine. Also handle permissions.
|
||||||
class TopicSummarization
|
class TopicSummarization
|
||||||
def self.summarize(topic, user, skip_age_check: false, &on_partial_blk)
|
def self.for(topic, user)
|
||||||
new(DiscourseAi::Summarization.default_strategy, topic, user).summarize(
|
new(DiscourseAi::Summarization.topic_summary(topic), user)
|
||||||
skip_age_check: skip_age_check,
|
|
||||||
&on_partial_blk
|
|
||||||
)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.cached_summary(topic, user)
|
def initialize(summarizer, user)
|
||||||
new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary
|
@summarizer = summarizer
|
||||||
end
|
|
||||||
|
|
||||||
def initialize(strategy, topic, user)
|
|
||||||
@strategy = strategy
|
|
||||||
@topic = topic
|
|
||||||
@user = user
|
@user = user
|
||||||
end
|
end
|
||||||
|
|
||||||
attr_reader :strategy, :topic, :user
|
|
||||||
|
|
||||||
def cached_summary
|
def cached_summary
|
||||||
existing_summary
|
summarizer.existing_summary
|
||||||
end
|
end
|
||||||
|
|
||||||
def summarize(skip_age_check: false, &on_partial_blk)
|
def summarize(skip_age_check: false, &on_partial_blk)
|
||||||
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
|
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
|
||||||
return if !user && !existing_summary
|
return if !user && !cached_summary
|
||||||
|
|
||||||
return existing_summary if use_cached?(skip_age_check)
|
return cached_summary if use_cached?(skip_age_check)
|
||||||
|
|
||||||
delete_cached_summaries! if existing_summary
|
summarizer.delete_cached_summaries! if cached_summary
|
||||||
|
|
||||||
content = {
|
summarizer.summarize(user, &on_partial_blk)
|
||||||
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
|
|
||||||
content_title: topic.title,
|
|
||||||
contents: [],
|
|
||||||
}
|
|
||||||
|
|
||||||
summary_targets_data.map do |(pn, raw, username)|
|
|
||||||
raw_text = raw
|
|
||||||
|
|
||||||
if pn == 1 && topic.topic_embed&.embed_content_cache.present?
|
|
||||||
raw_text = topic.topic_embed&.embed_content_cache
|
|
||||||
end
|
|
||||||
|
|
||||||
content[:contents] << { poster: username, id: pn, text: raw_text }
|
|
||||||
end
|
|
||||||
|
|
||||||
summarization_result = strategy.summarize(content, user, &on_partial_blk)
|
|
||||||
cache_summary(summarization_result)
|
|
||||||
end
|
|
||||||
|
|
||||||
def summary_targets
|
|
||||||
topic.has_summary? ? best_replies : pick_selection
|
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def summary_sha
|
attr_reader :summarizer, :user
|
||||||
@summary_sha ||= build_sha(summary_targets_data.map(&:first))
|
|
||||||
end
|
|
||||||
|
|
||||||
def summary_targets_data
|
|
||||||
@summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username)
|
|
||||||
end
|
|
||||||
|
|
||||||
def existing_summary
|
|
||||||
if !defined?(@existing_summary)
|
|
||||||
@existing_summary = AiSummary.find_by(target: topic)
|
|
||||||
if @existing_summary && existing_summary.original_content_sha != summary_sha
|
|
||||||
@existing_summary.mark_as_outdated
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@existing_summary
|
|
||||||
end
|
|
||||||
|
|
||||||
def best_replies
|
|
||||||
Post
|
|
||||||
.summary(topic.id)
|
|
||||||
.where("post_type = ?", Post.types[:regular])
|
|
||||||
.where("NOT hidden")
|
|
||||||
.joins(:user)
|
|
||||||
.order(:post_number)
|
|
||||||
end
|
|
||||||
|
|
||||||
def pick_selection
|
|
||||||
posts =
|
|
||||||
Post
|
|
||||||
.where(topic_id: topic.id)
|
|
||||||
.where("post_type = ?", Post.types[:regular])
|
|
||||||
.where("NOT hidden")
|
|
||||||
.order(:post_number)
|
|
||||||
|
|
||||||
post_numbers = posts.limit(5).pluck(:post_number)
|
|
||||||
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
|
|
||||||
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
|
|
||||||
|
|
||||||
Post
|
|
||||||
.where(topic_id: topic.id)
|
|
||||||
.joins(:user)
|
|
||||||
.where("post_number in (?)", post_numbers)
|
|
||||||
.order(:post_number)
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete_cached_summaries!
|
|
||||||
AiSummary.where(target: topic).destroy_all
|
|
||||||
end
|
|
||||||
|
|
||||||
def use_cached?(skip_age_check)
|
def use_cached?(skip_age_check)
|
||||||
can_summarize = Guardian.new(user).can_request_summary?
|
can_summarize = Guardian.new(user).can_request_summary?
|
||||||
|
|
||||||
existing_summary &&
|
cached_summary &&
|
||||||
!(
|
!(
|
||||||
can_summarize && new_targets? &&
|
can_summarize && cached_summary.outdated &&
|
||||||
(skip_age_check || existing_summary.created_at < 1.hour.ago)
|
(skip_age_check || cached_summary.created_at < 1.hour.ago)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
def new_targets?
|
|
||||||
existing_summary&.original_content_sha != summary_sha
|
|
||||||
end
|
|
||||||
|
|
||||||
def cache_summary(result)
|
|
||||||
post_numbers = summary_targets_data.map(&:first)
|
|
||||||
|
|
||||||
cached_summary =
|
|
||||||
AiSummary.create!(
|
|
||||||
target: topic,
|
|
||||||
algorithm: strategy.display_name,
|
|
||||||
content_range: (post_numbers.first..post_numbers.last),
|
|
||||||
summarized_text: result[:summary],
|
|
||||||
original_content_sha: summary_sha,
|
|
||||||
)
|
|
||||||
|
|
||||||
cached_summary
|
|
||||||
end
|
|
||||||
|
|
||||||
def build_sha(ids)
|
|
||||||
Digest::SHA256.hexdigest(ids.join)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
class AddAiSummaryTypeColumn < ActiveRecord::Migration[7.1]
|
||||||
|
def change
|
||||||
|
add_column :ai_summaries, :summary_type, :integer, default: 0, null: false
|
||||||
|
end
|
||||||
|
end
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module GuardianExtensions
|
module GuardianExtensions
|
||||||
def can_see_summary?(target)
|
def can_see_summary?(target, summary_type)
|
||||||
return false if !SiteSetting.ai_summarization_enabled
|
return false if !SiteSetting.ai_summarization_enabled
|
||||||
|
|
||||||
if target.class == Topic && target.private_message?
|
if target.class == Topic && target.private_message?
|
||||||
|
@ -14,7 +14,7 @@ module DiscourseAi
|
||||||
return false if !allowed
|
return false if !allowed
|
||||||
end
|
end
|
||||||
|
|
||||||
has_cached_summary = AiSummary.exists?(target: target)
|
has_cached_summary = AiSummary.exists?(target: target, summary_type: summary_type)
|
||||||
return has_cached_summary if user.nil?
|
return has_cached_summary if user.nil?
|
||||||
|
|
||||||
has_cached_summary || can_request_summary?
|
has_cached_summary || can_request_summary?
|
||||||
|
|
|
@ -1,9 +1,36 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module Summarization
|
module Summarization
|
||||||
def self.default_strategy
|
def self.topic_summary(topic)
|
||||||
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
|
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
|
||||||
DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model)
|
DiscourseAi::Summarization::FoldContent.new(
|
||||||
|
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
|
||||||
|
DiscourseAi::Summarization::Strategies::TopicSummary.new(topic),
|
||||||
|
)
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.topic_gist(topic)
|
||||||
|
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
|
||||||
|
DiscourseAi::Summarization::FoldContent.new(
|
||||||
|
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
|
||||||
|
DiscourseAi::Summarization::Strategies::TopicGist.new(topic),
|
||||||
|
)
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.chat_channel_summary(channel, time_window_in_hours)
|
||||||
|
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
|
||||||
|
DiscourseAi::Summarization::FoldContent.new(
|
||||||
|
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
|
||||||
|
DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, time_window_in_hours),
|
||||||
|
persist_summaries: false,
|
||||||
|
)
|
||||||
else
|
else
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
|
@ -2,14 +2,6 @@
|
||||||
|
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module Summarization
|
module Summarization
|
||||||
def self.default_strategy
|
|
||||||
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
|
|
||||||
DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model)
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class EntryPoint
|
class EntryPoint
|
||||||
def inject_into(plugin)
|
def inject_into(plugin)
|
||||||
plugin.add_to_serializer(:current_user, :can_summarize) do
|
plugin.add_to_serializer(:current_user, :can_summarize) do
|
||||||
|
@ -18,11 +10,11 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
plugin.add_to_serializer(:topic_view, :summarizable) do
|
plugin.add_to_serializer(:topic_view, :summarizable) do
|
||||||
scope.can_see_summary?(object.topic)
|
scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete])
|
||||||
end
|
end
|
||||||
|
|
||||||
plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do
|
plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do
|
||||||
scope.can_see_summary?(object.topic)
|
scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,189 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module DiscourseAi
|
||||||
|
module Summarization
|
||||||
|
# This class offers a generic way of summarizing content from multiple sources using different prompts.
|
||||||
|
#
|
||||||
|
# It summarizes large amounts of content by recursively summarizing it in smaller chunks that
|
||||||
|
# fit the given model context window, finally concatenating the disjoint summaries
|
||||||
|
# into a final version.
|
||||||
|
#
|
||||||
|
class FoldContent
|
||||||
|
def initialize(llm, strategy, persist_summaries: true)
|
||||||
|
@llm = llm
|
||||||
|
@strategy = strategy
|
||||||
|
@persist_summaries = persist_summaries
|
||||||
|
end
|
||||||
|
|
||||||
|
attr_reader :llm, :strategy
|
||||||
|
|
||||||
|
# @param user { User } - User object used for auditing usage.
|
||||||
|
#
|
||||||
|
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
|
||||||
|
# Note: The block is only called with results of the final summary, not intermediate summaries.
|
||||||
|
#
|
||||||
|
# @returns { AiSummary } - Resulting summary.
|
||||||
|
def summarize(user, &on_partial_blk)
|
||||||
|
opts = content_to_summarize.except(:contents)
|
||||||
|
|
||||||
|
initial_chunks =
|
||||||
|
rebalance_chunks(
|
||||||
|
content_to_summarize[:contents].map do |c|
|
||||||
|
{ ids: [c[:id]], summary: format_content_item(c) }
|
||||||
|
end,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Special case where we can do all the summarization in one pass.
|
||||||
|
result =
|
||||||
|
if initial_chunks.length == 1
|
||||||
|
{
|
||||||
|
summary:
|
||||||
|
summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
|
||||||
|
chunks: [],
|
||||||
|
}
|
||||||
|
else
|
||||||
|
summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
|
||||||
|
end
|
||||||
|
|
||||||
|
if persist_summaries
|
||||||
|
AiSummary.store!(
|
||||||
|
strategy.target,
|
||||||
|
strategy.type,
|
||||||
|
llm_model.name,
|
||||||
|
result[:summary],
|
||||||
|
content_to_summarize[:contents].map { |c| c[:id] },
|
||||||
|
)
|
||||||
|
else
|
||||||
|
AiSummary.new(summarized_text: result[:summary])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# @returns { AiSummary } - Resulting summary.
|
||||||
|
#
|
||||||
|
# Finds a summary matching the target and strategy. Marks it as outdates if the strategy found newer content
|
||||||
|
def existing_summary
|
||||||
|
if !defined?(@existing_summary)
|
||||||
|
summary = AiSummary.find_by(target: strategy.target, summary_type: strategy.type)
|
||||||
|
|
||||||
|
if summary
|
||||||
|
@existing_summary = summary
|
||||||
|
|
||||||
|
if existing_summary.original_content_sha != latest_sha
|
||||||
|
@existing_summary.mark_as_outdated
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@existing_summary
|
||||||
|
end
|
||||||
|
|
||||||
|
def delete_cached_summaries!
|
||||||
|
AiSummary.where(target: strategy.target, summary_type: strategy.type).destroy_all
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
attr_reader :persist_summaries
|
||||||
|
|
||||||
|
def llm_model
|
||||||
|
llm.llm_model
|
||||||
|
end
|
||||||
|
|
||||||
|
def content_to_summarize
|
||||||
|
@targets_data ||= strategy.targets_data
|
||||||
|
end
|
||||||
|
|
||||||
|
def latest_sha
|
||||||
|
@latest_sha ||= AiSummary.build_sha(content_to_summarize[:contents].map { |c| c[:id] }.join)
|
||||||
|
end
|
||||||
|
|
||||||
|
def summarize_chunks(chunks, user, opts, &on_partial_blk)
|
||||||
|
# Safely assume we always have more than one chunk.
|
||||||
|
summarized_chunks = summarize_in_chunks(chunks, user, opts)
|
||||||
|
total_summaries_size =
|
||||||
|
llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
|
||||||
|
|
||||||
|
if total_summaries_size < available_tokens
|
||||||
|
# Chunks are small enough, we can concatenate them.
|
||||||
|
{
|
||||||
|
summary:
|
||||||
|
concatenate_summaries(
|
||||||
|
summarized_chunks.map { |s| s[:summary] },
|
||||||
|
user,
|
||||||
|
&on_partial_blk
|
||||||
|
),
|
||||||
|
chunks: summarized_chunks,
|
||||||
|
}
|
||||||
|
else
|
||||||
|
# We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
|
||||||
|
rebalanced_chunks = rebalance_chunks(summarized_chunks)
|
||||||
|
|
||||||
|
summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def format_content_item(item)
|
||||||
|
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||||
|
end
|
||||||
|
|
||||||
|
def rebalance_chunks(chunks)
|
||||||
|
section = { ids: [], summary: "" }
|
||||||
|
|
||||||
|
chunks =
|
||||||
|
chunks.reduce([]) do |sections, chunk|
|
||||||
|
if llm_model.tokenizer_class.can_expand_tokens?(
|
||||||
|
section[:summary],
|
||||||
|
chunk[:summary],
|
||||||
|
available_tokens,
|
||||||
|
)
|
||||||
|
section[:summary] += chunk[:summary]
|
||||||
|
section[:ids] = section[:ids].concat(chunk[:ids])
|
||||||
|
else
|
||||||
|
sections << section
|
||||||
|
section = chunk
|
||||||
|
end
|
||||||
|
|
||||||
|
sections
|
||||||
|
end
|
||||||
|
|
||||||
|
chunks << section if section[:summary].present?
|
||||||
|
|
||||||
|
chunks
|
||||||
|
end
|
||||||
|
|
||||||
|
def summarize_single(text, user, opts, &on_partial_blk)
|
||||||
|
prompt = strategy.summarize_single_prompt(text, opts)
|
||||||
|
|
||||||
|
llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
|
||||||
|
end
|
||||||
|
|
||||||
|
def summarize_in_chunks(chunks, user, opts)
|
||||||
|
chunks.map do |chunk|
|
||||||
|
prompt = strategy.summarize_single_prompt(chunk[:summary], opts)
|
||||||
|
|
||||||
|
chunk[:summary] = llm.generate(
|
||||||
|
prompt,
|
||||||
|
user: user,
|
||||||
|
max_tokens: 300,
|
||||||
|
feature_name: "summarize",
|
||||||
|
)
|
||||||
|
|
||||||
|
chunk
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def concatenate_summaries(texts_to_summarize, user, &on_partial_blk)
|
||||||
|
prompt = strategy.concatenation_prompt(texts_to_summarize)
|
||||||
|
|
||||||
|
llm.generate(prompt, user: user, &on_partial_blk)
|
||||||
|
end
|
||||||
|
|
||||||
|
def available_tokens
|
||||||
|
# Reserve tokens for the response and the base prompt
|
||||||
|
# ~500 words
|
||||||
|
reserved_tokens = 700
|
||||||
|
|
||||||
|
llm_model.max_prompt_tokens - reserved_tokens
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,59 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module DiscourseAi
|
||||||
|
module Summarization
|
||||||
|
module Strategies
|
||||||
|
# Objects inheriting from this class will get passed as a dependency to `DiscourseAi::Summarization::FoldContent`.
|
||||||
|
# This collaborator knows how to source the content to summarize and the prompts used in the process,
|
||||||
|
# one for summarizing a chunk and another for concatenating them if necessary.
|
||||||
|
class Base
|
||||||
|
def initialize(target)
|
||||||
|
@target = target
|
||||||
|
end
|
||||||
|
|
||||||
|
attr_reader :target
|
||||||
|
|
||||||
|
# The summary type differentiates instances of `AiSummary` pointing to a single target.
|
||||||
|
# See the `summary_type` enum for available options.
|
||||||
|
def type
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
# @returns { Hash } - Content to summarize.
|
||||||
|
#
|
||||||
|
# This method returns a hash with the content to summarize and additional information.
|
||||||
|
# The only mandatory key is `contents`, which must be an array of hashes with
|
||||||
|
# the following structure:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# poster: A way to tell who write the content,
|
||||||
|
# id: A number to signal order,
|
||||||
|
# text: Text to summarize
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# Additionally, you could add more context, which will be available in the prompt. e.g.:
|
||||||
|
#
|
||||||
|
# {
|
||||||
|
# resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
|
||||||
|
# content_title: target.title,
|
||||||
|
# contents: [...]
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
def targets_data
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when concatenating multiple chunks.
|
||||||
|
def contatenation_prompt(_texts_to_summarize)
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM on each chunk,
|
||||||
|
# and when the whole content fits in one call.
|
||||||
|
def summarize_single_prompt(_input, _opts)
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,85 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module DiscourseAi
|
||||||
|
module Summarization
|
||||||
|
module Strategies
|
||||||
|
class ChatMessages < Base
|
||||||
|
def type
|
||||||
|
AiSummary.summary_types[:complete]
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(target, since)
|
||||||
|
super(target)
|
||||||
|
@since = since
|
||||||
|
end
|
||||||
|
|
||||||
|
def targets_data
|
||||||
|
content = { content_title: target.name }
|
||||||
|
|
||||||
|
content[:contents] = target
|
||||||
|
.chat_messages
|
||||||
|
.where("chat_messages.created_at > ?", since.hours.ago)
|
||||||
|
.includes(:user)
|
||||||
|
.order(created_at: :asc)
|
||||||
|
.pluck(:id, :username_lower, :message)
|
||||||
|
.map { { id: _1, poster: _2, text: _3 } }
|
||||||
|
|
||||||
|
content
|
||||||
|
end
|
||||||
|
|
||||||
|
def contatenation_prompt(texts_to_summarize)
|
||||||
|
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
||||||
|
You are a summarization bot tasked with creating a cohesive narrative by intelligently merging multiple disjointed summaries.
|
||||||
|
Your response should consist of well-structured paragraphs that combines these summaries into a clear and comprehensive overview.
|
||||||
|
Avoid adding any additional text or commentary. Format your output using Discourse forum Markdown.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||||
|
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
|
||||||
|
|
||||||
|
<input>
|
||||||
|
#{texts_to_summarize.join("\n")}
|
||||||
|
</input>
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt
|
||||||
|
end
|
||||||
|
|
||||||
|
def summarize_single_prompt(input, opts)
|
||||||
|
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
||||||
|
You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics
|
||||||
|
and developments from a series of chat messages within a user-selected time window.
|
||||||
|
|
||||||
|
Analyze the messages to extract key themes, participants' intentions, and any significant conclusions or decisions.
|
||||||
|
Your summary should be concise yet comprehensive, providing an overview that is accessible to someone with no prior context of the conversation.
|
||||||
|
|
||||||
|
- Only include the summary, WITHOUT additional commentary.
|
||||||
|
- Don't mention the channel title. Avoid including extraneous details or subjective opinions.
|
||||||
|
- Maintain the original language of the text being summarized.
|
||||||
|
- The same user could write multiple messages in a row, don't treat them as different persons.
|
||||||
|
- Aim for summaries to be 400 words or less.
|
||||||
|
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||||
|
#{opts[:content_title].present? ? "The name of the channel is: " + opts[:content_title] + ".\n" : ""}
|
||||||
|
|
||||||
|
Here are the messages, inside <input></input> XML tags:
|
||||||
|
|
||||||
|
<input>
|
||||||
|
#{input}
|
||||||
|
</input>
|
||||||
|
|
||||||
|
Generate a summary of the given chat messages.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
attr_reader :since
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,198 +0,0 @@
|
||||||
# frozen_string_literal: true
|
|
||||||
|
|
||||||
module DiscourseAi
|
|
||||||
module Summarization
|
|
||||||
module Strategies
|
|
||||||
class FoldContent
|
|
||||||
def initialize(completion_model)
|
|
||||||
@llm = DiscourseAi::Completions::Llm.proxy(completion_model)
|
|
||||||
raise "Invalid model provided for summarization strategy" if @llm.llm_model.nil?
|
|
||||||
end
|
|
||||||
|
|
||||||
attr_reader :llm
|
|
||||||
|
|
||||||
def summarize(content, user, &on_partial_blk)
|
|
||||||
opts = content.except(:contents)
|
|
||||||
|
|
||||||
initial_chunks =
|
|
||||||
rebalance_chunks(
|
|
||||||
content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
|
|
||||||
)
|
|
||||||
|
|
||||||
# Special case where we can do all the summarization in one pass.
|
|
||||||
if initial_chunks.length == 1
|
|
||||||
{
|
|
||||||
summary:
|
|
||||||
summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
|
|
||||||
chunks: [],
|
|
||||||
}
|
|
||||||
else
|
|
||||||
summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def display_name
|
|
||||||
llm_model&.name || "unknown model"
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def llm_model
|
|
||||||
llm.llm_model
|
|
||||||
end
|
|
||||||
|
|
||||||
def summarize_chunks(chunks, user, opts, &on_partial_blk)
|
|
||||||
# Safely assume we always have more than one chunk.
|
|
||||||
summarized_chunks = summarize_in_chunks(chunks, user, opts)
|
|
||||||
total_summaries_size =
|
|
||||||
llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
|
|
||||||
|
|
||||||
if total_summaries_size < available_tokens
|
|
||||||
# Chunks are small enough, we can concatenate them.
|
|
||||||
{
|
|
||||||
summary:
|
|
||||||
concatenate_summaries(
|
|
||||||
summarized_chunks.map { |s| s[:summary] },
|
|
||||||
user,
|
|
||||||
&on_partial_blk
|
|
||||||
),
|
|
||||||
chunks: summarized_chunks,
|
|
||||||
}
|
|
||||||
else
|
|
||||||
# We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
|
|
||||||
rebalanced_chunks = rebalance_chunks(summarized_chunks)
|
|
||||||
|
|
||||||
summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def format_content_item(item)
|
|
||||||
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
|
||||||
end
|
|
||||||
|
|
||||||
def rebalance_chunks(chunks)
|
|
||||||
section = { ids: [], summary: "" }
|
|
||||||
|
|
||||||
chunks =
|
|
||||||
chunks.reduce([]) do |sections, chunk|
|
|
||||||
if llm_model.tokenizer_class.can_expand_tokens?(
|
|
||||||
section[:summary],
|
|
||||||
chunk[:summary],
|
|
||||||
available_tokens,
|
|
||||||
)
|
|
||||||
section[:summary] += chunk[:summary]
|
|
||||||
section[:ids] = section[:ids].concat(chunk[:ids])
|
|
||||||
else
|
|
||||||
sections << section
|
|
||||||
section = chunk
|
|
||||||
end
|
|
||||||
|
|
||||||
sections
|
|
||||||
end
|
|
||||||
|
|
||||||
chunks << section if section[:summary].present?
|
|
||||||
|
|
||||||
chunks
|
|
||||||
end
|
|
||||||
|
|
||||||
def summarize_single(text, user, opts, &on_partial_blk)
|
|
||||||
prompt = summarization_prompt(text, opts)
|
|
||||||
|
|
||||||
llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
|
|
||||||
end
|
|
||||||
|
|
||||||
def summarize_in_chunks(chunks, user, opts)
|
|
||||||
chunks.map do |chunk|
|
|
||||||
prompt = summarization_prompt(chunk[:summary], opts)
|
|
||||||
|
|
||||||
chunk[:summary] = llm.generate(
|
|
||||||
prompt,
|
|
||||||
user: user,
|
|
||||||
max_tokens: 300,
|
|
||||||
feature_name: "summarize",
|
|
||||||
)
|
|
||||||
chunk
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def concatenate_summaries(summaries, user, &on_partial_blk)
|
|
||||||
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
|
||||||
You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
|
|
||||||
The narrative you create is in the form of one or multiple paragraphs.
|
|
||||||
Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
|
|
||||||
I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
|
|
||||||
You understand and generate Discourse forum Markdown.
|
|
||||||
You format the response, including links, using Markdown.
|
|
||||||
TEXT
|
|
||||||
|
|
||||||
prompt.push(type: :user, content: <<~TEXT.strip)
|
|
||||||
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
|
|
||||||
|
|
||||||
<input>
|
|
||||||
#{summaries.join("\n")}
|
|
||||||
</input>
|
|
||||||
TEXT
|
|
||||||
|
|
||||||
llm.generate(prompt, user: user, &on_partial_blk)
|
|
||||||
end
|
|
||||||
|
|
||||||
def summarization_prompt(input, opts)
|
|
||||||
insts = +<<~TEXT
|
|
||||||
You are an advanced summarization bot that generates concise, coherent summaries of provided text.
|
|
||||||
|
|
||||||
- Only include the summary, without any additional commentary.
|
|
||||||
- You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
|
|
||||||
- Maintain the original language of the text being summarized.
|
|
||||||
- Aim for summaries to be 400 words or less.
|
|
||||||
|
|
||||||
TEXT
|
|
||||||
|
|
||||||
insts << <<~TEXT if opts[:resource_path]
|
|
||||||
- Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
|
|
||||||
- Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
|
|
||||||
- Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
|
|
||||||
- Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
|
|
||||||
- Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
|
|
||||||
- When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
|
|
||||||
TEXT
|
|
||||||
|
|
||||||
prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
|
|
||||||
|
|
||||||
if opts[:resource_path]
|
|
||||||
prompt.push(
|
|
||||||
type: :user,
|
|
||||||
content:
|
|
||||||
"Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
|
|
||||||
)
|
|
||||||
prompt.push(
|
|
||||||
type: :model,
|
|
||||||
content:
|
|
||||||
"Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
prompt.push(type: :user, content: <<~TEXT.strip)
|
|
||||||
#{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
|
|
||||||
Here are the posts, inside <input></input> XML tags:
|
|
||||||
|
|
||||||
<input>
|
|
||||||
#{input}
|
|
||||||
</input>
|
|
||||||
|
|
||||||
Generate a concise, coherent summary of the text above maintaining the original language.
|
|
||||||
TEXT
|
|
||||||
|
|
||||||
prompt
|
|
||||||
end
|
|
||||||
|
|
||||||
def available_tokens
|
|
||||||
# Reserve tokens for the response and the base prompt
|
|
||||||
# ~500 words
|
|
||||||
reserved_tokens = 700
|
|
||||||
|
|
||||||
llm_model.max_prompt_tokens - reserved_tokens
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module DiscourseAi
|
||||||
|
module Summarization
|
||||||
|
module Strategies
|
||||||
|
class TopicGist < Base
|
||||||
|
def type
|
||||||
|
AiSummary.summary_types[:gist]
|
||||||
|
end
|
||||||
|
|
||||||
|
def targets_data
|
||||||
|
content = { content_title: target.title, contents: [] }
|
||||||
|
|
||||||
|
op_post_number = 1
|
||||||
|
|
||||||
|
last_twenty_posts =
|
||||||
|
Post
|
||||||
|
.where(topic_id: target.id)
|
||||||
|
.where("post_type = ?", Post.types[:regular])
|
||||||
|
.where("NOT hidden")
|
||||||
|
.order("post_number DESC")
|
||||||
|
.limit(20)
|
||||||
|
.pluck(:post_number)
|
||||||
|
|
||||||
|
posts_data =
|
||||||
|
Post
|
||||||
|
.where(topic_id: target.id)
|
||||||
|
.joins(:user)
|
||||||
|
.where("post_number IN (?)", last_twenty_posts << op_post_number)
|
||||||
|
.order(:post_number)
|
||||||
|
.pluck(:post_number, :raw, :username)
|
||||||
|
|
||||||
|
posts_data.each do |(pn, raw, username)|
|
||||||
|
raw_text = raw
|
||||||
|
|
||||||
|
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
||||||
|
raw_text = target.topic_embed&.embed_content_cache
|
||||||
|
end
|
||||||
|
|
||||||
|
content[:contents] << { poster: username, id: pn, text: raw_text }
|
||||||
|
end
|
||||||
|
|
||||||
|
content
|
||||||
|
end
|
||||||
|
|
||||||
|
def concatenation_prompt(texts_to_summarize)
|
||||||
|
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
||||||
|
You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement.
|
||||||
|
Your response should strictly be this single, comprehensive sentence, without any additional text or comments.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||||
|
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
|
||||||
|
|
||||||
|
<input>
|
||||||
|
#{texts_to_summarize.join("\n")}
|
||||||
|
</input>
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt
|
||||||
|
end
|
||||||
|
|
||||||
|
def summarize_single_prompt(input, opts)
|
||||||
|
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
||||||
|
You are an advanced summarization bot. Your task is to analyze a given conversation and generate a single,
|
||||||
|
concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context.
|
||||||
|
|
||||||
|
- Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone.
|
||||||
|
- Avoid including extraneous details or subjective opinions.
|
||||||
|
- Maintain the original language of the text being summarized.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||||
|
#{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
|
||||||
|
|
||||||
|
Here are the posts, inside <input></input> XML tags:
|
||||||
|
|
||||||
|
<input>
|
||||||
|
#{input}
|
||||||
|
</input>
|
||||||
|
|
||||||
|
Generate a single sentence of the text above maintaining the original language.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,142 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module DiscourseAi
|
||||||
|
module Summarization
|
||||||
|
module Strategies
|
||||||
|
class TopicSummary < Base
|
||||||
|
def type
|
||||||
|
AiSummary.summary_types[:complete]
|
||||||
|
end
|
||||||
|
|
||||||
|
def targets_data
|
||||||
|
content = {
|
||||||
|
resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
|
||||||
|
content_title: target.title,
|
||||||
|
contents: [],
|
||||||
|
}
|
||||||
|
|
||||||
|
posts_data =
|
||||||
|
(target.has_summary? ? best_replies : pick_selection).pluck(
|
||||||
|
:post_number,
|
||||||
|
:raw,
|
||||||
|
:username,
|
||||||
|
)
|
||||||
|
|
||||||
|
posts_data.each do |(pn, raw, username)|
|
||||||
|
raw_text = raw
|
||||||
|
|
||||||
|
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
||||||
|
raw_text = target.topic_embed&.embed_content_cache
|
||||||
|
end
|
||||||
|
|
||||||
|
content[:contents] << { poster: username, id: pn, text: raw_text }
|
||||||
|
end
|
||||||
|
|
||||||
|
content
|
||||||
|
end
|
||||||
|
|
||||||
|
def concatenation_prompt(texts_to_summarize)
|
||||||
|
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
||||||
|
You are a summarization bot that effectively concatenates disjointed summaries, creating a cohesive narrative.
|
||||||
|
The narrative you create is in the form of one or multiple paragraphs.
|
||||||
|
Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
|
||||||
|
I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
|
||||||
|
You understand and generate Discourse forum Markdown.
|
||||||
|
You format the response, including links, using Markdown.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||||
|
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
|
||||||
|
|
||||||
|
<input>
|
||||||
|
#{texts_to_summarize.join("\n")}
|
||||||
|
</input>
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt
|
||||||
|
end
|
||||||
|
|
||||||
|
def summarize_single_prompt(input, opts)
|
||||||
|
insts = +<<~TEXT
|
||||||
|
You are an advanced summarization bot that generates concise, coherent summaries of provided text.
|
||||||
|
|
||||||
|
- Only include the summary, without any additional commentary.
|
||||||
|
- You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
|
||||||
|
- Maintain the original language of the text being summarized.
|
||||||
|
- Aim for summaries to be 400 words or less.
|
||||||
|
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
insts << <<~TEXT if opts[:resource_path]
|
||||||
|
- Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
|
||||||
|
- Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
|
||||||
|
- Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
|
||||||
|
- Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
|
||||||
|
- Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
|
||||||
|
- When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
|
||||||
|
|
||||||
|
if opts[:resource_path]
|
||||||
|
prompt.push(
|
||||||
|
type: :user,
|
||||||
|
content:
|
||||||
|
"Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
|
||||||
|
)
|
||||||
|
prompt.push(
|
||||||
|
type: :model,
|
||||||
|
content:
|
||||||
|
"Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||||
|
#{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
|
||||||
|
Here are the posts, inside <input></input> XML tags:
|
||||||
|
|
||||||
|
<input>
|
||||||
|
#{input}
|
||||||
|
</input>
|
||||||
|
|
||||||
|
Generate a concise, coherent summary of the text above maintaining the original language.
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
prompt
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
attr_reader :topic
|
||||||
|
|
||||||
|
def best_replies
|
||||||
|
Post
|
||||||
|
.summary(target.id)
|
||||||
|
.where("post_type = ?", Post.types[:regular])
|
||||||
|
.where("NOT hidden")
|
||||||
|
.joins(:user)
|
||||||
|
.order(:post_number)
|
||||||
|
end
|
||||||
|
|
||||||
|
def pick_selection
|
||||||
|
posts =
|
||||||
|
Post
|
||||||
|
.where(topic_id: target.id)
|
||||||
|
.where("post_type = ?", Post.types[:regular])
|
||||||
|
.where("NOT hidden")
|
||||||
|
.order(:post_number)
|
||||||
|
|
||||||
|
post_numbers = posts.limit(5).pluck(:post_number)
|
||||||
|
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
|
||||||
|
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
|
||||||
|
|
||||||
|
Post
|
||||||
|
.where(topic_id: target.id)
|
||||||
|
.joins(:user)
|
||||||
|
.where("post_number in (?)", post_numbers)
|
||||||
|
.order(:post_number)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -20,7 +20,7 @@ describe DiscourseAi::GuardianExtensions do
|
||||||
it "returns false" do
|
it "returns false" do
|
||||||
SiteSetting.ai_custom_summarization_allowed_groups = ""
|
SiteSetting.ai_custom_summarization_allowed_groups = ""
|
||||||
|
|
||||||
expect(guardian.can_see_summary?(topic)).to eq(false)
|
expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "returns true if there is a cached summary" do
|
it "returns true if there is a cached summary" do
|
||||||
|
@ -29,9 +29,10 @@ describe DiscourseAi::GuardianExtensions do
|
||||||
summarized_text: "test",
|
summarized_text: "test",
|
||||||
original_content_sha: "123",
|
original_content_sha: "123",
|
||||||
algorithm: "test",
|
algorithm: "test",
|
||||||
|
summary_type: AiSummary.summary_types[:complete],
|
||||||
)
|
)
|
||||||
|
|
||||||
expect(guardian.can_see_summary?(topic)).to eq(true)
|
expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -39,7 +40,7 @@ describe DiscourseAi::GuardianExtensions do
|
||||||
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
|
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
|
||||||
|
|
||||||
it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do
|
it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do
|
||||||
expect(guardian.can_see_summary?(topic)).to eq(true)
|
expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -48,12 +49,12 @@ describe DiscourseAi::GuardianExtensions do
|
||||||
let(:pm) { Fabricate(:private_message_topic) }
|
let(:pm) { Fabricate(:private_message_topic) }
|
||||||
|
|
||||||
it "returns false" do
|
it "returns false" do
|
||||||
expect(guardian.can_see_summary?(pm)).to eq(false)
|
expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(false)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "returns true if user is in a group that is allowed summaries" do
|
it "returns true if user is in a group that is allowed summaries" do
|
||||||
SiteSetting.ai_pm_summarization_allowed_groups = group.id
|
SiteSetting.ai_pm_summarization_allowed_groups = group.id
|
||||||
expect(guardian.can_see_summary?(pm)).to eq(true)
|
expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(true)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -61,7 +62,7 @@ describe DiscourseAi::GuardianExtensions do
|
||||||
let(:guardian) { Guardian.new }
|
let(:guardian) { Guardian.new }
|
||||||
|
|
||||||
it "returns false for anons" do
|
it "returns false for anons" do
|
||||||
expect(guardian.can_see_summary?(topic)).to eq(false)
|
expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "returns true for anons when there is a cached summary" do
|
it "returns true for anons when there is a cached summary" do
|
||||||
|
@ -70,9 +71,10 @@ describe DiscourseAi::GuardianExtensions do
|
||||||
summarized_text: "test",
|
summarized_text: "test",
|
||||||
original_content_sha: "123",
|
original_content_sha: "123",
|
||||||
algorithm: "test",
|
algorithm: "test",
|
||||||
|
summary_type: AiSummary.summary_types[:complete],
|
||||||
)
|
)
|
||||||
|
|
||||||
expect(guardian.can_see_summary?(topic)).to eq(true)
|
expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,9 +1,14 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
RSpec.describe DiscourseAi::Summarization::FoldContent do
|
||||||
|
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
|
||||||
|
|
||||||
describe "#summarize" do
|
describe "#summarize" do
|
||||||
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
||||||
|
|
||||||
|
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
||||||
|
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
|
||||||
|
|
||||||
before do
|
before do
|
||||||
SiteSetting.ai_summarization_enabled = true
|
SiteSetting.ai_summarization_enabled = true
|
||||||
|
|
||||||
|
@ -15,10 +20,6 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
||||||
llm_model.update!(max_prompt_tokens: model_tokens)
|
llm_model.update!(max_prompt_tokens: model_tokens)
|
||||||
end
|
end
|
||||||
|
|
||||||
let(:strategy) { DiscourseAi::Summarization.default_strategy }
|
|
||||||
let(:summarize_text) { "This is a text" }
|
|
||||||
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
|
|
||||||
|
|
||||||
let(:single_summary) { "this is a single summary" }
|
let(:single_summary) { "this is a single summary" }
|
||||||
let(:concatenated_summary) { "this is a concatenated summary" }
|
let(:concatenated_summary) { "this is a concatenated summary" }
|
||||||
|
|
||||||
|
@ -28,27 +29,26 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
||||||
it "does one call to summarize content" do
|
it "does one call to summarize content" do
|
||||||
result =
|
result =
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
|
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
|
||||||
strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) }
|
summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
|
||||||
end
|
end
|
||||||
|
|
||||||
expect(result[:summary]).to eq(single_summary)
|
expect(result.summarized_text).to eq(single_summary)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context "when the content to summarize doesn't fit in a single call" do
|
context "when the content to summarize doesn't fit in a single call" do
|
||||||
it "summarizes each chunk and then concatenates them" do
|
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
|
||||||
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
|
|
||||||
|
|
||||||
|
it "summarizes each chunk and then concatenates them" do
|
||||||
result =
|
result =
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||||
[single_summary, single_summary, concatenated_summary],
|
[single_summary, single_summary, concatenated_summary],
|
||||||
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
|
) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(3) } }
|
||||||
|
|
||||||
expect(result[:summary]).to eq(concatenated_summary)
|
expect(result.summarized_text).to eq(concatenated_summary)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
|
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
|
||||||
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
|
|
||||||
max_length_response = "(1 asd said: This is a text "
|
max_length_response = "(1 asd said: This is a text "
|
||||||
chunk_of_chunks = "I'm smol"
|
chunk_of_chunks = "I'm smol"
|
||||||
|
|
||||||
|
@ -61,9 +61,9 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
||||||
chunk_of_chunks,
|
chunk_of_chunks,
|
||||||
concatenated_summary,
|
concatenated_summary,
|
||||||
],
|
],
|
||||||
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
|
) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(5) } }
|
||||||
|
|
||||||
expect(result[:summary]).to eq(concatenated_summary)
|
expect(result.summarized_text).to eq(concatenated_summary)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -0,0 +1,70 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
RSpec.describe DiscourseAi::Summarization::Strategies::TopicGist do
|
||||||
|
subject(:gist) { described_class.new(topic) }
|
||||||
|
|
||||||
|
fab!(:topic) { Fabricate(:topic, highest_post_number: 25) }
|
||||||
|
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
|
||||||
|
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
|
||||||
|
|
||||||
|
describe "#targets_data" do
|
||||||
|
context "when the topic has more than 20 posts" do
|
||||||
|
before do
|
||||||
|
offset = 3 # Already created posts 1 and 2
|
||||||
|
(topic.highest_post_number - 2).times do |i|
|
||||||
|
Fabricate(:post, topic: topic, post_number: i + offset)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it "includes the OP and the last 20 posts" do
|
||||||
|
content = gist.targets_data
|
||||||
|
post_numbers = content[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expected = (6..25).to_a << 1
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(*expected)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it "only includes visible posts" do
|
||||||
|
post_2.update!(hidden: true)
|
||||||
|
|
||||||
|
post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't include posts without users" do
|
||||||
|
post_2.update!(user_id: nil)
|
||||||
|
|
||||||
|
post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't include whispers" do
|
||||||
|
post_2.update!(post_type: Post.types[:whisper])
|
||||||
|
|
||||||
|
post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when the topic has embed content cached" do
|
||||||
|
it "embed content is used instead of the raw text" do
|
||||||
|
topic_embed =
|
||||||
|
Fabricate(
|
||||||
|
:topic_embed,
|
||||||
|
topic: topic,
|
||||||
|
embed_content_cache: "<p>hello world new post :D</p>",
|
||||||
|
)
|
||||||
|
|
||||||
|
content = gist.targets_data
|
||||||
|
|
||||||
|
op_content = content[:contents].first[:text]
|
||||||
|
|
||||||
|
expect(op_content).to include(topic_embed.embed_content_cache)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,66 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
|
||||||
|
subject(:topic_summary) { described_class.new(topic) }
|
||||||
|
|
||||||
|
fab!(:topic) { Fabricate(:topic, highest_post_number: 25) }
|
||||||
|
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
|
||||||
|
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
|
||||||
|
|
||||||
|
describe "#targets_data" do
|
||||||
|
shared_examples "includes only public-visible topics" do
|
||||||
|
it "only includes visible posts" do
|
||||||
|
post_2.update!(hidden: true)
|
||||||
|
|
||||||
|
post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't include posts without users" do
|
||||||
|
post_2.update!(user_id: nil)
|
||||||
|
|
||||||
|
post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(1)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't include whispers" do
|
||||||
|
post_2.update!(post_type: Post.types[:whisper])
|
||||||
|
|
||||||
|
post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
|
||||||
|
|
||||||
|
expect(post_numbers).to contain_exactly(1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when the topic has a best replies summary" do
|
||||||
|
before { topic.update(has_summary: true) }
|
||||||
|
|
||||||
|
it_behaves_like "includes only public-visible topics"
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when the topic doesn't have a best replies summary" do
|
||||||
|
before { topic.update(has_summary: false) }
|
||||||
|
|
||||||
|
it_behaves_like "includes only public-visible topics"
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when the topic has embed content cached" do
|
||||||
|
it "embed content is used instead of the raw text" do
|
||||||
|
topic_embed =
|
||||||
|
Fabricate(
|
||||||
|
:topic_embed,
|
||||||
|
topic: topic,
|
||||||
|
embed_content_cache: "<p>hello world new post :D</p>",
|
||||||
|
)
|
||||||
|
|
||||||
|
content = topic_summary.targets_data
|
||||||
|
|
||||||
|
op_content = content[:contents].first[:text]
|
||||||
|
|
||||||
|
expect(op_content).to include(topic_embed.embed_content_cache)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -19,6 +19,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
|
||||||
summarized_text: "test",
|
summarized_text: "test",
|
||||||
algorithm: "test",
|
algorithm: "test",
|
||||||
original_content_sha: "test",
|
original_content_sha: "test",
|
||||||
|
summary_type: AiSummary.summary_types[:complete],
|
||||||
)
|
)
|
||||||
|
|
||||||
sign_in(Fabricate(:admin))
|
sign_in(Fabricate(:admin))
|
||||||
|
@ -47,6 +48,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
|
||||||
summarized_text: "test",
|
summarized_text: "test",
|
||||||
algorithm: "test",
|
algorithm: "test",
|
||||||
original_content_sha: "test",
|
original_content_sha: "test",
|
||||||
|
summary_type: AiSummary.summary_types[:complete],
|
||||||
)
|
)
|
||||||
|
|
||||||
get "/discourse-ai/summarization/t/#{topic.id}.json"
|
get "/discourse-ai/summarization/t/#{topic.id}.json"
|
||||||
|
@ -133,6 +135,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
|
||||||
summarized_text: "test",
|
summarized_text: "test",
|
||||||
algorithm: "test",
|
algorithm: "test",
|
||||||
original_content_sha: "test",
|
original_content_sha: "test",
|
||||||
|
summary_type: AiSummary.summary_types[:complete],
|
||||||
)
|
)
|
||||||
|
|
||||||
get "/discourse-ai/summarization/t/#{topic.id}.json"
|
get "/discourse-ai/summarization/t/#{topic.id}.json"
|
||||||
|
|
|
@ -11,55 +11,14 @@ describe DiscourseAi::TopicSummarization do
|
||||||
SiteSetting.ai_summarization_enabled = true
|
SiteSetting.ai_summarization_enabled = true
|
||||||
end
|
end
|
||||||
|
|
||||||
let(:strategy) { DiscourseAi::Summarization.default_strategy }
|
let(:strategy) { DiscourseAi::Summarization.topic_summary(topic) }
|
||||||
|
|
||||||
shared_examples "includes only public-visible topics" do
|
|
||||||
subject { DiscourseAi::TopicSummarization.new(strategy, topic, user) }
|
|
||||||
|
|
||||||
it "only includes visible posts" do
|
|
||||||
topic.first_post.update!(hidden: true)
|
|
||||||
|
|
||||||
posts = subject.summary_targets
|
|
||||||
|
|
||||||
expect(posts.none?(&:hidden?)).to eq(true)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "doesn't include posts without users" do
|
|
||||||
topic.first_post.user.destroy!
|
|
||||||
|
|
||||||
posts = subject.summary_targets
|
|
||||||
|
|
||||||
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
|
|
||||||
end
|
|
||||||
|
|
||||||
it "doesn't include deleted posts" do
|
|
||||||
topic.first_post.update!(user_id: nil)
|
|
||||||
|
|
||||||
posts = subject.summary_targets
|
|
||||||
|
|
||||||
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "#summary_targets" do
|
|
||||||
context "when the topic has a best replies summary" do
|
|
||||||
before { topic.has_summary = true }
|
|
||||||
|
|
||||||
it_behaves_like "includes only public-visible topics"
|
|
||||||
end
|
|
||||||
|
|
||||||
context "when the topic doesn't have a best replies summary" do
|
|
||||||
before { topic.has_summary = false }
|
|
||||||
|
|
||||||
it_behaves_like "includes only public-visible topics"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "#summarize" do
|
describe "#summarize" do
|
||||||
subject(:summarization) { described_class.new(strategy, topic, user) }
|
subject(:summarization) { described_class.new(strategy, user) }
|
||||||
|
|
||||||
def assert_summary_is_cached(topic, summary_response)
|
def assert_summary_is_cached(topic, summary_response)
|
||||||
cached_summary = AiSummary.find_by(target: topic)
|
cached_summary =
|
||||||
|
AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
|
||||||
|
|
||||||
expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number))
|
expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number))
|
||||||
expect(cached_summary.summarized_text).to eq(summary)
|
expect(cached_summary.summarized_text).to eq(summary)
|
||||||
|
@ -82,41 +41,15 @@ describe DiscourseAi::TopicSummarization do
|
||||||
summarization.summarize
|
summarization.summarize
|
||||||
|
|
||||||
cached_summary_text = "This is a cached summary"
|
cached_summary_text = "This is a cached summary"
|
||||||
AiSummary.find_by(target: topic).update!(
|
AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete]).update!(
|
||||||
summarized_text: cached_summary_text,
|
summarized_text: cached_summary_text,
|
||||||
updated_at: 24.hours.ago,
|
updated_at: 24.hours.ago,
|
||||||
)
|
)
|
||||||
|
|
||||||
summarization = described_class.new(strategy, topic, user)
|
summarization = described_class.new(strategy, user)
|
||||||
section = summarization.summarize
|
section = summarization.summarize
|
||||||
expect(section.summarized_text).to eq(cached_summary_text)
|
expect(section.summarized_text).to eq(cached_summary_text)
|
||||||
end
|
end
|
||||||
|
|
||||||
context "when the topic has embed content cached" do
|
|
||||||
it "embed content is used instead of the raw text" do
|
|
||||||
topic_embed =
|
|
||||||
Fabricate(
|
|
||||||
:topic_embed,
|
|
||||||
topic: topic,
|
|
||||||
embed_content_cache: "<p>hello world new post :D</p>",
|
|
||||||
)
|
|
||||||
|
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(["A summary"]) do |spy|
|
|
||||||
summarization.summarize
|
|
||||||
|
|
||||||
prompt_raw =
|
|
||||||
spy
|
|
||||||
.prompt_messages
|
|
||||||
.reduce(+"") do |memo, m|
|
|
||||||
memo << m[:content] << "\n"
|
|
||||||
|
|
||||||
memo
|
|
||||||
end
|
|
||||||
|
|
||||||
expect(prompt_raw).to include(topic_embed.embed_content_cache)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "invalidating cached summaries" do
|
describe "invalidating cached summaries" do
|
||||||
|
@ -124,7 +57,7 @@ describe DiscourseAi::TopicSummarization do
|
||||||
let(:updated_summary) { "This is the final summary" }
|
let(:updated_summary) { "This is the final summary" }
|
||||||
|
|
||||||
def cached_summary
|
def cached_summary
|
||||||
AiSummary.find_by(target: topic)
|
AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
|
||||||
end
|
end
|
||||||
|
|
||||||
before do
|
before do
|
||||||
|
@ -133,8 +66,8 @@ describe DiscourseAi::TopicSummarization do
|
||||||
# since it is glued to the old llm instance
|
# since it is glued to the old llm instance
|
||||||
# so we create the cached summary totally independantly
|
# so we create the cached summary totally independantly
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
|
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
|
||||||
strategy = DiscourseAi::Summarization.default_strategy
|
strategy = DiscourseAi::Summarization.topic_summary(topic)
|
||||||
described_class.new(strategy, topic, user).summarize
|
described_class.new(strategy, user).summarize
|
||||||
end
|
end
|
||||||
|
|
||||||
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
|
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
|
||||||
|
|
|
@ -32,6 +32,7 @@ RSpec.describe "Summarize a topic ", type: :system do
|
||||||
summarized_text: summarization_result,
|
summarized_text: summarization_result,
|
||||||
algorithm: "test",
|
algorithm: "test",
|
||||||
original_content_sha: "test",
|
original_content_sha: "test",
|
||||||
|
summary_type: AiSummary.summary_types[:complete],
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue