diff --git a/app/controllers/discourse_ai/summarization/chat_summary_controller.rb b/app/controllers/discourse_ai/summarization/chat_summary_controller.rb index 96d95d8c..5354c539 100644 --- a/app/controllers/discourse_ai/summarization/chat_summary_controller.rb +++ b/app/controllers/discourse_ai/summarization/chat_summary_controller.rb @@ -15,29 +15,21 @@ module DiscourseAi channel = ::Chat::Channel.find(params[:channel_id]) guardian.ensure_can_join_chat_channel!(channel) - strategy = DiscourseAi::Summarization.default_strategy - raise Discourse::NotFound.new unless strategy + summarizer = DiscourseAi::Summarization.chat_channel_summary(channel, since) + raise Discourse::NotFound.new unless summarizer guardian.ensure_can_request_summary! RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed! hijack do - content = { content_title: channel.name } - - content[:contents] = channel - .chat_messages - .where("chat_messages.created_at > ?", since.hours.ago) - .includes(:user) - .order(created_at: :asc) - .pluck(:id, :username_lower, :message) - .map { { id: _1, poster: _2, text: _3 } } + strategy = DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, since) summarized_text = - if content[:contents].empty? + if strategy.targets_data[:contents].empty? I18n.t("discourse_ai.summarization.chat.no_targets") else - strategy.summarize(content, current_user).dig(:summary) + summarizer.summarize(current_user)&.summarized_text end render json: { summary: summarized_text } diff --git a/app/controllers/discourse_ai/summarization/summary_controller.rb b/app/controllers/discourse_ai/summarization/summary_controller.rb index 549bbb44..a7900549 100644 --- a/app/controllers/discourse_ai/summarization/summary_controller.rb +++ b/app/controllers/discourse_ai/summarization/summary_controller.rb @@ -9,15 +9,19 @@ module DiscourseAi topic = Topic.find(params[:topic_id]) guardian.ensure_can_see!(topic) - raise Discourse::NotFound if !guardian.can_see_summary?(topic) + if !guardian.can_see_summary?(topic, AiSummary.summary_types[:complete]) + raise Discourse::NotFound + end RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user opts = params.permit(:skip_age_check) skip_age_check = opts[:skip_age_check] == "true" + summarization_service = DiscourseAi::TopicSummarization.for(topic, current_user) + if params[:stream] && current_user - cached_summary = DiscourseAi::TopicSummarization.cached_summary(topic, current_user) + cached_summary = summarization_service.cached_summary if cached_summary && !skip_age_check render_serialized(cached_summary, AiTopicSummarySerializer) @@ -34,12 +38,7 @@ module DiscourseAi render json: success_json else hijack do - summary = - DiscourseAi::TopicSummarization.summarize( - topic, - current_user, - skip_age_check: skip_age_check, - ) + summary = summarization_service.summarize(skip_age_check: skip_age_check) render_serialized(summary, AiTopicSummarySerializer) end end diff --git a/app/jobs/regular/stream_topic_ai_summary.rb b/app/jobs/regular/stream_topic_ai_summary.rb index d3212f58..d22a7b81 100644 --- a/app/jobs/regular/stream_topic_ai_summary.rb +++ b/app/jobs/regular/stream_topic_ai_summary.rb @@ -8,8 +8,11 @@ module Jobs return unless topic = Topic.find_by(id: args[:topic_id]) return unless user = User.find_by(id: args[:user_id]) - strategy = DiscourseAi::Summarization.default_strategy - return if strategy.nil? || !Guardian.new(user).can_see_summary?(topic) + strategy = DiscourseAi::Summarization.topic_summary(topic) + if strategy.nil? || + !Guardian.new(user).can_see_summary?(topic, AiSummary.summary_types[:complete]) + return + end guardian = Guardian.new(user) return unless guardian.can_see?(topic) @@ -21,7 +24,7 @@ module Jobs summary = DiscourseAi::TopicSummarization - .new(strategy, topic, user) + .new(strategy, user) .summarize(skip_age_check: skip_age_check) do |partial_summary| streamed_summary << partial_summary diff --git a/app/models/ai_summary.rb b/app/models/ai_summary.rb index f947dc0d..3267efbd 100644 --- a/app/models/ai_summary.rb +++ b/app/models/ai_summary.rb @@ -3,6 +3,23 @@ class AiSummary < ActiveRecord::Base belongs_to :target, polymorphic: true + enum :summary_type, { complete: 0, gist: 1 } + + def self.store!(target, summary_type, model, summary, content_ids) + AiSummary.create!( + target: target, + algorithm: model, + content_range: (content_ids.first..content_ids.last), + summarized_text: summary, + original_content_sha: build_sha(content_ids.join), + summary_type: summary_type, + ) + end + + def self.build_sha(joined_ids) + Digest::SHA256.hexdigest(joined_ids) + end + def mark_as_outdated @outdated = true end @@ -25,6 +42,7 @@ end # algorithm :string not null # created_at :datetime not null # updated_at :datetime not null +# summary_type :string default("complete"), not null # # Indexes # diff --git a/app/services/discourse_ai/topic_summarization.rb b/app/services/discourse_ai/topic_summarization.rb index 15a94193..3d33e21d 100644 --- a/app/services/discourse_ai/topic_summarization.rb +++ b/app/services/discourse_ai/topic_summarization.rb @@ -1,145 +1,44 @@ # frozen_string_literal: true module DiscourseAi + # A cache layer on top of our topic summarization engine. Also handle permissions. class TopicSummarization - def self.summarize(topic, user, skip_age_check: false, &on_partial_blk) - new(DiscourseAi::Summarization.default_strategy, topic, user).summarize( - skip_age_check: skip_age_check, - &on_partial_blk - ) + def self.for(topic, user) + new(DiscourseAi::Summarization.topic_summary(topic), user) end - def self.cached_summary(topic, user) - new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary - end - - def initialize(strategy, topic, user) - @strategy = strategy - @topic = topic + def initialize(summarizer, user) + @summarizer = summarizer @user = user end - attr_reader :strategy, :topic, :user - def cached_summary - existing_summary + summarizer.existing_summary end def summarize(skip_age_check: false, &on_partial_blk) # Existing summary shouldn't be nil in this scenario because the controller checks its existence. - return if !user && !existing_summary + return if !user && !cached_summary - return existing_summary if use_cached?(skip_age_check) + return cached_summary if use_cached?(skip_age_check) - delete_cached_summaries! if existing_summary + summarizer.delete_cached_summaries! if cached_summary - content = { - resource_path: "#{Discourse.base_path}/t/-/#{topic.id}", - content_title: topic.title, - contents: [], - } - - summary_targets_data.map do |(pn, raw, username)| - raw_text = raw - - if pn == 1 && topic.topic_embed&.embed_content_cache.present? - raw_text = topic.topic_embed&.embed_content_cache - end - - content[:contents] << { poster: username, id: pn, text: raw_text } - end - - summarization_result = strategy.summarize(content, user, &on_partial_blk) - cache_summary(summarization_result) - end - - def summary_targets - topic.has_summary? ? best_replies : pick_selection + summarizer.summarize(user, &on_partial_blk) end private - def summary_sha - @summary_sha ||= build_sha(summary_targets_data.map(&:first)) - end - - def summary_targets_data - @summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username) - end - - def existing_summary - if !defined?(@existing_summary) - @existing_summary = AiSummary.find_by(target: topic) - if @existing_summary && existing_summary.original_content_sha != summary_sha - @existing_summary.mark_as_outdated - end - end - @existing_summary - end - - def best_replies - Post - .summary(topic.id) - .where("post_type = ?", Post.types[:regular]) - .where("NOT hidden") - .joins(:user) - .order(:post_number) - end - - def pick_selection - posts = - Post - .where(topic_id: topic.id) - .where("post_type = ?", Post.types[:regular]) - .where("NOT hidden") - .order(:post_number) - - post_numbers = posts.limit(5).pluck(:post_number) - post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number) - post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number) - - Post - .where(topic_id: topic.id) - .joins(:user) - .where("post_number in (?)", post_numbers) - .order(:post_number) - end - - def delete_cached_summaries! - AiSummary.where(target: topic).destroy_all - end + attr_reader :summarizer, :user def use_cached?(skip_age_check) can_summarize = Guardian.new(user).can_request_summary? - existing_summary && + cached_summary && !( - can_summarize && new_targets? && - (skip_age_check || existing_summary.created_at < 1.hour.ago) + can_summarize && cached_summary.outdated && + (skip_age_check || cached_summary.created_at < 1.hour.ago) ) end - - def new_targets? - existing_summary&.original_content_sha != summary_sha - end - - def cache_summary(result) - post_numbers = summary_targets_data.map(&:first) - - cached_summary = - AiSummary.create!( - target: topic, - algorithm: strategy.display_name, - content_range: (post_numbers.first..post_numbers.last), - summarized_text: result[:summary], - original_content_sha: summary_sha, - ) - - cached_summary - end - - def build_sha(ids) - Digest::SHA256.hexdigest(ids.join) - end end end diff --git a/db/migrate/20240909180908_add_ai_summary_type_column.rb b/db/migrate/20240909180908_add_ai_summary_type_column.rb new file mode 100644 index 00000000..82284106 --- /dev/null +++ b/db/migrate/20240909180908_add_ai_summary_type_column.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true +class AddAiSummaryTypeColumn < ActiveRecord::Migration[7.1] + def change + add_column :ai_summaries, :summary_type, :integer, default: 0, null: false + end +end diff --git a/lib/guardian_extensions.rb b/lib/guardian_extensions.rb index d4ee455e..075ae96c 100644 --- a/lib/guardian_extensions.rb +++ b/lib/guardian_extensions.rb @@ -2,7 +2,7 @@ module DiscourseAi module GuardianExtensions - def can_see_summary?(target) + def can_see_summary?(target, summary_type) return false if !SiteSetting.ai_summarization_enabled if target.class == Topic && target.private_message? @@ -14,7 +14,7 @@ module DiscourseAi return false if !allowed end - has_cached_summary = AiSummary.exists?(target: target) + has_cached_summary = AiSummary.exists?(target: target, summary_type: summary_type) return has_cached_summary if user.nil? has_cached_summary || can_request_summary? diff --git a/lib/summarization.rb b/lib/summarization.rb index e8b037df..e338794f 100644 --- a/lib/summarization.rb +++ b/lib/summarization.rb @@ -1,9 +1,36 @@ # frozen_string_literal: true + module DiscourseAi module Summarization - def self.default_strategy + def self.topic_summary(topic) if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled - DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model) + DiscourseAi::Summarization::FoldContent.new( + DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model), + DiscourseAi::Summarization::Strategies::TopicSummary.new(topic), + ) + else + nil + end + end + + def self.topic_gist(topic) + if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled + DiscourseAi::Summarization::FoldContent.new( + DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model), + DiscourseAi::Summarization::Strategies::TopicGist.new(topic), + ) + else + nil + end + end + + def self.chat_channel_summary(channel, time_window_in_hours) + if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled + DiscourseAi::Summarization::FoldContent.new( + DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model), + DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, time_window_in_hours), + persist_summaries: false, + ) else nil end diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 3066be47..052926df 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -2,14 +2,6 @@ module DiscourseAi module Summarization - def self.default_strategy - if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled - DiscourseAi::Summarization::Strategies::FoldContent.new(SiteSetting.ai_summarization_model) - else - nil - end - end - class EntryPoint def inject_into(plugin) plugin.add_to_serializer(:current_user, :can_summarize) do @@ -18,11 +10,11 @@ module DiscourseAi end plugin.add_to_serializer(:topic_view, :summarizable) do - scope.can_see_summary?(object.topic) + scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete]) end plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do - scope.can_see_summary?(object.topic) + scope.can_see_summary?(object.topic, AiSummary.summary_types[:complete]) end end end diff --git a/lib/summarization/fold_content.rb b/lib/summarization/fold_content.rb new file mode 100644 index 00000000..00b2c80b --- /dev/null +++ b/lib/summarization/fold_content.rb @@ -0,0 +1,189 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + # This class offers a generic way of summarizing content from multiple sources using different prompts. + # + # It summarizes large amounts of content by recursively summarizing it in smaller chunks that + # fit the given model context window, finally concatenating the disjoint summaries + # into a final version. + # + class FoldContent + def initialize(llm, strategy, persist_summaries: true) + @llm = llm + @strategy = strategy + @persist_summaries = persist_summaries + end + + attr_reader :llm, :strategy + + # @param user { User } - User object used for auditing usage. + # + # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function. + # Note: The block is only called with results of the final summary, not intermediate summaries. + # + # @returns { AiSummary } - Resulting summary. + def summarize(user, &on_partial_blk) + opts = content_to_summarize.except(:contents) + + initial_chunks = + rebalance_chunks( + content_to_summarize[:contents].map do |c| + { ids: [c[:id]], summary: format_content_item(c) } + end, + ) + + # Special case where we can do all the summarization in one pass. + result = + if initial_chunks.length == 1 + { + summary: + summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk), + chunks: [], + } + else + summarize_chunks(initial_chunks, user, opts, &on_partial_blk) + end + + if persist_summaries + AiSummary.store!( + strategy.target, + strategy.type, + llm_model.name, + result[:summary], + content_to_summarize[:contents].map { |c| c[:id] }, + ) + else + AiSummary.new(summarized_text: result[:summary]) + end + end + + # @returns { AiSummary } - Resulting summary. + # + # Finds a summary matching the target and strategy. Marks it as outdates if the strategy found newer content + def existing_summary + if !defined?(@existing_summary) + summary = AiSummary.find_by(target: strategy.target, summary_type: strategy.type) + + if summary + @existing_summary = summary + + if existing_summary.original_content_sha != latest_sha + @existing_summary.mark_as_outdated + end + end + end + @existing_summary + end + + def delete_cached_summaries! + AiSummary.where(target: strategy.target, summary_type: strategy.type).destroy_all + end + + private + + attr_reader :persist_summaries + + def llm_model + llm.llm_model + end + + def content_to_summarize + @targets_data ||= strategy.targets_data + end + + def latest_sha + @latest_sha ||= AiSummary.build_sha(content_to_summarize[:contents].map { |c| c[:id] }.join) + end + + def summarize_chunks(chunks, user, opts, &on_partial_blk) + # Safely assume we always have more than one chunk. + summarized_chunks = summarize_in_chunks(chunks, user, opts) + total_summaries_size = + llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join) + + if total_summaries_size < available_tokens + # Chunks are small enough, we can concatenate them. + { + summary: + concatenate_summaries( + summarized_chunks.map { |s| s[:summary] }, + user, + &on_partial_blk + ), + chunks: summarized_chunks, + } + else + # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again. + rebalanced_chunks = rebalance_chunks(summarized_chunks) + + summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk) + end + end + + def format_content_item(item) + "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + def rebalance_chunks(chunks) + section = { ids: [], summary: "" } + + chunks = + chunks.reduce([]) do |sections, chunk| + if llm_model.tokenizer_class.can_expand_tokens?( + section[:summary], + chunk[:summary], + available_tokens, + ) + section[:summary] += chunk[:summary] + section[:ids] = section[:ids].concat(chunk[:ids]) + else + sections << section + section = chunk + end + + sections + end + + chunks << section if section[:summary].present? + + chunks + end + + def summarize_single(text, user, opts, &on_partial_blk) + prompt = strategy.summarize_single_prompt(text, opts) + + llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk) + end + + def summarize_in_chunks(chunks, user, opts) + chunks.map do |chunk| + prompt = strategy.summarize_single_prompt(chunk[:summary], opts) + + chunk[:summary] = llm.generate( + prompt, + user: user, + max_tokens: 300, + feature_name: "summarize", + ) + + chunk + end + end + + def concatenate_summaries(texts_to_summarize, user, &on_partial_blk) + prompt = strategy.concatenation_prompt(texts_to_summarize) + + llm.generate(prompt, user: user, &on_partial_blk) + end + + def available_tokens + # Reserve tokens for the response and the base prompt + # ~500 words + reserved_tokens = 700 + + llm_model.max_prompt_tokens - reserved_tokens + end + end + end +end diff --git a/lib/summarization/strategies/base.rb b/lib/summarization/strategies/base.rb new file mode 100644 index 00000000..2ca76383 --- /dev/null +++ b/lib/summarization/strategies/base.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + # Objects inheriting from this class will get passed as a dependency to `DiscourseAi::Summarization::FoldContent`. + # This collaborator knows how to source the content to summarize and the prompts used in the process, + # one for summarizing a chunk and another for concatenating them if necessary. + class Base + def initialize(target) + @target = target + end + + attr_reader :target + + # The summary type differentiates instances of `AiSummary` pointing to a single target. + # See the `summary_type` enum for available options. + def type + raise NotImplementedError + end + + # @returns { Hash } - Content to summarize. + # + # This method returns a hash with the content to summarize and additional information. + # The only mandatory key is `contents`, which must be an array of hashes with + # the following structure: + # + # { + # poster: A way to tell who write the content, + # id: A number to signal order, + # text: Text to summarize + # } + # + # Additionally, you could add more context, which will be available in the prompt. e.g.: + # + # { + # resource_path: "#{Discourse.base_path}/t/-/#{target.id}", + # content_title: target.title, + # contents: [...] + # } + # + def targets_data + raise NotImplementedError + end + + # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when concatenating multiple chunks. + def contatenation_prompt(_texts_to_summarize) + raise NotImplementedError + end + + # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM on each chunk, + # and when the whole content fits in one call. + def summarize_single_prompt(_input, _opts) + raise NotImplementedError + end + end + end + end +end diff --git a/lib/summarization/strategies/chat_messages.rb b/lib/summarization/strategies/chat_messages.rb new file mode 100644 index 00000000..3af267ff --- /dev/null +++ b/lib/summarization/strategies/chat_messages.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class ChatMessages < Base + def type + AiSummary.summary_types[:complete] + end + + def initialize(target, since) + super(target) + @since = since + end + + def targets_data + content = { content_title: target.name } + + content[:contents] = target + .chat_messages + .where("chat_messages.created_at > ?", since.hours.ago) + .includes(:user) + .order(created_at: :asc) + .pluck(:id, :username_lower, :message) + .map { { id: _1, poster: _2, text: _3 } } + + content + end + + def contatenation_prompt(texts_to_summarize) + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) + You are a summarization bot tasked with creating a cohesive narrative by intelligently merging multiple disjointed summaries. + Your response should consist of well-structured paragraphs that combines these summaries into a clear and comprehensive overview. + Avoid adding any additional text or commentary. Format your output using Discourse forum Markdown. + TEXT + + prompt.push(type: :user, content: <<~TEXT.strip) + THESE are the summaries, each one separated by a newline, all of them inside XML tags: + + + #{texts_to_summarize.join("\n")} + + TEXT + + prompt + end + + def summarize_single_prompt(input, opts) + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) + You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics + and developments from a series of chat messages within a user-selected time window. + + Analyze the messages to extract key themes, participants' intentions, and any significant conclusions or decisions. + Your summary should be concise yet comprehensive, providing an overview that is accessible to someone with no prior context of the conversation. + + - Only include the summary, WITHOUT additional commentary. + - Don't mention the channel title. Avoid including extraneous details or subjective opinions. + - Maintain the original language of the text being summarized. + - The same user could write multiple messages in a row, don't treat them as different persons. + - Aim for summaries to be 400 words or less. + + TEXT + + prompt.push(type: :user, content: <<~TEXT.strip) + #{opts[:content_title].present? ? "The name of the channel is: " + opts[:content_title] + ".\n" : ""} + + Here are the messages, inside XML tags: + + + #{input} + + + Generate a summary of the given chat messages. + TEXT + + prompt + end + + private + + attr_reader :since + end + end + end +end diff --git a/lib/summarization/strategies/fold_content.rb b/lib/summarization/strategies/fold_content.rb deleted file mode 100644 index 788e09f6..00000000 --- a/lib/summarization/strategies/fold_content.rb +++ /dev/null @@ -1,198 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Strategies - class FoldContent - def initialize(completion_model) - @llm = DiscourseAi::Completions::Llm.proxy(completion_model) - raise "Invalid model provided for summarization strategy" if @llm.llm_model.nil? - end - - attr_reader :llm - - def summarize(content, user, &on_partial_blk) - opts = content.except(:contents) - - initial_chunks = - rebalance_chunks( - content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } }, - ) - - # Special case where we can do all the summarization in one pass. - if initial_chunks.length == 1 - { - summary: - summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk), - chunks: [], - } - else - summarize_chunks(initial_chunks, user, opts, &on_partial_blk) - end - end - - def display_name - llm_model&.name || "unknown model" - end - - private - - def llm_model - llm.llm_model - end - - def summarize_chunks(chunks, user, opts, &on_partial_blk) - # Safely assume we always have more than one chunk. - summarized_chunks = summarize_in_chunks(chunks, user, opts) - total_summaries_size = - llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join) - - if total_summaries_size < available_tokens - # Chunks are small enough, we can concatenate them. - { - summary: - concatenate_summaries( - summarized_chunks.map { |s| s[:summary] }, - user, - &on_partial_blk - ), - chunks: summarized_chunks, - } - else - # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again. - rebalanced_chunks = rebalance_chunks(summarized_chunks) - - summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk) - end - end - - def format_content_item(item) - "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - - def rebalance_chunks(chunks) - section = { ids: [], summary: "" } - - chunks = - chunks.reduce([]) do |sections, chunk| - if llm_model.tokenizer_class.can_expand_tokens?( - section[:summary], - chunk[:summary], - available_tokens, - ) - section[:summary] += chunk[:summary] - section[:ids] = section[:ids].concat(chunk[:ids]) - else - sections << section - section = chunk - end - - sections - end - - chunks << section if section[:summary].present? - - chunks - end - - def summarize_single(text, user, opts, &on_partial_blk) - prompt = summarization_prompt(text, opts) - - llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk) - end - - def summarize_in_chunks(chunks, user, opts) - chunks.map do |chunk| - prompt = summarization_prompt(chunk[:summary], opts) - - chunk[:summary] = llm.generate( - prompt, - user: user, - max_tokens: 300, - feature_name: "summarize", - ) - chunk - end - end - - def concatenate_summaries(summaries, user, &on_partial_blk) - prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) - You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative. - The narrative you create is in the form of one or multiple paragraphs. - Your reply MUST BE a single concatenated summary using the summaries I'll provide to you. - I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments. - You understand and generate Discourse forum Markdown. - You format the response, including links, using Markdown. - TEXT - - prompt.push(type: :user, content: <<~TEXT.strip) - THESE are the summaries, each one separated by a newline, all of them inside XML tags: - - - #{summaries.join("\n")} - - TEXT - - llm.generate(prompt, user: user, &on_partial_blk) - end - - def summarization_prompt(input, opts) - insts = +<<~TEXT - You are an advanced summarization bot that generates concise, coherent summaries of provided text. - - - Only include the summary, without any additional commentary. - - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**. - - Maintain the original language of the text being summarized. - - Aim for summaries to be 400 words or less. - - TEXT - - insts << <<~TEXT if opts[:resource_path] - - Each post is formatted as ") " - - Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER) - - Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3) - - Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6) - - Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13) - - When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER) - TEXT - - prompt = DiscourseAi::Completions::Prompt.new(insts.strip) - - if opts[:resource_path] - prompt.push( - type: :user, - content: - "Here are the posts inside XML tags:\n\n1) user1 said: I love Mondays 2) user2 said: I hate Mondays\n\nGenerate a concise, coherent summary of the text above maintaining the original language.", - ) - prompt.push( - type: :model, - content: - "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.", - ) - end - - prompt.push(type: :user, content: <<~TEXT.strip) - #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""} - Here are the posts, inside XML tags: - - - #{input} - - - Generate a concise, coherent summary of the text above maintaining the original language. - TEXT - - prompt - end - - def available_tokens - # Reserve tokens for the response and the base prompt - # ~500 words - reserved_tokens = 700 - - llm_model.max_prompt_tokens - reserved_tokens - end - end - end - end -end diff --git a/lib/summarization/strategies/topic_gist.rb b/lib/summarization/strategies/topic_gist.rb new file mode 100644 index 00000000..f52fbc39 --- /dev/null +++ b/lib/summarization/strategies/topic_gist.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class TopicGist < Base + def type + AiSummary.summary_types[:gist] + end + + def targets_data + content = { content_title: target.title, contents: [] } + + op_post_number = 1 + + last_twenty_posts = + Post + .where(topic_id: target.id) + .where("post_type = ?", Post.types[:regular]) + .where("NOT hidden") + .order("post_number DESC") + .limit(20) + .pluck(:post_number) + + posts_data = + Post + .where(topic_id: target.id) + .joins(:user) + .where("post_number IN (?)", last_twenty_posts << op_post_number) + .order(:post_number) + .pluck(:post_number, :raw, :username) + + posts_data.each do |(pn, raw, username)| + raw_text = raw + + if pn == 1 && target.topic_embed&.embed_content_cache.present? + raw_text = target.topic_embed&.embed_content_cache + end + + content[:contents] << { poster: username, id: pn, text: raw_text } + end + + content + end + + def concatenation_prompt(texts_to_summarize) + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) + You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement. + Your response should strictly be this single, comprehensive sentence, without any additional text or comments. + TEXT + + prompt.push(type: :user, content: <<~TEXT.strip) + THESE are the summaries, each one separated by a newline, all of them inside XML tags: + + + #{texts_to_summarize.join("\n")} + + TEXT + + prompt + end + + def summarize_single_prompt(input, opts) + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) + You are an advanced summarization bot. Your task is to analyze a given conversation and generate a single, + concise sentence that clearly conveys the main topic and purpose of the discussion to someone with no prior context. + + - Focus on the central theme or issue being addressed, while maintaining an objective and neutral tone. + - Avoid including extraneous details or subjective opinions. + - Maintain the original language of the text being summarized. + TEXT + + prompt.push(type: :user, content: <<~TEXT.strip) + #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""} + + Here are the posts, inside XML tags: + + + #{input} + + + Generate a single sentence of the text above maintaining the original language. + TEXT + + prompt + end + end + end + end +end diff --git a/lib/summarization/strategies/topic_summary.rb b/lib/summarization/strategies/topic_summary.rb new file mode 100644 index 00000000..945caace --- /dev/null +++ b/lib/summarization/strategies/topic_summary.rb @@ -0,0 +1,142 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class TopicSummary < Base + def type + AiSummary.summary_types[:complete] + end + + def targets_data + content = { + resource_path: "#{Discourse.base_path}/t/-/#{target.id}", + content_title: target.title, + contents: [], + } + + posts_data = + (target.has_summary? ? best_replies : pick_selection).pluck( + :post_number, + :raw, + :username, + ) + + posts_data.each do |(pn, raw, username)| + raw_text = raw + + if pn == 1 && target.topic_embed&.embed_content_cache.present? + raw_text = target.topic_embed&.embed_content_cache + end + + content[:contents] << { poster: username, id: pn, text: raw_text } + end + + content + end + + def concatenation_prompt(texts_to_summarize) + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) + You are a summarization bot that effectively concatenates disjointed summaries, creating a cohesive narrative. + The narrative you create is in the form of one or multiple paragraphs. + Your reply MUST BE a single concatenated summary using the summaries I'll provide to you. + I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments. + You understand and generate Discourse forum Markdown. + You format the response, including links, using Markdown. + TEXT + + prompt.push(type: :user, content: <<~TEXT.strip) + THESE are the summaries, each one separated by a newline, all of them inside XML tags: + + + #{texts_to_summarize.join("\n")} + + TEXT + + prompt + end + + def summarize_single_prompt(input, opts) + insts = +<<~TEXT + You are an advanced summarization bot that generates concise, coherent summaries of provided text. + + - Only include the summary, without any additional commentary. + - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**. + - Maintain the original language of the text being summarized. + - Aim for summaries to be 400 words or less. + + TEXT + + insts << <<~TEXT if opts[:resource_path] + - Each post is formatted as ") " + - Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER) + - Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3) + - Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6) + - Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13) + - When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER) + TEXT + + prompt = DiscourseAi::Completions::Prompt.new(insts.strip) + + if opts[:resource_path] + prompt.push( + type: :user, + content: + "Here are the posts inside XML tags:\n\n1) user1 said: I love Mondays 2) user2 said: I hate Mondays\n\nGenerate a concise, coherent summary of the text above maintaining the original language.", + ) + prompt.push( + type: :model, + content: + "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.", + ) + end + + prompt.push(type: :user, content: <<~TEXT.strip) + #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""} + Here are the posts, inside XML tags: + + + #{input} + + + Generate a concise, coherent summary of the text above maintaining the original language. + TEXT + + prompt + end + + private + + attr_reader :topic + + def best_replies + Post + .summary(target.id) + .where("post_type = ?", Post.types[:regular]) + .where("NOT hidden") + .joins(:user) + .order(:post_number) + end + + def pick_selection + posts = + Post + .where(topic_id: target.id) + .where("post_type = ?", Post.types[:regular]) + .where("NOT hidden") + .order(:post_number) + + post_numbers = posts.limit(5).pluck(:post_number) + post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number) + post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number) + + Post + .where(topic_id: target.id) + .joins(:user) + .where("post_number in (?)", post_numbers) + .order(:post_number) + end + end + end + end +end diff --git a/spec/lib/guardian_extensions_spec.rb b/spec/lib/guardian_extensions_spec.rb index 5787516c..38268ce1 100644 --- a/spec/lib/guardian_extensions_spec.rb +++ b/spec/lib/guardian_extensions_spec.rb @@ -20,7 +20,7 @@ describe DiscourseAi::GuardianExtensions do it "returns false" do SiteSetting.ai_custom_summarization_allowed_groups = "" - expect(guardian.can_see_summary?(topic)).to eq(false) + expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false) end it "returns true if there is a cached summary" do @@ -29,9 +29,10 @@ describe DiscourseAi::GuardianExtensions do summarized_text: "test", original_content_sha: "123", algorithm: "test", + summary_type: AiSummary.summary_types[:complete], ) - expect(guardian.can_see_summary?(topic)).to eq(true) + expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true) end end @@ -39,7 +40,7 @@ describe DiscourseAi::GuardianExtensions do before { SiteSetting.ai_custom_summarization_allowed_groups = group.id } it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do - expect(guardian.can_see_summary?(topic)).to eq(true) + expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true) end end @@ -48,12 +49,12 @@ describe DiscourseAi::GuardianExtensions do let(:pm) { Fabricate(:private_message_topic) } it "returns false" do - expect(guardian.can_see_summary?(pm)).to eq(false) + expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(false) end it "returns true if user is in a group that is allowed summaries" do SiteSetting.ai_pm_summarization_allowed_groups = group.id - expect(guardian.can_see_summary?(pm)).to eq(true) + expect(guardian.can_see_summary?(pm, AiSummary.summary_types[:complete])).to eq(true) end end @@ -61,7 +62,7 @@ describe DiscourseAi::GuardianExtensions do let(:guardian) { Guardian.new } it "returns false for anons" do - expect(guardian.can_see_summary?(topic)).to eq(false) + expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(false) end it "returns true for anons when there is a cached summary" do @@ -70,9 +71,10 @@ describe DiscourseAi::GuardianExtensions do summarized_text: "test", original_content_sha: "123", algorithm: "test", + summary_type: AiSummary.summary_types[:complete], ) - expect(guardian.can_see_summary?(topic)).to eq(true) + expect(guardian.can_see_summary?(topic, AiSummary.summary_types[:complete])).to eq(true) end end end diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/fold_content_spec.rb similarity index 66% rename from spec/lib/modules/summarization/strategies/fold_content_spec.rb rename to spec/lib/modules/summarization/fold_content_spec.rb index 16ebc892..b1f84519 100644 --- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb +++ b/spec/lib/modules/summarization/fold_content_spec.rb @@ -1,9 +1,14 @@ # frozen_string_literal: true -RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do +RSpec.describe DiscourseAi::Summarization::FoldContent do + subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) } + describe "#summarize" do let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) } + fab!(:topic) { Fabricate(:topic, highest_post_number: 2) } + fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") } + before do SiteSetting.ai_summarization_enabled = true @@ -15,10 +20,6 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do llm_model.update!(max_prompt_tokens: model_tokens) end - let(:strategy) { DiscourseAi::Summarization.default_strategy } - let(:summarize_text) { "This is a text" } - let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } - let(:single_summary) { "this is a single summary" } let(:concatenated_summary) { "this is a concatenated summary" } @@ -28,27 +29,26 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do it "does one call to summarize content" do result = DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy| - strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) } + summarizer.summarize(user).tap { expect(spy.completions).to eq(1) } end - expect(result[:summary]).to eq(single_summary) + expect(result.summarized_text).to eq(single_summary) end end context "when the content to summarize doesn't fit in a single call" do - it "summarizes each chunk and then concatenates them" do - content[:contents] << { poster: "asd2", id: 2, text: summarize_text } + fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") } + it "summarizes each chunk and then concatenates them" do result = DiscourseAi::Completions::Llm.with_prepared_responses( [single_summary, single_summary, concatenated_summary], - ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } } + ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(3) } } - expect(result[:summary]).to eq(concatenated_summary) + expect(result.summarized_text).to eq(concatenated_summary) end it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do - content[:contents] << { poster: "asd2", id: 2, text: summarize_text } max_length_response = "(1 asd said: This is a text " chunk_of_chunks = "I'm smol" @@ -61,9 +61,9 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do chunk_of_chunks, concatenated_summary, ], - ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } } + ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(5) } } - expect(result[:summary]).to eq(concatenated_summary) + expect(result.summarized_text).to eq(concatenated_summary) end end end diff --git a/spec/lib/modules/summarization/strategies/topic_gist_spec.rb b/spec/lib/modules/summarization/strategies/topic_gist_spec.rb new file mode 100644 index 00000000..ecea03ec --- /dev/null +++ b/spec/lib/modules/summarization/strategies/topic_gist_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Summarization::Strategies::TopicGist do + subject(:gist) { described_class.new(topic) } + + fab!(:topic) { Fabricate(:topic, highest_post_number: 25) } + fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) } + fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) } + + describe "#targets_data" do + context "when the topic has more than 20 posts" do + before do + offset = 3 # Already created posts 1 and 2 + (topic.highest_post_number - 2).times do |i| + Fabricate(:post, topic: topic, post_number: i + offset) + end + end + + it "includes the OP and the last 20 posts" do + content = gist.targets_data + post_numbers = content[:contents].map { |c| c[:id] } + + expected = (6..25).to_a << 1 + + expect(post_numbers).to contain_exactly(*expected) + end + end + + it "only includes visible posts" do + post_2.update!(hidden: true) + + post_numbers = gist.targets_data[:contents].map { |c| c[:id] } + + expect(post_numbers).to contain_exactly(1) + end + + it "doesn't include posts without users" do + post_2.update!(user_id: nil) + + post_numbers = gist.targets_data[:contents].map { |c| c[:id] } + + expect(post_numbers).to contain_exactly(1) + end + + it "doesn't include whispers" do + post_2.update!(post_type: Post.types[:whisper]) + + post_numbers = gist.targets_data[:contents].map { |c| c[:id] } + + expect(post_numbers).to contain_exactly(1) + end + + context "when the topic has embed content cached" do + it "embed content is used instead of the raw text" do + topic_embed = + Fabricate( + :topic_embed, + topic: topic, + embed_content_cache: "

hello world new post :D

", + ) + + content = gist.targets_data + + op_content = content[:contents].first[:text] + + expect(op_content).to include(topic_embed.embed_content_cache) + end + end + end +end diff --git a/spec/lib/modules/summarization/strategies/topic_summary_spec.rb b/spec/lib/modules/summarization/strategies/topic_summary_spec.rb new file mode 100644 index 00000000..329404a5 --- /dev/null +++ b/spec/lib/modules/summarization/strategies/topic_summary_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do + subject(:topic_summary) { described_class.new(topic) } + + fab!(:topic) { Fabricate(:topic, highest_post_number: 25) } + fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) } + fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) } + + describe "#targets_data" do + shared_examples "includes only public-visible topics" do + it "only includes visible posts" do + post_2.update!(hidden: true) + + post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] } + + expect(post_numbers).to contain_exactly(1) + end + + it "doesn't include posts without users" do + post_2.update!(user_id: nil) + + post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] } + + expect(post_numbers).to contain_exactly(1) + end + + it "doesn't include whispers" do + post_2.update!(post_type: Post.types[:whisper]) + + post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] } + + expect(post_numbers).to contain_exactly(1) + end + end + + context "when the topic has a best replies summary" do + before { topic.update(has_summary: true) } + + it_behaves_like "includes only public-visible topics" + end + + context "when the topic doesn't have a best replies summary" do + before { topic.update(has_summary: false) } + + it_behaves_like "includes only public-visible topics" + end + + context "when the topic has embed content cached" do + it "embed content is used instead of the raw text" do + topic_embed = + Fabricate( + :topic_embed, + topic: topic, + embed_content_cache: "

hello world new post :D

", + ) + + content = topic_summary.targets_data + + op_content = content[:contents].first[:text] + + expect(op_content).to include(topic_embed.embed_content_cache) + end + end + end +end diff --git a/spec/requests/summarization/summary_controller_spec.rb b/spec/requests/summarization/summary_controller_spec.rb index 9206c3d9..493d7c85 100644 --- a/spec/requests/summarization/summary_controller_spec.rb +++ b/spec/requests/summarization/summary_controller_spec.rb @@ -19,6 +19,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do summarized_text: "test", algorithm: "test", original_content_sha: "test", + summary_type: AiSummary.summary_types[:complete], ) sign_in(Fabricate(:admin)) @@ -47,6 +48,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do summarized_text: "test", algorithm: "test", original_content_sha: "test", + summary_type: AiSummary.summary_types[:complete], ) get "/discourse-ai/summarization/t/#{topic.id}.json" @@ -133,6 +135,7 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do summarized_text: "test", algorithm: "test", original_content_sha: "test", + summary_type: AiSummary.summary_types[:complete], ) get "/discourse-ai/summarization/t/#{topic.id}.json" diff --git a/spec/services/discourse_ai/topic_summarization_spec.rb b/spec/services/discourse_ai/topic_summarization_spec.rb index ba505bc1..eca5a13e 100644 --- a/spec/services/discourse_ai/topic_summarization_spec.rb +++ b/spec/services/discourse_ai/topic_summarization_spec.rb @@ -11,55 +11,14 @@ describe DiscourseAi::TopicSummarization do SiteSetting.ai_summarization_enabled = true end - let(:strategy) { DiscourseAi::Summarization.default_strategy } - - shared_examples "includes only public-visible topics" do - subject { DiscourseAi::TopicSummarization.new(strategy, topic, user) } - - it "only includes visible posts" do - topic.first_post.update!(hidden: true) - - posts = subject.summary_targets - - expect(posts.none?(&:hidden?)).to eq(true) - end - - it "doesn't include posts without users" do - topic.first_post.user.destroy! - - posts = subject.summary_targets - - expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil - end - - it "doesn't include deleted posts" do - topic.first_post.update!(user_id: nil) - - posts = subject.summary_targets - - expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil - end - end - - describe "#summary_targets" do - context "when the topic has a best replies summary" do - before { topic.has_summary = true } - - it_behaves_like "includes only public-visible topics" - end - - context "when the topic doesn't have a best replies summary" do - before { topic.has_summary = false } - - it_behaves_like "includes only public-visible topics" - end - end + let(:strategy) { DiscourseAi::Summarization.topic_summary(topic) } describe "#summarize" do - subject(:summarization) { described_class.new(strategy, topic, user) } + subject(:summarization) { described_class.new(strategy, user) } def assert_summary_is_cached(topic, summary_response) - cached_summary = AiSummary.find_by(target: topic) + cached_summary = + AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete]) expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number)) expect(cached_summary.summarized_text).to eq(summary) @@ -82,41 +41,15 @@ describe DiscourseAi::TopicSummarization do summarization.summarize cached_summary_text = "This is a cached summary" - AiSummary.find_by(target: topic).update!( + AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete]).update!( summarized_text: cached_summary_text, updated_at: 24.hours.ago, ) - summarization = described_class.new(strategy, topic, user) + summarization = described_class.new(strategy, user) section = summarization.summarize expect(section.summarized_text).to eq(cached_summary_text) end - - context "when the topic has embed content cached" do - it "embed content is used instead of the raw text" do - topic_embed = - Fabricate( - :topic_embed, - topic: topic, - embed_content_cache: "

hello world new post :D

", - ) - - DiscourseAi::Completions::Llm.with_prepared_responses(["A summary"]) do |spy| - summarization.summarize - - prompt_raw = - spy - .prompt_messages - .reduce(+"") do |memo, m| - memo << m[:content] << "\n" - - memo - end - - expect(prompt_raw).to include(topic_embed.embed_content_cache) - end - end - end end describe "invalidating cached summaries" do @@ -124,7 +57,7 @@ describe DiscourseAi::TopicSummarization do let(:updated_summary) { "This is the final summary" } def cached_summary - AiSummary.find_by(target: topic) + AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete]) end before do @@ -133,8 +66,8 @@ describe DiscourseAi::TopicSummarization do # since it is glued to the old llm instance # so we create the cached summary totally independantly DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do - strategy = DiscourseAi::Summarization.default_strategy - described_class.new(strategy, topic, user).summarize + strategy = DiscourseAi::Summarization.topic_summary(topic) + described_class.new(strategy, user).summarize end cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago) diff --git a/spec/system/summarization/topic_summarization_spec.rb b/spec/system/summarization/topic_summarization_spec.rb index a5aae5ab..fabe2dfa 100644 --- a/spec/system/summarization/topic_summarization_spec.rb +++ b/spec/system/summarization/topic_summarization_spec.rb @@ -32,6 +32,7 @@ RSpec.describe "Summarize a topic ", type: :system do summarized_text: summarization_result, algorithm: "test", original_content_sha: "test", + summary_type: AiSummary.summary_types[:complete], ) end