FEATURE: Personas powered summaries. (#1232)

* REFACTOR: Move personas into it's own module.

* WIP: Use personas for summarization

* Prioritize persona default LLM or fallback to newest one

* Simplify summarization strategy

* Keep ai_sumarization_model as a fallback
This commit is contained in:
Roman Rizzi 2025-04-02 12:54:47 -03:00 committed by GitHub
parent 32da999144
commit 0d60aca6ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 333 additions and 353 deletions

View File

@ -47,9 +47,9 @@ module DiscourseAi
def discover
ai_persona =
AiPersona.all_personas.find do |persona|
persona.id == SiteSetting.ai_bot_discover_persona.to_i
end
AiPersona
.all_personas(enabled_only: false)
.find { |persona| persona.id == SiteSetting.ai_bot_discover_persona.to_i }
if ai_persona.nil? || !current_user.in_any_groups?(ai_persona.allowed_group_ids.to_a)
raise Discourse::InvalidAccess.new

View File

@ -9,9 +9,9 @@ module Jobs
return if (query = args[:query]).blank?
ai_persona_klass =
AiPersona.all_personas.find do |persona|
persona.id == SiteSetting.ai_bot_discover_persona.to_i
end
AiPersona
.all_personas(enabled_only: false)
.find { |persona| persona.id == SiteSetting.ai_bot_discover_persona.to_i }
if ai_persona_klass.nil? || !user.in_any_groups?(ai_persona_klass.allowed_group_ids.to_a)
return

View File

@ -46,13 +46,18 @@ class AiPersona < ActiveRecord::Base
scope :ordered, -> { order("priority DESC, lower(name) ASC") }
def self.all_personas
def self.all_personas(enabled_only: true)
persona_cache[:value] ||= AiPersona
.ordered
.where(enabled: true)
.all
.limit(MAX_PERSONAS_PER_SITE)
.map(&:class_instance)
if enabled_only
persona_cache[:value].select { |p| p.enabled }
else
persona_cache[:value]
end
end
def self.persona_users(user: nil)
@ -176,6 +181,7 @@ class AiPersona < ActiveRecord::Base
description
allowed_group_ids
tool_details
enabled
]
instance_attributes = {}

View File

@ -303,6 +303,12 @@ en:
web_artifact_creator:
name: "Web Artifact Creator"
description: "AI Bot specialized in creating interactive web artifacts"
summarizer:
name: "Summarizer"
description: "Default persona used to power AI summaries"
short_summarizer:
name: "Summarizer (short form)"
description: "Default persona used to power AI short summaries for topic lists' items"
topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
searching: "Searching for: '%{query}'"
@ -452,6 +458,7 @@ en:
llm:
configuration:
create_llm: "You need to setup an LLM before enabling this feature"
disable_module_first: "You have to disable %{setting} first."
set_llm_first: "Set %{setting} first"
model_unreachable: "We couldn't get a response from this model. Check your settings first."

View File

@ -240,18 +240,30 @@ discourse_ai:
type: enum
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
hidden: true
ai_summarization_persona:
default: "-11"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
ai_pm_summarization_allowed_groups:
type: group_list
list_type: compact
default: ""
ai_custom_summarization_allowed_groups:
ai_custom_summarization_allowed_groups: # Deprecated. TODO(roman): Remove 2025-09-01
type: group_list
list_type: compact
default: "3|13" # 3: @staff, 13: @trust_level_3
hidden: true
ai_summary_gists_enabled:
default: false
hidden: true
ai_summary_gists_allowed_groups:
ai_summary_gists_persona:
default: "-12"
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
hidden: true
ai_summary_gists_allowed_groups: # Deprecated. TODO(roman): Remove 2025-09-01
type: group_list
list_type: compact
default: "0" #everyone

View File

@ -1,17 +1,39 @@
# frozen_string_literal: true
summarization_personas = [DiscourseAi::Personas::Summarizer, DiscourseAi::Personas::ShortSummarizer]
def from_setting(setting_name)
DB.query_single(
"SELECT value FROM site_settings WHERE name = :setting_name",
setting_name: setting_name,
)
end
DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
persona = AiPersona.find_by(id: id)
if !persona
persona = AiPersona.new
persona.id = id
if persona_class == DiscourseAi::Personas::WebArtifactCreator
# this is somewhat sensitive, so we default it to staff
persona.allowed_group_ids = [Group::AUTO_GROUPS[:staff]]
elsif summarization_personas.include?(persona_class)
# Copy group permissions from site settings.
default_groups = [Group::AUTO_GROUPS[:staff], Group::AUTO_GROUPS[:trust_level_3]]
setting_name = "ai_custom_summarization_allowed_groups"
if persona_class == DiscourseAi::Personas::ShortSummarizer
setting_name = "ai_summary_gists_allowed_groups"
default_groups = [] # Blank == everyone
end
persona.allowed_group_ids = from_setting(setting_name).first&.split("|") || default_groups
else
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
end
persona.enabled = true
persona.enabled = !summarization_personas.include?(persona_class)
persona.priority = true if persona_class == DiscourseAi::Personas::General
end
@ -22,16 +44,16 @@ DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
persona_class.name + SecureRandom.hex,
]
persona.name = DB.query_single(<<~SQL, names, id).first
SELECT guess_name
FROM (
SELECT unnest(Array[?]) AS guess_name
FROM (SELECT 1) as t
) x
LEFT JOIN ai_personas ON ai_personas.name = x.guess_name AND ai_personas.id <> ?
WHERE ai_personas.id IS NULL
ORDER BY x.guess_name ASC
LIMIT 1
SQL
SELECT guess_name
FROM (
SELECT unnest(Array[?]) AS guess_name
FROM (SELECT 1) as t
) x
LEFT JOIN ai_personas ON ai_personas.name = x.guess_name AND ai_personas.id <> ?
WHERE ai_personas.id IS NULL
ORDER BY x.guess_name ASC
LIMIT 1
SQL
persona.description = persona_class.description

View File

@ -10,17 +10,27 @@ module DiscourseAi
def valid_value?(val)
return true if val == "f"
@llm_dependency_setting_name =
DiscourseAi::Configuration::LlmValidator.new.choose_llm_setting_for(@opts[:name])
if @opts[:name] == :ai_summarization_enabled
has_llms = LlmModel.count > 0
@no_llms_configured = !has_llms
has_llms
else
@llm_dependency_setting_name =
DiscourseAi::Configuration::LlmValidator.new.choose_llm_setting_for(@opts[:name])
SiteSetting.public_send(@llm_dependency_setting_name).present?
SiteSetting.public_send(@llm_dependency_setting_name).present?
end
end
def error_message
I18n.t(
"discourse_ai.llm.configuration.set_llm_first",
setting: @llm_dependency_setting_name,
)
if @llm_dependency_setting_name
I18n.t(
"discourse_ai.llm.configuration.set_llm_first",
setting: @llm_dependency_setting_name,
)
elsif @no_llms_configured
I18n.t("discourse_ai.llm.configuration.create_llm")
end
end
end
end

View File

@ -26,7 +26,9 @@ module DiscourseAi
end
if SiteSetting.ai_summarization_enabled
model_id = SiteSetting.ai_summarization_model.split(":").last.to_i
summarization_persona = AiPersona.find_by(id: SiteSetting.ai_summarization_persona)
model_id = summarization_persona.default_llm_id || LlmModel.last&.id
rval[model_id] << { type: :ai_summarization }
end

View File

@ -10,7 +10,9 @@ module DiscourseAi
end
def self.values
AiPersona.all_personas.map { |persona| { name: persona.name, value: persona.id } }
AiPersona
.all_personas(enabled_only: false)
.map { |persona| { name: persona.name, value: persona.id } }
end
end
end

View File

@ -6,7 +6,7 @@ module DiscourseAi
def initialize(body)
@persona =
AiPersona
.all_personas
.all_personas(enabled_only: false)
.find { |persona| persona.id == SiteSetting.ai_discord_search_persona.to_i }
.new
@bot =

View File

@ -24,24 +24,26 @@ module DiscourseAi
def can_see_gists?
return false if !SiteSetting.ai_summarization_enabled
return false if !SiteSetting.ai_summary_gists_enabled
if SiteSetting.ai_summary_gists_allowed_groups.to_s == Group::AUTO_GROUPS[:everyone].to_s
return true
end
return false if anonymous?
return false if SiteSetting.ai_summary_gists_allowed_groups_map.empty?
SiteSetting.ai_summary_gists_allowed_groups_map.any? do |group_id|
user.group_ids.include?(group_id)
if (ai_persona = AiPersona.find_by(id: SiteSetting.ai_summary_gists_persona)).blank?
return false
end
persona_groups = ai_persona.allowed_group_ids.to_a
return true if persona_groups.empty?
return false if anonymous?
ai_persona.allowed_group_ids.to_a.any? { |group_id| user.group_ids.include?(group_id) }
end
def can_request_summary?
return false if anonymous?
user_group_ids = user.group_ids
SiteSetting.ai_custom_summarization_allowed_groups_map.any? do |group_id|
user_group_ids.include?(group_id)
if (ai_persona = AiPersona.find_by(id: SiteSetting.ai_summarization_persona)).blank?
return false
end
ai_persona.allowed_group_ids.to_a.any? { |group_id| user.group_ids.include?(group_id) }
end
def can_debug_ai_bot_conversation?(target)

View File

@ -28,7 +28,7 @@ module DiscourseAi
attr_accessor :persona
def llm
@llm ||= DiscourseAi::Completions::Llm.proxy(model)
DiscourseAi::Completions::Llm.proxy(model)
end
def force_tool_if_needed(prompt, context)
@ -51,12 +51,12 @@ module DiscourseAi
end
end
def reply(context, &update_blk)
def reply(context, llm_args: {}, &update_blk)
unless context.is_a?(BotContext)
raise ArgumentError, "context must be an instance of BotContext"
end
llm = DiscourseAi::Completions::Llm.proxy(model)
prompt = persona.craft_prompt(context, llm: llm)
current_llm = llm
prompt = persona.craft_prompt(context, llm: current_llm)
total_completions = 0
ongoing_chain = true
@ -67,6 +67,7 @@ module DiscourseAi
llm_kwargs = { user: user }
llm_kwargs[:temperature] = persona.temperature if persona.temperature
llm_kwargs[:top_p] = persona.top_p if persona.top_p
llm_kwargs[:max_tokens] = llm_args[:max_tokens] if llm_args[:max_tokens].present?
needs_newlines = false
tools_ran = 0
@ -82,9 +83,9 @@ module DiscourseAi
current_thinking = []
result =
llm.generate(
current_llm.generate(
prompt,
feature_name: "bot",
feature_name: context.feature_name,
partial_tool_calls: allow_partial_tool_calls,
output_thinking: true,
**llm_kwargs,
@ -93,7 +94,7 @@ module DiscourseAi
persona.find_tool(
partial,
bot_user: user,
llm: llm,
llm: current_llm,
context: context,
existing_tools: existing_tools,
)
@ -120,7 +121,7 @@ module DiscourseAi
process_tool(
tool: tool,
raw_context: raw_context,
llm: llm,
current_llm: current_llm,
cancel: cancel,
update_blk: update_blk,
prompt: prompt,
@ -204,7 +205,7 @@ module DiscourseAi
def process_tool(
tool:,
raw_context:,
llm:,
current_llm:,
cancel:,
update_blk:,
prompt:,
@ -212,7 +213,7 @@ module DiscourseAi
current_thinking:
)
tool_call_id = tool.tool_call_id
invocation_result_json = invoke_tool(tool, llm, cancel, context, &update_blk).to_json
invocation_result_json = invoke_tool(tool, cancel, context, &update_blk).to_json
tool_call_message = {
type: :tool_call,
@ -246,7 +247,7 @@ module DiscourseAi
raw_context << [invocation_result_json, tool_call_id, "tool", tool.name]
end
def invoke_tool(tool, llm, cancel, context, &update_blk)
def invoke_tool(tool, cancel, context, &update_blk)
show_placeholder = !context.skip_tool_details && !tool.class.allow_partial_tool_calls?
update_blk.call("", cancel, build_placeholder(tool.summary, "")) if show_placeholder

View File

@ -14,7 +14,9 @@ module DiscourseAi
:chosen_tools,
:message_id,
:channel_id,
:context_post_ids
:context_post_ids,
:feature_name,
:resource_url
def initialize(
post: nil,
@ -29,7 +31,9 @@ module DiscourseAi
time: nil,
message_id: nil,
channel_id: nil,
context_post_ids: nil
context_post_ids: nil,
feature_name: "bot",
resource_url: nil
)
@participants = participants
@user = user
@ -45,6 +49,7 @@ module DiscourseAi
@site_title = site_title
@site_description = site_description
@time = time
@feature_name = feature_name
if post
@post_id = post.id
@ -56,7 +61,7 @@ module DiscourseAi
end
# these are strings that can be safely interpolated into templates
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants]
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url]
def lookup_template_param(key)
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
@ -100,6 +105,8 @@ module DiscourseAi
site_title: @site_title,
site_description: @site_description,
skip_tool_details: @skip_tool_details,
feature_name: @feature_name,
resource_url: @resource_url,
}
end
end

View File

@ -44,6 +44,8 @@ module DiscourseAi
DiscourseHelper => -8,
GithubHelper => -9,
WebArtifactCreator => -10,
Summarizer => -11,
ShortSummarizer => -12,
}
end

View File

@ -0,0 +1,26 @@
# frozen_string_literal: true
module DiscourseAi
module Personas
class ShortSummarizer < Persona
def system_prompt
<<~PROMPT.strip
You are an advanced summarization bot. Analyze a given conversation and produce a concise,
single-sentence summary that conveys the main topic and current developments to someone with no prior context.
### Guidelines:
- Emphasize the most recent updates while considering their significance within the original post.
- Focus on the central theme or issue being addressed, maintaining an objective and neutral tone.
- Exclude extraneous details or subjective opinions.
- Use the original language of the text.
- Begin directly with the main topic or issue, avoiding introductory phrases.
- Limit the summary to a maximum of 40 words.
- Do *NOT* repeat the discussion title in the summary.
Return the summary inside <ai></ai> tags.
PROMPT
end
end
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
module DiscourseAi
module Personas
class Summarizer < Persona
def system_prompt
<<~PROMPT.strip
You are an advanced summarization bot that generates concise, coherent summaries of provided text.
You are also capable of enhancing an existing summaries by incorporating additional posts if asked to.
- Only include the summary, without any additional commentary.
- You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
- Maintain the original language of the text being summarized.
- Aim for summaries to be 400 words or less.
- Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
- Cite specific noteworthy posts using the format [DESCRIPTION]({resource_url}/POST_NUMBER)
- Example: links to the 3rd and 6th posts by sam: sam ([#3]({resource_url}/3), [#6]({resource_url}/6))
- Example: link to the 6th post by jane: [agreed with]({resource_url}/6)
- Example: link to the 13th post by joe: [joe]({resource_url}/13)
- When formatting usernames either use @USERNAME OR [USERNAME]({resource_url}/POST_NUMBER)
PROMPT
end
end
end
end

View File

@ -2,37 +2,78 @@
module DiscourseAi
module Summarization
def self.topic_summary(topic)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
class << self
def topic_summary(topic)
return nil if !SiteSetting.ai_summarization_enabled
if (ai_persona = AiPersona.find_by(id: SiteSetting.ai_summarization_persona)).blank?
return nil
end
persona_klass = ai_persona.class_instance
llm_model = find_summarization_model(persona_klass)
return nil if llm_model.blank?
DiscourseAi::Summarization::FoldContent.new(
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
build_bot(persona_klass, llm_model),
DiscourseAi::Summarization::Strategies::TopicSummary.new(topic),
)
else
nil
end
end
def self.topic_gist(topic)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
def topic_gist(topic)
return nil if !SiteSetting.ai_summarization_enabled
if (ai_persona = AiPersona.find_by(id: SiteSetting.ai_summary_gists_persona)).blank?
return nil
end
persona_klass = ai_persona.class_instance
llm_model = find_summarization_model(persona_klass)
return nil if llm_model.blank?
DiscourseAi::Summarization::FoldContent.new(
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
build_bot(persona_klass, llm_model),
DiscourseAi::Summarization::Strategies::HotTopicGists.new(topic),
)
else
nil
end
end
def self.chat_channel_summary(channel, time_window_in_hours)
if SiteSetting.ai_summarization_model.present? && SiteSetting.ai_summarization_enabled
def chat_channel_summary(channel, time_window_in_hours)
return nil if !SiteSetting.ai_summarization_enabled
if (ai_persona = AiPersona.find_by(id: SiteSetting.ai_summarization_persona)).blank?
return nil
end
persona_klass = ai_persona.class_instance
llm_model = find_summarization_model(persona_klass)
return nil if llm_model.blank?
DiscourseAi::Summarization::FoldContent.new(
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_summarization_model),
build_bot(persona_klass, llm_model),
DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, time_window_in_hours),
persist_summaries: false,
)
else
nil
end
# Priorities are:
# 1. Persona's default LLM
# 2. Hidden `ai_summarization_model` setting
# 3. Newest LLM config
def find_summarization_model(persona_klass)
model_id =
persona_klass.default_llm_id || SiteSetting.ai_summarization_model&.split(":")&.last # Remove legacy custom provider.
if model_id.present?
LlmModel.find_by(id: model_id)
else
LlmModel.last
end
end
### Private
def build_bot(persona_klass, llm_model)
persona = persona_klass.new
user = User.find_by(id: persona_klass.user_id) || Discourse.system_user
bot = DiscourseAi::Personas::Bot.as(user, persona: persona, model: llm_model)
end
end
end

View File

@ -6,7 +6,11 @@ module DiscourseAi
def inject_into(plugin)
plugin.add_to_serializer(:current_user, :can_summarize) do
return false if !SiteSetting.ai_summarization_enabled
scope.user.in_any_groups?(SiteSetting.ai_custom_summarization_allowed_groups_map)
if (ai_persona = AiPersona.find_by(id: SiteSetting.ai_summarization_persona)).blank?
return false
end
scope.user.in_any_groups?(ai_persona.allowed_group_ids.to_a)
end
plugin.add_to_serializer(:topic_view, :summarizable) do

View File

@ -9,13 +9,13 @@ module DiscourseAi
# into a final version.
#
class FoldContent
def initialize(llm, strategy, persist_summaries: true)
@llm = llm
def initialize(bot, strategy, persist_summaries: true)
@bot = bot
@strategy = strategy
@persist_summaries = persist_summaries
end
attr_reader :llm, :strategy
attr_reader :bot, :strategy
# @param user { User } - User object used for auditing usage.
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
@ -25,15 +25,11 @@ module DiscourseAi
#
# @returns { AiSummary } - Resulting summary.
def summarize(user, &on_partial_blk)
base_summary = ""
initial_pos = 0
truncated_content = content_to_summarize.map { |cts| truncate(cts) }
folded_summary = fold(truncated_content, base_summary, initial_pos, user, &on_partial_blk)
summary = fold(truncated_content, user, &on_partial_blk)
clean_summary =
Nokogiri::HTML5.fragment(folded_summary).css("ai")&.first&.text || folded_summary
clean_summary = Nokogiri::HTML5.fragment(summary).css("ai")&.first&.text || summary
if persist_summaries
AiSummary.store!(
@ -76,7 +72,7 @@ module DiscourseAi
attr_reader :persist_summaries
def llm_model
llm.llm_model
bot.llm.llm_model
end
def content_to_summarize
@ -88,52 +84,51 @@ module DiscourseAi
end
# @param items { Array<Hash> } - Content to summarize. Structure will be: { poster: who wrote the content, id: a way to order content, text: content }
# @param summary { String } - Intermediate summaries that we'll keep extending as part of our "folding" algorithm.
# @param cursor { Integer } - Idx to know how much we already summarized.
# @param user { User } - User object used for auditing usage.
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
# Note: The block is only called with results of the final summary, not intermediate summaries.
#
# The summarization algorithm.
# The idea is to build an initial summary packing as much content as we can. Once we have the initial summary, we'll keep extending using the leftover
# content until there is nothing left.
# It will summarize as much content summarize given the model's context window. If will prioriotize newer content in case it doesn't fit.
#
# @returns { String } - Resulting summary.
def fold(items, summary, cursor, user, &on_partial_blk)
def fold(items, user, &on_partial_blk)
tokenizer = llm_model.tokenizer_class
tokens_left = available_tokens - tokenizer.size(summary)
iteration_content = []
tokens_left = available_tokens
content_in_window = []
items.each_with_index do |item, idx|
next if idx < cursor
as_text = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
if tokenizer.below_limit?(as_text, tokens_left)
iteration_content << item
content_in_window << item
tokens_left -= tokenizer.size(as_text)
cursor += 1
else
break
end
end
prompt =
(
if summary.blank?
strategy.first_summary_prompt(iteration_content)
else
strategy.summary_extension_prompt(summary, iteration_content)
end
context =
DiscourseAi::Personas::BotContext.new(
user: user,
skip_tool_details: true,
feature_name: strategy.feature,
resource_url: "#{Discourse.base_path}/t/-/#{strategy.target.id}",
messages: strategy.as_llm_messages(content_in_window),
)
if cursor == items.length
llm.generate(prompt, user: user, feature_name: strategy.feature, &on_partial_blk)
else
latest_summary =
llm.generate(prompt, user: user, max_tokens: 600, feature_name: strategy.feature)
fold(items, latest_summary, cursor, user, &on_partial_blk)
end
summary = +""
buffer_blk =
Proc.new do |partial, cancel, placeholder, type|
if type.blank?
summary << partial
on_partial_blk.call(partial, cancel) if on_partial_blk
end
end
bot.reply(context, &buffer_blk)
summary
end
def available_tokens
@ -159,6 +154,12 @@ module DiscourseAi
item
end
def text_only_update(&on_partial_blk)
Proc.new do |partial, cancel, placeholder, type|
on_partial_blk.call(partial, cancel) if type.blank?
end
end
end
end
end

View File

@ -33,13 +33,8 @@ module DiscourseAi
raise NotImplementedError
end
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when extending an existing summary.
def summary_extension_prompt(_summary, _texts_to_summarize)
raise NotImplementedError
end
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM for summarizing a single chunk of content.
def first_summary_prompt(_input)
# @returns { Array } - Prompt messages to send to the LLM for summarizing content.
def as_llm_messages(_input)
raise NotImplementedError
end

View File

@ -27,70 +27,15 @@ module DiscourseAi
.map { { id: _1, poster: _2, text: _3, last_version_at: _4 } }
end
def summary_extension_prompt(summary, contents)
input =
contents
.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
.join("\n")
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot tasked with expanding on an existing summary by incorporating new chat messages.
Your goal is to seamlessly integrate the additional information into the existing summary, preserving the clarity and insights of the original while reflecting any new developments, themes, or conclusions.
Analyze the new messages to identify key themes, participants' intentions, and any significant decisions or resolutions.
Update the summary to include these aspects in a way that remains concise, comprehensive, and accessible to someone with no prior context of the conversation.
### Guidelines:
- Merge the new information naturally with the existing summary without redundancy.
- Only include the updated summary, WITHOUT additional commentary.
- Don't mention the channel title. Avoid extraneous details or subjective opinions.
- Maintain the original language of the text being summarized.
- The same user could write multiple messages in a row, don't treat them as different persons.
- Aim for summaries to be extended by a reasonable amount, but strive to maintain a total length of 400 words or less, unless absolutely necessary for comprehensiveness.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
### Context:
This is the existing summary:
#{summary}
These are the new chat messages:
#{input}
Intengrate the new messages into the existing summary.
TEXT
prompt
end
def first_summary_prompt(contents)
def as_llm_messages(contents)
content_title = target.name
input =
contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics
and developments from a series of chat messages within a user-selected time window.
[{ type: :user, content: <<~TEXT.strip }]
#{content_title.present? ? "These texts come from a chat channel called " + content_title + ".\n" : ""}
Analyze the messages to extract key themes, participants' intentions, and any significant conclusions or decisions.
Your summary should be concise yet comprehensive, providing an overview that is accessible to someone with no prior context of the conversation.
- Only include the summary, WITHOUT additional commentary.
- Don't mention the channel title. Avoid including extraneous details or subjective opinions.
- Maintain the original language of the text being summarized.
- The same user could write multiple messages in a row, don't treat them as different persons.
- Aim for summaries to be 400 words or less.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
#{content_title.present? ? "The name of the channel is: " + content_title + ".\n" : ""}
Here are the messages, inside <input></input> XML tags:
Here are the texts, inside <input></input> XML tags:
<input>
#{input}
@ -98,8 +43,6 @@ module DiscourseAi
Generate a summary of the given chat messages.
TEXT
prompt
end
private

View File

@ -62,69 +62,11 @@ module DiscourseAi
end
end
def summary_extension_prompt(summary, contents)
statements =
contents
.to_a
.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
.join("\n")
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip, topic_id: target.id)
You are an advanced summarization bot. Your task is to update an existing single-sentence summary by integrating new developments from a conversation.
Analyze the most recent messages to identify key updates or shifts in the main topic and reflect these in the updated summary.
Emphasize new significant information or developments within the context of the initial conversation theme.
### Guidelines:
- Ensure the revised summary remains concise and objective, maintaining a focus on the central theme or issue.
- Omit extraneous details or subjective opinions.
- Use the original language of the text.
- Begin directly with the main topic or issue, avoiding introductory phrases.
- Limit the updated summary to a maximum of 40 words.
- Return the 40-word summary inside <ai></ai> tags.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
### Context:
This is the existing single-sentence summary:
#{summary}
And these are the new developments in the conversation:
#{statements}
Your task is to update an existing single-sentence summary by integrating new developments from a conversation.
Return the 40-word summary inside <ai></ai> tags.
TEXT
prompt
end
def first_summary_prompt(contents)
def as_llm_messages(contents)
content_title = target.title
statements =
contents.to_a.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip, topic_id: target.id)
You are an advanced summarization bot. Analyze a given conversation and produce a concise,
single-sentence summary that conveys the main topic and current developments to someone with no prior context.
### Guidelines:
- Emphasize the most recent updates while considering their significance within the original post.
- Focus on the central theme or issue being addressed, maintaining an objective and neutral tone.
- Exclude extraneous details or subjective opinions.
- Use the original language of the text.
- Begin directly with the main topic or issue, avoiding introductory phrases.
- Limit the summary to a maximum of 40 words.
- Do *NOT* repeat the discussion title in the summary.
Return the summary inside <ai></ai> tags.\n
TEXT
context = +<<~TEXT
### Context:
@ -147,11 +89,9 @@ module DiscourseAi
context << "Your task is to capture the meaning of the initial statement."
end
prompt.push(type: :user, content: <<~TEXT.strip)
[{ type: :user, content: <<~TEXT.strip }]
#{context} Return the 40-word summary inside <ai></ai> tags.
TEXT
prompt
end
end
end

View File

@ -38,82 +38,26 @@ module DiscourseAi
end
end
def summary_extension_prompt(summary, contents)
resource_path = "#{Discourse.base_path}/t/-/#{target.id}"
content_title = target.title
input =
contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]})" }.join
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT, topic_id: target.id)
You are an advanced summarization bot tasked with enhancing an existing summary by incorporating additional posts.
### Guidelines:
- Only include the enhanced summary, without any additional commentary.
- Understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
- Maintain the original language of the text being summarized.
- Aim for summaries to be 400 words or less.
- Each new post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
- Cite specific noteworthy posts using the format [DESCRIPTION](#{resource_path}/POST_NUMBER)
- Example: links to the 3rd and 6th posts by sam: sam ([#3](#{resource_path}/3), [#6](#{resource_path}/6))
- Example: link to the 6th post by jane: [agreed with](#{resource_path}/6)
- Example: link to the 13th post by joe: [joe](#{resource_path}/13)
- When formatting usernames either use @USERNAME or [USERNAME](#{resource_path}/POST_NUMBER)
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
### Context:
#{content_title.present? ? "The discussion title is: " + content_title + ".\n" : ""}
Here is the existing summary:
#{summary}
Here are the new posts, inside <input></input> XML tags:
<input>
#{input}
</input>
Integrate the new information to generate an enhanced concise and coherent summary.
TEXT
prompt
end
def first_summary_prompt(contents)
def as_llm_messages(contents)
resource_path = "#{Discourse.base_path}/t/-/#{target.id}"
content_title = target.title
input =
contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip, topic_id: target.id)
You are an advanced summarization bot that generates concise, coherent summaries of provided text.
- Only include the summary, without any additional commentary.
- You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
- Maintain the original language of the text being summarized.
- Aim for summaries to be 400 words or less.
- Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
- Cite specific noteworthy posts using the format [DESCRIPTION](#{resource_path}/POST_NUMBER)
- Example: links to the 3rd and 6th posts by sam: sam ([#3](#{resource_path}/3), [#6](#{resource_path}/6))
- Example: link to the 6th post by jane: [agreed with](#{resource_path}/6)
- Example: link to the 13th post by joe: [joe](#{resource_path}/13)
- When formatting usernames either use @USERNMAE OR [USERNAME](#{resource_path}/POST_NUMBER)
TEXT
prompt.push(
messages = []
messages << {
type: :user,
content:
"Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
)
prompt.push(
}
messages << {
type: :model,
content:
"Two users are sharing their feelings toward Mondays. [user1](#{resource_path}/1) hates them, while [user2](#{resource_path}/2) loves them.",
)
}
prompt.push(type: :user, content: <<~TEXT.strip)
messages << { type: :user, content: <<~TEXT.strip }
#{content_title.present? ? "The discussion title is: " + content_title + ".\n" : ""}
Here are the posts, inside <input></input> XML tags:
@ -124,7 +68,7 @@ module DiscourseAi
Generate a concise, coherent summary of the text above maintaining the original language.
TEXT
prompt
messages
end
private

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
Fabricator(:ai_persona) do
name "test_bot"
name { sequence(:name) { |i| "persona_#{i}" } }
description "I am a test bot"
system_prompt "You are a test bot"
end

View File

@ -17,11 +17,9 @@ describe DiscourseAi::GuardianExtensions do
describe "#can_see_summary?" do
context "when the user cannot generate a summary" do
before { SiteSetting.ai_custom_summarization_allowed_groups = "" }
before { assign_persona_to(:ai_summarization_persona, []) }
it "returns false" do
SiteSetting.ai_custom_summarization_allowed_groups = ""
expect(guardian.can_see_summary?(topic)).to eq(false)
end
@ -33,7 +31,7 @@ describe DiscourseAi::GuardianExtensions do
end
context "when the user can generate a summary" do
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
before { assign_persona_to(:ai_summarization_persona, [group.id]) }
it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do
expect(guardian.can_see_summary?(topic)).to eq(true)
@ -41,7 +39,7 @@ describe DiscourseAi::GuardianExtensions do
end
context "when the topic is a PM" do
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
before { assign_persona_to(:ai_summarization_persona, [group.id]) }
let(:pm) { Fabricate(:private_message_topic) }
it "returns false" do
@ -68,34 +66,34 @@ describe DiscourseAi::GuardianExtensions do
end
describe "#can_see_gists?" do
before { SiteSetting.ai_summary_gists_allowed_groups = group.id }
before { assign_persona_to(:ai_summary_gists_persona, [group.id]) }
let(:guardian) { Guardian.new(user) }
context "when there is no user" do
context "when access is restricted to the user's group" do
it "returns false when there is a user who is a member of an allowed group" do
expect(guardian.can_see_gists?).to eq(true)
end
it "returns false for anons" do
expect(anon_guardian.can_see_gists?).to eq(false)
end
it "returns false for non-group members" do
other_user_guardian = Guardian.new(Fabricate(:user))
expect(other_user_guardian.can_see_gists?).to eq(false)
end
end
context "when setting is set to everyone" do
before { SiteSetting.ai_summary_gists_allowed_groups = Group::AUTO_GROUPS[:everyone] }
context "when access is set to everyone" do
before { assign_persona_to(:ai_summary_gists_persona, []) }
it "returns true" do
expect(guardian.can_see_gists?).to eq(true)
end
end
context "when there is a user but it's not a member of the allowed groups" do
before { SiteSetting.ai_summary_gists_allowed_groups = "" }
it "returns false" do
expect(guardian.can_see_gists?).to eq(false)
end
end
context "when there is a user who is a member of an allowed group" do
it "returns false" do
expect(guardian.can_see_gists?).to eq(true)
it "returns false for anons" do
expect(anon_guardian.can_see_gists?).to eq(true)
end
end
end

View File

@ -63,7 +63,7 @@ RSpec.describe DiscourseAi::Summarization::EntryPoint do
before do
group.add(user)
SiteSetting.ai_summary_gists_allowed_groups = group.id
assign_persona_to(:ai_summary_gists_persona, [group.id])
SiteSetting.ai_summary_gists_enabled = true
end
@ -82,14 +82,14 @@ RSpec.describe DiscourseAi::Summarization::EntryPoint do
end
it "doesn't include the summary when the user is not a member of the opt-in group" do
SiteSetting.ai_summary_gists_allowed_groups = ""
non_member_user = Fabricate(:user)
gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id }
serialized =
TopicListItemSerializer.new(
gist_topic,
scope: Guardian.new(user),
scope: Guardian.new(non_member_user),
root: false,
filter: :hot,
).as_json

View File

@ -23,33 +23,17 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do
llm_model.update!(max_prompt_tokens: model_tokens)
end
let(:single_summary) { "single" }
let(:concatenated_summary) { "this is a concatenated summary" }
let(:single_summary) { "this is a summary" }
fab!(:user)
context "when the content to summarize fits in a single call" do
it "does one call to summarize content" do
result =
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
end
it "summarizes the content" do
result =
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
end
expect(result.summarized_text).to eq(single_summary)
end
end
context "when the content to summarize doesn't fit in a single call" do
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
it "keeps extending the summary until there is nothing else to process" do
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
[single_summary, concatenated_summary],
) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(2) } }
expect(result.summarized_text).to eq(concatenated_summary)
end
expect(result.summarized_text).to eq(single_summary)
end
end

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Personas::Bot do
subject(:bot) { described_class.as(bot_user) }
subject(:bot) { described_class.as(bot_user, persona: DiscourseAi::Personas::General.new) }
fab!(:admin)
fab!(:gpt_4) { Fabricate(:llm_model, name: "gpt-4") }

View File

@ -15,6 +15,12 @@ module DiscourseAi::ChatBotHelper
SiteSetting.public_send("#{setting_name}=", "custom:#{fake_llm.id}")
end
end
def assign_persona_to(setting_name, allowed_group_ids)
Fabricate(:ai_persona, allowed_group_ids: allowed_group_ids).tap do |p|
SiteSetting.public_send("#{setting_name}=", p.id)
end
end
end
RSpec.configure { |config| config.include DiscourseAi::ChatBotHelper }

View File

@ -12,8 +12,8 @@ RSpec.describe "Summarize a channel since your last visit", type: :system do
group.add(current_user)
assign_fake_provider_to(:ai_summarization_model)
assign_persona_to(:ai_summarization_persona, [group.id])
SiteSetting.ai_summarization_enabled = true
SiteSetting.ai_custom_summarization_allowed_groups = group.id.to_s
SiteSetting.chat_enabled = true
SiteSetting.chat_allowed_groups = group.id.to_s

View File

@ -22,8 +22,8 @@ RSpec.describe "Summarize a topic ", type: :system do
group.add(current_user)
assign_fake_provider_to(:ai_summarization_model)
assign_persona_to(:ai_summarization_persona, [group.id])
SiteSetting.ai_summarization_enabled = true
SiteSetting.ai_custom_summarization_allowed_groups = group.id.to_s
sign_in(current_user)
end