208 lines
7.1 KiB
Ruby
208 lines
7.1 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Agents
|
|
module Tools
|
|
class Researcher < Tool
|
|
attr_reader :filter, :result_count, :goals, :dry_run
|
|
|
|
class << self
|
|
def signature
|
|
{
|
|
name: name,
|
|
description:
|
|
"Analyze and extract information from content across the forum based on specified filters",
|
|
parameters: [
|
|
{ name: "filter", description: filter_description, type: "string" },
|
|
{
|
|
name: "goals",
|
|
description:
|
|
"The specific information you want to extract or analyze from the filtered content, you may specify multiple goals",
|
|
type: "string",
|
|
},
|
|
{
|
|
name: "dry_run",
|
|
description: "When true, only count matching posts without processing data",
|
|
type: "boolean",
|
|
},
|
|
],
|
|
}
|
|
end
|
|
|
|
def filter_description
|
|
<<~TEXT
|
|
Filter string to target specific content.
|
|
- Supports user (@username)
|
|
- date ranges (after:YYYY-MM-DD, before:YYYY-MM-DD for posts; topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD for topics)
|
|
- categories (category:category1,category2)
|
|
- tags (tag:tag1,tag2)
|
|
- groups (group:group1,group2).
|
|
- status (status:open, status:closed, status:archived, status:noreplies, status:single_user)
|
|
- keywords (keywords:keyword1,keyword2) - specific words to search for in posts
|
|
- max_results (max_results:10) the maximum number of results to return (optional)
|
|
- order (order:latest, order:oldest, order:latest_topic, order:oldest_topic) - the order of the results (optional)
|
|
- topic (topic:topic_id1,topic_id2) - add specific topics to the filter, topics will unconditionally be included
|
|
|
|
If multiple tags or categories are specified, they are treated as OR conditions.
|
|
|
|
Multiple filters can be combined with spaces. Example: '@sam after:2023-01-01 tag:feature'
|
|
TEXT
|
|
end
|
|
|
|
def name
|
|
"researcher"
|
|
end
|
|
|
|
def accepted_options
|
|
[
|
|
option(:max_results, type: :integer),
|
|
option(:include_private, type: :boolean),
|
|
option(:max_tokens_per_post, type: :integer),
|
|
]
|
|
end
|
|
end
|
|
|
|
def invoke(&blk)
|
|
max_results = options[:max_results] || 1000
|
|
|
|
@filter = parameters[:filter] || ""
|
|
@goals = parameters[:goals] || ""
|
|
@dry_run = parameters[:dry_run].nil? ? false : parameters[:dry_run]
|
|
|
|
post = Post.find_by(id: context.post_id)
|
|
goals = parameters[:goals] || ""
|
|
dry_run = parameters[:dry_run].nil? ? false : parameters[:dry_run]
|
|
|
|
return { error: "No goals provided" } if goals.blank?
|
|
return { error: "No filter provided" } if @filter.blank?
|
|
|
|
guardian = nil
|
|
guardian = Guardian.new(context.user) if options[:include_private]
|
|
|
|
filter =
|
|
DiscourseAi::Utils::Research::Filter.new(
|
|
@filter,
|
|
limit: max_results,
|
|
guardian: guardian,
|
|
)
|
|
|
|
if filter.invalid_filters.present?
|
|
return(
|
|
{
|
|
error:
|
|
"Invalid filter fragment: #{filter.invalid_filters.join(" ")}\n\n#{self.class.filter_description}",
|
|
}
|
|
)
|
|
end
|
|
|
|
@result_count = filter.search.count
|
|
|
|
blk.call details
|
|
|
|
if dry_run
|
|
{ dry_run: true, goals: goals, filter: @filter, number_of_posts: @result_count }
|
|
else
|
|
process_filter(filter, goals, post, &blk)
|
|
end
|
|
end
|
|
|
|
def details
|
|
if @dry_run
|
|
I18n.t("discourse_ai.ai_bot.tool_description.researcher_dry_run", description_args)
|
|
else
|
|
I18n.t("discourse_ai.ai_bot.tool_description.researcher", description_args)
|
|
end
|
|
end
|
|
|
|
def summary
|
|
if @dry_run
|
|
I18n.t("discourse_ai.ai_bot.tool_summary.researcher_dry_run")
|
|
else
|
|
I18n.t("discourse_ai.ai_bot.tool_summary.researcher")
|
|
end
|
|
end
|
|
|
|
def description_args
|
|
{ count: @result_count || 0, filter: @filter || "", goals: @goals || "" }
|
|
end
|
|
|
|
protected
|
|
|
|
MIN_TOKENS_FOR_RESEARCH = 8000
|
|
def process_filter(filter, goals, post, &blk)
|
|
if llm.max_prompt_tokens < MIN_TOKENS_FOR_RESEARCH
|
|
raise ArgumentError,
|
|
"LLM max tokens too low for research. Minimum is #{MIN_TOKENS_FOR_RESEARCH}."
|
|
end
|
|
formatter =
|
|
DiscourseAi::Utils::Research::LlmFormatter.new(
|
|
filter,
|
|
max_tokens_per_batch: llm.max_prompt_tokens - 2000,
|
|
tokenizer: llm.tokenizer,
|
|
max_tokens_per_post: options[:max_tokens_per_post] || 2000,
|
|
)
|
|
|
|
results = []
|
|
|
|
formatter.each_chunk { |chunk| results << run_inference(chunk[:text], goals, post, &blk) }
|
|
{ dry_run: false, goals: goals, filter: @filter, results: results }
|
|
end
|
|
|
|
def run_inference(chunk_text, goals, post, &blk)
|
|
system_prompt = goal_system_prompt(goals)
|
|
user_prompt = goal_user_prompt(goals, chunk_text)
|
|
|
|
prompt =
|
|
DiscourseAi::Completions::Prompt.new(
|
|
system_prompt,
|
|
messages: [{ type: :user, content: user_prompt }],
|
|
post_id: post.id,
|
|
topic_id: post.topic_id,
|
|
)
|
|
|
|
results = []
|
|
llm.generate(
|
|
prompt,
|
|
user: post.user,
|
|
feature_name: context.feature_name,
|
|
cancel_manager: context.cancel_manager,
|
|
) { |partial| results << partial }
|
|
|
|
@progress_dots ||= 0
|
|
@progress_dots += 1
|
|
blk.call(details + "\n\n#{"." * @progress_dots}")
|
|
results.join
|
|
end
|
|
|
|
def goal_system_prompt(goals)
|
|
<<~TEXT
|
|
You are a researcher tool designed to analyze and extract information from forum content on #{Discourse.base_url}.
|
|
The current date is #{::Time.zone.now.strftime("%a, %d %b %Y %H:%M %Z")}.
|
|
Your task is to process the provided content and extract relevant information based on the specified goal.
|
|
When extracting content ALWAYS include the following:
|
|
- Multiple citations using Markdown
|
|
- Topic citations: Interesting fact [ref](/t/-/TOPIC_ID)
|
|
- Post citations: Interesting fact [ref](/t/-/TOPIC_ID/POST_NUMBER)
|
|
- Relevent quotes from the direct source content
|
|
- Relevant dates and times from the content
|
|
|
|
Your goal is: #{goals}
|
|
TEXT
|
|
end
|
|
|
|
def goal_user_prompt(goals, chunk_text)
|
|
<<~TEXT
|
|
Here is the content to analyze:
|
|
|
|
{{{
|
|
#{chunk_text}
|
|
}}}
|
|
|
|
Your goal is: #{goals}
|
|
TEXT
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|