FEATURE: Extend inferred concepts to include posts

* Adds support for concepts to be inferred from and applied to posts
* Replaces daily task with one that handles both topics and posts
* Adds database migration for posts_inferred_concepts join table
* Updates PersonaContext to include inferred concepts
This commit is contained in:
Rafael Silva 2025-05-09 15:03:45 -03:00
parent fb0d364687
commit 5f0d682e69
18 changed files with 486 additions and 179 deletions

View File

@ -0,0 +1,8 @@
{
"permissions": {
"allow": [
"Bash(bundle exec rails g migration:*)"
],
"deny": []
}
}

View File

@ -1,47 +0,0 @@
# frozen_string_literal: true
module Jobs
class ApplyInferredConcepts < ::Jobs::Base
sidekiq_options queue: 'low'
# Process a batch of topics to apply existing concepts to them
#
# @param args [Hash] Contains job arguments
# @option args [Array<Integer>] :topic_ids Required - List of topic IDs to process
# @option args [Integer] :batch_size (100) Number of topics to process in each batch
def execute(args = {})
return if args[:topic_ids].blank?
# Process topics in smaller batches to avoid memory issues
batch_size = args[:batch_size] || 100
# Get the list of topic IDs
topic_ids = args[:topic_ids]
# Process topics in batches
topic_ids.each_slice(batch_size) do |batch_topic_ids|
process_batch(batch_topic_ids)
end
end
private
def process_batch(topic_ids)
topics = Topic.where(id: topic_ids)
topics.each do |topic|
begin
process_topic(topic)
rescue => e
Rails.logger.error("Error applying concepts to topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}")
end
end
end
def process_topic(topic)
# Match topic against existing concepts and apply them
# Pass the topic object directly
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(topic)
end
end
end

View File

@ -4,44 +4,64 @@ module Jobs
class GenerateInferredConcepts < ::Jobs::Base
sidekiq_options queue: 'low'
# Process a batch of topics to generate new concepts (without applying them to topics)
# Process items to generate new concepts
#
# @param args [Hash] Contains job arguments
# @option args [Array<Integer>] :topic_ids Required - List of topic IDs to process
# @option args [Integer] :batch_size (100) Number of topics to process in each batch
# @option args [String] :item_type Required - Type of items to process ('topics' or 'posts')
# @option args [Array<Integer>] :item_ids Required - List of item IDs to process
# @option args [Integer] :batch_size (100) Number of items to process in each batch
# @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones
def execute(args = {})
return if args[:topic_ids].blank?
return if args[:item_ids].blank? || args[:item_type].blank?
# Process topics in smaller batches to avoid memory issues
unless ['topics', 'posts'].include?(args[:item_type])
Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}")
return
end
# Process items in smaller batches to avoid memory issues
batch_size = args[:batch_size] || 100
# Get the list of topic IDs
topic_ids = args[:topic_ids]
# Get the list of item IDs
item_ids = args[:item_ids]
match_only = args[:match_only] || false
# Process topics in batches
topic_ids.each_slice(batch_size) do |batch_topic_ids|
process_batch(batch_topic_ids)
# Process items in batches
item_ids.each_slice(batch_size) do |batch_item_ids|
process_batch(batch_item_ids, args[:item_type], match_only)
end
end
private
def process_batch(topic_ids)
topics = Topic.where(id: topic_ids)
def process_batch(item_ids, item_type, match_only)
klass = item_type.singularize.classify.constantize
items = klass.where(id: item_ids)
topics.each do |topic|
items.each do |item|
begin
process_topic(topic)
process_item(item, item_type, match_only)
rescue => e
Rails.logger.error("Error generating concepts from topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}")
Rails.logger.error("Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}")
end
end
end
def process_topic(topic)
def process_item(item, item_type, match_only)
# Use the Manager method that handles both identifying and creating concepts
# Pass the topic object directly
DiscourseAi::InferredConcepts::Manager.generate_concepts_from_topic(topic)
if match_only
if item_type == 'topics'
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item)
else # posts
DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item)
end
else
if item_type == 'topics'
DiscourseAi::InferredConcepts::Manager.analyze_topic(item)
else # posts
DiscourseAi::InferredConcepts::Manager.analyze_post(item)
end
end
end
end
end

View File

@ -0,0 +1,81 @@
# frozen_string_literal: true
module Jobs
class GenerateConceptsFromPopularItems < ::Jobs::Scheduled
every 1.day
# This job runs daily and generates new concepts from popular topics and posts
# It selects items based on engagement metrics and generates concepts from their content
def execute(args = {})
return unless SiteSetting.inferred_concepts_enabled
process_popular_topics
process_popular_posts
end
private
def process_popular_topics
# Find candidate topics that are popular and don't have concepts yet
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
min_views: SiteSetting.inferred_concepts_min_views || 100,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
)
return if candidates.blank?
# Process candidate topics - first generate concepts, then match
Jobs.enqueue(
:generate_inferred_concepts,
item_type: 'topics',
item_ids: candidates.map(&:id),
batch_size: 10
)
# Schedule a follow-up job to match existing concepts
Jobs.enqueue_in(
1.hour,
:generate_inferred_concepts,
item_type: 'topics',
item_ids: candidates.map(&:id),
batch_size: 10,
match_only: true
)
end
def process_popular_posts
# Find candidate posts that are popular and don't have concepts yet
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts(
limit: SiteSetting.inferred_concepts_daily_posts_limit || 30,
min_likes: SiteSetting.inferred_concepts_post_min_likes || 5,
exclude_first_posts: true,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
)
return if candidates.blank?
# Process candidate posts - first generate concepts, then match
Jobs.enqueue(
:generate_inferred_concepts,
item_type: 'posts',
item_ids: candidates.map(&:id),
batch_size: 10
)
# Schedule a follow-up job to match against existing concepts
Jobs.enqueue_in(
1.hour,
:generate_inferred_concepts,
item_type: 'posts',
item_ids: candidates.map(&:id),
batch_size: 10,
match_only: true
)
end
end
end

View File

@ -1,38 +0,0 @@
# frozen_string_literal: true
module Jobs
class GenerateConceptsFromPopularTopics < ::Jobs::Scheduled
every 1.day
# This job runs daily and generates new concepts from popular topics
# It selects topics based on engagement metrics and generates concepts from their content
def execute(args = {})
# Find candidate topics that are popular and don't have concepts yet
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
min_views: SiteSetting.inferred_concepts_min_views || 100,
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
)
return if candidates.blank?
# Process the candidate topics in batches using the regular job
Jobs.enqueue(
:generate_inferred_concepts,
topic_ids: candidates.map(&:id),
batch_size: 10
)
# Schedule a follow-up job to apply the concepts to topics
# This runs after a delay to ensure concepts have been generated
Jobs.enqueue_in(
1.hour,
:apply_inferred_concepts,
topic_ids: candidates.map(&:id),
batch_size: 10
)
end
end
end

View File

@ -2,6 +2,7 @@
class InferredConcept < ActiveRecord::Base
has_and_belongs_to_many :topics
has_and_belongs_to_many :posts
validates :name, presence: true, uniqueness: true
end

View File

@ -0,0 +1,34 @@
# frozen_string_literal: true
class AiInferredConceptPostSerializer < ApplicationSerializer
attributes :id,
:post_number,
:topic_id,
:topic_title,
:username,
:avatar_template,
:created_at,
:updated_at,
:excerpt,
:truncated,
:inferred_concepts
def avatar_template
User.avatar_template(object.username, object.uploaded_avatar_id)
end
def excerpt
Post.excerpt(object.cooked)
end
def truncated
object.cooked.length > SiteSetting.post_excerpt_maxlength
end
def inferred_concepts
ActiveModel::ArraySerializer.new(
object.inferred_concepts,
each_serializer: InferredConceptSerializer
)
end
end

View File

@ -326,6 +326,12 @@ en:
short_summarizer:
name: "Summarizer (short form)"
description: "Default persona used to power AI short summaries for topic lists' items"
concept_finder:
name: "Concept Finder"
description: "AI Bot specialized in identifying concepts and themes in content"
concept_matcher:
name: "Concept Matcher"
description: "AI Bot specialized in matching content against existing concepts"
topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
searching: "Searching for: '%{query}'"

View File

@ -426,3 +426,11 @@ discourse_ai:
default: 30
client: false
description: "Only consider topics created within this many days for concept generation"
inferred_concepts_daily_posts_limit:
default: 30
client: false
description: "Maximum number of posts to process each day for concept generation"
inferred_concepts_post_min_likes:
default: 5
client: false
description: "Minimum number of likes a post must have to be considered for concept generation"

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0]
def change
create_table :posts_inferred_concepts do |t|
t.integer :post_id, null: false
t.integer :inferred_concept_id, null: false
t.timestamps
end
add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept'
add_index :posts_inferred_concepts, :post_id
add_index :posts_inferred_concepts, :inferred_concept_id
end
end

View File

@ -8,7 +8,7 @@ module DiscourseAi
# concepts: an array of InferredConcept instances
def self.apply_to_topic(topic, concepts)
return if topic.blank? || concepts.blank?
concepts.each do |concept|
# Use the join table to associate the concept with the topic
# Avoid duplicates by using find_or_create_by
@ -19,94 +19,163 @@ module DiscourseAi
SQL
end
end
# Associates the provided concepts with a post
# post: a Post instance
# concepts: an array of InferredConcept instances
def self.apply_to_post(post, concepts)
return if post.blank? || concepts.blank?
concepts.each do |concept|
# Use the join table to associate the concept with the post
# Avoid duplicates by using find_or_create_by
ActiveRecord::Base.connection.execute(<<~SQL)
INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at)
VALUES (#{post.id}, #{concept.id}, NOW(), NOW())
ON CONFLICT (post_id, inferred_concept_id) DO NOTHING
SQL
end
end
# Extracts content from a topic for concept analysis
# Returns a string with the topic title and first few posts
def self.topic_content_for_analysis(topic)
return "" if topic.blank?
# Combine title and first few posts for analysis
posts = Post.where(topic_id: topic.id).order(:post_number).limit(10)
content = "Title: #{topic.title}\n\n"
content += posts.map do |p|
"#{p.post_number}) #{p.user.username}: #{p.raw}"
end.join("\n\n")
content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n")
content
end
# Extracts content from a post for concept analysis
# Returns a string with the post content
def self.post_content_for_analysis(post)
return "" if post.blank?
# Get the topic title for context
topic_title = post.topic&.title || ""
content = "Topic: #{topic_title}\n\n"
content += "Post by #{post.user.username}:\n#{post.raw}"
content
end
# Comprehensive method to analyze a topic and apply concepts
def self.analyze_and_apply(topic)
return if topic.blank?
# Get content to analyze
content = topic_content_for_analysis(topic)
# Identify concepts
concept_names = Finder.identify_concepts(content)
# Create or find concepts in the database
concepts = Finder.create_or_find_concepts(concept_names)
# Apply concepts to the topic
apply_to_topic(topic, concepts)
concepts
end
# Comprehensive method to analyze a post and apply concepts
def self.analyze_and_apply_post(post)
return if post.blank?
# Get content to analyze
content = post_content_for_analysis(post)
# Identify concepts
concept_names = Finder.identify_concepts(content)
# Create or find concepts in the database
concepts = Finder.create_or_find_concepts(concept_names)
# Apply concepts to the post
apply_to_post(post, concepts)
concepts
end
# Match a topic with existing concepts
def self.match_existing_concepts(topic)
return [] if topic.blank?
# Get content to analyze
content = topic_content_for_analysis(topic)
# Get all existing concepts
existing_concepts = InferredConcept.all.pluck(:name)
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts
return [] if existing_concepts.empty?
# Use the ConceptMatcher persona to match concepts
matched_concept_names = match_concepts_to_content(content, existing_concepts)
# Find concepts in the database
matched_concepts = InferredConcept.where(name: matched_concept_names)
# Apply concepts to the topic
apply_to_topic(topic, matched_concepts)
matched_concepts
end
# Match a post with existing concepts
def self.match_existing_concepts_for_post(post)
return [] if post.blank?
# Get content to analyze
content = post_content_for_analysis(post)
# Get all existing concepts
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts
return [] if existing_concepts.empty?
# Use the ConceptMatcher persona to match concepts
matched_concept_names = match_concepts_to_content(content, existing_concepts)
# Find concepts in the database
matched_concepts = InferredConcept.where(name: matched_concept_names)
# Apply concepts to the post
apply_to_post(post, matched_concepts)
matched_concepts
end
# Use ConceptMatcher persona to match content against provided concepts
def self.match_concepts_to_content(content, concept_list)
return [] if content.blank? || concept_list.blank?
# Prepare user message with content and concept list
# Prepare user message with only the content
user_message = <<~MESSAGE
Content to analyze:
#{content}
Available concepts to match:
#{concept_list.join(", ")}
MESSAGE
# Use the ConceptMatcher persona to match concepts
llm = DiscourseAi::Completions::Llm.default_llm
persona = DiscourseAi::Personas::ConceptMatcher.new
context = DiscourseAi::Personas::BotContext.new(
messages: [{ type: :user, content: user_message }],
user: Discourse.system_user
)
persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list)
context =
DiscourseAi::Personas::BotContext.new(
messages: [{ type: :user, content: user_message }],
user: Discourse.system_user,
)
prompt = persona.craft_prompt(context)
response = llm.completion(prompt, extract_json: true)
return [] unless response.success?
matching_concepts = response.parsed_output["matching_concepts"]
matching_concepts || []
end
end
end
end
end

View File

@ -36,7 +36,7 @@ module DiscourseAi
end
# Finds candidate topics to use for concept generation
#
#
# @param limit [Integer] Maximum number of topics to return
# @param min_posts [Integer] Minimum number of posts in topic
# @param min_likes [Integer] Minimum number of likes across all posts
@ -46,46 +46,92 @@ module DiscourseAi
# @param created_after [DateTime] Only include topics created after this time (optional)
# @return [Array<Topic>] Array of Topic objects that are good candidates
def self.find_candidate_topics(
limit: 100,
min_posts: 5,
min_likes: 10,
min_views: 100,
limit: 100,
min_posts: 5,
min_likes: 10,
min_views: 100,
exclude_topic_ids: [],
category_ids: nil,
created_after: 30.days.ago
)
query = Topic.where(
"topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?",
min_posts,
min_views,
min_posts,
min_views,
min_likes
)
# Apply additional filters
query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present?
query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present?
query = query.where("topics.created_at >= ?", created_after) if created_after.present?
# Exclude PM topics (if they exist in Discourse)
query = query.where(archetype: Topic.public_archetype)
# Exclude topics that already have concepts
topics_with_concepts = <<~SQL
SELECT DISTINCT topic_id
SELECT DISTINCT topic_id
FROM topics_inferred_concepts
SQL
query = query.where("topics.id NOT IN (#{topics_with_concepts})")
# Score and order topics by engagement (combination of views, likes, and posts)
query = query.select(
"topics.*,
"topics.*,
(topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score"
).order("engagement_score DESC")
# Return limited number of topics
query.limit(limit)
end
# Find candidate posts that are good for concept generation
#
# @param limit [Integer] Maximum number of posts to return
# @param min_likes [Integer] Minimum number of likes
# @param exclude_first_posts [Boolean] Exclude first posts in topics
# @param exclude_post_ids [Array<Integer>] Post IDs to exclude
# @param category_ids [Array<Integer>] Only include posts from topics in these categories
# @param created_after [DateTime] Only include posts created after this time
# @return [Array<Post>] Array of Post objects that are good candidates
def self.find_candidate_posts(
limit: 100,
min_likes: 5,
exclude_first_posts: true,
exclude_post_ids: [],
category_ids: nil,
created_after: 30.days.ago
)
query = Post.where("posts.like_count >= ?", min_likes)
# Exclude first posts if specified
query = query.where("posts.post_number > 1") if exclude_first_posts
# Apply additional filters
query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present?
query = query.where("posts.created_at >= ?", created_after) if created_after.present?
# Filter by category if specified
if category_ids.present?
query = query.joins(:topic).where("topics.category_id IN (?)", category_ids)
end
# Exclude posts that already have concepts
posts_with_concepts = <<~SQL
SELECT DISTINCT post_id
FROM posts_inferred_concepts
SQL
query = query.where("posts.id NOT IN (#{posts_with_concepts})")
# Order by engagement (likes)
query = query.order(like_count: :desc)
# Return limited number of posts
query.limit(limit)
end
end
end
end

View File

@ -3,59 +3,102 @@
module DiscourseAi
module InferredConcepts
class Manager
# Get a list of existing concepts
# @param limit [Integer, nil] Optional maximum number of concepts to return
# @return [Array<InferredConcept>] Array of InferredConcept objects
def self.list_concepts(limit: nil)
query = InferredConcept.all.order("name ASC")
# Apply limit if provided
query = query.limit(limit) if limit.present?
query.pluck(:name)
end
# Generate new concepts for a topic and apply them
# @param topic [Topic] A Topic instance
# @return [Array<InferredConcept>] The concepts that were applied
def self.analyze_topic(topic)
return [] if topic.blank?
Applier.analyze_and_apply(topic)
end
# Generate new concepts for a post and apply them
# @param post [Post] A Post instance
# @return [Array<InferredConcept>] The concepts that were applied
def self.analyze_post(post)
return [] if post.blank?
Applier.analyze_and_apply_post(post)
end
# Extract new concepts from arbitrary content
# @param content [String] The content to analyze
# @return [Array<String>] The identified concept names
def self.identify_concepts(content)
Finder.identify_concepts(content)
end
# Identify and create concepts from content without applying them to any topic
# @param content [String] The content to analyze
# @return [Array<InferredConcept>] The created or found concepts
def self.generate_concepts_from_content(content)
return [] if content.blank?
# Identify concepts
concept_names = Finder.identify_concepts(content)
return [] if concept_names.blank?
# Create or find concepts in the database
Finder.create_or_find_concepts(concept_names)
end
# Generate concepts from a topic's content without applying them to the topic
# @param topic [Topic] A Topic instance
# @return [Array<InferredConcept>] The created or found concepts
def self.generate_concepts_from_topic(topic)
return [] if topic.blank?
# Get content to analyze
content = Applier.topic_content_for_analysis(topic)
return [] if content.blank?
# Generate concepts from the content
generate_concepts_from_content(content)
end
# Generate concepts from a post's content without applying them to the post
# @param post [Post] A Post instance
# @return [Array<InferredConcept>] The created or found concepts
def self.generate_concepts_from_post(post)
return [] if post.blank?
# Get content to analyze
content = Applier.post_content_for_analysis(post)
return [] if content.blank?
# Generate concepts from the content
generate_concepts_from_content(content)
end
# Match a topic against existing concepts
# @param topic [Topic] A Topic instance
# @return [Array<InferredConcept>] The concepts that were applied
def self.match_topic_to_concepts(topic)
return [] if topic.blank?
Applier.match_existing_concepts(topic)
end
# Match a post against existing concepts
# @param post [Post] A Post instance
# @return [Array<InferredConcept>] The concepts that were applied
def self.match_post_to_concepts(post)
return [] if post.blank?
Applier.match_existing_concepts_for_post(post)
end
# Find topics that have a specific concept
# @param concept_name [String] The name of the concept to search for
# @return [Array<Topic>] Topics that have the specified concept
@ -64,19 +107,28 @@ module DiscourseAi
return [] unless concept
concept.topics
end
# Find posts that have a specific concept
# @param concept_name [String] The name of the concept to search for
# @return [Array<Post>] Posts that have the specified concept
def self.search_posts_by_concept(concept_name)
concept = ::InferredConcept.find_by(name: concept_name)
return [] unless concept
concept.posts
end
# Match arbitrary content against existing concepts
# @param content [String] The content to analyze
# @return [Array<String>] Names of matching concepts
def self.match_content_to_concepts(content)
existing_concepts = InferredConcept.all.pluck(:name)
return [] if existing_concepts.empty?
Applier.match_concepts_to_content(content, existing_concepts)
end
# Find candidate topics that are good for concept generation
#
#
# @param opts [Hash] Options to pass to the finder
# @option opts [Integer] :limit (100) Maximum number of topics to return
# @option opts [Integer] :min_posts (5) Minimum number of posts in topic
@ -89,6 +141,20 @@ module DiscourseAi
def self.find_candidate_topics(opts = {})
Finder.find_candidate_topics(opts)
end
# Find candidate posts that are good for concept generation
# @param opts [Hash] Options to pass to the finder
# @return [Array<Post>] Array of Post objects that are good candidates
def self.find_candidate_posts(opts = {})
Finder.find_candidate_posts(
limit: opts[:limit],
min_likes: opts[:min_likes],
exclude_first_posts: opts[:exclude_first_posts],
exclude_post_ids: opts[:exclude_post_ids],
category_ids: opts[:category_ids],
created_after: opts[:created_after],
)
end
end
end
end
end

View File

@ -17,7 +17,11 @@ module DiscourseAi
:context_post_ids,
:feature_name,
:resource_url,
<<<<<<< HEAD
:cancel_manager
=======
:inferred_concepts
>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts)
def initialize(
post: nil,
@ -35,7 +39,11 @@ module DiscourseAi
context_post_ids: nil,
feature_name: "bot",
resource_url: nil,
<<<<<<< HEAD
cancel_manager: nil
=======
inferred_concepts: []
>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts)
)
@participants = participants
@user = user
@ -54,7 +62,7 @@ module DiscourseAi
@resource_url = resource_url
@feature_name = feature_name
@resource_url = resource_url
@inferred_concepts = inferred_concepts
@cancel_manager = cancel_manager
@ -68,7 +76,15 @@ module DiscourseAi
end
# these are strings that can be safely interpolated into templates
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url]
TEMPLATE_PARAMS = %w[
time
site_url
site_title
site_description
participants
resource_url
inferred_concepts
]
def lookup_template_param(key)
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
@ -114,6 +130,7 @@ module DiscourseAi
skip_tool_details: @skip_tool_details,
feature_name: @feature_name,
resource_url: @resource_url,
inferred_concepts: @inferred_concepts,
}
end
end

View File

@ -4,6 +4,16 @@ module DiscourseAi
module Personas
class ConceptFinder < Persona
def system_prompt
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100)
existing_concepts_text = ""
existing_concepts_text = <<~CONCEPTS if existing_concepts.present?
The following concepts already exist in the system:
#{existing_concepts.join(", ")}
You can reuse these existing concepts if they apply to the content, or suggest new concepts.
CONCEPTS
<<~PROMPT.strip
You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text.
Your job is to extract meaningful labels that can be used to categorize content.
@ -16,7 +26,7 @@ module DiscourseAi
- Ensure concepts are relevant to the core content
- Do not include proper nouns unless they represent key technologies or methodologies
- Maintain the original language of the text being analyzed
#{existing_concepts_text}
Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value.
Your output should be in the following format:
<o>

View File

@ -7,7 +7,8 @@ module DiscourseAi
<<~PROMPT.strip
You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content.
Your job is to analyze the content and determine which concepts from the list apply to it.
#{concepts_text}
Guidelines for matching concepts:
- Only select concepts that are clearly relevant to the content
- The content must substantially discuss or relate to the concept
@ -17,13 +18,17 @@ module DiscourseAi
- Maintain the original language of the text being analyzed
- IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts
- If no concepts from the list match the content, return an empty array
The list of available concepts is:
{inferred_concepts}
Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list.
Your output should be in the following format:
<o>
{"matching_concepts": ["concept1", "concept3", "concept5"]}
</o>
Only include concepts from the provided list that match the content. If no concepts match, return an empty array.
PROMPT
end
@ -33,4 +38,4 @@ module DiscourseAi
end
end
end
end
end

View File

@ -11,6 +11,8 @@ module DiscourseAi
-> { where(classification_type: "sentiment") },
class_name: "ClassificationResult",
as: :target
has_and_belongs_to_many :inferred_concepts
end
end
end

View File

@ -17,6 +17,7 @@ class TestPersona < DiscourseAi::Personas::Persona
{participants}
{time}
{resource_url}
{inferred_concepts}
PROMPT
end
end
@ -38,6 +39,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
end
let(:resource_url) { "https://path-to-resource" }
let(:inferred_concepts) { %w[bulbassaur charmander squirtle] }
let(:context) do
DiscourseAi::Personas::BotContext.new(
@ -47,6 +49,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
time: Time.zone.now,
participants: topic_with_users.allowed_users.map(&:username).join(", "),
resource_url: resource_url,
inferred_concepts: inferred_concepts,
)
end
@ -66,6 +69,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
expect(system_message).to include("joe, jane")
expect(system_message).to include(Time.zone.now.to_s)
expect(system_message).to include(resource_url)
expect(system_message).to include(inferred_concepts)
tools = rendered.tools