FEATURE: Extend inferred concepts to include posts
* Adds support for concepts to be inferred from and applied to posts * Replaces daily task with one that handles both topics and posts * Adds database migration for posts_inferred_concepts join table * Updates PersonaContext to include inferred concepts
This commit is contained in:
parent
fb0d364687
commit
5f0d682e69
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(bundle exec rails g migration:*)"
|
||||
],
|
||||
"deny": []
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class ApplyInferredConcepts < ::Jobs::Base
|
||||
sidekiq_options queue: 'low'
|
||||
|
||||
# Process a batch of topics to apply existing concepts to them
|
||||
#
|
||||
# @param args [Hash] Contains job arguments
|
||||
# @option args [Array<Integer>] :topic_ids Required - List of topic IDs to process
|
||||
# @option args [Integer] :batch_size (100) Number of topics to process in each batch
|
||||
def execute(args = {})
|
||||
return if args[:topic_ids].blank?
|
||||
|
||||
# Process topics in smaller batches to avoid memory issues
|
||||
batch_size = args[:batch_size] || 100
|
||||
|
||||
# Get the list of topic IDs
|
||||
topic_ids = args[:topic_ids]
|
||||
|
||||
# Process topics in batches
|
||||
topic_ids.each_slice(batch_size) do |batch_topic_ids|
|
||||
process_batch(batch_topic_ids)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_batch(topic_ids)
|
||||
topics = Topic.where(id: topic_ids)
|
||||
|
||||
topics.each do |topic|
|
||||
begin
|
||||
process_topic(topic)
|
||||
rescue => e
|
||||
Rails.logger.error("Error applying concepts to topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def process_topic(topic)
|
||||
# Match topic against existing concepts and apply them
|
||||
# Pass the topic object directly
|
||||
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(topic)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -4,44 +4,64 @@ module Jobs
|
|||
class GenerateInferredConcepts < ::Jobs::Base
|
||||
sidekiq_options queue: 'low'
|
||||
|
||||
# Process a batch of topics to generate new concepts (without applying them to topics)
|
||||
# Process items to generate new concepts
|
||||
#
|
||||
# @param args [Hash] Contains job arguments
|
||||
# @option args [Array<Integer>] :topic_ids Required - List of topic IDs to process
|
||||
# @option args [Integer] :batch_size (100) Number of topics to process in each batch
|
||||
# @option args [String] :item_type Required - Type of items to process ('topics' or 'posts')
|
||||
# @option args [Array<Integer>] :item_ids Required - List of item IDs to process
|
||||
# @option args [Integer] :batch_size (100) Number of items to process in each batch
|
||||
# @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones
|
||||
def execute(args = {})
|
||||
return if args[:topic_ids].blank?
|
||||
return if args[:item_ids].blank? || args[:item_type].blank?
|
||||
|
||||
# Process topics in smaller batches to avoid memory issues
|
||||
unless ['topics', 'posts'].include?(args[:item_type])
|
||||
Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}")
|
||||
return
|
||||
end
|
||||
|
||||
# Process items in smaller batches to avoid memory issues
|
||||
batch_size = args[:batch_size] || 100
|
||||
|
||||
# Get the list of topic IDs
|
||||
topic_ids = args[:topic_ids]
|
||||
# Get the list of item IDs
|
||||
item_ids = args[:item_ids]
|
||||
match_only = args[:match_only] || false
|
||||
|
||||
# Process topics in batches
|
||||
topic_ids.each_slice(batch_size) do |batch_topic_ids|
|
||||
process_batch(batch_topic_ids)
|
||||
# Process items in batches
|
||||
item_ids.each_slice(batch_size) do |batch_item_ids|
|
||||
process_batch(batch_item_ids, args[:item_type], match_only)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_batch(topic_ids)
|
||||
topics = Topic.where(id: topic_ids)
|
||||
def process_batch(item_ids, item_type, match_only)
|
||||
klass = item_type.singularize.classify.constantize
|
||||
items = klass.where(id: item_ids)
|
||||
|
||||
topics.each do |topic|
|
||||
items.each do |item|
|
||||
begin
|
||||
process_topic(topic)
|
||||
process_item(item, item_type, match_only)
|
||||
rescue => e
|
||||
Rails.logger.error("Error generating concepts from topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}")
|
||||
Rails.logger.error("Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def process_topic(topic)
|
||||
def process_item(item, item_type, match_only)
|
||||
# Use the Manager method that handles both identifying and creating concepts
|
||||
# Pass the topic object directly
|
||||
DiscourseAi::InferredConcepts::Manager.generate_concepts_from_topic(topic)
|
||||
if match_only
|
||||
if item_type == 'topics'
|
||||
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item)
|
||||
else # posts
|
||||
DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item)
|
||||
end
|
||||
else
|
||||
if item_type == 'topics'
|
||||
DiscourseAi::InferredConcepts::Manager.analyze_topic(item)
|
||||
else # posts
|
||||
DiscourseAi::InferredConcepts::Manager.analyze_post(item)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,81 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class GenerateConceptsFromPopularItems < ::Jobs::Scheduled
|
||||
every 1.day
|
||||
|
||||
# This job runs daily and generates new concepts from popular topics and posts
|
||||
# It selects items based on engagement metrics and generates concepts from their content
|
||||
def execute(args = {})
|
||||
return unless SiteSetting.inferred_concepts_enabled
|
||||
|
||||
process_popular_topics
|
||||
process_popular_posts
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_popular_topics
|
||||
|
||||
# Find candidate topics that are popular and don't have concepts yet
|
||||
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
|
||||
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
|
||||
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
|
||||
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
|
||||
min_views: SiteSetting.inferred_concepts_min_views || 100,
|
||||
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
|
||||
)
|
||||
|
||||
return if candidates.blank?
|
||||
|
||||
# Process candidate topics - first generate concepts, then match
|
||||
Jobs.enqueue(
|
||||
:generate_inferred_concepts,
|
||||
item_type: 'topics',
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10
|
||||
)
|
||||
|
||||
# Schedule a follow-up job to match existing concepts
|
||||
Jobs.enqueue_in(
|
||||
1.hour,
|
||||
:generate_inferred_concepts,
|
||||
item_type: 'topics',
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10,
|
||||
match_only: true
|
||||
)
|
||||
end
|
||||
|
||||
def process_popular_posts
|
||||
|
||||
# Find candidate posts that are popular and don't have concepts yet
|
||||
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts(
|
||||
limit: SiteSetting.inferred_concepts_daily_posts_limit || 30,
|
||||
min_likes: SiteSetting.inferred_concepts_post_min_likes || 5,
|
||||
exclude_first_posts: true,
|
||||
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
|
||||
)
|
||||
|
||||
return if candidates.blank?
|
||||
|
||||
# Process candidate posts - first generate concepts, then match
|
||||
Jobs.enqueue(
|
||||
:generate_inferred_concepts,
|
||||
item_type: 'posts',
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10
|
||||
)
|
||||
|
||||
# Schedule a follow-up job to match against existing concepts
|
||||
Jobs.enqueue_in(
|
||||
1.hour,
|
||||
:generate_inferred_concepts,
|
||||
item_type: 'posts',
|
||||
item_ids: candidates.map(&:id),
|
||||
batch_size: 10,
|
||||
match_only: true
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,38 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class GenerateConceptsFromPopularTopics < ::Jobs::Scheduled
|
||||
every 1.day
|
||||
|
||||
# This job runs daily and generates new concepts from popular topics
|
||||
# It selects topics based on engagement metrics and generates concepts from their content
|
||||
def execute(args = {})
|
||||
# Find candidate topics that are popular and don't have concepts yet
|
||||
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
|
||||
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
|
||||
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
|
||||
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
|
||||
min_views: SiteSetting.inferred_concepts_min_views || 100,
|
||||
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
|
||||
)
|
||||
|
||||
return if candidates.blank?
|
||||
|
||||
# Process the candidate topics in batches using the regular job
|
||||
Jobs.enqueue(
|
||||
:generate_inferred_concepts,
|
||||
topic_ids: candidates.map(&:id),
|
||||
batch_size: 10
|
||||
)
|
||||
|
||||
# Schedule a follow-up job to apply the concepts to topics
|
||||
# This runs after a delay to ensure concepts have been generated
|
||||
Jobs.enqueue_in(
|
||||
1.hour,
|
||||
:apply_inferred_concepts,
|
||||
topic_ids: candidates.map(&:id),
|
||||
batch_size: 10
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
class InferredConcept < ActiveRecord::Base
|
||||
has_and_belongs_to_many :topics
|
||||
has_and_belongs_to_many :posts
|
||||
|
||||
validates :name, presence: true, uniqueness: true
|
||||
end
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AiInferredConceptPostSerializer < ApplicationSerializer
|
||||
attributes :id,
|
||||
:post_number,
|
||||
:topic_id,
|
||||
:topic_title,
|
||||
:username,
|
||||
:avatar_template,
|
||||
:created_at,
|
||||
:updated_at,
|
||||
:excerpt,
|
||||
:truncated,
|
||||
:inferred_concepts
|
||||
|
||||
def avatar_template
|
||||
User.avatar_template(object.username, object.uploaded_avatar_id)
|
||||
end
|
||||
|
||||
def excerpt
|
||||
Post.excerpt(object.cooked)
|
||||
end
|
||||
|
||||
def truncated
|
||||
object.cooked.length > SiteSetting.post_excerpt_maxlength
|
||||
end
|
||||
|
||||
def inferred_concepts
|
||||
ActiveModel::ArraySerializer.new(
|
||||
object.inferred_concepts,
|
||||
each_serializer: InferredConceptSerializer
|
||||
)
|
||||
end
|
||||
end
|
|
@ -326,6 +326,12 @@ en:
|
|||
short_summarizer:
|
||||
name: "Summarizer (short form)"
|
||||
description: "Default persona used to power AI short summaries for topic lists' items"
|
||||
concept_finder:
|
||||
name: "Concept Finder"
|
||||
description: "AI Bot specialized in identifying concepts and themes in content"
|
||||
concept_matcher:
|
||||
name: "Concept Matcher"
|
||||
description: "AI Bot specialized in matching content against existing concepts"
|
||||
topic_not_found: "Summary unavailable, topic not found!"
|
||||
summarizing: "Summarizing topic"
|
||||
searching: "Searching for: '%{query}'"
|
||||
|
|
|
@ -426,3 +426,11 @@ discourse_ai:
|
|||
default: 30
|
||||
client: false
|
||||
description: "Only consider topics created within this many days for concept generation"
|
||||
inferred_concepts_daily_posts_limit:
|
||||
default: 30
|
||||
client: false
|
||||
description: "Maximum number of posts to process each day for concept generation"
|
||||
inferred_concepts_post_min_likes:
|
||||
default: 5
|
||||
client: false
|
||||
description: "Minimum number of likes a post must have to be considered for concept generation"
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :posts_inferred_concepts do |t|
|
||||
t.integer :post_id, null: false
|
||||
t.integer :inferred_concept_id, null: false
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept'
|
||||
add_index :posts_inferred_concepts, :post_id
|
||||
add_index :posts_inferred_concepts, :inferred_concept_id
|
||||
end
|
||||
end
|
|
@ -20,6 +20,23 @@ module DiscourseAi
|
|||
end
|
||||
end
|
||||
|
||||
# Associates the provided concepts with a post
|
||||
# post: a Post instance
|
||||
# concepts: an array of InferredConcept instances
|
||||
def self.apply_to_post(post, concepts)
|
||||
return if post.blank? || concepts.blank?
|
||||
|
||||
concepts.each do |concept|
|
||||
# Use the join table to associate the concept with the post
|
||||
# Avoid duplicates by using find_or_create_by
|
||||
ActiveRecord::Base.connection.execute(<<~SQL)
|
||||
INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at)
|
||||
VALUES (#{post.id}, #{concept.id}, NOW(), NOW())
|
||||
ON CONFLICT (post_id, inferred_concept_id) DO NOTHING
|
||||
SQL
|
||||
end
|
||||
end
|
||||
|
||||
# Extracts content from a topic for concept analysis
|
||||
# Returns a string with the topic title and first few posts
|
||||
def self.topic_content_for_analysis(topic)
|
||||
|
@ -29,9 +46,21 @@ module DiscourseAi
|
|||
posts = Post.where(topic_id: topic.id).order(:post_number).limit(10)
|
||||
|
||||
content = "Title: #{topic.title}\n\n"
|
||||
content += posts.map do |p|
|
||||
"#{p.post_number}) #{p.user.username}: #{p.raw}"
|
||||
end.join("\n\n")
|
||||
content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n")
|
||||
|
||||
content
|
||||
end
|
||||
|
||||
# Extracts content from a post for concept analysis
|
||||
# Returns a string with the post content
|
||||
def self.post_content_for_analysis(post)
|
||||
return "" if post.blank?
|
||||
|
||||
# Get the topic title for context
|
||||
topic_title = post.topic&.title || ""
|
||||
|
||||
content = "Topic: #{topic_title}\n\n"
|
||||
content += "Post by #{post.user.username}:\n#{post.raw}"
|
||||
|
||||
content
|
||||
end
|
||||
|
@ -55,6 +84,25 @@ module DiscourseAi
|
|||
concepts
|
||||
end
|
||||
|
||||
# Comprehensive method to analyze a post and apply concepts
|
||||
def self.analyze_and_apply_post(post)
|
||||
return if post.blank?
|
||||
|
||||
# Get content to analyze
|
||||
content = post_content_for_analysis(post)
|
||||
|
||||
# Identify concepts
|
||||
concept_names = Finder.identify_concepts(content)
|
||||
|
||||
# Create or find concepts in the database
|
||||
concepts = Finder.create_or_find_concepts(concept_names)
|
||||
|
||||
# Apply concepts to the post
|
||||
apply_to_post(post, concepts)
|
||||
|
||||
concepts
|
||||
end
|
||||
|
||||
# Match a topic with existing concepts
|
||||
def self.match_existing_concepts(topic)
|
||||
return [] if topic.blank?
|
||||
|
@ -63,7 +111,7 @@ module DiscourseAi
|
|||
content = topic_content_for_analysis(topic)
|
||||
|
||||
# Get all existing concepts
|
||||
existing_concepts = InferredConcept.all.pluck(:name)
|
||||
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts
|
||||
return [] if existing_concepts.empty?
|
||||
|
||||
# Use the ConceptMatcher persona to match concepts
|
||||
|
@ -78,25 +126,46 @@ module DiscourseAi
|
|||
matched_concepts
|
||||
end
|
||||
|
||||
# Match a post with existing concepts
|
||||
def self.match_existing_concepts_for_post(post)
|
||||
return [] if post.blank?
|
||||
|
||||
# Get content to analyze
|
||||
content = post_content_for_analysis(post)
|
||||
|
||||
# Get all existing concepts
|
||||
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts
|
||||
return [] if existing_concepts.empty?
|
||||
|
||||
# Use the ConceptMatcher persona to match concepts
|
||||
matched_concept_names = match_concepts_to_content(content, existing_concepts)
|
||||
|
||||
# Find concepts in the database
|
||||
matched_concepts = InferredConcept.where(name: matched_concept_names)
|
||||
|
||||
# Apply concepts to the post
|
||||
apply_to_post(post, matched_concepts)
|
||||
|
||||
matched_concepts
|
||||
end
|
||||
|
||||
# Use ConceptMatcher persona to match content against provided concepts
|
||||
def self.match_concepts_to_content(content, concept_list)
|
||||
return [] if content.blank? || concept_list.blank?
|
||||
|
||||
# Prepare user message with content and concept list
|
||||
# Prepare user message with only the content
|
||||
user_message = <<~MESSAGE
|
||||
Content to analyze:
|
||||
#{content}
|
||||
|
||||
Available concepts to match:
|
||||
#{concept_list.join(", ")}
|
||||
MESSAGE
|
||||
|
||||
# Use the ConceptMatcher persona to match concepts
|
||||
llm = DiscourseAi::Completions::Llm.default_llm
|
||||
persona = DiscourseAi::Personas::ConceptMatcher.new
|
||||
context = DiscourseAi::Personas::BotContext.new(
|
||||
persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list)
|
||||
context =
|
||||
DiscourseAi::Personas::BotContext.new(
|
||||
messages: [{ type: :user, content: user_message }],
|
||||
user: Discourse.system_user
|
||||
user: Discourse.system_user,
|
||||
)
|
||||
|
||||
prompt = persona.craft_prompt(context)
|
||||
|
|
|
@ -86,6 +86,52 @@ module DiscourseAi
|
|||
# Return limited number of topics
|
||||
query.limit(limit)
|
||||
end
|
||||
|
||||
# Find candidate posts that are good for concept generation
|
||||
#
|
||||
# @param limit [Integer] Maximum number of posts to return
|
||||
# @param min_likes [Integer] Minimum number of likes
|
||||
# @param exclude_first_posts [Boolean] Exclude first posts in topics
|
||||
# @param exclude_post_ids [Array<Integer>] Post IDs to exclude
|
||||
# @param category_ids [Array<Integer>] Only include posts from topics in these categories
|
||||
# @param created_after [DateTime] Only include posts created after this time
|
||||
# @return [Array<Post>] Array of Post objects that are good candidates
|
||||
def self.find_candidate_posts(
|
||||
limit: 100,
|
||||
min_likes: 5,
|
||||
exclude_first_posts: true,
|
||||
exclude_post_ids: [],
|
||||
category_ids: nil,
|
||||
created_after: 30.days.ago
|
||||
)
|
||||
query = Post.where("posts.like_count >= ?", min_likes)
|
||||
|
||||
# Exclude first posts if specified
|
||||
query = query.where("posts.post_number > 1") if exclude_first_posts
|
||||
|
||||
# Apply additional filters
|
||||
query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present?
|
||||
query = query.where("posts.created_at >= ?", created_after) if created_after.present?
|
||||
|
||||
# Filter by category if specified
|
||||
if category_ids.present?
|
||||
query = query.joins(:topic).where("topics.category_id IN (?)", category_ids)
|
||||
end
|
||||
|
||||
# Exclude posts that already have concepts
|
||||
posts_with_concepts = <<~SQL
|
||||
SELECT DISTINCT post_id
|
||||
FROM posts_inferred_concepts
|
||||
SQL
|
||||
|
||||
query = query.where("posts.id NOT IN (#{posts_with_concepts})")
|
||||
|
||||
# Order by engagement (likes)
|
||||
query = query.order(like_count: :desc)
|
||||
|
||||
# Return limited number of posts
|
||||
query.limit(limit)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -3,6 +3,17 @@
|
|||
module DiscourseAi
|
||||
module InferredConcepts
|
||||
class Manager
|
||||
# Get a list of existing concepts
|
||||
# @param limit [Integer, nil] Optional maximum number of concepts to return
|
||||
# @return [Array<InferredConcept>] Array of InferredConcept objects
|
||||
def self.list_concepts(limit: nil)
|
||||
query = InferredConcept.all.order("name ASC")
|
||||
|
||||
# Apply limit if provided
|
||||
query = query.limit(limit) if limit.present?
|
||||
|
||||
query.pluck(:name)
|
||||
end
|
||||
# Generate new concepts for a topic and apply them
|
||||
# @param topic [Topic] A Topic instance
|
||||
# @return [Array<InferredConcept>] The concepts that were applied
|
||||
|
@ -12,6 +23,15 @@ module DiscourseAi
|
|||
Applier.analyze_and_apply(topic)
|
||||
end
|
||||
|
||||
# Generate new concepts for a post and apply them
|
||||
# @param post [Post] A Post instance
|
||||
# @return [Array<InferredConcept>] The concepts that were applied
|
||||
def self.analyze_post(post)
|
||||
return [] if post.blank?
|
||||
|
||||
Applier.analyze_and_apply_post(post)
|
||||
end
|
||||
|
||||
# Extract new concepts from arbitrary content
|
||||
# @param content [String] The content to analyze
|
||||
# @return [Array<String>] The identified concept names
|
||||
|
@ -47,6 +67,20 @@ module DiscourseAi
|
|||
generate_concepts_from_content(content)
|
||||
end
|
||||
|
||||
# Generate concepts from a post's content without applying them to the post
|
||||
# @param post [Post] A Post instance
|
||||
# @return [Array<InferredConcept>] The created or found concepts
|
||||
def self.generate_concepts_from_post(post)
|
||||
return [] if post.blank?
|
||||
|
||||
# Get content to analyze
|
||||
content = Applier.post_content_for_analysis(post)
|
||||
return [] if content.blank?
|
||||
|
||||
# Generate concepts from the content
|
||||
generate_concepts_from_content(content)
|
||||
end
|
||||
|
||||
# Match a topic against existing concepts
|
||||
# @param topic [Topic] A Topic instance
|
||||
# @return [Array<InferredConcept>] The concepts that were applied
|
||||
|
@ -56,6 +90,15 @@ module DiscourseAi
|
|||
Applier.match_existing_concepts(topic)
|
||||
end
|
||||
|
||||
# Match a post against existing concepts
|
||||
# @param post [Post] A Post instance
|
||||
# @return [Array<InferredConcept>] The concepts that were applied
|
||||
def self.match_post_to_concepts(post)
|
||||
return [] if post.blank?
|
||||
|
||||
Applier.match_existing_concepts_for_post(post)
|
||||
end
|
||||
|
||||
# Find topics that have a specific concept
|
||||
# @param concept_name [String] The name of the concept to search for
|
||||
# @return [Array<Topic>] Topics that have the specified concept
|
||||
|
@ -65,6 +108,15 @@ module DiscourseAi
|
|||
concept.topics
|
||||
end
|
||||
|
||||
# Find posts that have a specific concept
|
||||
# @param concept_name [String] The name of the concept to search for
|
||||
# @return [Array<Post>] Posts that have the specified concept
|
||||
def self.search_posts_by_concept(concept_name)
|
||||
concept = ::InferredConcept.find_by(name: concept_name)
|
||||
return [] unless concept
|
||||
concept.posts
|
||||
end
|
||||
|
||||
# Match arbitrary content against existing concepts
|
||||
# @param content [String] The content to analyze
|
||||
# @return [Array<String>] Names of matching concepts
|
||||
|
@ -89,6 +141,20 @@ module DiscourseAi
|
|||
def self.find_candidate_topics(opts = {})
|
||||
Finder.find_candidate_topics(opts)
|
||||
end
|
||||
|
||||
# Find candidate posts that are good for concept generation
|
||||
# @param opts [Hash] Options to pass to the finder
|
||||
# @return [Array<Post>] Array of Post objects that are good candidates
|
||||
def self.find_candidate_posts(opts = {})
|
||||
Finder.find_candidate_posts(
|
||||
limit: opts[:limit],
|
||||
min_likes: opts[:min_likes],
|
||||
exclude_first_posts: opts[:exclude_first_posts],
|
||||
exclude_post_ids: opts[:exclude_post_ids],
|
||||
category_ids: opts[:category_ids],
|
||||
created_after: opts[:created_after],
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -17,7 +17,11 @@ module DiscourseAi
|
|||
:context_post_ids,
|
||||
:feature_name,
|
||||
:resource_url,
|
||||
<<<<<<< HEAD
|
||||
:cancel_manager
|
||||
=======
|
||||
:inferred_concepts
|
||||
>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts)
|
||||
|
||||
def initialize(
|
||||
post: nil,
|
||||
|
@ -35,7 +39,11 @@ module DiscourseAi
|
|||
context_post_ids: nil,
|
||||
feature_name: "bot",
|
||||
resource_url: nil,
|
||||
<<<<<<< HEAD
|
||||
cancel_manager: nil
|
||||
=======
|
||||
inferred_concepts: []
|
||||
>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts)
|
||||
)
|
||||
@participants = participants
|
||||
@user = user
|
||||
|
@ -54,7 +62,7 @@ module DiscourseAi
|
|||
@resource_url = resource_url
|
||||
|
||||
@feature_name = feature_name
|
||||
@resource_url = resource_url
|
||||
@inferred_concepts = inferred_concepts
|
||||
|
||||
@cancel_manager = cancel_manager
|
||||
|
||||
|
@ -68,7 +76,15 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
# these are strings that can be safely interpolated into templates
|
||||
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url]
|
||||
TEMPLATE_PARAMS = %w[
|
||||
time
|
||||
site_url
|
||||
site_title
|
||||
site_description
|
||||
participants
|
||||
resource_url
|
||||
inferred_concepts
|
||||
]
|
||||
|
||||
def lookup_template_param(key)
|
||||
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
|
||||
|
@ -114,6 +130,7 @@ module DiscourseAi
|
|||
skip_tool_details: @skip_tool_details,
|
||||
feature_name: @feature_name,
|
||||
resource_url: @resource_url,
|
||||
inferred_concepts: @inferred_concepts,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
|
|
@ -4,6 +4,16 @@ module DiscourseAi
|
|||
module Personas
|
||||
class ConceptFinder < Persona
|
||||
def system_prompt
|
||||
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100)
|
||||
existing_concepts_text = ""
|
||||
|
||||
existing_concepts_text = <<~CONCEPTS if existing_concepts.present?
|
||||
The following concepts already exist in the system:
|
||||
#{existing_concepts.join(", ")}
|
||||
|
||||
You can reuse these existing concepts if they apply to the content, or suggest new concepts.
|
||||
CONCEPTS
|
||||
|
||||
<<~PROMPT.strip
|
||||
You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text.
|
||||
Your job is to extract meaningful labels that can be used to categorize content.
|
||||
|
@ -16,7 +26,7 @@ module DiscourseAi
|
|||
- Ensure concepts are relevant to the core content
|
||||
- Do not include proper nouns unless they represent key technologies or methodologies
|
||||
- Maintain the original language of the text being analyzed
|
||||
|
||||
#{existing_concepts_text}
|
||||
Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value.
|
||||
Your output should be in the following format:
|
||||
<o>
|
||||
|
|
|
@ -8,6 +8,7 @@ module DiscourseAi
|
|||
You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content.
|
||||
Your job is to analyze the content and determine which concepts from the list apply to it.
|
||||
|
||||
#{concepts_text}
|
||||
Guidelines for matching concepts:
|
||||
- Only select concepts that are clearly relevant to the content
|
||||
- The content must substantially discuss or relate to the concept
|
||||
|
@ -18,6 +19,10 @@ module DiscourseAi
|
|||
- IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts
|
||||
- If no concepts from the list match the content, return an empty array
|
||||
|
||||
The list of available concepts is:
|
||||
|
||||
{inferred_concepts}
|
||||
|
||||
Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list.
|
||||
Your output should be in the following format:
|
||||
<o>
|
||||
|
|
|
@ -11,6 +11,8 @@ module DiscourseAi
|
|||
-> { where(classification_type: "sentiment") },
|
||||
class_name: "ClassificationResult",
|
||||
as: :target
|
||||
|
||||
has_and_belongs_to_many :inferred_concepts
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,6 +17,7 @@ class TestPersona < DiscourseAi::Personas::Persona
|
|||
{participants}
|
||||
{time}
|
||||
{resource_url}
|
||||
{inferred_concepts}
|
||||
PROMPT
|
||||
end
|
||||
end
|
||||
|
@ -38,6 +39,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
|||
end
|
||||
|
||||
let(:resource_url) { "https://path-to-resource" }
|
||||
let(:inferred_concepts) { %w[bulbassaur charmander squirtle] }
|
||||
|
||||
let(:context) do
|
||||
DiscourseAi::Personas::BotContext.new(
|
||||
|
@ -47,6 +49,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
|||
time: Time.zone.now,
|
||||
participants: topic_with_users.allowed_users.map(&:username).join(", "),
|
||||
resource_url: resource_url,
|
||||
inferred_concepts: inferred_concepts,
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -66,6 +69,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
|||
expect(system_message).to include("joe, jane")
|
||||
expect(system_message).to include(Time.zone.now.to_s)
|
||||
expect(system_message).to include(resource_url)
|
||||
expect(system_message).to include(inferred_concepts)
|
||||
|
||||
tools = rendered.tools
|
||||
|
||||
|
|
Loading…
Reference in New Issue