FEATURE: Extend inferred concepts to include posts
* Adds support for concepts to be inferred from and applied to posts * Replaces daily task with one that handles both topics and posts * Adds database migration for posts_inferred_concepts join table * Updates PersonaContext to include inferred concepts
This commit is contained in:
parent
fb0d364687
commit
5f0d682e69
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(bundle exec rails g migration:*)"
|
||||||
|
],
|
||||||
|
"deny": []
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,47 +0,0 @@
|
||||||
# frozen_string_literal: true
|
|
||||||
|
|
||||||
module Jobs
|
|
||||||
class ApplyInferredConcepts < ::Jobs::Base
|
|
||||||
sidekiq_options queue: 'low'
|
|
||||||
|
|
||||||
# Process a batch of topics to apply existing concepts to them
|
|
||||||
#
|
|
||||||
# @param args [Hash] Contains job arguments
|
|
||||||
# @option args [Array<Integer>] :topic_ids Required - List of topic IDs to process
|
|
||||||
# @option args [Integer] :batch_size (100) Number of topics to process in each batch
|
|
||||||
def execute(args = {})
|
|
||||||
return if args[:topic_ids].blank?
|
|
||||||
|
|
||||||
# Process topics in smaller batches to avoid memory issues
|
|
||||||
batch_size = args[:batch_size] || 100
|
|
||||||
|
|
||||||
# Get the list of topic IDs
|
|
||||||
topic_ids = args[:topic_ids]
|
|
||||||
|
|
||||||
# Process topics in batches
|
|
||||||
topic_ids.each_slice(batch_size) do |batch_topic_ids|
|
|
||||||
process_batch(batch_topic_ids)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def process_batch(topic_ids)
|
|
||||||
topics = Topic.where(id: topic_ids)
|
|
||||||
|
|
||||||
topics.each do |topic|
|
|
||||||
begin
|
|
||||||
process_topic(topic)
|
|
||||||
rescue => e
|
|
||||||
Rails.logger.error("Error applying concepts to topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_topic(topic)
|
|
||||||
# Match topic against existing concepts and apply them
|
|
||||||
# Pass the topic object directly
|
|
||||||
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(topic)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -4,44 +4,64 @@ module Jobs
|
||||||
class GenerateInferredConcepts < ::Jobs::Base
|
class GenerateInferredConcepts < ::Jobs::Base
|
||||||
sidekiq_options queue: 'low'
|
sidekiq_options queue: 'low'
|
||||||
|
|
||||||
# Process a batch of topics to generate new concepts (without applying them to topics)
|
# Process items to generate new concepts
|
||||||
#
|
#
|
||||||
# @param args [Hash] Contains job arguments
|
# @param args [Hash] Contains job arguments
|
||||||
# @option args [Array<Integer>] :topic_ids Required - List of topic IDs to process
|
# @option args [String] :item_type Required - Type of items to process ('topics' or 'posts')
|
||||||
# @option args [Integer] :batch_size (100) Number of topics to process in each batch
|
# @option args [Array<Integer>] :item_ids Required - List of item IDs to process
|
||||||
|
# @option args [Integer] :batch_size (100) Number of items to process in each batch
|
||||||
|
# @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones
|
||||||
def execute(args = {})
|
def execute(args = {})
|
||||||
return if args[:topic_ids].blank?
|
return if args[:item_ids].blank? || args[:item_type].blank?
|
||||||
|
|
||||||
# Process topics in smaller batches to avoid memory issues
|
unless ['topics', 'posts'].include?(args[:item_type])
|
||||||
|
Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}")
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# Process items in smaller batches to avoid memory issues
|
||||||
batch_size = args[:batch_size] || 100
|
batch_size = args[:batch_size] || 100
|
||||||
|
|
||||||
# Get the list of topic IDs
|
# Get the list of item IDs
|
||||||
topic_ids = args[:topic_ids]
|
item_ids = args[:item_ids]
|
||||||
|
match_only = args[:match_only] || false
|
||||||
|
|
||||||
# Process topics in batches
|
# Process items in batches
|
||||||
topic_ids.each_slice(batch_size) do |batch_topic_ids|
|
item_ids.each_slice(batch_size) do |batch_item_ids|
|
||||||
process_batch(batch_topic_ids)
|
process_batch(batch_item_ids, args[:item_type], match_only)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def process_batch(topic_ids)
|
def process_batch(item_ids, item_type, match_only)
|
||||||
topics = Topic.where(id: topic_ids)
|
klass = item_type.singularize.classify.constantize
|
||||||
|
items = klass.where(id: item_ids)
|
||||||
|
|
||||||
topics.each do |topic|
|
items.each do |item|
|
||||||
begin
|
begin
|
||||||
process_topic(topic)
|
process_item(item, item_type, match_only)
|
||||||
rescue => e
|
rescue => e
|
||||||
Rails.logger.error("Error generating concepts from topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}")
|
Rails.logger.error("Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def process_topic(topic)
|
def process_item(item, item_type, match_only)
|
||||||
# Use the Manager method that handles both identifying and creating concepts
|
# Use the Manager method that handles both identifying and creating concepts
|
||||||
# Pass the topic object directly
|
if match_only
|
||||||
DiscourseAi::InferredConcepts::Manager.generate_concepts_from_topic(topic)
|
if item_type == 'topics'
|
||||||
|
DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item)
|
||||||
|
else # posts
|
||||||
|
DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
if item_type == 'topics'
|
||||||
|
DiscourseAi::InferredConcepts::Manager.analyze_topic(item)
|
||||||
|
else # posts
|
||||||
|
DiscourseAi::InferredConcepts::Manager.analyze_post(item)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -0,0 +1,81 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module Jobs
|
||||||
|
class GenerateConceptsFromPopularItems < ::Jobs::Scheduled
|
||||||
|
every 1.day
|
||||||
|
|
||||||
|
# This job runs daily and generates new concepts from popular topics and posts
|
||||||
|
# It selects items based on engagement metrics and generates concepts from their content
|
||||||
|
def execute(args = {})
|
||||||
|
return unless SiteSetting.inferred_concepts_enabled
|
||||||
|
|
||||||
|
process_popular_topics
|
||||||
|
process_popular_posts
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def process_popular_topics
|
||||||
|
|
||||||
|
# Find candidate topics that are popular and don't have concepts yet
|
||||||
|
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
|
||||||
|
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
|
||||||
|
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
|
||||||
|
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
|
||||||
|
min_views: SiteSetting.inferred_concepts_min_views || 100,
|
||||||
|
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
|
||||||
|
)
|
||||||
|
|
||||||
|
return if candidates.blank?
|
||||||
|
|
||||||
|
# Process candidate topics - first generate concepts, then match
|
||||||
|
Jobs.enqueue(
|
||||||
|
:generate_inferred_concepts,
|
||||||
|
item_type: 'topics',
|
||||||
|
item_ids: candidates.map(&:id),
|
||||||
|
batch_size: 10
|
||||||
|
)
|
||||||
|
|
||||||
|
# Schedule a follow-up job to match existing concepts
|
||||||
|
Jobs.enqueue_in(
|
||||||
|
1.hour,
|
||||||
|
:generate_inferred_concepts,
|
||||||
|
item_type: 'topics',
|
||||||
|
item_ids: candidates.map(&:id),
|
||||||
|
batch_size: 10,
|
||||||
|
match_only: true
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def process_popular_posts
|
||||||
|
|
||||||
|
# Find candidate posts that are popular and don't have concepts yet
|
||||||
|
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts(
|
||||||
|
limit: SiteSetting.inferred_concepts_daily_posts_limit || 30,
|
||||||
|
min_likes: SiteSetting.inferred_concepts_post_min_likes || 5,
|
||||||
|
exclude_first_posts: true,
|
||||||
|
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
|
||||||
|
)
|
||||||
|
|
||||||
|
return if candidates.blank?
|
||||||
|
|
||||||
|
# Process candidate posts - first generate concepts, then match
|
||||||
|
Jobs.enqueue(
|
||||||
|
:generate_inferred_concepts,
|
||||||
|
item_type: 'posts',
|
||||||
|
item_ids: candidates.map(&:id),
|
||||||
|
batch_size: 10
|
||||||
|
)
|
||||||
|
|
||||||
|
# Schedule a follow-up job to match against existing concepts
|
||||||
|
Jobs.enqueue_in(
|
||||||
|
1.hour,
|
||||||
|
:generate_inferred_concepts,
|
||||||
|
item_type: 'posts',
|
||||||
|
item_ids: candidates.map(&:id),
|
||||||
|
batch_size: 10,
|
||||||
|
match_only: true
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,38 +0,0 @@
|
||||||
# frozen_string_literal: true
|
|
||||||
|
|
||||||
module Jobs
|
|
||||||
class GenerateConceptsFromPopularTopics < ::Jobs::Scheduled
|
|
||||||
every 1.day
|
|
||||||
|
|
||||||
# This job runs daily and generates new concepts from popular topics
|
|
||||||
# It selects topics based on engagement metrics and generates concepts from their content
|
|
||||||
def execute(args = {})
|
|
||||||
# Find candidate topics that are popular and don't have concepts yet
|
|
||||||
candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics(
|
|
||||||
limit: SiteSetting.inferred_concepts_daily_topics_limit || 20,
|
|
||||||
min_posts: SiteSetting.inferred_concepts_min_posts || 5,
|
|
||||||
min_likes: SiteSetting.inferred_concepts_min_likes || 10,
|
|
||||||
min_views: SiteSetting.inferred_concepts_min_views || 100,
|
|
||||||
created_after: SiteSetting.inferred_concepts_lookback_days.days.ago
|
|
||||||
)
|
|
||||||
|
|
||||||
return if candidates.blank?
|
|
||||||
|
|
||||||
# Process the candidate topics in batches using the regular job
|
|
||||||
Jobs.enqueue(
|
|
||||||
:generate_inferred_concepts,
|
|
||||||
topic_ids: candidates.map(&:id),
|
|
||||||
batch_size: 10
|
|
||||||
)
|
|
||||||
|
|
||||||
# Schedule a follow-up job to apply the concepts to topics
|
|
||||||
# This runs after a delay to ensure concepts have been generated
|
|
||||||
Jobs.enqueue_in(
|
|
||||||
1.hour,
|
|
||||||
:apply_inferred_concepts,
|
|
||||||
topic_ids: candidates.map(&:id),
|
|
||||||
batch_size: 10
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
class InferredConcept < ActiveRecord::Base
|
class InferredConcept < ActiveRecord::Base
|
||||||
has_and_belongs_to_many :topics
|
has_and_belongs_to_many :topics
|
||||||
|
has_and_belongs_to_many :posts
|
||||||
|
|
||||||
validates :name, presence: true, uniqueness: true
|
validates :name, presence: true, uniqueness: true
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class AiInferredConceptPostSerializer < ApplicationSerializer
|
||||||
|
attributes :id,
|
||||||
|
:post_number,
|
||||||
|
:topic_id,
|
||||||
|
:topic_title,
|
||||||
|
:username,
|
||||||
|
:avatar_template,
|
||||||
|
:created_at,
|
||||||
|
:updated_at,
|
||||||
|
:excerpt,
|
||||||
|
:truncated,
|
||||||
|
:inferred_concepts
|
||||||
|
|
||||||
|
def avatar_template
|
||||||
|
User.avatar_template(object.username, object.uploaded_avatar_id)
|
||||||
|
end
|
||||||
|
|
||||||
|
def excerpt
|
||||||
|
Post.excerpt(object.cooked)
|
||||||
|
end
|
||||||
|
|
||||||
|
def truncated
|
||||||
|
object.cooked.length > SiteSetting.post_excerpt_maxlength
|
||||||
|
end
|
||||||
|
|
||||||
|
def inferred_concepts
|
||||||
|
ActiveModel::ArraySerializer.new(
|
||||||
|
object.inferred_concepts,
|
||||||
|
each_serializer: InferredConceptSerializer
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
|
@ -326,6 +326,12 @@ en:
|
||||||
short_summarizer:
|
short_summarizer:
|
||||||
name: "Summarizer (short form)"
|
name: "Summarizer (short form)"
|
||||||
description: "Default persona used to power AI short summaries for topic lists' items"
|
description: "Default persona used to power AI short summaries for topic lists' items"
|
||||||
|
concept_finder:
|
||||||
|
name: "Concept Finder"
|
||||||
|
description: "AI Bot specialized in identifying concepts and themes in content"
|
||||||
|
concept_matcher:
|
||||||
|
name: "Concept Matcher"
|
||||||
|
description: "AI Bot specialized in matching content against existing concepts"
|
||||||
topic_not_found: "Summary unavailable, topic not found!"
|
topic_not_found: "Summary unavailable, topic not found!"
|
||||||
summarizing: "Summarizing topic"
|
summarizing: "Summarizing topic"
|
||||||
searching: "Searching for: '%{query}'"
|
searching: "Searching for: '%{query}'"
|
||||||
|
|
|
@ -426,3 +426,11 @@ discourse_ai:
|
||||||
default: 30
|
default: 30
|
||||||
client: false
|
client: false
|
||||||
description: "Only consider topics created within this many days for concept generation"
|
description: "Only consider topics created within this many days for concept generation"
|
||||||
|
inferred_concepts_daily_posts_limit:
|
||||||
|
default: 30
|
||||||
|
client: false
|
||||||
|
description: "Maximum number of posts to process each day for concept generation"
|
||||||
|
inferred_concepts_post_min_likes:
|
||||||
|
default: 5
|
||||||
|
client: false
|
||||||
|
description: "Minimum number of likes a post must have to be considered for concept generation"
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0]
|
||||||
|
def change
|
||||||
|
create_table :posts_inferred_concepts do |t|
|
||||||
|
t.integer :post_id, null: false
|
||||||
|
t.integer :inferred_concept_id, null: false
|
||||||
|
t.timestamps
|
||||||
|
end
|
||||||
|
|
||||||
|
add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept'
|
||||||
|
add_index :posts_inferred_concepts, :post_id
|
||||||
|
add_index :posts_inferred_concepts, :inferred_concept_id
|
||||||
|
end
|
||||||
|
end
|
|
@ -20,6 +20,23 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Associates the provided concepts with a post
|
||||||
|
# post: a Post instance
|
||||||
|
# concepts: an array of InferredConcept instances
|
||||||
|
def self.apply_to_post(post, concepts)
|
||||||
|
return if post.blank? || concepts.blank?
|
||||||
|
|
||||||
|
concepts.each do |concept|
|
||||||
|
# Use the join table to associate the concept with the post
|
||||||
|
# Avoid duplicates by using find_or_create_by
|
||||||
|
ActiveRecord::Base.connection.execute(<<~SQL)
|
||||||
|
INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at)
|
||||||
|
VALUES (#{post.id}, #{concept.id}, NOW(), NOW())
|
||||||
|
ON CONFLICT (post_id, inferred_concept_id) DO NOTHING
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Extracts content from a topic for concept analysis
|
# Extracts content from a topic for concept analysis
|
||||||
# Returns a string with the topic title and first few posts
|
# Returns a string with the topic title and first few posts
|
||||||
def self.topic_content_for_analysis(topic)
|
def self.topic_content_for_analysis(topic)
|
||||||
|
@ -29,9 +46,21 @@ module DiscourseAi
|
||||||
posts = Post.where(topic_id: topic.id).order(:post_number).limit(10)
|
posts = Post.where(topic_id: topic.id).order(:post_number).limit(10)
|
||||||
|
|
||||||
content = "Title: #{topic.title}\n\n"
|
content = "Title: #{topic.title}\n\n"
|
||||||
content += posts.map do |p|
|
content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n")
|
||||||
"#{p.post_number}) #{p.user.username}: #{p.raw}"
|
|
||||||
end.join("\n\n")
|
content
|
||||||
|
end
|
||||||
|
|
||||||
|
# Extracts content from a post for concept analysis
|
||||||
|
# Returns a string with the post content
|
||||||
|
def self.post_content_for_analysis(post)
|
||||||
|
return "" if post.blank?
|
||||||
|
|
||||||
|
# Get the topic title for context
|
||||||
|
topic_title = post.topic&.title || ""
|
||||||
|
|
||||||
|
content = "Topic: #{topic_title}\n\n"
|
||||||
|
content += "Post by #{post.user.username}:\n#{post.raw}"
|
||||||
|
|
||||||
content
|
content
|
||||||
end
|
end
|
||||||
|
@ -55,6 +84,25 @@ module DiscourseAi
|
||||||
concepts
|
concepts
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Comprehensive method to analyze a post and apply concepts
|
||||||
|
def self.analyze_and_apply_post(post)
|
||||||
|
return if post.blank?
|
||||||
|
|
||||||
|
# Get content to analyze
|
||||||
|
content = post_content_for_analysis(post)
|
||||||
|
|
||||||
|
# Identify concepts
|
||||||
|
concept_names = Finder.identify_concepts(content)
|
||||||
|
|
||||||
|
# Create or find concepts in the database
|
||||||
|
concepts = Finder.create_or_find_concepts(concept_names)
|
||||||
|
|
||||||
|
# Apply concepts to the post
|
||||||
|
apply_to_post(post, concepts)
|
||||||
|
|
||||||
|
concepts
|
||||||
|
end
|
||||||
|
|
||||||
# Match a topic with existing concepts
|
# Match a topic with existing concepts
|
||||||
def self.match_existing_concepts(topic)
|
def self.match_existing_concepts(topic)
|
||||||
return [] if topic.blank?
|
return [] if topic.blank?
|
||||||
|
@ -63,7 +111,7 @@ module DiscourseAi
|
||||||
content = topic_content_for_analysis(topic)
|
content = topic_content_for_analysis(topic)
|
||||||
|
|
||||||
# Get all existing concepts
|
# Get all existing concepts
|
||||||
existing_concepts = InferredConcept.all.pluck(:name)
|
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts
|
||||||
return [] if existing_concepts.empty?
|
return [] if existing_concepts.empty?
|
||||||
|
|
||||||
# Use the ConceptMatcher persona to match concepts
|
# Use the ConceptMatcher persona to match concepts
|
||||||
|
@ -78,26 +126,47 @@ module DiscourseAi
|
||||||
matched_concepts
|
matched_concepts
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Match a post with existing concepts
|
||||||
|
def self.match_existing_concepts_for_post(post)
|
||||||
|
return [] if post.blank?
|
||||||
|
|
||||||
|
# Get content to analyze
|
||||||
|
content = post_content_for_analysis(post)
|
||||||
|
|
||||||
|
# Get all existing concepts
|
||||||
|
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts
|
||||||
|
return [] if existing_concepts.empty?
|
||||||
|
|
||||||
|
# Use the ConceptMatcher persona to match concepts
|
||||||
|
matched_concept_names = match_concepts_to_content(content, existing_concepts)
|
||||||
|
|
||||||
|
# Find concepts in the database
|
||||||
|
matched_concepts = InferredConcept.where(name: matched_concept_names)
|
||||||
|
|
||||||
|
# Apply concepts to the post
|
||||||
|
apply_to_post(post, matched_concepts)
|
||||||
|
|
||||||
|
matched_concepts
|
||||||
|
end
|
||||||
|
|
||||||
# Use ConceptMatcher persona to match content against provided concepts
|
# Use ConceptMatcher persona to match content against provided concepts
|
||||||
def self.match_concepts_to_content(content, concept_list)
|
def self.match_concepts_to_content(content, concept_list)
|
||||||
return [] if content.blank? || concept_list.blank?
|
return [] if content.blank? || concept_list.blank?
|
||||||
|
|
||||||
# Prepare user message with content and concept list
|
# Prepare user message with only the content
|
||||||
user_message = <<~MESSAGE
|
user_message = <<~MESSAGE
|
||||||
Content to analyze:
|
Content to analyze:
|
||||||
#{content}
|
#{content}
|
||||||
|
|
||||||
Available concepts to match:
|
|
||||||
#{concept_list.join(", ")}
|
|
||||||
MESSAGE
|
MESSAGE
|
||||||
|
|
||||||
# Use the ConceptMatcher persona to match concepts
|
# Use the ConceptMatcher persona to match concepts
|
||||||
llm = DiscourseAi::Completions::Llm.default_llm
|
llm = DiscourseAi::Completions::Llm.default_llm
|
||||||
persona = DiscourseAi::Personas::ConceptMatcher.new
|
persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list)
|
||||||
context = DiscourseAi::Personas::BotContext.new(
|
context =
|
||||||
messages: [{ type: :user, content: user_message }],
|
DiscourseAi::Personas::BotContext.new(
|
||||||
user: Discourse.system_user
|
messages: [{ type: :user, content: user_message }],
|
||||||
)
|
user: Discourse.system_user,
|
||||||
|
)
|
||||||
|
|
||||||
prompt = persona.craft_prompt(context)
|
prompt = persona.craft_prompt(context)
|
||||||
response = llm.completion(prompt, extract_json: true)
|
response = llm.completion(prompt, extract_json: true)
|
||||||
|
|
|
@ -86,6 +86,52 @@ module DiscourseAi
|
||||||
# Return limited number of topics
|
# Return limited number of topics
|
||||||
query.limit(limit)
|
query.limit(limit)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Find candidate posts that are good for concept generation
|
||||||
|
#
|
||||||
|
# @param limit [Integer] Maximum number of posts to return
|
||||||
|
# @param min_likes [Integer] Minimum number of likes
|
||||||
|
# @param exclude_first_posts [Boolean] Exclude first posts in topics
|
||||||
|
# @param exclude_post_ids [Array<Integer>] Post IDs to exclude
|
||||||
|
# @param category_ids [Array<Integer>] Only include posts from topics in these categories
|
||||||
|
# @param created_after [DateTime] Only include posts created after this time
|
||||||
|
# @return [Array<Post>] Array of Post objects that are good candidates
|
||||||
|
def self.find_candidate_posts(
|
||||||
|
limit: 100,
|
||||||
|
min_likes: 5,
|
||||||
|
exclude_first_posts: true,
|
||||||
|
exclude_post_ids: [],
|
||||||
|
category_ids: nil,
|
||||||
|
created_after: 30.days.ago
|
||||||
|
)
|
||||||
|
query = Post.where("posts.like_count >= ?", min_likes)
|
||||||
|
|
||||||
|
# Exclude first posts if specified
|
||||||
|
query = query.where("posts.post_number > 1") if exclude_first_posts
|
||||||
|
|
||||||
|
# Apply additional filters
|
||||||
|
query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present?
|
||||||
|
query = query.where("posts.created_at >= ?", created_after) if created_after.present?
|
||||||
|
|
||||||
|
# Filter by category if specified
|
||||||
|
if category_ids.present?
|
||||||
|
query = query.joins(:topic).where("topics.category_id IN (?)", category_ids)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Exclude posts that already have concepts
|
||||||
|
posts_with_concepts = <<~SQL
|
||||||
|
SELECT DISTINCT post_id
|
||||||
|
FROM posts_inferred_concepts
|
||||||
|
SQL
|
||||||
|
|
||||||
|
query = query.where("posts.id NOT IN (#{posts_with_concepts})")
|
||||||
|
|
||||||
|
# Order by engagement (likes)
|
||||||
|
query = query.order(like_count: :desc)
|
||||||
|
|
||||||
|
# Return limited number of posts
|
||||||
|
query.limit(limit)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -3,6 +3,17 @@
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module InferredConcepts
|
module InferredConcepts
|
||||||
class Manager
|
class Manager
|
||||||
|
# Get a list of existing concepts
|
||||||
|
# @param limit [Integer, nil] Optional maximum number of concepts to return
|
||||||
|
# @return [Array<InferredConcept>] Array of InferredConcept objects
|
||||||
|
def self.list_concepts(limit: nil)
|
||||||
|
query = InferredConcept.all.order("name ASC")
|
||||||
|
|
||||||
|
# Apply limit if provided
|
||||||
|
query = query.limit(limit) if limit.present?
|
||||||
|
|
||||||
|
query.pluck(:name)
|
||||||
|
end
|
||||||
# Generate new concepts for a topic and apply them
|
# Generate new concepts for a topic and apply them
|
||||||
# @param topic [Topic] A Topic instance
|
# @param topic [Topic] A Topic instance
|
||||||
# @return [Array<InferredConcept>] The concepts that were applied
|
# @return [Array<InferredConcept>] The concepts that were applied
|
||||||
|
@ -12,6 +23,15 @@ module DiscourseAi
|
||||||
Applier.analyze_and_apply(topic)
|
Applier.analyze_and_apply(topic)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Generate new concepts for a post and apply them
|
||||||
|
# @param post [Post] A Post instance
|
||||||
|
# @return [Array<InferredConcept>] The concepts that were applied
|
||||||
|
def self.analyze_post(post)
|
||||||
|
return [] if post.blank?
|
||||||
|
|
||||||
|
Applier.analyze_and_apply_post(post)
|
||||||
|
end
|
||||||
|
|
||||||
# Extract new concepts from arbitrary content
|
# Extract new concepts from arbitrary content
|
||||||
# @param content [String] The content to analyze
|
# @param content [String] The content to analyze
|
||||||
# @return [Array<String>] The identified concept names
|
# @return [Array<String>] The identified concept names
|
||||||
|
@ -47,6 +67,20 @@ module DiscourseAi
|
||||||
generate_concepts_from_content(content)
|
generate_concepts_from_content(content)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Generate concepts from a post's content without applying them to the post
|
||||||
|
# @param post [Post] A Post instance
|
||||||
|
# @return [Array<InferredConcept>] The created or found concepts
|
||||||
|
def self.generate_concepts_from_post(post)
|
||||||
|
return [] if post.blank?
|
||||||
|
|
||||||
|
# Get content to analyze
|
||||||
|
content = Applier.post_content_for_analysis(post)
|
||||||
|
return [] if content.blank?
|
||||||
|
|
||||||
|
# Generate concepts from the content
|
||||||
|
generate_concepts_from_content(content)
|
||||||
|
end
|
||||||
|
|
||||||
# Match a topic against existing concepts
|
# Match a topic against existing concepts
|
||||||
# @param topic [Topic] A Topic instance
|
# @param topic [Topic] A Topic instance
|
||||||
# @return [Array<InferredConcept>] The concepts that were applied
|
# @return [Array<InferredConcept>] The concepts that were applied
|
||||||
|
@ -56,6 +90,15 @@ module DiscourseAi
|
||||||
Applier.match_existing_concepts(topic)
|
Applier.match_existing_concepts(topic)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Match a post against existing concepts
|
||||||
|
# @param post [Post] A Post instance
|
||||||
|
# @return [Array<InferredConcept>] The concepts that were applied
|
||||||
|
def self.match_post_to_concepts(post)
|
||||||
|
return [] if post.blank?
|
||||||
|
|
||||||
|
Applier.match_existing_concepts_for_post(post)
|
||||||
|
end
|
||||||
|
|
||||||
# Find topics that have a specific concept
|
# Find topics that have a specific concept
|
||||||
# @param concept_name [String] The name of the concept to search for
|
# @param concept_name [String] The name of the concept to search for
|
||||||
# @return [Array<Topic>] Topics that have the specified concept
|
# @return [Array<Topic>] Topics that have the specified concept
|
||||||
|
@ -65,6 +108,15 @@ module DiscourseAi
|
||||||
concept.topics
|
concept.topics
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Find posts that have a specific concept
|
||||||
|
# @param concept_name [String] The name of the concept to search for
|
||||||
|
# @return [Array<Post>] Posts that have the specified concept
|
||||||
|
def self.search_posts_by_concept(concept_name)
|
||||||
|
concept = ::InferredConcept.find_by(name: concept_name)
|
||||||
|
return [] unless concept
|
||||||
|
concept.posts
|
||||||
|
end
|
||||||
|
|
||||||
# Match arbitrary content against existing concepts
|
# Match arbitrary content against existing concepts
|
||||||
# @param content [String] The content to analyze
|
# @param content [String] The content to analyze
|
||||||
# @return [Array<String>] Names of matching concepts
|
# @return [Array<String>] Names of matching concepts
|
||||||
|
@ -89,6 +141,20 @@ module DiscourseAi
|
||||||
def self.find_candidate_topics(opts = {})
|
def self.find_candidate_topics(opts = {})
|
||||||
Finder.find_candidate_topics(opts)
|
Finder.find_candidate_topics(opts)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Find candidate posts that are good for concept generation
|
||||||
|
# @param opts [Hash] Options to pass to the finder
|
||||||
|
# @return [Array<Post>] Array of Post objects that are good candidates
|
||||||
|
def self.find_candidate_posts(opts = {})
|
||||||
|
Finder.find_candidate_posts(
|
||||||
|
limit: opts[:limit],
|
||||||
|
min_likes: opts[:min_likes],
|
||||||
|
exclude_first_posts: opts[:exclude_first_posts],
|
||||||
|
exclude_post_ids: opts[:exclude_post_ids],
|
||||||
|
category_ids: opts[:category_ids],
|
||||||
|
created_after: opts[:created_after],
|
||||||
|
)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -17,7 +17,11 @@ module DiscourseAi
|
||||||
:context_post_ids,
|
:context_post_ids,
|
||||||
:feature_name,
|
:feature_name,
|
||||||
:resource_url,
|
:resource_url,
|
||||||
|
<<<<<<< HEAD
|
||||||
:cancel_manager
|
:cancel_manager
|
||||||
|
=======
|
||||||
|
:inferred_concepts
|
||||||
|
>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts)
|
||||||
|
|
||||||
def initialize(
|
def initialize(
|
||||||
post: nil,
|
post: nil,
|
||||||
|
@ -35,7 +39,11 @@ module DiscourseAi
|
||||||
context_post_ids: nil,
|
context_post_ids: nil,
|
||||||
feature_name: "bot",
|
feature_name: "bot",
|
||||||
resource_url: nil,
|
resource_url: nil,
|
||||||
|
<<<<<<< HEAD
|
||||||
cancel_manager: nil
|
cancel_manager: nil
|
||||||
|
=======
|
||||||
|
inferred_concepts: []
|
||||||
|
>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts)
|
||||||
)
|
)
|
||||||
@participants = participants
|
@participants = participants
|
||||||
@user = user
|
@user = user
|
||||||
|
@ -54,7 +62,7 @@ module DiscourseAi
|
||||||
@resource_url = resource_url
|
@resource_url = resource_url
|
||||||
|
|
||||||
@feature_name = feature_name
|
@feature_name = feature_name
|
||||||
@resource_url = resource_url
|
@inferred_concepts = inferred_concepts
|
||||||
|
|
||||||
@cancel_manager = cancel_manager
|
@cancel_manager = cancel_manager
|
||||||
|
|
||||||
|
@ -68,7 +76,15 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
# these are strings that can be safely interpolated into templates
|
# these are strings that can be safely interpolated into templates
|
||||||
TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url]
|
TEMPLATE_PARAMS = %w[
|
||||||
|
time
|
||||||
|
site_url
|
||||||
|
site_title
|
||||||
|
site_description
|
||||||
|
participants
|
||||||
|
resource_url
|
||||||
|
inferred_concepts
|
||||||
|
]
|
||||||
|
|
||||||
def lookup_template_param(key)
|
def lookup_template_param(key)
|
||||||
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
|
public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key)
|
||||||
|
@ -114,6 +130,7 @@ module DiscourseAi
|
||||||
skip_tool_details: @skip_tool_details,
|
skip_tool_details: @skip_tool_details,
|
||||||
feature_name: @feature_name,
|
feature_name: @feature_name,
|
||||||
resource_url: @resource_url,
|
resource_url: @resource_url,
|
||||||
|
inferred_concepts: @inferred_concepts,
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -4,6 +4,16 @@ module DiscourseAi
|
||||||
module Personas
|
module Personas
|
||||||
class ConceptFinder < Persona
|
class ConceptFinder < Persona
|
||||||
def system_prompt
|
def system_prompt
|
||||||
|
existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100)
|
||||||
|
existing_concepts_text = ""
|
||||||
|
|
||||||
|
existing_concepts_text = <<~CONCEPTS if existing_concepts.present?
|
||||||
|
The following concepts already exist in the system:
|
||||||
|
#{existing_concepts.join(", ")}
|
||||||
|
|
||||||
|
You can reuse these existing concepts if they apply to the content, or suggest new concepts.
|
||||||
|
CONCEPTS
|
||||||
|
|
||||||
<<~PROMPT.strip
|
<<~PROMPT.strip
|
||||||
You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text.
|
You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text.
|
||||||
Your job is to extract meaningful labels that can be used to categorize content.
|
Your job is to extract meaningful labels that can be used to categorize content.
|
||||||
|
@ -16,7 +26,7 @@ module DiscourseAi
|
||||||
- Ensure concepts are relevant to the core content
|
- Ensure concepts are relevant to the core content
|
||||||
- Do not include proper nouns unless they represent key technologies or methodologies
|
- Do not include proper nouns unless they represent key technologies or methodologies
|
||||||
- Maintain the original language of the text being analyzed
|
- Maintain the original language of the text being analyzed
|
||||||
|
#{existing_concepts_text}
|
||||||
Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value.
|
Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value.
|
||||||
Your output should be in the following format:
|
Your output should be in the following format:
|
||||||
<o>
|
<o>
|
||||||
|
|
|
@ -8,6 +8,7 @@ module DiscourseAi
|
||||||
You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content.
|
You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content.
|
||||||
Your job is to analyze the content and determine which concepts from the list apply to it.
|
Your job is to analyze the content and determine which concepts from the list apply to it.
|
||||||
|
|
||||||
|
#{concepts_text}
|
||||||
Guidelines for matching concepts:
|
Guidelines for matching concepts:
|
||||||
- Only select concepts that are clearly relevant to the content
|
- Only select concepts that are clearly relevant to the content
|
||||||
- The content must substantially discuss or relate to the concept
|
- The content must substantially discuss or relate to the concept
|
||||||
|
@ -18,6 +19,10 @@ module DiscourseAi
|
||||||
- IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts
|
- IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts
|
||||||
- If no concepts from the list match the content, return an empty array
|
- If no concepts from the list match the content, return an empty array
|
||||||
|
|
||||||
|
The list of available concepts is:
|
||||||
|
|
||||||
|
{inferred_concepts}
|
||||||
|
|
||||||
Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list.
|
Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list.
|
||||||
Your output should be in the following format:
|
Your output should be in the following format:
|
||||||
<o>
|
<o>
|
||||||
|
|
|
@ -11,6 +11,8 @@ module DiscourseAi
|
||||||
-> { where(classification_type: "sentiment") },
|
-> { where(classification_type: "sentiment") },
|
||||||
class_name: "ClassificationResult",
|
class_name: "ClassificationResult",
|
||||||
as: :target
|
as: :target
|
||||||
|
|
||||||
|
has_and_belongs_to_many :inferred_concepts
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -17,6 +17,7 @@ class TestPersona < DiscourseAi::Personas::Persona
|
||||||
{participants}
|
{participants}
|
||||||
{time}
|
{time}
|
||||||
{resource_url}
|
{resource_url}
|
||||||
|
{inferred_concepts}
|
||||||
PROMPT
|
PROMPT
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -38,6 +39,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
||||||
end
|
end
|
||||||
|
|
||||||
let(:resource_url) { "https://path-to-resource" }
|
let(:resource_url) { "https://path-to-resource" }
|
||||||
|
let(:inferred_concepts) { %w[bulbassaur charmander squirtle] }
|
||||||
|
|
||||||
let(:context) do
|
let(:context) do
|
||||||
DiscourseAi::Personas::BotContext.new(
|
DiscourseAi::Personas::BotContext.new(
|
||||||
|
@ -47,6 +49,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
||||||
time: Time.zone.now,
|
time: Time.zone.now,
|
||||||
participants: topic_with_users.allowed_users.map(&:username).join(", "),
|
participants: topic_with_users.allowed_users.map(&:username).join(", "),
|
||||||
resource_url: resource_url,
|
resource_url: resource_url,
|
||||||
|
inferred_concepts: inferred_concepts,
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -66,6 +69,7 @@ RSpec.describe DiscourseAi::Personas::Persona do
|
||||||
expect(system_message).to include("joe, jane")
|
expect(system_message).to include("joe, jane")
|
||||||
expect(system_message).to include(Time.zone.now.to_s)
|
expect(system_message).to include(Time.zone.now.to_s)
|
||||||
expect(system_message).to include(resource_url)
|
expect(system_message).to include(resource_url)
|
||||||
|
expect(system_message).to include(inferred_concepts)
|
||||||
|
|
||||||
tools = rendered.tools
|
tools = rendered.tools
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue