172 lines
6.4 KiB
Ruby
172 lines
6.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module InferredConcepts
|
|
class Finder
|
|
# Identifies potential concepts from provided content
|
|
# Returns an array of concept names (strings)
|
|
def self.identify_concepts(content)
|
|
return [] if content.blank?
|
|
|
|
# Use the ConceptFinder persona to identify concepts
|
|
persona =
|
|
AiPersona
|
|
.all_personas(enabled_only: false)
|
|
.find { |persona| persona.id == SiteSetting.inferred_concepts_generate_persona.to_i }
|
|
.new
|
|
|
|
llm = LlmModel.find(persona.class.default_llm_id)
|
|
context =
|
|
DiscourseAi::Personas::BotContext.new(
|
|
messages: [{ type: :user, content: content }],
|
|
user: Discourse.system_user,
|
|
inferred_concepts: DiscourseAi::InferredConcepts::Manager.list_concepts,
|
|
)
|
|
|
|
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
|
|
|
|
response = bot.reply(context)
|
|
|
|
concepts = JSON.parse(response[0][0]).dig("concepts")
|
|
concepts || []
|
|
end
|
|
|
|
# Creates or finds concepts in the database from provided names
|
|
# Returns an array of InferredConcept instances
|
|
def self.create_or_find_concepts(concept_names)
|
|
return [] if concept_names.blank?
|
|
|
|
concept_names.map { |name| InferredConcept.find_or_create_by(name: name) }
|
|
end
|
|
|
|
# Finds candidate topics to use for concept generation
|
|
#
|
|
# @param limit [Integer] Maximum number of topics to return
|
|
# @param min_posts [Integer] Minimum number of posts in topic
|
|
# @param min_likes [Integer] Minimum number of likes across all posts
|
|
# @param min_views [Integer] Minimum number of views
|
|
# @param exclude_topic_ids [Array<Integer>] Topic IDs to exclude
|
|
# @param category_ids [Array<Integer>] Only include topics from these categories (optional)
|
|
# @param created_after [DateTime] Only include topics created after this time (optional)
|
|
# @return [Array<Topic>] Array of Topic objects that are good candidates
|
|
def self.find_candidate_topics(
|
|
limit: 100,
|
|
min_posts: 5,
|
|
min_likes: 10,
|
|
min_views: 100,
|
|
exclude_topic_ids: [],
|
|
category_ids: nil,
|
|
created_after: 30.days.ago
|
|
)
|
|
query =
|
|
Topic.where(
|
|
"topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?",
|
|
min_posts,
|
|
min_views,
|
|
min_likes,
|
|
)
|
|
|
|
# Apply additional filters
|
|
query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present?
|
|
query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present?
|
|
query = query.where("topics.created_at >= ?", created_after) if created_after.present?
|
|
|
|
# Exclude PM topics (if they exist in Discourse)
|
|
query = query.where(archetype: Archetype.default)
|
|
|
|
# Exclude topics that already have concepts
|
|
topics_with_concepts = <<~SQL
|
|
SELECT DISTINCT topic_id
|
|
FROM topics_inferred_concepts
|
|
SQL
|
|
|
|
query = query.where("topics.id NOT IN (#{topics_with_concepts})")
|
|
|
|
# Score and order topics by engagement (combination of views, likes, and posts)
|
|
query =
|
|
query.select(
|
|
"topics.*,
|
|
(topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score",
|
|
).order("engagement_score DESC")
|
|
|
|
# Return limited number of topics
|
|
query.limit(limit)
|
|
end
|
|
|
|
# Find candidate posts that are good for concept generation
|
|
#
|
|
# @param limit [Integer] Maximum number of posts to return
|
|
# @param min_likes [Integer] Minimum number of likes
|
|
# @param exclude_first_posts [Boolean] Exclude first posts in topics
|
|
# @param exclude_post_ids [Array<Integer>] Post IDs to exclude
|
|
# @param category_ids [Array<Integer>] Only include posts from topics in these categories
|
|
# @param created_after [DateTime] Only include posts created after this time
|
|
# @return [Array<Post>] Array of Post objects that are good candidates
|
|
def self.find_candidate_posts(
|
|
limit: 100,
|
|
min_likes: 5,
|
|
exclude_first_posts: true,
|
|
exclude_post_ids: [],
|
|
category_ids: nil,
|
|
created_after: 30.days.ago
|
|
)
|
|
query = Post.where("posts.like_count >= ?", min_likes)
|
|
|
|
# Exclude first posts if specified
|
|
query = query.where("posts.post_number > 1") if exclude_first_posts
|
|
|
|
# Apply additional filters
|
|
query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present?
|
|
query = query.where("posts.created_at >= ?", created_after) if created_after.present?
|
|
|
|
# Filter by category if specified
|
|
if category_ids.present?
|
|
query = query.joins(:topic).where("topics.category_id IN (?)", category_ids)
|
|
end
|
|
|
|
# Exclude posts that already have concepts
|
|
posts_with_concepts = <<~SQL
|
|
SELECT DISTINCT post_id
|
|
FROM posts_inferred_concepts
|
|
SQL
|
|
|
|
query = query.where("posts.id NOT IN (#{posts_with_concepts})")
|
|
|
|
# Order by engagement (likes)
|
|
query = query.order(like_count: :desc)
|
|
|
|
# Return limited number of posts
|
|
query.limit(limit)
|
|
end
|
|
|
|
# Deduplicate and standardize a list of concepts
|
|
# @param concept_names [Array<String>] List of concept names to deduplicate
|
|
# @return [Hash] Hash with deduplicated concepts and mapping
|
|
def self.deduplicate_concepts(concept_names)
|
|
return { deduplicated_concepts: [], mapping: {} } if concept_names.blank?
|
|
|
|
# Use the ConceptDeduplicator persona to deduplicate concepts
|
|
persona =
|
|
AiPersona
|
|
.all_personas(enabled_only: false)
|
|
.find { |persona| persona.id == SiteSetting.inferred_concepts_deduplicate_persona.to_i }
|
|
.new
|
|
|
|
llm = LlmModel.find(persona.class.default_llm_id)
|
|
|
|
# Create the input for the deduplicator
|
|
input = { type: :user, content: concept_names.join(", ") }
|
|
|
|
context =
|
|
DiscourseAi::Personas::BotContext.new(messages: [input], user: Discourse.system_user)
|
|
|
|
bot = DiscourseAi::Personas::Bot.as(Discourse.system_user, persona: persona, model: llm)
|
|
|
|
response = bot.reply(context)
|
|
|
|
concepts = JSON.parse(response[0][0]).dig("streamlined_tags")
|
|
end
|
|
end
|
|
end
|
|
end
|