220 lines
8.0 KiB
Ruby
220 lines
8.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module InferredConcepts
|
|
class Manager
|
|
# Get a list of existing concepts
|
|
# @param limit [Integer, nil] Optional maximum number of concepts to return
|
|
# @return [Array<InferredConcept>] Array of InferredConcept objects
|
|
def self.list_concepts(limit: nil)
|
|
query = InferredConcept.all.order("name ASC")
|
|
|
|
# Apply limit if provided
|
|
query = query.limit(limit) if limit.present?
|
|
|
|
query.pluck(:name)
|
|
end
|
|
|
|
# Deduplicate concepts in batches by letter
|
|
# This method will:
|
|
# 1. Group concepts by first letter
|
|
# 2. Process each letter group separately through the deduplicator
|
|
# 3. Do a final pass with all deduplicated concepts
|
|
# @return [Hash] Statistics about the deduplication process
|
|
def self.deduplicate_concepts_by_letter(per_letter_batch: 50, full_pass_batch: 150)
|
|
# Get all concepts
|
|
all_concepts = list_concepts
|
|
return if all_concepts.empty?
|
|
|
|
letter_groups = Hash.new { |h, k| h[k] = [] }
|
|
|
|
# Group concepts by first letter
|
|
all_concepts.each do |concept|
|
|
first_char = concept[0]&.upcase
|
|
|
|
if first_char && first_char.match?(/[A-Z]/)
|
|
letter_groups[first_char] << concept
|
|
else
|
|
# Non-alphabetic or empty concepts go in a special group
|
|
letter_groups["#"] << concept
|
|
end
|
|
end
|
|
|
|
# Process each letter group
|
|
letter_deduplicated_concepts = []
|
|
|
|
letter_groups.each do |letter, concepts|
|
|
next if concepts.empty?
|
|
|
|
batches = concepts.each_slice(per_letter_batch).to_a
|
|
|
|
batches.each do |batch|
|
|
result = Finder.deduplicate_concepts(batch)
|
|
letter_deduplicated_concepts.concat(result)
|
|
end
|
|
end
|
|
|
|
# Final pass with all deduplicated concepts
|
|
if letter_deduplicated_concepts.present?
|
|
final_result = []
|
|
|
|
batches = letter_deduplicated_concepts.each_slice(full_pass_batch).to_a
|
|
batches.each do |batch|
|
|
dedups = Finder.deduplicate_concepts(batch)
|
|
final_result.concat(dedups)
|
|
end
|
|
|
|
# Remove duplicates
|
|
final_result.uniq!
|
|
|
|
# Apply the deduplicated concepts
|
|
InferredConcept.destroy_all
|
|
InferredConcept.insert_all(final_result.map { { name: it } })
|
|
end
|
|
end
|
|
|
|
# Generate new concepts for a topic and apply them
|
|
# @param topic [Topic] A Topic instance
|
|
# @return [Array<InferredConcept>] The concepts that were applied
|
|
def self.analyze_topic(topic)
|
|
return [] if topic.blank?
|
|
|
|
Applier.analyze_and_apply(topic)
|
|
end
|
|
|
|
# Generate new concepts for a post and apply them
|
|
# @param post [Post] A Post instance
|
|
# @return [Array<InferredConcept>] The concepts that were applied
|
|
def self.analyze_post(post)
|
|
return [] if post.blank?
|
|
|
|
Applier.analyze_and_apply_post(post)
|
|
end
|
|
|
|
# Extract new concepts from arbitrary content
|
|
# @param content [String] The content to analyze
|
|
# @return [Array<String>] The identified concept names
|
|
def self.identify_concepts(content)
|
|
Finder.identify_concepts(content)
|
|
end
|
|
|
|
# Identify and create concepts from content without applying them to any topic
|
|
# @param content [String] The content to analyze
|
|
# @return [Array<InferredConcept>] The created or found concepts
|
|
def self.generate_concepts_from_content(content)
|
|
return [] if content.blank?
|
|
|
|
# Identify concepts
|
|
concept_names = Finder.identify_concepts(content)
|
|
return [] if concept_names.blank?
|
|
|
|
# Create or find concepts in the database
|
|
Finder.create_or_find_concepts(concept_names)
|
|
end
|
|
|
|
# Generate concepts from a topic's content without applying them to the topic
|
|
# @param topic [Topic] A Topic instance
|
|
# @return [Array<InferredConcept>] The created or found concepts
|
|
def self.generate_concepts_from_topic(topic)
|
|
return [] if topic.blank?
|
|
|
|
# Get content to analyze
|
|
content = Applier.topic_content_for_analysis(topic)
|
|
return [] if content.blank?
|
|
|
|
# Generate concepts from the content
|
|
generate_concepts_from_content(content)
|
|
end
|
|
|
|
# Generate concepts from a post's content without applying them to the post
|
|
# @param post [Post] A Post instance
|
|
# @return [Array<InferredConcept>] The created or found concepts
|
|
def self.generate_concepts_from_post(post)
|
|
return [] if post.blank?
|
|
|
|
# Get content to analyze
|
|
content = Applier.post_content_for_analysis(post)
|
|
return [] if content.blank?
|
|
|
|
# Generate concepts from the content
|
|
generate_concepts_from_content(content)
|
|
end
|
|
|
|
# Match a topic against existing concepts
|
|
# @param topic [Topic] A Topic instance
|
|
# @return [Array<InferredConcept>] The concepts that were applied
|
|
def self.match_topic_to_concepts(topic)
|
|
return [] if topic.blank?
|
|
|
|
Applier.match_existing_concepts(topic)
|
|
end
|
|
|
|
# Match a post against existing concepts
|
|
# @param post [Post] A Post instance
|
|
# @return [Array<InferredConcept>] The concepts that were applied
|
|
def self.match_post_to_concepts(post)
|
|
return [] if post.blank?
|
|
|
|
Applier.match_existing_concepts_for_post(post)
|
|
end
|
|
|
|
# Find topics that have a specific concept
|
|
# @param concept_name [String] The name of the concept to search for
|
|
# @return [Array<Topic>] Topics that have the specified concept
|
|
def self.search_topics_by_concept(concept_name)
|
|
concept = ::InferredConcept.find_by(name: concept_name)
|
|
return [] unless concept
|
|
concept.topics
|
|
end
|
|
|
|
# Find posts that have a specific concept
|
|
# @param concept_name [String] The name of the concept to search for
|
|
# @return [Array<Post>] Posts that have the specified concept
|
|
def self.search_posts_by_concept(concept_name)
|
|
concept = ::InferredConcept.find_by(name: concept_name)
|
|
return [] unless concept
|
|
concept.posts
|
|
end
|
|
|
|
# Match arbitrary content against existing concepts
|
|
# @param content [String] The content to analyze
|
|
# @return [Array<String>] Names of matching concepts
|
|
def self.match_content_to_concepts(content)
|
|
existing_concepts = InferredConcept.all.pluck(:name)
|
|
return [] if existing_concepts.empty?
|
|
|
|
Applier.match_concepts_to_content(content, existing_concepts)
|
|
end
|
|
|
|
# Find candidate topics that are good for concept generation
|
|
#
|
|
# @param opts [Hash] Options to pass to the finder
|
|
# @option opts [Integer] :limit (100) Maximum number of topics to return
|
|
# @option opts [Integer] :min_posts (5) Minimum number of posts in topic
|
|
# @option opts [Integer] :min_likes (10) Minimum number of likes across all posts
|
|
# @option opts [Integer] :min_views (100) Minimum number of views
|
|
# @option opts [Array<Integer>] :exclude_topic_ids ([]) Topic IDs to exclude
|
|
# @option opts [Array<Integer>] :category_ids (nil) Only include topics from these categories
|
|
# @option opts [DateTime] :created_after (30.days.ago) Only include topics created after this time
|
|
# @return [Array<Topic>] Array of Topic objects that are good candidates
|
|
def self.find_candidate_topics(opts = {})
|
|
Finder.find_candidate_topics(**opts)
|
|
end
|
|
|
|
# Find candidate posts that are good for concept generation
|
|
# @param opts [Hash] Options to pass to the finder
|
|
# @return [Array<Post>] Array of Post objects that are good candidates
|
|
def self.find_candidate_posts(opts = {})
|
|
Finder.find_candidate_posts(
|
|
limit: opts[:limit],
|
|
min_likes: opts[:min_likes],
|
|
exclude_first_posts: opts[:exclude_first_posts],
|
|
exclude_post_ids: opts[:exclude_post_ids],
|
|
category_ids: opts[:category_ids],
|
|
created_after: opts[:created_after],
|
|
)
|
|
end
|
|
end
|
|
end
|
|
end
|