diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 00000000..650de3e3 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,8 @@ +{ + "permissions": { + "allow": [ + "Bash(bundle exec rails g migration:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/app/jobs/regular/apply_inferred_concepts.rb b/app/jobs/regular/apply_inferred_concepts.rb deleted file mode 100644 index 916c3f0c..00000000 --- a/app/jobs/regular/apply_inferred_concepts.rb +++ /dev/null @@ -1,47 +0,0 @@ -# frozen_string_literal: true - -module Jobs - class ApplyInferredConcepts < ::Jobs::Base - sidekiq_options queue: 'low' - - # Process a batch of topics to apply existing concepts to them - # - # @param args [Hash] Contains job arguments - # @option args [Array] :topic_ids Required - List of topic IDs to process - # @option args [Integer] :batch_size (100) Number of topics to process in each batch - def execute(args = {}) - return if args[:topic_ids].blank? - - # Process topics in smaller batches to avoid memory issues - batch_size = args[:batch_size] || 100 - - # Get the list of topic IDs - topic_ids = args[:topic_ids] - - # Process topics in batches - topic_ids.each_slice(batch_size) do |batch_topic_ids| - process_batch(batch_topic_ids) - end - end - - private - - def process_batch(topic_ids) - topics = Topic.where(id: topic_ids) - - topics.each do |topic| - begin - process_topic(topic) - rescue => e - Rails.logger.error("Error applying concepts to topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}") - end - end - end - - def process_topic(topic) - # Match topic against existing concepts and apply them - # Pass the topic object directly - DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(topic) - end - end -end \ No newline at end of file diff --git a/app/jobs/regular/generate_inferred_concepts.rb b/app/jobs/regular/generate_inferred_concepts.rb index d0b73b89..61b1d4be 100644 --- a/app/jobs/regular/generate_inferred_concepts.rb +++ b/app/jobs/regular/generate_inferred_concepts.rb @@ -4,44 +4,64 @@ module Jobs class GenerateInferredConcepts < ::Jobs::Base sidekiq_options queue: 'low' - # Process a batch of topics to generate new concepts (without applying them to topics) + # Process items to generate new concepts # # @param args [Hash] Contains job arguments - # @option args [Array] :topic_ids Required - List of topic IDs to process - # @option args [Integer] :batch_size (100) Number of topics to process in each batch + # @option args [String] :item_type Required - Type of items to process ('topics' or 'posts') + # @option args [Array] :item_ids Required - List of item IDs to process + # @option args [Integer] :batch_size (100) Number of items to process in each batch + # @option args [Boolean] :match_only (false) Only match against existing concepts without generating new ones def execute(args = {}) - return if args[:topic_ids].blank? + return if args[:item_ids].blank? || args[:item_type].blank? - # Process topics in smaller batches to avoid memory issues + unless ['topics', 'posts'].include?(args[:item_type]) + Rails.logger.error("Invalid item_type for GenerateInferredConcepts: #{args[:item_type]}") + return + end + + # Process items in smaller batches to avoid memory issues batch_size = args[:batch_size] || 100 - # Get the list of topic IDs - topic_ids = args[:topic_ids] + # Get the list of item IDs + item_ids = args[:item_ids] + match_only = args[:match_only] || false - # Process topics in batches - topic_ids.each_slice(batch_size) do |batch_topic_ids| - process_batch(batch_topic_ids) + # Process items in batches + item_ids.each_slice(batch_size) do |batch_item_ids| + process_batch(batch_item_ids, args[:item_type], match_only) end end private - def process_batch(topic_ids) - topics = Topic.where(id: topic_ids) + def process_batch(item_ids, item_type, match_only) + klass = item_type.singularize.classify.constantize + items = klass.where(id: item_ids) - topics.each do |topic| + items.each do |item| begin - process_topic(topic) + process_item(item, item_type, match_only) rescue => e - Rails.logger.error("Error generating concepts from topic #{topic.id}: #{e.message}\n#{e.backtrace.join("\n")}") + Rails.logger.error("Error generating concepts from #{item_type.singularize} #{item.id}: #{e.message}\n#{e.backtrace.join("\n")}") end end end - def process_topic(topic) + def process_item(item, item_type, match_only) # Use the Manager method that handles both identifying and creating concepts - # Pass the topic object directly - DiscourseAi::InferredConcepts::Manager.generate_concepts_from_topic(topic) + if match_only + if item_type == 'topics' + DiscourseAi::InferredConcepts::Manager.match_topic_to_concepts(item) + else # posts + DiscourseAi::InferredConcepts::Manager.match_post_to_concepts(item) + end + else + if item_type == 'topics' + DiscourseAi::InferredConcepts::Manager.analyze_topic(item) + else # posts + DiscourseAi::InferredConcepts::Manager.analyze_post(item) + end + end end end end \ No newline at end of file diff --git a/app/jobs/scheduled/generate_concepts_from_popular_items.rb b/app/jobs/scheduled/generate_concepts_from_popular_items.rb new file mode 100644 index 00000000..a9a03493 --- /dev/null +++ b/app/jobs/scheduled/generate_concepts_from_popular_items.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Jobs + class GenerateConceptsFromPopularItems < ::Jobs::Scheduled + every 1.day + + # This job runs daily and generates new concepts from popular topics and posts + # It selects items based on engagement metrics and generates concepts from their content + def execute(args = {}) + return unless SiteSetting.inferred_concepts_enabled + + process_popular_topics + process_popular_posts + end + + private + + def process_popular_topics + + # Find candidate topics that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( + limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, + min_posts: SiteSetting.inferred_concepts_min_posts || 5, + min_likes: SiteSetting.inferred_concepts_min_likes || 10, + min_views: SiteSetting.inferred_concepts_min_views || 100, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process candidate topics - first generate concepts, then match + Jobs.enqueue( + :generate_inferred_concepts, + item_type: 'topics', + item_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to match existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: 'topics', + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true + ) + end + + def process_popular_posts + + # Find candidate posts that are popular and don't have concepts yet + candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_posts( + limit: SiteSetting.inferred_concepts_daily_posts_limit || 30, + min_likes: SiteSetting.inferred_concepts_post_min_likes || 5, + exclude_first_posts: true, + created_after: SiteSetting.inferred_concepts_lookback_days.days.ago + ) + + return if candidates.blank? + + # Process candidate posts - first generate concepts, then match + Jobs.enqueue( + :generate_inferred_concepts, + item_type: 'posts', + item_ids: candidates.map(&:id), + batch_size: 10 + ) + + # Schedule a follow-up job to match against existing concepts + Jobs.enqueue_in( + 1.hour, + :generate_inferred_concepts, + item_type: 'posts', + item_ids: candidates.map(&:id), + batch_size: 10, + match_only: true + ) + end + end +end \ No newline at end of file diff --git a/app/jobs/scheduled/generate_concepts_from_popular_topics.rb b/app/jobs/scheduled/generate_concepts_from_popular_topics.rb deleted file mode 100644 index fe009a1c..00000000 --- a/app/jobs/scheduled/generate_concepts_from_popular_topics.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -module Jobs - class GenerateConceptsFromPopularTopics < ::Jobs::Scheduled - every 1.day - - # This job runs daily and generates new concepts from popular topics - # It selects topics based on engagement metrics and generates concepts from their content - def execute(args = {}) - # Find candidate topics that are popular and don't have concepts yet - candidates = DiscourseAi::InferredConcepts::Manager.find_candidate_topics( - limit: SiteSetting.inferred_concepts_daily_topics_limit || 20, - min_posts: SiteSetting.inferred_concepts_min_posts || 5, - min_likes: SiteSetting.inferred_concepts_min_likes || 10, - min_views: SiteSetting.inferred_concepts_min_views || 100, - created_after: SiteSetting.inferred_concepts_lookback_days.days.ago - ) - - return if candidates.blank? - - # Process the candidate topics in batches using the regular job - Jobs.enqueue( - :generate_inferred_concepts, - topic_ids: candidates.map(&:id), - batch_size: 10 - ) - - # Schedule a follow-up job to apply the concepts to topics - # This runs after a delay to ensure concepts have been generated - Jobs.enqueue_in( - 1.hour, - :apply_inferred_concepts, - topic_ids: candidates.map(&:id), - batch_size: 10 - ) - end - end -end \ No newline at end of file diff --git a/app/models/inferred_concept.rb b/app/models/inferred_concept.rb index a5b8d877..0248277f 100644 --- a/app/models/inferred_concept.rb +++ b/app/models/inferred_concept.rb @@ -2,6 +2,7 @@ class InferredConcept < ActiveRecord::Base has_and_belongs_to_many :topics + has_and_belongs_to_many :posts validates :name, presence: true, uniqueness: true end diff --git a/app/serializers/ai_inferred_concept_post_serializer.rb b/app/serializers/ai_inferred_concept_post_serializer.rb new file mode 100644 index 00000000..d4bfcd62 --- /dev/null +++ b/app/serializers/ai_inferred_concept_post_serializer.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +class AiInferredConceptPostSerializer < ApplicationSerializer + attributes :id, + :post_number, + :topic_id, + :topic_title, + :username, + :avatar_template, + :created_at, + :updated_at, + :excerpt, + :truncated, + :inferred_concepts + + def avatar_template + User.avatar_template(object.username, object.uploaded_avatar_id) + end + + def excerpt + Post.excerpt(object.cooked) + end + + def truncated + object.cooked.length > SiteSetting.post_excerpt_maxlength + end + + def inferred_concepts + ActiveModel::ArraySerializer.new( + object.inferred_concepts, + each_serializer: InferredConceptSerializer + ) + end +end \ No newline at end of file diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 3e4c1064..390da18e 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -326,6 +326,12 @@ en: short_summarizer: name: "Summarizer (short form)" description: "Default persona used to power AI short summaries for topic lists' items" + concept_finder: + name: "Concept Finder" + description: "AI Bot specialized in identifying concepts and themes in content" + concept_matcher: + name: "Concept Matcher" + description: "AI Bot specialized in matching content against existing concepts" topic_not_found: "Summary unavailable, topic not found!" summarizing: "Summarizing topic" searching: "Searching for: '%{query}'" diff --git a/config/settings.yml b/config/settings.yml index 2f166a3e..92371470 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -426,3 +426,11 @@ discourse_ai: default: 30 client: false description: "Only consider topics created within this many days for concept generation" + inferred_concepts_daily_posts_limit: + default: 30 + client: false + description: "Maximum number of posts to process each day for concept generation" + inferred_concepts_post_min_likes: + default: 5 + client: false + description: "Minimum number of likes a post must have to be considered for concept generation" diff --git a/db/migrate/20250509000001_create_posts_inferred_concepts.rb b/db/migrate/20250509000001_create_posts_inferred_concepts.rb new file mode 100644 index 00000000..258d0f14 --- /dev/null +++ b/db/migrate/20250509000001_create_posts_inferred_concepts.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CreatePostsInferredConcepts < ActiveRecord::Migration[7.0] + def change + create_table :posts_inferred_concepts do |t| + t.integer :post_id, null: false + t.integer :inferred_concept_id, null: false + t.timestamps + end + + add_index :posts_inferred_concepts, [:post_id, :inferred_concept_id], unique: true, name: 'idx_unique_post_inferred_concept' + add_index :posts_inferred_concepts, :post_id + add_index :posts_inferred_concepts, :inferred_concept_id + end +end \ No newline at end of file diff --git a/lib/inferred_concepts/applier.rb b/lib/inferred_concepts/applier.rb index 9426ec0a..fa393e75 100644 --- a/lib/inferred_concepts/applier.rb +++ b/lib/inferred_concepts/applier.rb @@ -8,7 +8,7 @@ module DiscourseAi # concepts: an array of InferredConcept instances def self.apply_to_topic(topic, concepts) return if topic.blank? || concepts.blank? - + concepts.each do |concept| # Use the join table to associate the concept with the topic # Avoid duplicates by using find_or_create_by @@ -19,94 +19,163 @@ module DiscourseAi SQL end end - + + # Associates the provided concepts with a post + # post: a Post instance + # concepts: an array of InferredConcept instances + def self.apply_to_post(post, concepts) + return if post.blank? || concepts.blank? + + concepts.each do |concept| + # Use the join table to associate the concept with the post + # Avoid duplicates by using find_or_create_by + ActiveRecord::Base.connection.execute(<<~SQL) + INSERT INTO posts_inferred_concepts (post_id, inferred_concept_id, created_at, updated_at) + VALUES (#{post.id}, #{concept.id}, NOW(), NOW()) + ON CONFLICT (post_id, inferred_concept_id) DO NOTHING + SQL + end + end + # Extracts content from a topic for concept analysis # Returns a string with the topic title and first few posts def self.topic_content_for_analysis(topic) return "" if topic.blank? - + # Combine title and first few posts for analysis posts = Post.where(topic_id: topic.id).order(:post_number).limit(10) - + content = "Title: #{topic.title}\n\n" - content += posts.map do |p| - "#{p.post_number}) #{p.user.username}: #{p.raw}" - end.join("\n\n") - + content += posts.map { |p| "#{p.post_number}) #{p.user.username}: #{p.raw}" }.join("\n\n") + content end - + + # Extracts content from a post for concept analysis + # Returns a string with the post content + def self.post_content_for_analysis(post) + return "" if post.blank? + + # Get the topic title for context + topic_title = post.topic&.title || "" + + content = "Topic: #{topic_title}\n\n" + content += "Post by #{post.user.username}:\n#{post.raw}" + + content + end + # Comprehensive method to analyze a topic and apply concepts def self.analyze_and_apply(topic) return if topic.blank? - + # Get content to analyze content = topic_content_for_analysis(topic) - + # Identify concepts concept_names = Finder.identify_concepts(content) - + # Create or find concepts in the database concepts = Finder.create_or_find_concepts(concept_names) - + # Apply concepts to the topic apply_to_topic(topic, concepts) - + concepts end - + + # Comprehensive method to analyze a post and apply concepts + def self.analyze_and_apply_post(post) + return if post.blank? + + # Get content to analyze + content = post_content_for_analysis(post) + + # Identify concepts + concept_names = Finder.identify_concepts(content) + + # Create or find concepts in the database + concepts = Finder.create_or_find_concepts(concept_names) + + # Apply concepts to the post + apply_to_post(post, concepts) + + concepts + end + # Match a topic with existing concepts def self.match_existing_concepts(topic) return [] if topic.blank? - + # Get content to analyze content = topic_content_for_analysis(topic) - + # Get all existing concepts - existing_concepts = InferredConcept.all.pluck(:name) + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts return [] if existing_concepts.empty? - + # Use the ConceptMatcher persona to match concepts matched_concept_names = match_concepts_to_content(content, existing_concepts) - + # Find concepts in the database matched_concepts = InferredConcept.where(name: matched_concept_names) - + # Apply concepts to the topic apply_to_topic(topic, matched_concepts) - + matched_concepts end - + + # Match a post with existing concepts + def self.match_existing_concepts_for_post(post) + return [] if post.blank? + + # Get content to analyze + content = post_content_for_analysis(post) + + # Get all existing concepts + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts + return [] if existing_concepts.empty? + + # Use the ConceptMatcher persona to match concepts + matched_concept_names = match_concepts_to_content(content, existing_concepts) + + # Find concepts in the database + matched_concepts = InferredConcept.where(name: matched_concept_names) + + # Apply concepts to the post + apply_to_post(post, matched_concepts) + + matched_concepts + end + # Use ConceptMatcher persona to match content against provided concepts def self.match_concepts_to_content(content, concept_list) return [] if content.blank? || concept_list.blank? - - # Prepare user message with content and concept list + + # Prepare user message with only the content user_message = <<~MESSAGE Content to analyze: #{content} - - Available concepts to match: - #{concept_list.join(", ")} MESSAGE - + # Use the ConceptMatcher persona to match concepts llm = DiscourseAi::Completions::Llm.default_llm - persona = DiscourseAi::Personas::ConceptMatcher.new - context = DiscourseAi::Personas::BotContext.new( - messages: [{ type: :user, content: user_message }], - user: Discourse.system_user - ) - + persona = DiscourseAi::Personas::ConceptMatcher.new(concept_list: concept_list) + context = + DiscourseAi::Personas::BotContext.new( + messages: [{ type: :user, content: user_message }], + user: Discourse.system_user, + ) + prompt = persona.craft_prompt(context) response = llm.completion(prompt, extract_json: true) - + return [] unless response.success? - + matching_concepts = response.parsed_output["matching_concepts"] matching_concepts || [] end end end -end \ No newline at end of file +end diff --git a/lib/inferred_concepts/finder.rb b/lib/inferred_concepts/finder.rb index 3e870cf1..56ddcbdc 100644 --- a/lib/inferred_concepts/finder.rb +++ b/lib/inferred_concepts/finder.rb @@ -36,7 +36,7 @@ module DiscourseAi end # Finds candidate topics to use for concept generation - # + # # @param limit [Integer] Maximum number of topics to return # @param min_posts [Integer] Minimum number of posts in topic # @param min_likes [Integer] Minimum number of likes across all posts @@ -46,46 +46,92 @@ module DiscourseAi # @param created_after [DateTime] Only include topics created after this time (optional) # @return [Array] Array of Topic objects that are good candidates def self.find_candidate_topics( - limit: 100, - min_posts: 5, - min_likes: 10, - min_views: 100, + limit: 100, + min_posts: 5, + min_likes: 10, + min_views: 100, exclude_topic_ids: [], category_ids: nil, created_after: 30.days.ago ) query = Topic.where( "topics.posts_count >= ? AND topics.views >= ? AND topics.like_count >= ?", - min_posts, - min_views, + min_posts, + min_views, min_likes ) - + # Apply additional filters query = query.where("topics.id NOT IN (?)", exclude_topic_ids) if exclude_topic_ids.present? query = query.where("topics.category_id IN (?)", category_ids) if category_ids.present? query = query.where("topics.created_at >= ?", created_after) if created_after.present? - + # Exclude PM topics (if they exist in Discourse) query = query.where(archetype: Topic.public_archetype) - + # Exclude topics that already have concepts topics_with_concepts = <<~SQL - SELECT DISTINCT topic_id + SELECT DISTINCT topic_id FROM topics_inferred_concepts SQL - + query = query.where("topics.id NOT IN (#{topics_with_concepts})") - + # Score and order topics by engagement (combination of views, likes, and posts) query = query.select( - "topics.*, + "topics.*, (topics.like_count * 2 + topics.posts_count * 3 + topics.views * 0.1) AS engagement_score" ).order("engagement_score DESC") - + # Return limited number of topics query.limit(limit) end + + # Find candidate posts that are good for concept generation + # + # @param limit [Integer] Maximum number of posts to return + # @param min_likes [Integer] Minimum number of likes + # @param exclude_first_posts [Boolean] Exclude first posts in topics + # @param exclude_post_ids [Array] Post IDs to exclude + # @param category_ids [Array] Only include posts from topics in these categories + # @param created_after [DateTime] Only include posts created after this time + # @return [Array] Array of Post objects that are good candidates + def self.find_candidate_posts( + limit: 100, + min_likes: 5, + exclude_first_posts: true, + exclude_post_ids: [], + category_ids: nil, + created_after: 30.days.ago + ) + query = Post.where("posts.like_count >= ?", min_likes) + + # Exclude first posts if specified + query = query.where("posts.post_number > 1") if exclude_first_posts + + # Apply additional filters + query = query.where("posts.id NOT IN (?)", exclude_post_ids) if exclude_post_ids.present? + query = query.where("posts.created_at >= ?", created_after) if created_after.present? + + # Filter by category if specified + if category_ids.present? + query = query.joins(:topic).where("topics.category_id IN (?)", category_ids) + end + + # Exclude posts that already have concepts + posts_with_concepts = <<~SQL + SELECT DISTINCT post_id + FROM posts_inferred_concepts + SQL + + query = query.where("posts.id NOT IN (#{posts_with_concepts})") + + # Order by engagement (likes) + query = query.order(like_count: :desc) + + # Return limited number of posts + query.limit(limit) + end end end end \ No newline at end of file diff --git a/lib/inferred_concepts/manager.rb b/lib/inferred_concepts/manager.rb index 28246891..3037cd15 100644 --- a/lib/inferred_concepts/manager.rb +++ b/lib/inferred_concepts/manager.rb @@ -3,59 +3,102 @@ module DiscourseAi module InferredConcepts class Manager + # Get a list of existing concepts + # @param limit [Integer, nil] Optional maximum number of concepts to return + # @return [Array] Array of InferredConcept objects + def self.list_concepts(limit: nil) + query = InferredConcept.all.order("name ASC") + + # Apply limit if provided + query = query.limit(limit) if limit.present? + + query.pluck(:name) + end # Generate new concepts for a topic and apply them # @param topic [Topic] A Topic instance # @return [Array] The concepts that were applied def self.analyze_topic(topic) return [] if topic.blank? - + Applier.analyze_and_apply(topic) end - + + # Generate new concepts for a post and apply them + # @param post [Post] A Post instance + # @return [Array] The concepts that were applied + def self.analyze_post(post) + return [] if post.blank? + + Applier.analyze_and_apply_post(post) + end + # Extract new concepts from arbitrary content # @param content [String] The content to analyze # @return [Array] The identified concept names def self.identify_concepts(content) Finder.identify_concepts(content) end - + # Identify and create concepts from content without applying them to any topic # @param content [String] The content to analyze # @return [Array] The created or found concepts def self.generate_concepts_from_content(content) return [] if content.blank? - + # Identify concepts concept_names = Finder.identify_concepts(content) return [] if concept_names.blank? - + # Create or find concepts in the database Finder.create_or_find_concepts(concept_names) end - + # Generate concepts from a topic's content without applying them to the topic # @param topic [Topic] A Topic instance # @return [Array] The created or found concepts def self.generate_concepts_from_topic(topic) return [] if topic.blank? - + # Get content to analyze content = Applier.topic_content_for_analysis(topic) return [] if content.blank? - + # Generate concepts from the content generate_concepts_from_content(content) end - + + # Generate concepts from a post's content without applying them to the post + # @param post [Post] A Post instance + # @return [Array] The created or found concepts + def self.generate_concepts_from_post(post) + return [] if post.blank? + + # Get content to analyze + content = Applier.post_content_for_analysis(post) + return [] if content.blank? + + # Generate concepts from the content + generate_concepts_from_content(content) + end + # Match a topic against existing concepts # @param topic [Topic] A Topic instance # @return [Array] The concepts that were applied def self.match_topic_to_concepts(topic) return [] if topic.blank? - + Applier.match_existing_concepts(topic) end - + + # Match a post against existing concepts + # @param post [Post] A Post instance + # @return [Array] The concepts that were applied + def self.match_post_to_concepts(post) + return [] if post.blank? + + Applier.match_existing_concepts_for_post(post) + end + # Find topics that have a specific concept # @param concept_name [String] The name of the concept to search for # @return [Array] Topics that have the specified concept @@ -64,19 +107,28 @@ module DiscourseAi return [] unless concept concept.topics end - + + # Find posts that have a specific concept + # @param concept_name [String] The name of the concept to search for + # @return [Array] Posts that have the specified concept + def self.search_posts_by_concept(concept_name) + concept = ::InferredConcept.find_by(name: concept_name) + return [] unless concept + concept.posts + end + # Match arbitrary content against existing concepts # @param content [String] The content to analyze # @return [Array] Names of matching concepts def self.match_content_to_concepts(content) existing_concepts = InferredConcept.all.pluck(:name) return [] if existing_concepts.empty? - + Applier.match_concepts_to_content(content, existing_concepts) end - + # Find candidate topics that are good for concept generation - # + # # @param opts [Hash] Options to pass to the finder # @option opts [Integer] :limit (100) Maximum number of topics to return # @option opts [Integer] :min_posts (5) Minimum number of posts in topic @@ -89,6 +141,20 @@ module DiscourseAi def self.find_candidate_topics(opts = {}) Finder.find_candidate_topics(opts) end + + # Find candidate posts that are good for concept generation + # @param opts [Hash] Options to pass to the finder + # @return [Array] Array of Post objects that are good candidates + def self.find_candidate_posts(opts = {}) + Finder.find_candidate_posts( + limit: opts[:limit], + min_likes: opts[:min_likes], + exclude_first_posts: opts[:exclude_first_posts], + exclude_post_ids: opts[:exclude_post_ids], + category_ids: opts[:category_ids], + created_after: opts[:created_after], + ) + end end end -end \ No newline at end of file +end diff --git a/lib/personas/bot_context.rb b/lib/personas/bot_context.rb index 69d86669..83220a52 100644 --- a/lib/personas/bot_context.rb +++ b/lib/personas/bot_context.rb @@ -17,7 +17,11 @@ module DiscourseAi :context_post_ids, :feature_name, :resource_url, +<<<<<<< HEAD :cancel_manager +======= + :inferred_concepts +>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts) def initialize( post: nil, @@ -35,7 +39,11 @@ module DiscourseAi context_post_ids: nil, feature_name: "bot", resource_url: nil, +<<<<<<< HEAD cancel_manager: nil +======= + inferred_concepts: [] +>>>>>>> 44391e27 (FEATURE: Extend inferred concepts to include posts) ) @participants = participants @user = user @@ -54,7 +62,7 @@ module DiscourseAi @resource_url = resource_url @feature_name = feature_name - @resource_url = resource_url + @inferred_concepts = inferred_concepts @cancel_manager = cancel_manager @@ -68,7 +76,15 @@ module DiscourseAi end # these are strings that can be safely interpolated into templates - TEMPLATE_PARAMS = %w[time site_url site_title site_description participants resource_url] + TEMPLATE_PARAMS = %w[ + time + site_url + site_title + site_description + participants + resource_url + inferred_concepts + ] def lookup_template_param(key) public_send(key.to_sym) if TEMPLATE_PARAMS.include?(key) @@ -114,6 +130,7 @@ module DiscourseAi skip_tool_details: @skip_tool_details, feature_name: @feature_name, resource_url: @resource_url, + inferred_concepts: @inferred_concepts, } end end diff --git a/lib/personas/concept_finder.rb b/lib/personas/concept_finder.rb index 2e0502d0..a713e8b8 100644 --- a/lib/personas/concept_finder.rb +++ b/lib/personas/concept_finder.rb @@ -4,6 +4,16 @@ module DiscourseAi module Personas class ConceptFinder < Persona def system_prompt + existing_concepts = DiscourseAi::InferredConcepts::Manager.list_concepts(limit: 100) + existing_concepts_text = "" + + existing_concepts_text = <<~CONCEPTS if existing_concepts.present? + The following concepts already exist in the system: + #{existing_concepts.join(", ")} + + You can reuse these existing concepts if they apply to the content, or suggest new concepts. + CONCEPTS + <<~PROMPT.strip You are an advanced concept tagging system that identifies key concepts, themes, and topics from provided text. Your job is to extract meaningful labels that can be used to categorize content. @@ -16,7 +26,7 @@ module DiscourseAi - Ensure concepts are relevant to the core content - Do not include proper nouns unless they represent key technologies or methodologies - Maintain the original language of the text being analyzed - + #{existing_concepts_text} Format your response as a JSON object with a single key named "concepts", which has an array of concept strings as the value. Your output should be in the following format: diff --git a/lib/personas/concept_matcher.rb b/lib/personas/concept_matcher.rb index ce398bcb..bea17e09 100644 --- a/lib/personas/concept_matcher.rb +++ b/lib/personas/concept_matcher.rb @@ -7,7 +7,8 @@ module DiscourseAi <<~PROMPT.strip You are an advanced concept matching system that determines which concepts from a provided list are relevant to a piece of content. Your job is to analyze the content and determine which concepts from the list apply to it. - + + #{concepts_text} Guidelines for matching concepts: - Only select concepts that are clearly relevant to the content - The content must substantially discuss or relate to the concept @@ -17,13 +18,17 @@ module DiscourseAi - Maintain the original language of the text being analyzed - IMPORTANT: Only select from the exact concepts in the provided list - do not add new concepts - If no concepts from the list match the content, return an empty array - + + The list of available concepts is: + + {inferred_concepts} + Format your response as a JSON object with a single key named "matching_concepts", which has an array of concept strings from the provided list. Your output should be in the following format: {"matching_concepts": ["concept1", "concept3", "concept5"]} - + Only include concepts from the provided list that match the content. If no concepts match, return an empty array. PROMPT end @@ -33,4 +38,4 @@ module DiscourseAi end end end -end \ No newline at end of file +end diff --git a/lib/post_extensions.rb b/lib/post_extensions.rb index 04a28a15..3a06495f 100644 --- a/lib/post_extensions.rb +++ b/lib/post_extensions.rb @@ -11,6 +11,8 @@ module DiscourseAi -> { where(classification_type: "sentiment") }, class_name: "ClassificationResult", as: :target + + has_and_belongs_to_many :inferred_concepts end end end diff --git a/spec/lib/personas/persona_spec.rb b/spec/lib/personas/persona_spec.rb index d3e90568..22670f15 100644 --- a/spec/lib/personas/persona_spec.rb +++ b/spec/lib/personas/persona_spec.rb @@ -17,6 +17,7 @@ class TestPersona < DiscourseAi::Personas::Persona {participants} {time} {resource_url} + {inferred_concepts} PROMPT end end @@ -38,6 +39,7 @@ RSpec.describe DiscourseAi::Personas::Persona do end let(:resource_url) { "https://path-to-resource" } + let(:inferred_concepts) { %w[bulbassaur charmander squirtle] } let(:context) do DiscourseAi::Personas::BotContext.new( @@ -47,6 +49,7 @@ RSpec.describe DiscourseAi::Personas::Persona do time: Time.zone.now, participants: topic_with_users.allowed_users.map(&:username).join(", "), resource_url: resource_url, + inferred_concepts: inferred_concepts, ) end @@ -66,6 +69,7 @@ RSpec.describe DiscourseAi::Personas::Persona do expect(system_message).to include("joe, jane") expect(system_message).to include(Time.zone.now.to_s) expect(system_message).to include(resource_url) + expect(system_message).to include(inferred_concepts) tools = rendered.tools