diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 3c813317..a55342de 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -6,12 +6,59 @@ en: discourse_ai: "Discourse AI" js: discourse_automation: + ai_models: + gpt_4_turbo: GPT 4 Turbo + gpt_4: GPT 4 + gpt_3_5_turbo: GPT 3.5 Turbo + claude_2: Claude 2 + gemini_pro: Gemini Pro scriptables: + llm_report: + fields: + sender: + label: "Sender" + description: "The user that will send the report" + receivers: + label: "Receivers" + description: "The users that will receive the report (can be email or usernames)" + title: + label: "Title" + description: "The title of the report" + days: + label: "Days" + description: "The timespan of the report" + offset: + label: "Offset" + description: "When testing you may want to run the report historically, use offset to start the report in an earlier date" + instructions: + label: "Instructions" + description: "The instructions provided to the large language model" + sample_size: + label: "Sample Size" + description: "The number of posts to sample for the report" + tokens_per_post: + label: "Tokens per post" + description: "The number of llm tokens to use per post" + model: + label: "Model" + description: "LLM to use for report generation" + categories: + label: "Categories" + description: "Filter topics only to these category" + tags: + label: "Tags" + description: "Filter topics only to these tags" + allow_secure_categories: + label: "Allow secure categories" + description: "Allow the report to be generated for topics in secure categories" + debug_mode: + label: "Debug Mode" + description: "Enable debug mode to see the raw input and output of the LLM" + priority_group: + label: "Priority Group" + description: "Priotize content from this group in the report" + llm_triage: - models: - gpt_4: GPT 4 - gpt_3_5_turbo: GPT 3.5 Turbo - claude_2: Claude 2 fields: system_prompt: label: "System Prompt" diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 390bee93..b7382742 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -5,6 +5,9 @@ en: title: Triage posts using AI description: "Triage posts using a large language model" system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%" + llm_report: + title: Periodic report using AI + description: "Periodic report based on a large language model" site_settings: discourse_ai_enabled: "Enable the discourse AI plugin." ai_toxicity_enabled: "Enable the toxicity module." diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb new file mode 100644 index 00000000..e17ab7d7 --- /dev/null +++ b/discourse_automation/llm_report.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +if defined?(DiscourseAutomation) + module DiscourseAutomation::LlmReport + end + + DiscourseAutomation::Scriptable::LLM_REPORT = "llm_report" + + DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_REPORT) do + version 1 + triggerables %i[recurring] + + field :sender, component: :user, required: true + field :receivers, component: :users, required: true + field :title, component: :text, required: true + field :days, component: :text, required: true, default_value: 7 + field :offset, component: :text, required: true, default_value: 0 + field :instructions, + component: :message, + required: true, + default_value: DiscourseAi::Automation::ReportRunner.default_instructions + field :sample_size, component: :text, required: true, default_value: 100 + field :tokens_per_post, component: :text, required: true, default_value: 150 + + field :model, + component: :choices, + required: true, + extra: { + content: DiscourseAi::Automation::AVAILABLE_MODELS, + } + + field :priority_group, component: :group + field :categories, component: :categories + field :tags, component: :tags + + field :allow_secure_categories, component: :boolean + field :debug_mode, component: :boolean + + script do |context, fields, automation| + begin + sender = fields.dig("sender", "value") + receivers = fields.dig("receivers", "value") + title = fields.dig("title", "value") + model = fields.dig("model", "value") + category_ids = fields.dig("categories", "value") + tags = fields.dig("tags", "value") + allow_secure_categories = !!fields.dig("allow_secure_categories", "value") + debug_mode = !!fields.dig("debug_mode", "value") + sample_size = fields.dig("sample_size", "value") + instructions = fields.dig("instructions", "value") + days = fields.dig("days", "value") + offset = fields.dig("offset", "value").to_i + priority_group = fields.dig("priority_group", "value") + tokens_per_post = fields.dig("tokens_per_post", "value") + + DiscourseAi::Automation::ReportRunner.run!( + sender_username: sender, + receivers: receivers, + title: title, + model: model, + category_ids: category_ids, + tags: tags, + allow_secure_categories: allow_secure_categories, + debug_mode: debug_mode, + sample_size: sample_size, + instructions: instructions, + days: days, + offset: offset, + priority_group_id: priority_group, + tokens_per_post: tokens_per_post, + ) + rescue => e + Discourse.warn_exception e, message: "Error running LLM report!" + if Rails.env.development? + p e + puts e.backtrace + end + end + end + end +end diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb index e66688e3..99e6c5f0 100644 --- a/discourse_automation/llm_triage.rb +++ b/discourse_automation/llm_triage.rb @@ -1,104 +1,8 @@ # frozen_string_literal: true if defined?(DiscourseAutomation) - module DiscourseAutomation::LlmTriage - def self.handle( - post:, - model:, - search_for_text:, - system_prompt:, - category_id: nil, - tags: nil, - canned_reply: nil, - canned_reply_user: nil, - hide_topic: nil - ) - if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? - raise ArgumentError, "llm_triage: no action specified!" - end - - post_template = +"" - post_template << "title: #{post.topic.title}\n" - post_template << "#{post.raw}" - - filled_system_prompt = system_prompt.sub("%%POST%%", post_template) - - if filled_system_prompt == system_prompt - raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder" - end - - result = nil - if model == "claude-2" - # allowing double + 10 tokens - # technically maybe just token count is fine, but this will allow for more creative bad responses - result = - DiscourseAi::Inference::AnthropicCompletions.perform!( - filled_system_prompt, - model, - temperature: 0, - max_tokens: - DiscourseAi::Tokenizer::AnthropicTokenizer.tokenize(search_for_text).length * 2 + 10, - ).dig(:completion) - else - result = - DiscourseAi::Inference::OpenAiCompletions.perform!( - [{ :role => "system", "content" => filled_system_prompt }], - model, - temperature: 0, - max_tokens: - DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(search_for_text).length * 2 + 10, - ).dig(:choices, 0, :message, :content) - end - - if result.strip == search_for_text.strip - user = User.find_by_username(canned_reply_user) if canned_reply_user.present? - user = user || Discourse.system_user - if canned_reply.present? - PostCreator.create!( - user, - topic_id: post.topic_id, - raw: canned_reply, - reply_to_post_number: post.post_number, - skip_validations: true, - ) - end - - changes = {} - changes[:category_id] = category_id if category_id.present? - changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present? - - if changes.present? - first_post = post.topic.posts.where(post_number: 1).first - changes[:bypass_bump] = true - changes[:skip_validations] = true - first_post.revise(Discourse.system_user, changes) - end - - post.topic.update!(visible: false) if hide_topic - end - end - end - DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage" - AVAILABLE_MODELS = [ - { - id: "gpt-4", - name: - "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4", - }, - { - id: "gpt-3-5-turbo", - name: - "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo", - }, - { - id: "claude-2", - name: - "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2", - }, - ] - DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do version 1 run_in_background @@ -119,7 +23,12 @@ if defined?(DiscourseAutomation) end, accepts_placeholders: true field :search_for_text, component: :text, required: true - field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS } + field :model, + component: :choices, + required: true, + extra: { + content: DiscourseAi::Automation::AVAILABLE_MODELS, + } field :category, component: :category field :tags, component: :tags field :hide_topic, component: :boolean @@ -149,7 +58,7 @@ if defined?(DiscourseAutomation) end begin - DiscourseAutomation::LlmTriage.handle( + DiscourseAi::Automation::LlmTriage.handle( post: post, model: model, search_for_text: search_for_text, diff --git a/lib/automation.rb b/lib/automation.rb new file mode 100644 index 00000000..06c621f4 --- /dev/null +++ b/lib/automation.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module DiscourseAi + module Automation + AVAILABLE_MODELS = [ + { id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" }, + { id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" }, + { id: "gpt-3-5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" }, + { id: "claude-2", name: "discourse_automation.ai_models.claude_2" }, + { id: "gemini-pro", name: "discourse_automation.ai_models.gemini_pro" }, + ] + end +end diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb new file mode 100644 index 00000000..4dd5c9c7 --- /dev/null +++ b/lib/automation/llm_triage.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true +# +module DiscourseAi + module Automation + module LlmTriage + def self.handle( + post:, + model:, + search_for_text:, + system_prompt:, + category_id: nil, + tags: nil, + canned_reply: nil, + canned_reply_user: nil, + hide_topic: nil + ) + if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? + raise ArgumentError, "llm_triage: no action specified!" + end + + post_template = +"" + post_template << "title: #{post.topic.title}\n" + post_template << "#{post.raw}" + + filled_system_prompt = system_prompt.sub("%%POST%%", post_template) + + if filled_system_prompt == system_prompt + raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder" + end + + result = nil + + llm = DiscourseAi::Completions::Llm.proxy(model) + prompt = { + insts: filled_system_prompt, + params: { + model => { + max_tokens: (llm.tokenizer.tokenize(search_for_text).length * 2 + 10), + temperature: 0, + }, + }, + } + + result = llm.completion!(prompt, Discourse.system_user) + + if result.strip == search_for_text.strip + user = User.find_by_username(canned_reply_user) if canned_reply_user.present? + user = user || Discourse.system_user + if canned_reply.present? + PostCreator.create!( + user, + topic_id: post.topic_id, + raw: canned_reply, + reply_to_post_number: post.post_number, + skip_validations: true, + ) + end + + changes = {} + changes[:category_id] = category_id if category_id.present? + changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present? + + if changes.present? + first_post = post.topic.posts.where(post_number: 1).first + changes[:bypass_bump] = true + changes[:skip_validations] = true + first_post.revise(Discourse.system_user, changes) + end + + post.topic.update!(visible: false) if hide_topic + end + end + end + end +end diff --git a/lib/automation/report_context_generator.rb b/lib/automation/report_context_generator.rb new file mode 100644 index 00000000..9f32332c --- /dev/null +++ b/lib/automation/report_context_generator.rb @@ -0,0 +1,225 @@ +# frozen_string_literal: true + +module DiscourseAi + module Automation + class ReportContextGenerator + def self.generate(**args) + new(**args).generate + end + + def initialize( + start_date:, + duration:, + category_ids: nil, + tags: nil, + allow_secure_categories: false, + max_posts: 200, + tokens_per_post: 100, + tokenizer: nil, + prioritized_group_ids: [] + ) + @start_date = start_date + @duration = duration + @category_ids = category_ids + @tags = tags + @allow_secure_categories = allow_secure_categories + @max_posts = max_posts + @tokenizer = tokenizer || DiscourseAi::Tokenizer::OpenAiTokenizer + @tokens_per_post = tokens_per_post + @prioritized_group_ids = prioritized_group_ids + + @posts = + Post + .where("posts.created_at >= ?", @start_date) + .joins(topic: :category) + .includes(:topic, :user) + .where("posts.created_at < ?", @start_date + @duration) + .where("posts.post_type = ?", Post.types[:regular]) + .where("posts.hidden_at IS NULL") + .where("topics.deleted_at IS NULL") + .where("topics.archetype = ?", Archetype.default) + @posts = @posts.where("categories.read_restricted = ?", false) if !@allow_secure_categories + @posts = @posts.where("categories.id IN (?)", @category_ids) if @category_ids.present? + + if @tags.present? + tag_ids = Tag.where(name: @tags).select(:id) + topic_ids_with_tags = TopicTag.where(tag_id: tag_ids).select(:topic_id) + @posts = @posts.where(topic_id: topic_ids_with_tags) + end + + @solutions = {} + if defined?(::DiscourseSolved) + TopicCustomField + .where(name: ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD) + .where(topic_id: @posts.select(:topic_id)) + .pluck(:topic_id, :value) + .each do |topic_id, post_id| + @solutions[topic_id] ||= Set.new + @solutions[topic_id] << post_id.to_i + end + end + end + + def format_topic(topic) + info = [] + info << "" + info << "### #{topic.title}" + info << "topic_id: #{topic.id}" + info << "solved: true" if @solutions.key?(topic.id) + info << "category: #{topic.category&.name}" + tags = topic.tags.pluck(:name) + info << "tags: #{topic.tags.pluck(:name).join(", ")}" if tags.present? + info << topic.created_at.strftime("%Y-%m-%d %H:%M") + { created_at: topic.created_at, info: info.join("\n"), posts: {} } + end + + def format_post(post) + buffer = [] + buffer << "" + buffer << "post_number: #{post.post_number}" + if @solutions.key?(post.topic_id) && @solutions[post.topic_id].include?(post.id) + buffer << "solution: true" + end + buffer << post.created_at.strftime("%Y-%m-%d %H:%M") + buffer << "user: #{post.user&.username}" + buffer << "likes: #{post.like_count}" + excerpt = @tokenizer.truncate(post.raw, @tokens_per_post) + excerpt = "excerpt: #{excerpt}..." if excerpt.length < post.raw.length + buffer << "#{excerpt}" + { likes: post.like_count, info: buffer.join("\n") } + end + + def format_summary + topic_count = + @posts + .where("topics.created_at > ?", @start_date) + .select(:topic_id) + .distinct(:topic_id) + .count + + buffer = [] + buffer << "Start Date: #{@start_date.to_date}" + buffer << "End Date: #{(@start_date + @duration).to_date}" + buffer << "New posts: #{@posts.count}" + buffer << "New topics: #{topic_count}" + + top_users = + Post + .where(id: @posts.select(:id)) + .joins(:user) + .group(:user_id, :username) + .select( + "user_id, username, sum(posts.like_count) like_count, count(posts.id) post_count", + ) + .order("sum(posts.like_count) desc") + .limit(10) + + buffer << "Top users:" + top_users.each do |user| + buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)" + end + + if @prioritized_group_ids.present? + group_names = + Group + .where(id: @prioritized_group_ids) + .pluck(:name, :full_name) + .map do |name, full_name| + if full_name.present? + "#{name} (#{full_name[0..100].gsub("\n", " ")})" + else + name + end + end + .join(", ") + buffer << "" + buffer << "Top users in #{group_names} group#{group_names.include?(",") ? "s" : ""}:" + + group_users = GroupUser.where(group_id: @prioritized_group_ids).select(:user_id) + top_users + .where(user_id: group_users) + .each do |user| + buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)" + end + end + + buffer.join("\n") + end + + def format_topics + buffer = [] + topics = {} + + post_count = 0 + + @posts = @posts.order("posts.like_count desc, posts.created_at desc") + + if @prioritized_group_ids.present? + user_groups = GroupUser.where(group_id: @prioritized_group_ids) + prioritized_posts = @posts.where(user_id: user_groups.select(:user_id)).limit(@max_posts) + + post_count += add_posts(prioritized_posts, topics) + end + + add_posts(@posts.limit(@max_posts), topics, limit: @max_posts - post_count) + + # we need last posts in all topics + # they may have important info + last_posts = + @posts.where("posts.post_number = topics.highest_post_number").where( + "topics.id IN (?)", + topics.keys, + ) + + add_posts(last_posts, topics) + + topics.each do |topic_id, topic_info| + topic_info[:post_likes] = topic_info[:posts].sum { |_, post_info| post_info[:likes] } + end + + topics = topics.sort { |a, b| b[1][:post_likes] <=> a[1][:post_likes] } + + topics.each do |topic_id, topic_info| + buffer << topic_info[:info] + + last_post_number = 0 + + topic_info[:posts] + .sort { |a, b| a[0] <=> b[0] } + .each do |post_number, post_info| + buffer << "\n..." if post_number > last_post_number + 1 + buffer << post_info[:info] + last_post_number = post_number + end + end + + buffer.join("\n") + end + + def generate + buffer = [] + + buffer << "## Summary" + buffer << format_summary + buffer << "\n## Topics" + buffer << format_topics + + buffer.join("\n") + end + + def add_posts(relation, topics, limit: nil) + post_count = 0 + relation.each do |post| + topics[post.topic_id] ||= format_topic(post.topic) + if !topics[post.topic_id][:posts][post.post_number] + topics[post.topic_id][:posts][post.post_number] = format_post(post) + post_count += 1 + limit -= 1 if limit + end + break if limit && limit <= 0 + end + post_count + end + end + end +end diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb new file mode 100644 index 00000000..bf9c0a6d --- /dev/null +++ b/lib/automation/report_runner.rb @@ -0,0 +1,146 @@ +# frozen_string_literal: true + +module DiscourseAi + module Automation + class ReportRunner + def self.default_instructions + # not localizing for now cause non English LLM will require + # a fair bit of experimentation + <<~TEXT + Generate report: + + ## Report Guidelines: + + - Length & Style: Aim for 12 dense paragraphs in a narrative style, focusing on internal forum discussions. + - Accuracy: Only include verified information with no embellishments. + - Sourcing: ALWAYS Back statements with links to forum discussions. + - Markdown Usage: Enhance readability with **bold**, *italic*, and > quotes. + - Linking: Use `#{Discourse.base_url}/t/-/TOPIC_ID/POST_NUMBER` for direct references. + - User Mentions: Reference users with @USERNAME + - Context tips: Staff are denoted with Username *. For example: jane * means that jane is a staff member. Do not render the * in the report. + - Add many topic links: strive to link to at least 30 topics in the report. Topic Id is meaningless to end users if you need to throw in a link use [ref](...) or better still just embed it into the [sentence](...) + - Categories and tags: use the format #TAG and #CATEGORY to denote tags and categories + + ## Structure: + + - Key statistics: Specify date range, call out important stats like number of new topics and posts + - Overview: Briefly state trends within period. + - Highlighted content: 5 paragaraphs highlighting important topics people should know about. If possible have each paragraph link to multiple related topics. + - Key insights and trends linking to a selection of posts that back them + TEXT + end + + def self.run!(**args) + new(**args).run! + end + + def initialize( + sender_username:, + receivers:, + title:, + model:, + category_ids:, + tags:, + allow_secure_categories:, + debug_mode:, + sample_size:, + instructions:, + days:, + offset:, + priority_group_id:, + tokens_per_post: + ) + @sender = User.find_by(username: sender_username) + @receivers = User.where(username: receivers) + @title = title + + @model = model + @llm = DiscourseAi::Completions::Llm.proxy(model) + @category_ids = category_ids + @tags = tags + @allow_secure_categories = allow_secure_categories + @debug_mode = debug_mode + @sample_size = sample_size.to_i < 10 ? 10 : sample_size.to_i + @instructions = instructions + @days = days.to_i + @offset = offset.to_i + @priority_group_id = priority_group_id + @tokens_per_post = tokens_per_post.to_i + end + + def run! + start_date = (@offset + @days).days.ago + prioritized_group_ids = [@priority_group_id] if @priority_group_id.present? + context = + DiscourseAi::Automation::ReportContextGenerator.generate( + start_date: start_date, + duration: @days.days, + max_posts: @sample_size, + tags: @tags, + category_ids: @category_ids, + prioritized_group_ids: prioritized_group_ids, + allow_secure_categories: @allow_secure_categories, + tokens_per_post: @tokens_per_post, + tokenizer: @llm.tokenizer, + ) + input = <<~INPUT + #{@instructions} + + + #{context} + + + #{@instructions} + INPUT + + prompt = { + insts: "You are a helpful bot specializing in summarizing activity Discourse sites", + input: input, + final_insts: "Here is the report I generated for you", + params: { + @model => { + temperature: 0, + }, + }, + } + + result = +"" + + puts if Rails.env.development? && @debug_mode + + @llm.completion!(prompt, Discourse.system_user) do |response| + print response if Rails.env.development? && @debug_mode + result << response + end + + post = + PostCreator.create!( + @sender, + raw: result, + title: @title, + archetype: Archetype.private_message, + target_usernames: @receivers.map(&:username).join(","), + skip_validations: true, + ) + + if @debug_mode + input = input.split("\n").map { |line| " #{line}" }.join("\n") + raw = <<~RAW + ``` + start_date: #{start_date}, + duration: #{@days.days}, + max_posts: #{@sample_size}, + tags: #{@tags}, + category_ids: #{@category_ids}, + priority_group: #{@priority_group_id} + LLM context was: + ``` + + #{input} + RAW + PostCreator.create!(@sender, raw: raw, topic_id: post.topic_id, skip_validations: true) + end + end + end + end +end diff --git a/lib/completions/dialects/chat_gpt.rb b/lib/completions/dialects/chat_gpt.rb index dda3e8a2..777f4184 100644 --- a/lib/completions/dialects/chat_gpt.rb +++ b/lib/completions/dialects/chat_gpt.rb @@ -6,7 +6,14 @@ module DiscourseAi class ChatGpt < Dialect class << self def can_translate?(model_name) - %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name) + %w[ + gpt-3.5-turbo + gpt-4 + gpt-3.5-turbo-16k + gpt-4-32k + gpt-4-1106-preview + gpt-4-turbo + ].include?(model_name) end def tokenizer diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb index f10c4c49..28fff8ee 100644 --- a/lib/completions/dialects/claude.rb +++ b/lib/completions/dialects/claude.rb @@ -27,7 +27,9 @@ module DiscourseAi claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts] - claude_prompt << "Assistant:\n" + claude_prompt << "Assistant:" + claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts] + claude_prompt << "\n" end def max_prompt_tokens diff --git a/lib/completions/dialects/dialect.rb b/lib/completions/dialects/dialect.rb index 5e6d6b97..cea3bfe2 100644 --- a/lib/completions/dialects/dialect.rb +++ b/lib/completions/dialects/dialect.rb @@ -17,9 +17,10 @@ module DiscourseAi DiscourseAi::Completions::Dialects::OrcaStyle, DiscourseAi::Completions::Dialects::Gemini, ] - dialects.detect(-> { raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL }) do |d| - d.can_translate?(model_name) - end + + dialect = dialects.find { |d| d.can_translate?(model_name) } + raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect + dialect end def tokenizer diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb index b0664cd0..0b1d438e 100644 --- a/lib/completions/endpoints/open_ai.rb +++ b/lib/completions/endpoints/open_ai.rb @@ -5,11 +5,18 @@ module DiscourseAi module Endpoints class OpenAi < Base def self.can_contact?(model_name) - %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name) + %w[ + gpt-3.5-turbo + gpt-4 + gpt-3.5-turbo-16k + gpt-4-32k + gpt-4-1106-preview + gpt-4-turbo + ].include?(model_name) end def default_options - { model: model } + { model: model == "gpt-4-turbo" ? "gpt-4-1106-preview" : model } end def provider_id @@ -24,7 +31,11 @@ module DiscourseAi if model.include?("32k") SiteSetting.ai_openai_gpt4_32k_url else - SiteSetting.ai_openai_gpt4_url + if model.include?("1106") || model.include?("turbo") + SiteSetting.ai_openai_gpt4_turbo_url + else + SiteSetting.ai_openai_gpt4_url + end end else if model.include?("16k") diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index bb553366..f262a42e 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -7,6 +7,7 @@ module DiscourseAi foldable_models = [ Models::OpenAi.new("gpt-4", max_tokens: 8192), Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768), + Models::OpenAi.new("gpt-4-1106-preview", max_tokens: 100_000), Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), Models::Anthropic.new("claude-2", max_tokens: 100_000), diff --git a/plugin.rb b/plugin.rb index 79996d0e..abfedec9 100644 --- a/plugin.rb +++ b/plugin.rb @@ -40,6 +40,7 @@ register_svg_icon "meh" after_initialize do # do not autoload this cause we may have no namespace require_relative "discourse_automation/llm_triage" + require_relative "discourse_automation/llm_report" add_admin_route "discourse_ai.title", "discourse-ai" diff --git a/spec/lib/discourse_automation/llm_report_spec.rb b/spec/lib/discourse_automation/llm_report_spec.rb new file mode 100644 index 00000000..cae9d6f2 --- /dev/null +++ b/spec/lib/discourse_automation/llm_report_spec.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +return if !defined?(DiscourseAutomation) + +describe DiscourseAutomation do + let(:automation) { Fabricate(:automation, script: "llm_report", enabled: true) } + + def add_automation_field(name, value, type: "text") + automation.fields.create!( + component: type, + name: name, + metadata: { + value: value, + }, + target: "script", + ) + end + + it "can trigger via automation" do + user = Fabricate(:user) + + add_automation_field("sender", user.username, type: "user") + add_automation_field("receivers", [user.username], type: "users") + add_automation_field("model", "gpt-4-turbo") + add_automation_field("title", "Weekly report") + + DiscourseAi::Completions::Llm.with_prepared_responses(["An Amazing Report!!!"]) do + automation.trigger! + end + + pm = Topic.where(title: "Weekly report").first + expect(pm.posts.first.raw).to eq("An Amazing Report!!!") + end +end diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb index 469daef9..34579c07 100644 --- a/spec/lib/discourse_automation/llm_triage_spec.rb +++ b/spec/lib/discourse_automation/llm_triage_spec.rb @@ -2,106 +2,9 @@ return if !defined?(DiscourseAutomation) -describe DiscourseAutomation::LlmTriage do +describe DiscourseAi::Automation::LlmTriage do fab!(:post) { Fabricate(:post) } - def triage(**args) - DiscourseAutomation::LlmTriage.handle(**args) - end - - it "does nothing if it does not pass triage" do - stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( - status: 200, - body: { choices: [{ message: { content: "good" } }] }.to_json, - ) - - triage( - post: post, - model: "gpt-4", - hide_topic: true, - system_prompt: "test %%POST%%", - search_for_text: "bad", - ) - - expect(post.topic.reload.visible).to eq(true) - end - - it "can hide topics on triage with claude" do - stub_request(:post, "https://api.anthropic.com/v1/complete").to_return( - status: 200, - body: { completion: "bad" }.to_json, - ) - - triage( - post: post, - model: "claude-2", - hide_topic: true, - system_prompt: "test %%POST%%", - search_for_text: "bad", - ) - - expect(post.topic.reload.visible).to eq(false) - end - - it "can hide topics on triage with claude" do - stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( - status: 200, - body: { choices: [{ message: { content: "bad" } }] }.to_json, - ) - - triage( - post: post, - model: "gpt-4", - hide_topic: true, - system_prompt: "test %%POST%%", - search_for_text: "bad", - ) - - expect(post.topic.reload.visible).to eq(false) - end - - it "can categorize topics on triage" do - category = Fabricate(:category) - - stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( - status: 200, - body: { choices: [{ message: { content: "bad" } }] }.to_json, - ) - - triage( - post: post, - model: "gpt-4", - category_id: category.id, - system_prompt: "test %%POST%%", - search_for_text: "bad", - ) - - expect(post.topic.reload.category_id).to eq(category.id) - end - - it "can reply to topics on triage" do - user = Fabricate(:user) - - stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( - status: 200, - body: { choices: [{ message: { content: "bad" } }] }.to_json, - ) - - triage( - post: post, - model: "gpt-4", - system_prompt: "test %%POST%%", - search_for_text: "bad", - canned_reply: "test canned reply 123", - canned_reply_user: user.username, - ) - - reply = post.topic.posts.order(:post_number).last - - expect(reply.raw).to eq("test canned reply 123") - expect(reply.user.id).to eq(user.id) - end - let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) } def add_automation_field(name, value, type: "text") @@ -130,13 +33,10 @@ describe DiscourseAutomation::LlmTriage do add_automation_field("canned_reply", "Yo this is a reply") add_automation_field("canned_reply_user", user.username, type: "user") - stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( - status: 200, - body: { choices: [{ message: { content: "bad" } }] }.to_json, - ) - - automation.running_in_background! - automation.trigger!({ "post" => post }) + DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do + automation.running_in_background! + automation.trigger!({ "post" => post }) + end topic = post.topic.reload expect(topic.category_id).to eq(category.id) diff --git a/spec/lib/modules/automation/llm_triage_spec.rb b/spec/lib/modules/automation/llm_triage_spec.rb new file mode 100644 index 00000000..911064d4 --- /dev/null +++ b/spec/lib/modules/automation/llm_triage_spec.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true +describe DiscourseAi::Automation::LlmTriage do + fab!(:post) { Fabricate(:post) } + + def triage(**args) + DiscourseAi::Automation::LlmTriage.handle(**args) + end + + it "does nothing if it does not pass triage" do + DiscourseAi::Completions::Llm.with_prepared_responses(["good"]) do + triage( + post: post, + model: "gpt-4", + hide_topic: true, + system_prompt: "test %%POST%%", + search_for_text: "bad", + ) + end + + expect(post.topic.reload.visible).to eq(true) + end + + it "can hide topics on triage with claude" do + DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do + triage( + post: post, + model: "claude-2", + hide_topic: true, + system_prompt: "test %%POST%%", + search_for_text: "bad", + ) + end + + expect(post.topic.reload.visible).to eq(false) + end + + it "can hide topics on triage with claude" do + DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do + triage( + post: post, + model: "gpt-4", + hide_topic: true, + system_prompt: "test %%POST%%", + search_for_text: "bad", + ) + end + + expect(post.topic.reload.visible).to eq(false) + end + + it "can categorize topics on triage" do + category = Fabricate(:category) + + DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do + triage( + post: post, + model: "gpt-4", + category_id: category.id, + system_prompt: "test %%POST%%", + search_for_text: "bad", + ) + end + + expect(post.topic.reload.category_id).to eq(category.id) + end + + it "can reply to topics on triage" do + user = Fabricate(:user) + DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do + triage( + post: post, + model: "gpt-4", + system_prompt: "test %%POST%%", + search_for_text: "bad", + canned_reply: "test canned reply 123", + canned_reply_user: user.username, + ) + end + + reply = post.topic.posts.order(:post_number).last + + expect(reply.raw).to eq("test canned reply 123") + expect(reply.user.id).to eq(user.id) + end +end diff --git a/spec/lib/modules/automation/report_context_generator_spec.rb b/spec/lib/modules/automation/report_context_generator_spec.rb new file mode 100644 index 00000000..58cd6347 --- /dev/null +++ b/spec/lib/modules/automation/report_context_generator_spec.rb @@ -0,0 +1,152 @@ +# frozen_string_literal: true + +require "rails_helper" + +module DiscourseAi + module Automation + describe ReportContextGenerator do + describe ".generate" do + fab!(:private_message_post) + fab!(:post_in_other_category) { Fabricate(:post) } + + fab!(:category) + fab!(:topic) { Fabricate(:topic, category: category) } + fab!(:post_in_category) { Fabricate(:post, topic: topic) } + fab!(:reply_in_category) { Fabricate(:post, topic: topic, reply_to_post_number: 1) } + + fab!(:group) + fab!(:private_category) { Fabricate(:private_category, group: group) } + fab!(:secure_topic) do + Fabricate(:topic, title: "category in secure category", category: private_category) + end + fab!(:user_in_group) { Fabricate(:user, groups: [group]) } + fab!(:post_in_private_category) do + Fabricate(:post, user: user_in_group, topic: secure_topic) + end + + fab!(:tag) + fab!(:tag2) { Fabricate(:tag) } + fab!(:topic_with_tag) { Fabricate(:topic, tags: [tag, tag2]) } + fab!(:post_with_tag) { Fabricate(:post, topic: topic_with_tag) } + + fab!(:long_post) do + Fabricate( + :post, + raw: (1..100).map { |i| "testing#{i}" }.join(" "), + topic: Fabricate(:topic, category: category), + ) + end + + fab!(:topic_with_likes) { Fabricate(:topic, like_count: 10) } + + fab!(:post_with_likes) { Fabricate(:post, topic: topic_with_likes, like_count: 10) } + + fab!(:post_with_likes2) { Fabricate(:post, topic: topic_with_likes, like_count: 5) } + + fab!(:post_with_likes3) { Fabricate(:post, topic: topic_with_likes, like_count: 3) } + + if defined?(::DiscourseSolved) + it "will correctly denote solved topics" do + topic_with_likes.custom_fields[ + ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD + ] = post_with_likes2.id + topic_with_likes.save_custom_fields + + context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day) + + expect(context).to include("solved: true") + expect(context).to include("solution: true") + end + end + + it "always includes info from last posts on topic" do + context = + ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day, max_posts: 1) + + expect(context).to include("...") + expect(context).to include("post_number: 3") + end + + it "includes a summary" do + context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day) + + expect(context).to include("New posts: 8") + expect(context).to include("New topics: 5") + end + + it "orders so most liked are first" do + context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day) + + regex = "topic_id: #{topic_with_likes.id}.*topic_id: #{long_post.topic.id}" + expect(context).to match(Regexp.new(regex, Regexp::MULTILINE)) + end + + it "allows you to prioritize groups" do + context = + ReportContextGenerator.generate( + start_date: 1.day.ago, + duration: 2.day, + prioritized_group_ids: [group.id], + allow_secure_categories: true, + max_posts: 1, + ) + + expect(context).to include(post_in_private_category.topic.title) + expect(context).not_to include(post_in_other_category.topic.title) + expect(context).to include(group.name) + end + + it "can generate context (excluding PMs)" do + context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day) + + expect(context).to include(post_in_other_category.topic.title) + expect(context).to include(topic.title) + expect(context).not_to include(private_message_post.topic.title) + expect(context).not_to include(secure_topic.title) + end + + it "can filter on tag" do + context = + ReportContextGenerator.generate( + start_date: 1.day.ago, + duration: 2.day, + tags: [tag.name], + ) + + expect(context).not_to include(post_in_other_category.topic.title) + expect(context).not_to include(topic.title) + expect(context).not_to include(private_message_post.topic.title) + expect(context).not_to include(secure_topic.title) + expect(context).to include(post_with_tag.topic.title) + end + + it "can optionally include secure categories" do + context = + ReportContextGenerator.generate( + start_date: 1.day.ago, + duration: 2.day, + allow_secure_categories: true, + ) + expect(context).to include(post_in_other_category.topic.title) + expect(context).to include(topic.title) + expect(context).not_to include(private_message_post.topic.title) + expect(context).to include(secure_topic.title) + end + + it "can filter to a categories" do + context = + ReportContextGenerator.generate( + start_date: 1.day.ago, + duration: 2.day, + category_ids: [category.id], + ) + + expect(context).not_to include(post_in_other_category.topic.title) + expect(context).to include(topic.title) + expect(context).not_to include(private_message_post.topic.title) + expect(context).not_to include(secure_topic.title) + end + end + end + end +end diff --git a/spec/lib/modules/automation/report_runner_spec.rb b/spec/lib/modules/automation/report_runner_spec.rb new file mode 100644 index 00000000..13029bec --- /dev/null +++ b/spec/lib/modules/automation/report_runner_spec.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require "rails_helper" + +module DiscourseAi + module Automation + describe ReportRunner do + fab!(:user) + fab!(:reciever) { Fabricate(:user) } + fab!(:post) { Fabricate(:post, user: user) } + fab!(:group) + fab!(:secure_category) { Fabricate(:private_category, group: group) } + fab!(:secure_topic) { Fabricate(:topic, category: secure_category) } + fab!(:secure_post) { Fabricate(:post, raw: "Top secret date !!!!", topic: secure_topic) } + + describe "#run!" do + it "generates correctly respects the params" do + DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do + ReportRunner.run!( + sender_username: user.username, + receivers: [reciever.username], + title: "test report", + model: "gpt-4", + category_ids: nil, + tags: nil, + allow_secure_categories: false, + debug_mode: true, + sample_size: 100, + instructions: "make a magic report", + days: 7, + offset: 0, + priority_group_id: nil, + tokens_per_post: 150, + ) + end + + report = Topic.where(title: "test report").first + expect(report.ordered_posts.first.raw).to eq("magical report") + debugging = report.ordered_posts.last.raw + + expect(debugging).to include(post.raw) + expect(debugging).not_to include(secure_post.raw) + end + end + end + end +end