From 47f5da7e4296f168c16a804f04aba86bcd1ecae3 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 12 Dec 2024 09:17:25 +1100 Subject: [PATCH] FEATURE: Add AI-powered spam detection for new user posts (#1004) This introduces a comprehensive spam detection system that uses LLM models to automatically identify and flag potential spam posts. The system is designed to be both powerful and configurable while preventing false positives. Key Features: * Automatically scans first 3 posts from new users (TL0/TL1) * Creates dedicated AI flagging user to distinguish from system flags * Tracks false positives/negatives for quality monitoring * Supports custom instructions to fine-tune detection * Includes test interface for trying detection on any post Technical Implementation: * New database tables: - ai_spam_logs: Stores scan history and results - ai_moderation_settings: Stores LLM config and custom instructions * Rate limiting and safeguards: - Minimum 10-minute delay between rescans - Only scans significant edits (>10 char difference) - Maximum 3 scans per post - 24-hour maximum age for scannable posts * Admin UI features: - Real-time testing capabilities - 7-day statistics dashboard - Configurable LLM model selection - Custom instruction support Security and Performance: * Respects trust levels - only scans TL0/TL1 users * Skips private messages entirely * Stops scanning users after 3 successful public posts * Includes comprehensive test coverage * Maintains audit log of all scan attempts --------- Co-authored-by: Keegan George Co-authored-by: Martin Brennan --- .../admin-plugins-show-discourse-ai-spam.js | 11 + .../admin-plugins/show/discourse-ai-spam.hbs | 1 + .../discourse_ai/admin/ai_spam_controller.rb | 112 ++++++ app/jobs/regular/ai_spam_scan.rb | 13 + app/models/ai_moderation_setting.rb | 32 ++ app/models/ai_spam_log.rb | 26 ++ app/models/llm_model.rb | 6 +- app/serializers/ai_spam_serializer.rb | 40 ++ .../admin-discourse-ai-plugin-route-map.js | 1 + .../discourse/components/ai-spam.gjs | 243 ++++++++++++ .../components/modal/spam-test-modal.gjs | 101 +++++ .../admin-plugin-configuration-nav.js | 4 + .../stylesheets/modules/llms/common/spam.scss | 124 ++++++ config/locales/client.en.yml | 30 ++ config/locales/server.en.yml | 7 +- config/routes.rb | 3 + config/settings.yml | 15 +- ...241206030229_add_ai_moderation_settings.rb | 15 + db/migrate/20241206051225_add_ai_spam_logs.rb | 16 + lib/ai_moderation/entry_point.rb | 17 + lib/ai_moderation/spam_report.rb | 47 +++ lib/ai_moderation/spam_scanner.rb | 371 ++++++++++++++++++ lib/configuration/llm_enumerator.rb | 15 +- plugin.rb | 2 + .../ai_moderation/spam_scanner_spec.rb | 222 +++++++++++ .../requests/admin/ai_spam_controller_spec.rb | 285 ++++++++++++++ spec/system/ai_moderation/ai_spam_spec.rb | 48 +++ 27 files changed, 1801 insertions(+), 6 deletions(-) create mode 100644 admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-spam.js create mode 100644 admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-spam.hbs create mode 100644 app/controllers/discourse_ai/admin/ai_spam_controller.rb create mode 100644 app/jobs/regular/ai_spam_scan.rb create mode 100644 app/models/ai_moderation_setting.rb create mode 100644 app/models/ai_spam_log.rb create mode 100644 app/serializers/ai_spam_serializer.rb create mode 100644 assets/javascripts/discourse/components/ai-spam.gjs create mode 100644 assets/javascripts/discourse/components/modal/spam-test-modal.gjs create mode 100644 assets/stylesheets/modules/llms/common/spam.scss create mode 100644 db/migrate/20241206030229_add_ai_moderation_settings.rb create mode 100644 db/migrate/20241206051225_add_ai_spam_logs.rb create mode 100644 lib/ai_moderation/entry_point.rb create mode 100644 lib/ai_moderation/spam_report.rb create mode 100644 lib/ai_moderation/spam_scanner.rb create mode 100644 spec/lib/modules/ai_moderation/spam_scanner_spec.rb create mode 100644 spec/requests/admin/ai_spam_controller_spec.rb create mode 100644 spec/system/ai_moderation/ai_spam_spec.rb diff --git a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-spam.js b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-spam.js new file mode 100644 index 00000000..66fd3cf4 --- /dev/null +++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-spam.js @@ -0,0 +1,11 @@ +import { service } from "@ember/service"; +import { ajax } from "discourse/lib/ajax"; +import DiscourseRoute from "discourse/routes/discourse"; + +export default class DiscourseAiSpamRoute extends DiscourseRoute { + @service store; + + model() { + return ajax("/admin/plugins/discourse-ai/ai-spam.json"); + } +} diff --git a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-spam.hbs b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-spam.hbs new file mode 100644 index 00000000..f9946ef9 --- /dev/null +++ b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-spam.hbs @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/controllers/discourse_ai/admin/ai_spam_controller.rb b/app/controllers/discourse_ai/admin/ai_spam_controller.rb new file mode 100644 index 00000000..aae13cd1 --- /dev/null +++ b/app/controllers/discourse_ai/admin/ai_spam_controller.rb @@ -0,0 +1,112 @@ +# frozen_string_literal: true + +module DiscourseAi + module Admin + class AiSpamController < ::Admin::AdminController + requires_plugin "discourse-ai" + + def show + render json: AiSpamSerializer.new(spam_config, root: false) + end + + def update + updated_params = {} + if allowed_params.key?(:llm_model_id) + llm_model_id = updated_params[:llm_model_id] = allowed_params[:llm_model_id] + if llm_model_id.to_i < 0 && + !SiteSetting.ai_spam_detection_model_allowed_seeded_models_map.include?( + "custom:#{llm_model_id}", + ) + return( + render_json_error( + I18n.t("discourse_ai.llm.configuration.invalid_seeded_model"), + status: 422, + ) + ) + end + end + updated_params[:data] = { + custom_instructions: allowed_params[:custom_instructions], + } if allowed_params.key?(:custom_instructions) + + if updated_params.present? + # not using upsert cause we will not get the correct validation errors + if AiModerationSetting.spam + AiModerationSetting.spam.update!(updated_params) + else + AiModerationSetting.create!(updated_params.merge(setting_type: :spam)) + end + end + + is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled]) + + if allowed_params.key?(:is_enabled) + if is_enabled && !AiModerationSetting.spam&.llm_model_id + return( + render_json_error( + I18n.t("discourse_ai.llm.configuration.must_select_model"), + status: 422, + ) + ) + end + + SiteSetting.ai_spam_detection_enabled = is_enabled + end + + render json: AiSpamSerializer.new(spam_config, root: false) + end + + def test + url = params[:post_url].to_s + post = nil + + if url.match?(/^\d+$/) + post_id = url.to_i + post = Post.find_by(id: post_id) + end + + route = UrlHelper.rails_route_from_url(url) if !post + + if route + if route[:controller] == "topics" + post_number = route[:post_number] || 1 + post = Post.with_deleted.find_by(post_number: post_number, topic_id: route[:topic_id]) + end + end + + raise Discourse::NotFound if !post + + result = + DiscourseAi::AiModeration::SpamScanner.test_post( + post, + custom_instructions: params[:custom_instructions], + llm_id: params[:llm_id], + ) + + render json: result + end + + private + + def allowed_params + params.permit(:is_enabled, :llm_model_id, :custom_instructions) + end + + def spam_config + spam_config = { + enabled: SiteSetting.ai_spam_detection_enabled, + settings: AiModerationSetting.spam, + } + + spam_config[:stats] = DiscourseAi::AiModeration::SpamReport.generate(min_date: 1.week.ago) + + if spam_config[:stats].scanned_count > 0 + spam_config[ + :flagging_username + ] = DiscourseAi::AiModeration::SpamScanner.flagging_user&.username + end + spam_config + end + end + end +end diff --git a/app/jobs/regular/ai_spam_scan.rb b/app/jobs/regular/ai_spam_scan.rb new file mode 100644 index 00000000..483e56ec --- /dev/null +++ b/app/jobs/regular/ai_spam_scan.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Jobs + class AiSpamScan < ::Jobs::Base + def execute(args) + return if !args[:post_id] + post = Post.find_by(id: args[:post_id]) + return if !post + + DiscourseAi::AiModeration::SpamScanner.perform_scan(post) + end + end +end diff --git a/app/models/ai_moderation_setting.rb b/app/models/ai_moderation_setting.rb new file mode 100644 index 00000000..8b440725 --- /dev/null +++ b/app/models/ai_moderation_setting.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true +class AiModerationSetting < ActiveRecord::Base + belongs_to :llm_model + + validates :llm_model_id, presence: true + validates :setting_type, presence: true + validates :setting_type, uniqueness: true + + def self.spam + find_by(setting_type: :spam) + end + + def custom_instructions + data["custom_instructions"] + end +end + +# == Schema Information +# +# Table name: ai_moderation_settings +# +# id :bigint not null, primary key +# setting_type :enum not null +# data :jsonb +# llm_model_id :bigint not null +# created_at :datetime not null +# updated_at :datetime not null +# +# Indexes +# +# index_ai_moderation_settings_on_setting_type (setting_type) UNIQUE +# diff --git a/app/models/ai_spam_log.rb b/app/models/ai_spam_log.rb new file mode 100644 index 00000000..616a64b5 --- /dev/null +++ b/app/models/ai_spam_log.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true +class AiSpamLog < ActiveRecord::Base + belongs_to :post + belongs_to :llm_model + belongs_to :ai_api_audit_log + belongs_to :reviewable +end + +# == Schema Information +# +# Table name: ai_spam_logs +# +# id :bigint not null, primary key +# post_id :bigint not null +# llm_model_id :bigint not null +# ai_api_audit_log_id :bigint +# reviewable_id :bigint +# is_spam :boolean not null +# payload :string(20000) default(""), not null +# created_at :datetime not null +# updated_at :datetime not null +# +# Indexes +# +# index_ai_spam_logs_on_post_id (post_id) +# diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb index 98d39623..a78f876c 100644 --- a/app/models/llm_model.rb +++ b/app/models/llm_model.rb @@ -56,7 +56,11 @@ class LlmModel < ActiveRecord::Base end def to_llm - DiscourseAi::Completions::Llm.proxy("custom:#{id}") + DiscourseAi::Completions::Llm.proxy(identifier) + end + + def identifier + "custom:#{id}" end def toggle_companion_user diff --git a/app/serializers/ai_spam_serializer.rb b/app/serializers/ai_spam_serializer.rb new file mode 100644 index 00000000..179d828c --- /dev/null +++ b/app/serializers/ai_spam_serializer.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +class AiSpamSerializer < ApplicationSerializer + attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username + + def is_enabled + object[:enabled] + end + + def llm_id + settings&.llm_model&.id + end + + def custom_instructions + settings&.custom_instructions + end + + def available_llms + DiscourseAi::Configuration::LlmEnumerator + .values(allowed_seeded_llms: SiteSetting.ai_spam_detection_model_allowed_seeded_models_map) + .map { |hash| { id: hash[:value], name: hash[:name] } } + end + + def flagging_username + object[:flagging_username] + end + + def stats + { + scanned_count: object[:stats].scanned_count.to_i, + spam_detected: object[:stats].spam_detected.to_i, + false_positives: object[:stats].false_positives.to_i, + false_negatives: object[:stats].false_negatives.to_i, + } + end + + def settings + object[:settings] + end +end diff --git a/assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js b/assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js index 0a9de7f1..5e526247 100644 --- a/assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js +++ b/assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js @@ -18,6 +18,7 @@ export default { this.route("new"); this.route("show", { path: "/:id" }); }); + this.route("discourse-ai-spam", { path: "ai-spam" }); this.route("discourse-ai-usage", { path: "ai-usage" }); }, }; diff --git a/assets/javascripts/discourse/components/ai-spam.gjs b/assets/javascripts/discourse/components/ai-spam.gjs new file mode 100644 index 00000000..43c41e0b --- /dev/null +++ b/assets/javascripts/discourse/components/ai-spam.gjs @@ -0,0 +1,243 @@ +import Component from "@glimmer/component"; +import { tracked } from "@glimmer/tracking"; +import { fn } from "@ember/helper"; +import { on } from "@ember/modifier"; +import { action } from "@ember/object"; +import { LinkTo } from "@ember/routing"; +import { service } from "@ember/service"; +import DButton from "discourse/components/d-button"; +import DToggleSwitch from "discourse/components/d-toggle-switch"; +import DTooltip from "discourse/components/d-tooltip"; +import withEventValue from "discourse/helpers/with-event-value"; +import { ajax } from "discourse/lib/ajax"; +import { popupAjaxError } from "discourse/lib/ajax-error"; +import i18n from "discourse-common/helpers/i18n"; +import getURL from "discourse-common/lib/get-url"; +import AdminConfigAreaCard from "admin/components/admin-config-area-card"; +import AdminPageSubheader from "admin/components/admin-page-subheader"; +import ComboBox from "select-kit/components/combo-box"; +import SpamTestModal from "./modal/spam-test-modal"; + +export default class AiSpam extends Component { + @service siteSettings; + @service toasts; + @service modal; + + @tracked + stats = { + scanned_count: 0, + spam_detected: 0, + false_positives: 0, + false_negatives: 0, + daily_data: [], + }; + @tracked isEnabled = false; + @tracked selectedLLM = null; + @tracked customInstructions = ""; + + constructor() { + super(...arguments); + this.initializeFromModel(); + } + + @action + initializeFromModel() { + const model = this.args.model; + this.isEnabled = model.is_enabled; + + if (model.llm_id) { + this.selectedLLM = "custom:" + model.llm_id; + } else { + if (this.availableLLMs.length) { + this.selectedLLM = this.availableLLMs[0].id; + this.autoSelectedLLM = true; + } + } + this.customInstructions = model.custom_instructions; + this.stats = model.stats; + } + + get availableLLMs() { + return this.args.model?.available_llms || []; + } + + @action + async toggleEnabled() { + this.isEnabled = !this.isEnabled; + const data = { is_enabled: this.isEnabled }; + if (this.autoSelectedLLM) { + data.llm_model_id = this.llmId; + } + try { + const response = await ajax("/admin/plugins/discourse-ai/ai-spam.json", { + type: "PUT", + data, + }); + this.autoSelectedLLM = false; + this.isEnabled = response.is_enabled; + } catch (error) { + this.isEnabled = !this.isEnabled; + popupAjaxError(error); + } + } + + get llmId() { + return this.selectedLLM.toString().split(":")[1]; + } + + @action + async updateLLM(value) { + this.selectedLLM = value; + } + + @action + async save() { + try { + await ajax("/admin/plugins/discourse-ai/ai-spam.json", { + type: "PUT", + data: { + llm_model_id: this.llmId, + custom_instructions: this.customInstructions, + }, + }); + this.toasts.success({ + data: { message: i18n("discourse_ai.spam.settings_saved") }, + duration: 2000, + }); + } catch (error) { + popupAjaxError(error); + } + } + + @action + showTestModal() { + this.modal.show(SpamTestModal, { + model: { + customInstructions: this.customInstructions, + llmId: this.llmId, + }, + }); + } + + get metrics() { + const detected = { + label: "discourse_ai.spam.spam_detected", + value: this.stats.spam_detected, + }; + if (this.args.model.flagging_username) { + detected.href = getURL( + "/review?flagged_by=" + this.args.model.flagging_username + ); + } + return [ + { + label: "discourse_ai.spam.scanned_count", + value: this.stats.scanned_count, + }, + detected, + { + label: "discourse_ai.spam.false_positives", + value: this.stats.false_positives, + }, + { + label: "discourse_ai.spam.false_negatives", + value: this.stats.false_negatives, + }, + ]; + } + + +} diff --git a/assets/javascripts/discourse/components/modal/spam-test-modal.gjs b/assets/javascripts/discourse/components/modal/spam-test-modal.gjs new file mode 100644 index 00000000..d1dc47d0 --- /dev/null +++ b/assets/javascripts/discourse/components/modal/spam-test-modal.gjs @@ -0,0 +1,101 @@ +import Component from "@glimmer/component"; +import { tracked } from "@glimmer/tracking"; +import { fn } from "@ember/helper"; +import { on } from "@ember/modifier"; +import { action } from "@ember/object"; +import DButton from "discourse/components/d-button"; +import DModal from "discourse/components/d-modal"; +import withEventValue from "discourse/helpers/with-event-value"; +import { ajax } from "discourse/lib/ajax"; +import { popupAjaxError } from "discourse/lib/ajax-error"; +import I18n from "discourse-i18n"; +import AiIndicatorWave from "../ai-indicator-wave"; + +export default class SpamTestModal extends Component { + @tracked testResult; + @tracked isLoading = false; + @tracked postUrl = ""; + @tracked scanLog = ""; + @tracked isSpam; + + @action + async runTest() { + this.isLoading = true; + try { + const response = await ajax( + `/admin/plugins/discourse-ai/ai-spam/test.json`, + { + type: "POST", + data: { + post_url: this.postUrl, + custom_instructions: this.args.model.customInstructions, + llm_id: this.args.model.llmId, + }, + } + ); + + this.isSpam = response.is_spam; + this.testResult = response.is_spam + ? I18n.t("discourse_ai.spam.test_modal.spam") + : I18n.t("discourse_ai.spam.test_modal.not_spam"); + this.scanLog = response.log; + } catch (error) { + popupAjaxError(error); + } finally { + this.isLoading = false; + } + } + + +} diff --git a/assets/javascripts/initializers/admin-plugin-configuration-nav.js b/assets/javascripts/initializers/admin-plugin-configuration-nav.js index d667b468..119744a1 100644 --- a/assets/javascripts/initializers/admin-plugin-configuration-nav.js +++ b/assets/javascripts/initializers/admin-plugin-configuration-nav.js @@ -24,6 +24,10 @@ export default { label: "discourse_ai.tools.short_title", route: "adminPlugins.show.discourse-ai-tools", }, + { + label: "discourse_ai.spam.short_title", + route: "adminPlugins.show.discourse-ai-spam", + }, { label: "discourse_ai.usage.short_title", route: "adminPlugins.show.discourse-ai-usage", diff --git a/assets/stylesheets/modules/llms/common/spam.scss b/assets/stylesheets/modules/llms/common/spam.scss new file mode 100644 index 00000000..ca0021c5 --- /dev/null +++ b/assets/stylesheets/modules/llms/common/spam.scss @@ -0,0 +1,124 @@ +.ai-spam { + --chart-scanned-color: var(--success); + --chart-spam-color: var(--danger); + padding-top: 15px; + + &__settings { + margin-bottom: 2em; + } + + &__enabled { + display: flex; + align-items: center; + gap: 0.4em; + margin-bottom: 1em; + + .fk-d-tooltip__trigger { + color: var(--primary-high); + } + } + + &__settings-title { + margin-bottom: 1em; + } + + &__toggle, + &__llm, + &__instructions { + margin-bottom: 1em; + } + + &__toggle-label, + &__llm-label, + &__instructions-label { + display: block; + margin-bottom: 0.5em; + font-weight: bold; + } + + &__instructions-input { + width: 100%; + min-height: 100px; + margin-bottom: 0.5em; + } + + &__stats { + margin-top: 2em; + } + + &__stats-title { + margin-bottom: 1em; + } + + &__metrics { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 1em; + margin-bottom: 2em; + } + + &__metrics-item { + display: flex; + flex-direction: column; + padding: 1em; + background: var(--primary-very-low); + border-radius: 0.25em; + } + + &__metrics-label { + color: var(--primary-medium); + font-size: 0.875em; + margin-bottom: 0.5em; + } + + &__metrics-value { + color: var(--primary); + font-size: 1.5em; + font-weight: bold; + } +} + +.spam-test-modal { + &__body { + min-width: 500px; + } + + &__test-result { + margin-top: 1.5em; + padding-top: 1.5em; + border-top: 1px solid var(--primary-low); + } + + &__verdict { + font-size: var(--font-up-2); + font-weight: bold; + padding: 0.5em; + border-radius: 0.25em; + text-align: center; + margin: 1em 0; + + &.is-spam { + background: var(--danger-low); + color: var(--danger); + } + + &.not-spam { + background: var(--success-low); + color: var(--success); + } + } + + &__log { + margin-top: 1em; + + pre { + max-height: 300px; + overflow-y: auto; + background: var(--primary-very-low); + padding: 1em; + margin: 0.5em 0; + font-family: monospace; + white-space: pre-wrap; + } + } +} diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index baa85203..776a4e76 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -129,6 +129,35 @@ en: modals: select_option: "Select an option..." + spam: + short_title: "Spam" + title: "Configure spam handling" + select_llm: "Select LLM" + custom_instructions: "Custom instructions" + custom_instructions_help: "Custom instructions specific to your site to help guide the AI in identifying spam, e.g. 'Be more aggressive about scanning posts not in English'." + last_seven_days: "Last 7 days" + scanned_count: "Posts scanned" + false_positives: "Incorrectly flagged" + false_negatives: "Missed spam" + spam_detected: "Spam detected" + custom_instructions_placeholder: "Site-specific instructions for the AI to help identify spam more accurately" + enable: "Enable" + spam_tip: "AI spam detection will scan the first 3 posts by all new users on public topics. It will flag them for review and block users if they are likely spam." + settings_saved: "Settings saved" + spam_description: "Identifies potential spam using the selected LLM and flags it for site moderators to inspect in the review queue" + no_llms: "No LLMs available" + test_button: "Test..." + save_button: "Save changes" + test_modal: + title: "Test spam detection" + post_url_label: "Post URL or ID" + post_url_placeholder: "https://your-forum.com/t/topic/123/4 or post ID" + result: "Result" + scan_log: "Scan log" + run: "Run test" + spam: "Spam" + not_spam: "Not spam" + usage: short_title: "Usage" summary: "Summary" @@ -305,6 +334,7 @@ en: ai_persona: "Persona (%{persona})" ai_summarization: "Summarize" ai_embeddings_semantic_search: "AI search" + ai_spam: "Spam" in_use_warning: one: "This model is currently used by %{settings}. If misconfigured, the feature won't work as expected." other: "This model is currently used by the following: %{settings}. If misconfigured, features won't work as expected. " diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index ea8aeed7..d45aa4cb 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -251,6 +251,8 @@ en: other_content_in_pm: "Personal messages containing posts from other people cannot be shared publicly" failed_to_share: "Failed to share the conversation" conversation_deleted: "Conversation share deleted successfully" + spam_detection: + flag_reason: "Flagged as spam by Discourse AI" ai_bot: reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]" default_pm_prefix: "[Untitled AI bot PM]" @@ -413,9 +415,10 @@ en: llm: configuration: disable_module_first: "You have to disable %{setting} first." - set_llm_first: "Set %{setting} first." + set_llm_first: "Set %{setting} first" model_unreachable: "We couldn't get a response from this model. Check your settings first." - invalid_seeded_model: "You can't use this model with this feature." + invalid_seeded_model: "You can't use this model with this feature" + must_select_model: "You must select a LLM first" endpoints: not_configured: "%{display_name} (not configured)" configuration_hint: diff --git a/config/routes.rb b/config/routes.rb index ae82f77c..f70b5a4b 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -80,6 +80,9 @@ Discourse::Application.routes.draw do get "/ai-usage", to: "discourse_ai/admin/ai_usage#show" get "/ai-usage-report", to: "discourse_ai/admin/ai_usage#report" + get "/ai-spam", to: "discourse_ai/admin/ai_spam#show" + put "/ai-spam", to: "discourse_ai/admin/ai_spam#update" + post "/ai-spam/test", to: "discourse_ai/admin/ai_spam#test" resources :ai_llms, only: %i[index create show update destroy], diff --git a/config/settings.yml b/config/settings.yml index 7c0afba7..b1b86be5 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -25,7 +25,7 @@ discourse_ai: ai_sentiment_backfill_post_max_age_days: default: 60 hidden: true - + ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations" ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings" @@ -321,3 +321,16 @@ discourse_ai: type: list list_type: compact default: "" + + ai_spam_detection_enabled: + default: false + hidden: true + + ai_spam_detection_user_id: + default: "" + hidden: true + + ai_spam_detection_model_allowed_seeded_models: + default: "" + hidden: true + type: list diff --git a/db/migrate/20241206030229_add_ai_moderation_settings.rb b/db/migrate/20241206030229_add_ai_moderation_settings.rb new file mode 100644 index 00000000..12e17782 --- /dev/null +++ b/db/migrate/20241206030229_add_ai_moderation_settings.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true +class AddAiModerationSettings < ActiveRecord::Migration[7.2] + def change + create_enum :ai_moderation_setting_type, %w[spam nsfw custom] + + create_table :ai_moderation_settings do |t| + t.enum :setting_type, enum_type: "ai_moderation_setting_type", null: false + t.jsonb :data, default: {} + t.bigint :llm_model_id, null: false + t.timestamps + end + + add_index :ai_moderation_settings, :setting_type, unique: true + end +end diff --git a/db/migrate/20241206051225_add_ai_spam_logs.rb b/db/migrate/20241206051225_add_ai_spam_logs.rb new file mode 100644 index 00000000..5ef42388 --- /dev/null +++ b/db/migrate/20241206051225_add_ai_spam_logs.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true +class AddAiSpamLogs < ActiveRecord::Migration[7.2] + def change + create_table :ai_spam_logs do |t| + t.bigint :post_id, null: false + t.bigint :llm_model_id, null: false + t.bigint :ai_api_audit_log_id + t.bigint :reviewable_id + t.boolean :is_spam, null: false + t.string :payload, null: false, default: "", limit: 20_000 + t.timestamps + end + + add_index :ai_spam_logs, :post_id + end +end diff --git a/lib/ai_moderation/entry_point.rb b/lib/ai_moderation/entry_point.rb new file mode 100644 index 00000000..a9655fd8 --- /dev/null +++ b/lib/ai_moderation/entry_point.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module DiscourseAi + module AiModeration + class EntryPoint + def inject_into(plugin) + plugin.on(:post_created) { |post| SpamScanner.new_post(post) } + plugin.on(:post_edited) { |post| SpamScanner.edited_post(post) } + plugin.on(:post_process_cooked) { |_doc, post| SpamScanner.after_cooked_post(post) } + + plugin.on(:site_setting_changed) do |name, _old_value, new_value| + SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value + end + end + end + end +end diff --git a/lib/ai_moderation/spam_report.rb b/lib/ai_moderation/spam_report.rb new file mode 100644 index 00000000..5bb3b395 --- /dev/null +++ b/lib/ai_moderation/spam_report.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module DiscourseAi + module AiModeration + class SpamReport + def self.generate(min_date: 1.week.ago) + spam_status = [Reviewable.statuses[:approved], Reviewable.statuses[:deleted]] + ham_status = [Reviewable.statuses[:rejected], Reviewable.statuses[:ignored]] + + sql = <<~SQL + WITH spam_stats AS ( + SELECT + asl.reviewable_id, + asl.post_id, + asl.is_spam, + r.status as reviewable_status, + r.target_type, + r.potential_spam + FROM ai_spam_logs asl + LEFT JOIN reviewables r ON r.id = asl.reviewable_id + WHERE asl.created_at > :min_date + ), + post_reviewables AS ( + SELECT + target_id post_id, + COUNT(DISTINCT target_id) as false_negative_count + FROM reviewables + WHERE target_type = 'Post' + AND status IN (:spam) + AND potential_spam + AND target_id IN (SELECT post_id FROM spam_stats) + GROUP BY target_id + ) + SELECT + COUNT(*) AS scanned_count, + SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected, + COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives, + COALESCE(SUM(pr.false_negative_count), 0) AS false_negatives + FROM spam_stats + LEFT JOIN post_reviewables pr USING (post_id) + SQL + + DB.query(sql, spam: spam_status, ham: ham_status, min_date: min_date).first + end + end + end +end diff --git a/lib/ai_moderation/spam_scanner.rb b/lib/ai_moderation/spam_scanner.rb new file mode 100644 index 00000000..72b23886 --- /dev/null +++ b/lib/ai_moderation/spam_scanner.rb @@ -0,0 +1,371 @@ +# frozen_string_literal: true + +module DiscourseAi + module AiModeration + class SpamScanner + POSTS_TO_SCAN = 3 + MINIMUM_EDIT_DIFFERENCE = 10 + EDIT_DELAY_MINUTES = 10 + MAX_AGE_TO_SCAN = 1.day + MAX_RAW_SCAN_LENGTH = 5000 + + SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post" + + def self.new_post(post) + return if !enabled? + return if !should_scan_post?(post) + + flag_post_for_scanning(post) + end + + def self.ensure_flagging_user! + if !SiteSetting.ai_spam_detection_user_id.present? + User.transaction do + # prefer a "high" id for this bot + id = User.where("id > -20").minimum(:id) - 1 + id = User.minimum(:id) - 1 if id == -100 + + user = + User.create!( + id: id, + username: UserNameSuggester.suggest("discourse_ai_spam"), + name: "Discourse AI Spam Scanner", + email: "#{SecureRandom.hex(10)}@invalid.invalid", + active: true, + approved: true, + trust_level: TrustLevel[4], + admin: true, + ) + Group.user_trust_level_change!(user.id, user.trust_level) + + SiteSetting.ai_spam_detection_user_id = user.id + end + end + end + + def self.flagging_user + user = nil + if SiteSetting.ai_spam_detection_user_id.present? + user = User.find_by(id: SiteSetting.ai_spam_detection_user_id) + end + user || Discourse.system_user + end + + def self.after_cooked_post(post) + return if !enabled? + return if !should_scan_post?(post) + return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] + return if post.updated_at < MAX_AGE_TO_SCAN.ago + + last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first + + if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago + delay_minutes = + ((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60 + Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id) + else + Jobs.enqueue(:ai_spam_scan, post_id: post.id) + end + end + + def self.edited_post(post) + return if !enabled? + return if !should_scan_post?(post) + return if scanned_max_times?(post) + + previous_version = post.revisions.last&.modifications&.dig("raw", 0) + current_version = post.raw + + return if !significant_change?(previous_version, current_version) + + flag_post_for_scanning(post) + end + + def self.flag_post_for_scanning(post) + post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true" + post.save_custom_fields + end + + def self.enabled? + SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled + end + + def self.should_scan_post?(post) + return false if !post.present? + return false if post.user.trust_level > TrustLevel[1] + return false if post.topic.private_message? + if Post + .where(user_id: post.user_id) + .joins(:topic) + .where(topic: { archetype: Archetype.default }) + .limit(4) + .count > 3 + return false + end + true + end + + def self.scanned_max_times?(post) + AiSpamLog.where(post_id: post.id).count >= 3 + end + + def self.significant_change?(previous_version, current_version) + return true if previous_version.nil? # First edit should be scanned + + # Use Discourse's built-in levenshtein implementation + distance = + ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000]) + + distance >= MINIMUM_EDIT_DIFFERENCE + end + + def self.test_post(post, custom_instructions: nil, llm_id: nil) + settings = AiModerationSetting.spam + llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model + llm = llm_model.to_llm + custom_instructions = custom_instructions || settings.custom_instructions.presence + context = build_context(post) + prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions) + + result = + llm.generate( + prompt, + temperature: 0.1, + max_tokens: 5, + user: Discourse.system_user, + feature_name: "spam_detection_test", + feature_context: { + post_id: post.id, + }, + )&.strip + + history = nil + AiSpamLog + .where(post: post) + .order(:created_at) + .limit(100) + .each do |log| + history ||= +"Scan History:\n" + history << "date: #{log.created_at} is_spam: #{log.is_spam}\n" + end + + log = +"Scanning #{post.url}\n\n" + + if history + log << history + log << "\n" + end + + log << "LLM: #{llm_model.name}\n\n" + log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n" + log << "Context: #{context}\n\n" + + is_spam = check_if_spam(result) + + prompt.push(type: :model, content: result) + prompt.push(type: :user, content: "Explain your reasoning") + + reasoning = + llm.generate( + prompt, + temperature: 0.1, + max_tokens: 100, + user: Discourse.system_user, + feature_name: "spam_detection_test", + feature_context: { + post_id: post.id, + }, + )&.strip + + log << "#{reasoning}" + + { is_spam: is_spam, log: log } + end + + def self.completion_prompt(post, context:, custom_instructions:) + system_prompt = build_system_prompt(custom_instructions) + prompt = DiscourseAi::Completions::Prompt.new(system_prompt) + args = { type: :user, content: context } + upload_ids = post.upload_ids + args[:upload_ids] = upload_ids.take(3) if upload_ids.present? + prompt.push(**args) + prompt + end + + def self.perform_scan(post) + return if !enabled? + return if !should_scan_post?(post) + + settings = AiModerationSetting.spam + return if !settings || !settings.llm_model + + context = build_context(post) + llm = settings.llm_model.to_llm + custom_instructions = settings.custom_instructions.presence + prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions) + + begin + result = + llm.generate( + prompt, + temperature: 0.1, + max_tokens: 5, + user: Discourse.system_user, + feature_name: "spam_detection", + feature_context: { + post_id: post.id, + }, + )&.strip + + is_spam = check_if_spam(result) + + log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first + AiSpamLog.transaction do + log = + AiSpamLog.create!( + post: post, + llm_model: settings.llm_model, + ai_api_audit_log: log, + is_spam: is_spam, + payload: context, + ) + handle_spam(post, log) if is_spam + end + rescue StandardError => e + # we need retries otherwise stuff will not be handled + Discourse.warn_exception( + e, + message: "Discourse AI: Error in SpamScanner for post #{post.id}", + ) + raise e + end + end + + private + + def self.check_if_spam(result) + (result.present? && result.strip.downcase.start_with?("spam")) + end + + def self.build_context(post) + context = [] + + # Clear distinction between reply and new topic + if post.is_first_post? + context << "NEW TOPIC POST ANALYSIS" + context << "- Topic title: #{post.topic.title}" + context << "- Category: #{post.topic.category&.name}" + else + context << "REPLY POST ANALYSIS" + context << "- In topic: #{post.topic.title}" + context << "- Category: #{post.topic.category&.name}" + context << "- Topic started by: #{post.topic.user.username}" + + # Include parent post context for replies + if post.reply_to_post.present? + parent = post.reply_to_post + context << "\nReplying to #{parent.user.username}'s post:" + context << "#{parent.raw[0..500]}..." if parent.raw.length > 500 + context << parent.raw if parent.raw.length <= 500 + end + end + + context << "\nPost Author Information:" + context << "- Username: #{post.user.username}" + context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days" + context << "- Total posts: #{post.user.post_count}" + context << "- Trust level: #{post.user.trust_level}" + + context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n" + context << post.raw[0..MAX_RAW_SCAN_LENGTH] + context.join("\n") + end + + def self.build_system_prompt(custom_instructions) + base_prompt = +<<~PROMPT + You are a spam detection system. Analyze the following post content and context. + Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate. + + - ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API + + Consider the post type carefully: + - For REPLY posts: Check if the response is relevant and topical to the thread + - For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion + + A post is spam if it matches any of these criteria: + - Contains unsolicited commercial content or promotions + - Has suspicious or unrelated external links + - Shows patterns of automated/bot posting + - Contains irrelevant content or advertisements + - For replies: Completely unrelated to the discussion thread + - Uses excessive keywords or repetitive text patterns + - Shows suspicious formatting or character usage + + Be especially strict with: + - Replies that ignore the previous conversation + - Posts containing multiple unrelated external links + - Generic responses that could be posted anywhere + + Be fair to: + - New users making legitimate first contributions + - Non-native speakers making genuine efforts to participate + - Topic-relevant product mentions in appropriate contexts + PROMPT + + base_prompt << "\n\n" + base_prompt << <<~SITE_SPECIFIC + Site Specific Information: + - Site name: #{SiteSetting.title} + - Site URL: #{Discourse.base_url} + - Site description: #{SiteSetting.site_description} + - Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")} + SITE_SPECIFIC + + if custom_instructions.present? + base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}" + end + + base_prompt + end + + def self.handle_spam(post, log) + url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam" + reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url) + + result = + PostActionCreator.new( + flagging_user, + post, + PostActionType.types[:spam], + reason: reason, + queue_for_review: true, + ).perform + + log.update!(reviewable: result.reviewable) + SpamRule::AutoSilence.new(post.user, post).silence_user + # this is required cause tl1 is not auto hidden + # we want to also handle tl1 + hide_posts_and_topics(post.user) + end + + def self.hide_posts_and_topics(user) + Post + .where(user_id: user.id) + .where("created_at > ?", 24.hours.ago) + .update_all( + [ + "hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)", + Post.hidden_reasons[:new_user_spam_threshold_reached], + ], + ) + topic_ids = + Post + .where(user_id: user.id, post_number: 1) + .where("created_at > ?", 24.hours.ago) + .select(:topic_id) + + Topic.where(id: topic_ids).update_all(visible: false) + end + end + end +end diff --git a/lib/configuration/llm_enumerator.rb b/lib/configuration/llm_enumerator.rb index 3b3c6a41..02a266fc 100644 --- a/lib/configuration/llm_enumerator.rb +++ b/lib/configuration/llm_enumerator.rb @@ -38,6 +38,11 @@ module DiscourseAi rval[model_id] << { type: :ai_embeddings_semantic_search } end + if SiteSetting.ai_spam_detection_enabled + model_id = AiModerationSetting.spam[:llm_model_id] + rval[model_id] << { type: :ai_spam } + end + rval end @@ -45,14 +50,20 @@ module DiscourseAi true end - def self.values + def self.values(allowed_seeded_llms: nil) values = DB.query_hash(<<~SQL).map(&:symbolize_keys) SELECT display_name AS name, id AS value FROM llm_models SQL - values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" } + if allowed_seeded_llms.is_a?(Array) + values = + values.filter do |value_h| + value_h[:value] > 0 || allowed_seeded_llms.include?("custom:#{value_h[:value]}") + end + end + values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" } values end diff --git a/plugin.rb b/plugin.rb index 9cc3a8c2..aac3eee3 100644 --- a/plugin.rb +++ b/plugin.rb @@ -37,6 +37,7 @@ register_asset "stylesheets/modules/sentiment/common/dashboard.scss" register_asset "stylesheets/modules/llms/common/ai-llms-editor.scss" register_asset "stylesheets/modules/llms/common/usage.scss" +register_asset "stylesheets/modules/llms/common/spam.scss" register_asset "stylesheets/modules/ai-bot/common/ai-tools.scss" @@ -71,6 +72,7 @@ after_initialize do DiscourseAi::AiHelper::EntryPoint.new, DiscourseAi::Summarization::EntryPoint.new, DiscourseAi::AiBot::EntryPoint.new, + DiscourseAi::AiModeration::EntryPoint.new, ].each { |a_module| a_module.inject_into(self) } register_reviewable_type ReviewableAiChatMessage diff --git a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb new file mode 100644 index 00000000..ab18b409 --- /dev/null +++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb @@ -0,0 +1,222 @@ +# frozen_string_literal: true + +require "rails_helper" + +RSpec.describe DiscourseAi::AiModeration::SpamScanner do + fab!(:user) { Fabricate(:user, trust_level: TrustLevel[0]) } + fab!(:topic) + fab!(:post) { Fabricate(:post, user: user, topic: topic) } + fab!(:llm_model) + fab!(:spam_setting) do + AiModerationSetting.create!( + setting_type: :spam, + llm_model: llm_model, + data: { + custom_instructions: "test instructions", + }, + ) + end + + before do + SiteSetting.discourse_ai_enabled = true + SiteSetting.ai_spam_detection_enabled = true + end + + describe ".enabled?" do + it "returns true when both settings are enabled" do + expect(described_class.enabled?).to eq(true) + end + + it "returns false when discourse_ai is disabled" do + SiteSetting.discourse_ai_enabled = false + expect(described_class.enabled?).to eq(false) + end + + it "returns false when spam detection is disabled" do + SiteSetting.ai_spam_detection_enabled = false + expect(described_class.enabled?).to eq(false) + end + end + + describe ".should_scan_post?" do + it "returns true for new users' posts" do + expect(described_class.should_scan_post?(post)).to eq(true) + end + + it "returns false for trusted users" do + post.user.trust_level = TrustLevel[2] + expect(described_class.should_scan_post?(post)).to eq(false) + end + + it "returns false for users with many public posts" do + Fabricate(:post, user: user, topic: topic) + Fabricate(:post, user: user, topic: topic) + expect(described_class.should_scan_post?(post)).to eq(true) + + pm = Fabricate(:private_message_topic, user: user) + Fabricate(:post, user: user, topic: pm) + + expect(described_class.should_scan_post?(post)).to eq(true) + + topic = Fabricate(:topic, user: user) + Fabricate(:post, user: user, topic: topic) + + expect(described_class.should_scan_post?(post)).to eq(false) + end + + it "returns false for private messages" do + pm_topic = Fabricate(:private_message_topic) + pm_post = Fabricate(:post, topic: pm_topic, user: user) + expect(described_class.should_scan_post?(pm_post)).to eq(false) + end + + it "returns false for nil posts" do + expect(described_class.should_scan_post?(nil)).to eq(false) + end + end + + describe ".scanned_max_times?" do + it "returns true when post has been scanned 3 times" do + 3.times do + AiSpamLog.create!(post: post, llm_model: llm_model, ai_api_audit_log_id: 1, is_spam: false) + end + + expect(described_class.scanned_max_times?(post)).to eq(true) + end + + it "returns false for posts scanned less than 3 times" do + expect(described_class.scanned_max_times?(post)).to eq(false) + end + end + + describe ".significant_change?" do + it "returns true for first edits" do + expect(described_class.significant_change?(nil, "new content")).to eq(true) + end + + it "returns true for significant changes" do + old_version = "This is a test post" + new_version = "This is a completely different post with new content" + expect(described_class.significant_change?(old_version, new_version)).to eq(true) + end + + it "returns false for minor changes" do + old_version = "This is a test post" + new_version = "This is a test Post" # Only capitalization change + expect(described_class.significant_change?(old_version, new_version)).to eq(false) + end + end + + describe ".new_post" do + it "enqueues spam scan job for eligible posts" do + expect { + described_class.new_post(post) + described_class.after_cooked_post(post) + }.to change(Jobs::AiSpamScan.jobs, :size).by(1) + end + + it "doesn't enqueue jobs when disabled" do + SiteSetting.ai_spam_detection_enabled = false + expect { described_class.new_post(post) }.not_to change(Jobs::AiSpamScan.jobs, :size) + end + end + + describe ".edited_post" do + it "enqueues spam scan job for eligible edited posts" do + PostRevision.create!( + post: post, + modifications: { + raw: ["old content", "completely new content"], + }, + ) + + expect { + described_class.edited_post(post) + described_class.after_cooked_post(post) + }.to change(Jobs::AiSpamScan.jobs, :size).by(1) + end + + it "schedules delayed job when edited too soon after last scan" do + AiSpamLog.create!( + post: post, + llm_model: llm_model, + ai_api_audit_log_id: 1, + is_spam: false, + created_at: 5.minutes.ago, + ) + + expect { + described_class.edited_post(post) + described_class.after_cooked_post(post) + }.to change(Jobs::AiSpamScan.jobs, :size).by(1) + end + end + + describe "integration test" do + fab!(:llm_model) + let(:api_audit_log) { Fabricate(:api_audit_log) } + fab!(:post_with_uploaded_image) + + before { Jobs.run_immediately! } + + it "Can correctly run tests" do + prompts = nil + result = + DiscourseAi::Completions::Llm.with_prepared_responses( + ["spam", "the reason is just because"], + ) do |_, _, _prompts| + prompts = _prompts + described_class.test_post(post, custom_instructions: "123") + end + + expect(prompts.length).to eq(2) + expect(result[:is_spam]).to eq(true) + expect(result[:log]).to include("123") + expect(result[:log]).to include("just because") + + result = + DiscourseAi::Completions::Llm.with_prepared_responses( + ["not_spam", "the reason is just because"], + ) do |_, _, _prompts| + prompts = _prompts + described_class.test_post(post, custom_instructions: "123") + end + + expect(result[:is_spam]).to eq(false) + end + + it "Correctly handles spam scanning" do + expect(described_class.flagging_user.id).not_to eq(Discourse.system_user.id) + + # flag post for scanning + post = post_with_uploaded_image + + described_class.new_post(post) + + prompt = nil + DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts| + # force a rebake so we actually scan + post.rebake! + prompt = _prompts.first + end + + content = prompt.messages[1][:content] + expect(content).to include(post.topic.title) + expect(content).to include(post.raw) + + upload_ids = prompt.messages[1][:upload_ids] + expect(upload_ids).to be_present + expect(upload_ids).to eq(post.upload_ids) + + log = AiSpamLog.find_by(post: post) + + expect(log.payload).to eq(content) + expect(log.is_spam).to eq(true) + expect(post.user.reload.silenced_till).to be_present + expect(post.topic.reload.visible).to eq(false) + + expect(log.reviewable).to be_present + expect(log.reviewable.created_by_id).to eq(described_class.flagging_user.id) + end + end +end diff --git a/spec/requests/admin/ai_spam_controller_spec.rb b/spec/requests/admin/ai_spam_controller_spec.rb new file mode 100644 index 00000000..5f82d306 --- /dev/null +++ b/spec/requests/admin/ai_spam_controller_spec.rb @@ -0,0 +1,285 @@ +# frozen_string_literal: true + +require "rails_helper" + +RSpec.describe DiscourseAi::Admin::AiSpamController do + fab!(:admin) + fab!(:user) + fab!(:llm_model) + + describe "#update" do + context "when logged in as admin" do + before { sign_in(admin) } + + it "can update settings from scratch" do + put "/admin/plugins/discourse-ai/ai-spam.json", + params: { + is_enabled: true, + llm_model_id: llm_model.id, + custom_instructions: "custom instructions", + } + + expect(response.status).to eq(200) + expect(SiteSetting.ai_spam_detection_enabled).to eq(true) + expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id) + expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions") + end + + it "denies update for disallowed seeded llm" do + seeded_llm = Fabricate(:llm_model, id: -1) + + put "/admin/plugins/discourse-ai/ai-spam.json", + params: { + is_enabled: true, + llm_model_id: seeded_llm.id, + custom_instructions: "custom instructions", + } + + expect(response.status).to eq(422) + + SiteSetting.ai_spam_detection_model_allowed_seeded_models = seeded_llm.identifier + + put "/admin/plugins/discourse-ai/ai-spam.json", + params: { + is_enabled: true, + llm_model_id: seeded_llm.id, + custom_instructions: "custom instructions", + } + + expect(response.status).to eq(200) + end + + it "can not enable spam detection without a model selected" do + put "/admin/plugins/discourse-ai/ai-spam.json", + params: { + custom_instructions: "custom instructions", + } + expect(response.status).to eq(422) + end + + it "can not fiddle with custom instructions without an llm" do + put "/admin/plugins/discourse-ai/ai-spam.json", params: { is_enabled: true } + expect(response.status).to eq(422) + end + + context "when spam detection was already set" do + fab!(:setting) do + AiModerationSetting.create( + { + setting_type: :spam, + llm_model_id: llm_model.id, + data: { + custom_instructions: "custom instructions", + }, + }, + ) + end + + it "can partially update settings" do + put "/admin/plugins/discourse-ai/ai-spam.json", params: { is_enabled: false } + + expect(response.status).to eq(200) + expect(SiteSetting.ai_spam_detection_enabled).to eq(false) + expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id) + expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions") + end + + it "can update pre existing settings" do + put "/admin/plugins/discourse-ai/ai-spam.json", + params: { + is_enabled: true, + llm_model_id: llm_model.id, + custom_instructions: "custom instructions new", + } + + expect(response.status).to eq(200) + expect(SiteSetting.ai_spam_detection_enabled).to eq(true) + expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id) + expect(AiModerationSetting.spam.data["custom_instructions"]).to eq( + "custom instructions new", + ) + end + end + end + end + + describe "#test" do + fab!(:spam_post) { Fabricate(:post) } + fab!(:spam_post2) { Fabricate(:post, topic: spam_post.topic, raw: "something special 123") } + fab!(:setting) do + AiModerationSetting.create( + { + setting_type: :spam, + llm_model_id: llm_model.id, + data: { + custom_instructions: "custom instructions", + }, + }, + ) + end + + before { sign_in(admin) } + + it "can scan using post url" do + llm2 = Fabricate(:llm_model, name: "DiffLLM") + + DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do + post "/admin/plugins/discourse-ai/ai-spam/test.json", + params: { + post_url: spam_post2.url, + llm_id: llm2.id, + } + end + + expect(response.status).to eq(200) + + parsed = response.parsed_body + expect(parsed["log"]).to include(spam_post2.raw) + expect(parsed["log"]).to include("DiffLLM") + end + + it "can scan using post id" do + DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because apples"]) do + post "/admin/plugins/discourse-ai/ai-spam/test.json", + params: { + post_url: spam_post.id.to_s, + } + end + + expect(response.status).to eq(200) + + parsed = response.parsed_body + expect(parsed["log"]).to include(spam_post.raw) + end + + it "returns proper spam test results" do + freeze_time DateTime.parse("2000-01-01") + + AiSpamLog.create!( + post: spam_post, + llm_model: llm_model, + is_spam: false, + created_at: 2.days.ago, + ) + + AiSpamLog.create!(post: spam_post, llm_model: llm_model, is_spam: true, created_at: 1.day.ago) + + DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because banana"]) do + post "/admin/plugins/discourse-ai/ai-spam/test.json", + params: { + post_url: spam_post.url, + custom_instructions: "special custom instructions", + } + end + + expect(response.status).to eq(200) + + parsed = response.parsed_body + expect(parsed["log"]).to include("special custom instructions") + expect(parsed["log"]).to include(spam_post.raw) + expect(parsed["is_spam"]).to eq(true) + expect(parsed["log"]).to include("Scan History:") + expect(parsed["log"]).to include("banana") + end + end + + describe "#show" do + context "when logged in as admin" do + before { sign_in(admin) } + + it "correctly filters seeded llms" do + SiteSetting.ai_spam_detection_enabled = true + seeded_llm = Fabricate(:llm_model, id: -1, name: "seeded") + + get "/admin/plugins/discourse-ai/ai-spam.json" + expect(response.status).to eq(200) + json = response.parsed_body + + # only includes fabricated model + expect(json["available_llms"].length).to eq(1) + + SiteSetting.ai_spam_detection_model_allowed_seeded_models = seeded_llm.identifier + + get "/admin/plugins/discourse-ai/ai-spam.json" + expect(response.status).to eq(200) + json = response.parsed_body + + expect(json["available_llms"].length).to eq(2) + end + + it "returns the serialized spam settings" do + SiteSetting.ai_spam_detection_enabled = true + + get "/admin/plugins/discourse-ai/ai-spam.json" + + expect(response.status).to eq(200) + json = response.parsed_body + expect(json["is_enabled"]).to eq(true) + expect(json["selected_llm"]).to eq(nil) + expect(json["custom_instructions"]).to eq(nil) + expect(json["available_llms"]).to be_an(Array) + expect(json["stats"]).to be_present + end + + it "return proper settings when spam detection is enabled" do + SiteSetting.ai_spam_detection_enabled = true + + AiModerationSetting.create( + { + setting_type: :spam, + llm_model_id: llm_model.id, + data: { + custom_instructions: "custom instructions", + }, + }, + ) + + flagging_user = DiscourseAi::AiModeration::SpamScanner.flagging_user + expect(flagging_user.id).not_to eq(Discourse.system_user.id) + + AiSpamLog.create!(post_id: 1, llm_model_id: llm_model.id, is_spam: true, payload: "test") + + get "/admin/plugins/discourse-ai/ai-spam.json" + + json = response.parsed_body + expect(json["is_enabled"]).to eq(true) + expect(json["llm_id"]).to eq(llm_model.id) + expect(json["custom_instructions"]).to eq("custom instructions") + + expect(json["stats"].to_h).to eq( + "scanned_count" => 1, + "spam_detected" => 1, + "false_positives" => 0, + "false_negatives" => 0, + ) + + expect(json["flagging_username"]).to eq(flagging_user.username) + end + end + + context "when not logged in as admin" do + it "returns 404 for anonymous users" do + get "/admin/plugins/discourse-ai/ai-spam.json" + expect(response.status).to eq(404) + end + + it "returns 404 for regular users" do + sign_in(user) + get "/admin/plugins/discourse-ai/ai-spam.json" + expect(response.status).to eq(404) + end + end + + context "when plugin is disabled" do + before do + sign_in(admin) + SiteSetting.discourse_ai_enabled = false + end + + it "returns 404" do + get "/admin/plugins/discourse-ai/ai-spam.json" + expect(response.status).to eq(404) + end + end + end +end diff --git a/spec/system/ai_moderation/ai_spam_spec.rb b/spec/system/ai_moderation/ai_spam_spec.rb new file mode 100644 index 00000000..4640b760 --- /dev/null +++ b/spec/system/ai_moderation/ai_spam_spec.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +RSpec.describe "AI Spam Configuration", type: :system, js: true do + fab!(:admin) + let(:llm_model) { Fabricate(:llm_model) } + + before do + SiteSetting.discourse_ai_enabled = true + sign_in(admin) + end + + it "can properly configure spam settings" do + visit "/admin/plugins/discourse-ai/ai-spam" + + expect(page).to have_css(".ai-spam__llm-placeholder") + + toggle = PageObjects::Components::DToggleSwitch.new(".ai-spam__toggle") + + toggle.toggle + dialog = PageObjects::Components::Dialog.new + expect(dialog).to have_content(I18n.t("discourse_ai.llm.configuration.must_select_model")) + dialog.click_ok + + expect(toggle.unchecked?).to eq(true) + + llm_model + visit "/admin/plugins/discourse-ai/ai-spam" + + toggle = PageObjects::Components::DToggleSwitch.new(".ai-spam__toggle") + toggle.toggle + + try_until_success { expect(AiModerationSetting.spam&.llm_model_id).to eq(llm_model.id) } + + find(".ai-spam__instructions-input").fill_in(with: "Test spam detection instructions") + find(".ai-spam__instructions-save").click + + toasts = PageObjects::Components::Toasts.new + expect(toasts).to have_content(I18n.t("js.discourse_ai.spam.settings_saved")) + + expect(AiModerationSetting.spam.custom_instructions).to eq("Test spam detection instructions") + + visit "/admin/plugins/discourse-ai/ai-llms" + + expect(find(".ai-llm-list-editor__usages")).to have_content( + I18n.t("js.discourse_ai.llms.usage.ai_spam"), + ) + end +end