From c881f8b361c2deb87c548943492ad9ceaa0eb68c Mon Sep 17 00:00:00 2001 From: Natalie Tay Date: Wed, 15 Jan 2025 11:48:57 +0800 Subject: [PATCH] DEV: Add rake task to send topics or posts to spam scanner (#1059) --- lib/ai_moderation/spam_scanner.rb | 6 ++- lib/tasks/modules/ai_moderation/scan.rake | 29 ++++++++++++++ .../ai_moderation/spam_scanner_spec.rb | 31 +++++++++++++++ spec/tasks/scan_spec.rb | 38 +++++++++++++++++++ 4 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 lib/tasks/modules/ai_moderation/scan.rake create mode 100644 spec/tasks/scan_spec.rb diff --git a/lib/ai_moderation/spam_scanner.rb b/lib/ai_moderation/spam_scanner.rb index 60dffa24..b7b26949 100644 --- a/lib/ai_moderation/spam_scanner.rb +++ b/lib/ai_moderation/spam_scanner.rb @@ -193,9 +193,13 @@ module DiscourseAi end def self.perform_scan(post) - return if !enabled? return if !should_scan_post?(post) + perform_scan!(post) + end + + def self.perform_scan!(post) + return if !enabled? settings = AiModerationSetting.spam return if !settings || !settings.llm_model diff --git a/lib/tasks/modules/ai_moderation/scan.rake b/lib/tasks/modules/ai_moderation/scan.rake new file mode 100644 index 00000000..a8b295fc --- /dev/null +++ b/lib/tasks/modules/ai_moderation/scan.rake @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +desc "Scan first posts of topics from a date, end date is optional. Usage: rake ai:spam:scan_topics[2024-01-01,2024-02-31]" +task "ai:spam:scan_topics", %i[start_date end_date] => [:environment] do |_, args| + start_date = args[:start_date] ? DateTime.parse(args[:start_date]) : 1.day.ago + end_date = args[:end_date] ? DateTime.parse(args[:end_date]) : Time.current + + scope = Topic.joins(:posts).where(created_at: start_date..end_date).where("posts.post_number = 1") + puts "Processing #{scope.count} topics from #{start_date} to #{end_date}" + scope + .select("topics.id, posts.id as post_id") + .find_each(batch_size: 500) do |record| + Jobs.enqueue(:ai_spam_scan, post_id: record.post_id) + print "." + end +end + +desc "Scan posts from a date, end date is optional. Usage: rake ai:spam:scan_posts[2024-01-31,2024-02-01]" +task "ai:spam:scan_posts", %i[start_date end_date] => [:environment] do |_, args| + start_date = args[:start_date] ? DateTime.parse(args[:start_date]) : 1.day.ago + end_date = args[:end_date] ? DateTime.parse(args[:end_date]) : Time.current + + scope = Post.where(created_at: start_date..end_date).select(:id) + puts "Processing #{scope.count} posts from #{start_date} to #{end_date}" + scope.find_each(batch_size: 500) do |post| + Jobs.enqueue(:ai_spam_scan, post_id: post.id) + print "." + end +end diff --git a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb index 29ab9cd5..cf2519e7 100644 --- a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb +++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb @@ -75,6 +75,37 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do end end + describe ".perform_scan" do + it "does nothing if post should not be scanned" do + post.user.trust_level = TrustLevel[2] + + expect { described_class.perform_scan(post) }.not_to change { AiSpamLog.count } + end + + it "scans when post should be scanned" do + expect do + DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do + described_class.perform_scan!(post) + end + end.to change { AiSpamLog.count }.by(1) + end + end + + describe ".perform_scan!" do + it "creates spam log entry when scanning post" do + expect do + DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do + described_class.perform_scan!(post) + end + end.to change { AiSpamLog.count }.by(1) + end + + it "does nothing when disabled" do + SiteSetting.ai_spam_detection_enabled = false + expect { described_class.perform_scan!(post) }.not_to change { AiSpamLog.count } + end + end + describe ".scanned_max_times?" do it "returns true when post has been scanned 3 times" do 3.times do diff --git a/spec/tasks/scan_spec.rb b/spec/tasks/scan_spec.rb new file mode 100644 index 00000000..53187392 --- /dev/null +++ b/spec/tasks/scan_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +RSpec.describe "ai:spam rake tasks" do + let!(:topic1) { Fabricate(:topic, created_at: 2.days.ago) } + let!(:post1) { Fabricate(:post, topic: topic1, created_at: 2.days.ago) } + let!(:topic2) { Fabricate(:topic, created_at: 1.hour.ago) } + let!(:post2) { Fabricate(:post, topic: topic2, created_at: 1.hour.ago) } + + describe "ai:spam:scan_posts" do + it "enqueues posts within date range" do + freeze_time do + start_date = 1.day.ago.to_s + end_date = Time.now.to_s + + expect_enqueued_with(job: :ai_spam_scan, args: { post_id: post2.id }) do + Rake::Task["ai:spam:scan_posts"].invoke(start_date, end_date) + end + + expect_not_enqueued_with(job: :ai_spam_scan, args: { post_id: post1.id }) + end + end + end + + describe "ai:spam:scan_topics" do + it "enqueues first posts of topics within date range" do + freeze_time do + start_date = 1.day.ago.to_s + end_date = Time.now.to_s + + expect_enqueued_with(job: :ai_spam_scan, args: { post_id: topic2.first_post.id }) do + Rake::Task["ai:spam:scan_topics"].invoke(start_date, end_date) + end + + expect_not_enqueued_with(job: :ai_spam_scan, args: { post_id: topic1.first_post.id }) + end + end + end +end