DEV: Add rake task to send topics or posts to spam scanner (#1059)

This commit is contained in:
Natalie Tay 2025-01-15 11:48:57 +08:00 committed by GitHub
parent 92f122c54d
commit c881f8b361
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 103 additions and 1 deletions

View File

@ -193,9 +193,13 @@ module DiscourseAi
end
def self.perform_scan(post)
return if !enabled?
return if !should_scan_post?(post)
perform_scan!(post)
end
def self.perform_scan!(post)
return if !enabled?
settings = AiModerationSetting.spam
return if !settings || !settings.llm_model

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
desc "Scan first posts of topics from a date, end date is optional. Usage: rake ai:spam:scan_topics[2024-01-01,2024-02-31]"
task "ai:spam:scan_topics", %i[start_date end_date] => [:environment] do |_, args|
start_date = args[:start_date] ? DateTime.parse(args[:start_date]) : 1.day.ago
end_date = args[:end_date] ? DateTime.parse(args[:end_date]) : Time.current
scope = Topic.joins(:posts).where(created_at: start_date..end_date).where("posts.post_number = 1")
puts "Processing #{scope.count} topics from #{start_date} to #{end_date}"
scope
.select("topics.id, posts.id as post_id")
.find_each(batch_size: 500) do |record|
Jobs.enqueue(:ai_spam_scan, post_id: record.post_id)
print "."
end
end
desc "Scan posts from a date, end date is optional. Usage: rake ai:spam:scan_posts[2024-01-31,2024-02-01]"
task "ai:spam:scan_posts", %i[start_date end_date] => [:environment] do |_, args|
start_date = args[:start_date] ? DateTime.parse(args[:start_date]) : 1.day.ago
end_date = args[:end_date] ? DateTime.parse(args[:end_date]) : Time.current
scope = Post.where(created_at: start_date..end_date).select(:id)
puts "Processing #{scope.count} posts from #{start_date} to #{end_date}"
scope.find_each(batch_size: 500) do |post|
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
print "."
end
end

View File

@ -75,6 +75,37 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
end
end
describe ".perform_scan" do
it "does nothing if post should not be scanned" do
post.user.trust_level = TrustLevel[2]
expect { described_class.perform_scan(post) }.not_to change { AiSpamLog.count }
end
it "scans when post should be scanned" do
expect do
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do
described_class.perform_scan!(post)
end
end.to change { AiSpamLog.count }.by(1)
end
end
describe ".perform_scan!" do
it "creates spam log entry when scanning post" do
expect do
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do
described_class.perform_scan!(post)
end
end.to change { AiSpamLog.count }.by(1)
end
it "does nothing when disabled" do
SiteSetting.ai_spam_detection_enabled = false
expect { described_class.perform_scan!(post) }.not_to change { AiSpamLog.count }
end
end
describe ".scanned_max_times?" do
it "returns true when post has been scanned 3 times" do
3.times do

38
spec/tasks/scan_spec.rb Normal file
View File

@ -0,0 +1,38 @@
# frozen_string_literal: true
RSpec.describe "ai:spam rake tasks" do
let!(:topic1) { Fabricate(:topic, created_at: 2.days.ago) }
let!(:post1) { Fabricate(:post, topic: topic1, created_at: 2.days.ago) }
let!(:topic2) { Fabricate(:topic, created_at: 1.hour.ago) }
let!(:post2) { Fabricate(:post, topic: topic2, created_at: 1.hour.ago) }
describe "ai:spam:scan_posts" do
it "enqueues posts within date range" do
freeze_time do
start_date = 1.day.ago.to_s
end_date = Time.now.to_s
expect_enqueued_with(job: :ai_spam_scan, args: { post_id: post2.id }) do
Rake::Task["ai:spam:scan_posts"].invoke(start_date, end_date)
end
expect_not_enqueued_with(job: :ai_spam_scan, args: { post_id: post1.id })
end
end
end
describe "ai:spam:scan_topics" do
it "enqueues first posts of topics within date range" do
freeze_time do
start_date = 1.day.ago.to_s
end_date = Time.now.to_s
expect_enqueued_with(job: :ai_spam_scan, args: { post_id: topic2.first_post.id }) do
Rake::Task["ai:spam:scan_topics"].invoke(start_date, end_date)
end
expect_not_enqueued_with(job: :ai_spam_scan, args: { post_id: topic1.first_post.id })
end
end
end
end