FIX: Multiple topics may have the same post as its solution (#348)

We are seeing some errors when migrating and adding indexes on `answer_post_id`.

```
#<StandardError:"An error has occurred, all later migrations canceled:\n\nPG::UniqueViolation: ERROR:  could not create unique index \"index_discourse_solved_solved_topics_on_answer_post_id\"\nDETAIL:  Key (answer_post_id)=(13006) is duplicated.\n">
```

This PR modifies the earlier migration, and also adds one before the addition of indexes to remove duplicates.
This commit is contained in:
Natalie Tay 2025-03-26 22:21:32 +08:00 committed by GitHub
parent a8a3554cec
commit 1805184cde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 169 additions and 28 deletions

View File

@ -23,23 +23,31 @@ class CopySolvedTopicCustomFieldToDiscourseSolvedSolvedTopics < ActiveRecord::Mi
created_at, created_at,
updated_at updated_at
) )
SELECT DISTINCT ON (tc.topic_id) SELECT
tc.topic_id, tc.topic_id,
CAST(tc.value AS INTEGER), tc.answer_post_id,
CAST(tc2.value AS INTEGER), tc.topic_timer_id,
COALESCE(ua.acting_user_id, -1), tc.accepter_user_id,
tc.created_at, tc.created_at,
tc.updated_at tc.updated_at
FROM (
SELECT
tc.topic_id,
CAST(tc.value AS INTEGER) AS answer_post_id,
CAST(tc2.value AS INTEGER) AS topic_timer_id,
COALESCE(ua.acting_user_id, -1) AS accepter_user_id,
tc.created_at,
tc.updated_at,
ROW_NUMBER() OVER (PARTITION BY tc.topic_id ORDER BY tc.created_at ASC) AS rn_topic,
ROW_NUMBER() OVER (PARTITION BY CAST(tc.value AS INTEGER) ORDER BY tc.created_at ASC) AS rn_answer
FROM topic_custom_fields tc FROM topic_custom_fields tc
LEFT JOIN topic_custom_fields tc2 LEFT JOIN topic_custom_fields tc2 ON tc2.topic_id = tc.topic_id AND tc2.name = 'solved_auto_close_topic_timer_id'
ON tc2.topic_id = tc.topic_id LEFT JOIN user_actions ua ON ua.target_topic_id = tc.topic_id AND ua.action_type = 15
AND tc2.name = 'solved_auto_close_topic_timer_id'
LEFT JOIN user_actions ua
ON ua.target_topic_id = tc.topic_id
AND ua.action_type = 15
WHERE tc.name = 'accepted_answer_post_id' WHERE tc.name = 'accepted_answer_post_id'
AND tc.id > :last_id AND tc.id > :last_id
AND tc.id <= :last_id + :batch_size AND tc.id <= :last_id + :batch_size
) tc
WHERE tc.rn_topic = 1 AND tc.rn_answer = 1
ON CONFLICT DO NOTHING ON CONFLICT DO NOTHING
SQL SQL

View File

@ -0,0 +1,31 @@
# frozen_string_literal: true
class RemoveDuplicatesFromDiscourseSolvedSolvedTopics < ActiveRecord::Migration[7.2]
def up
# remove duplicates on answer_post_id based on earliest created_at
DB.exec(<<~SQL)
DELETE FROM discourse_solved_solved_topics
WHERE id NOT IN (
SELECT id FROM (
SELECT id, ROW_NUMBER() OVER (PARTITION BY answer_post_id ORDER BY created_at) as row_num
FROM discourse_solved_solved_topics
) t WHERE row_num = 1
)
SQL
# remove duplicates on topic_id based on earliest created_at
DB.exec(<<~SQL)
DELETE FROM discourse_solved_solved_topics
WHERE id NOT IN (
SELECT id FROM (
SELECT id, ROW_NUMBER() OVER (PARTITION BY topic_id ORDER BY created_at) as row_num
FROM discourse_solved_solved_topics
) t WHERE row_num = 1
)
SQL
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -25,23 +25,31 @@ class CopyRemainingSolvedTopicCustomFieldToDiscourseSolvedSolvedTopics < ActiveR
created_at, created_at,
updated_at updated_at
) )
SELECT DISTINCT ON (tc.topic_id) SELECT
tc.topic_id, tc.topic_id,
CAST(tc.value AS INTEGER), tc.answer_post_id,
CAST(tc2.value AS INTEGER), tc.topic_timer_id,
COALESCE(ua.acting_user_id, -1), tc.accepter_user_id,
tc.created_at, tc.created_at,
tc.updated_at tc.updated_at
FROM (
SELECT
tc.topic_id,
CAST(tc.value AS INTEGER) AS answer_post_id,
CAST(tc2.value AS INTEGER) AS topic_timer_id,
COALESCE(ua.acting_user_id, -1) AS accepter_user_id,
tc.created_at,
tc.updated_at,
ROW_NUMBER() OVER (PARTITION BY tc.topic_id ORDER BY tc.created_at ASC) AS rn_topic,
ROW_NUMBER() OVER (PARTITION BY CAST(tc.value AS INTEGER) ORDER BY tc.created_at ASC) AS rn_answer
FROM topic_custom_fields tc FROM topic_custom_fields tc
LEFT JOIN topic_custom_fields tc2 LEFT JOIN topic_custom_fields tc2 ON tc2.topic_id = tc.topic_id AND tc2.name = 'solved_auto_close_topic_timer_id'
ON tc2.topic_id = tc.topic_id LEFT JOIN user_actions ua ON ua.target_topic_id = tc.topic_id AND ua.action_type = 15
AND tc2.name = 'solved_auto_close_topic_timer_id'
LEFT JOIN user_actions ua
ON ua.target_topic_id = tc.topic_id
AND ua.action_type = 15
WHERE tc.name = 'accepted_answer_post_id' WHERE tc.name = 'accepted_answer_post_id'
AND tc.id > :last_id AND tc.id > :last_id
AND tc.id <= :last_id + :batch_size AND tc.id <= :last_id + :batch_size
) tc
WHERE tc.rn_topic = 1 AND tc.rn_answer = 1
ON CONFLICT DO NOTHING ON CONFLICT DO NOTHING
SQL SQL

View File

@ -0,0 +1,42 @@
# frozen_string_literal: true
require_relative "../../db/migrate/20250318024953_copy_solved_topic_custom_field_to_discourse_solved_solved_topics"
RSpec.describe CopySolvedTopicCustomFieldToDiscourseSolvedSolvedTopics, type: :migration do
let(:migration) { described_class.new }
describe "handling duplicates" do
it "ensures only unique topic_id and answer_post_id are inserted" do
topic = Fabricate(:topic)
topic1 = Fabricate(:topic)
post1 = Fabricate(:post, topic: topic)
TopicCustomField.create!(
topic_id: topic.id,
name: "accepted_answer_post_id",
value: post1.id.to_s,
)
# explicit duplicate
TopicCustomField.create!(
topic_id: topic1.id,
name: "accepted_answer_post_id",
value: post1.id.to_s,
)
second_topic = Fabricate(:topic)
post2 = Fabricate(:post, topic: second_topic)
TopicCustomField.create!(
topic_id: second_topic.id,
name: "accepted_answer_post_id",
value: post2.id.to_s,
)
migration.up
expected_count = DiscourseSolved::SolvedTopic.count
expect(expected_count).to eq(2)
expect(DiscourseSolved::SolvedTopic.where(topic_id: topic.id).count).to eq(1)
expect(DiscourseSolved::SolvedTopic.where(answer_post_id: post1.id).count).to eq(1)
end
end
end

View File

@ -0,0 +1,52 @@
# frozen_string_literal: true
require_relative "../../db/migrate/20250318024954_remove_duplicates_from_discourse_solved_solved_topics"
RSpec.describe RemoveDuplicatesFromDiscourseSolvedSolvedTopics, type: :migration do
let(:migration) { described_class.new }
before do
# temp drop unique constraints to allow testing duplicate entries
ActiveRecord::Base.connection.execute(
"DROP INDEX IF EXISTS index_discourse_solved_solved_topics_on_topic_id;",
)
ActiveRecord::Base.connection.execute(
"DROP INDEX IF EXISTS index_discourse_solved_solved_topics_on_answer_post_id;",
)
end
after do
DiscourseSolved::SolvedTopic.delete_all
# reapply unique indexes
ActiveRecord::Base.connection.execute(
"CREATE UNIQUE INDEX index_discourse_solved_solved_topics_on_topic_id ON discourse_solved_solved_topics (topic_id);",
)
ActiveRecord::Base.connection.execute(
"CREATE UNIQUE INDEX index_discourse_solved_solved_topics_on_answer_post_id ON discourse_solved_solved_topics (answer_post_id);",
)
end
describe "removal of duplicate answer_post_ids" do
it "keeps only the earliest record for each answer_post_id" do
topic1 = Fabricate(:topic)
post1 = Fabricate(:post, topic: topic1)
topic2 = Fabricate(:topic)
post2 = Fabricate(:post, topic: topic2)
earlier = Fabricate(:solved_topic, topic: topic1, answer_post: post1, created_at: 2.days.ago)
Fabricate(:solved_topic, topic: topic1, answer_post: post1, created_at: 1.day.ago)
Fabricate(:solved_topic, topic: topic1, answer_post: post1, created_at: Date.today)
another = Fabricate(:solved_topic, topic: topic2, answer_post: post2, created_at: Date.today)
expect(DiscourseSolved::SolvedTopic.count).to eq(4)
migration.up
expect(DiscourseSolved::SolvedTopic.count).to eq(2)
expect(DiscourseSolved::SolvedTopic.pluck(:id, :answer_post_id)).to contain_exactly(
[earlier.id, post1.id],
[another.id, post2.id],
)
end
end
end