DEV: Migration to remove old embeddings tables~ (#1067)

* DEV: Migration to remove old embeddings tables~

* Check for table existence
This commit is contained in:
Roman Rizzi 2025-01-14 17:13:34 -03:00 committed by GitHub
parent c4d2b7de1d
commit 65456c8b30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 102 additions and 43 deletions

View File

@ -3,24 +3,26 @@ class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2]
disable_ddl_transaction!
def up
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_topic_embeddings old_table
LEFT JOIN ai_topics_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.topic_id = old_table.topic_id
)
WHERE target.topic_id IS NULL
LIMIT 10000
) source
SQL
if table_exists?(:ai_topic_embeddings)
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_topic_embeddings old_table
LEFT JOIN ai_topics_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.topic_id = old_table.topic_id
)
WHERE target.topic_id IS NULL
LIMIT 10000
) source
SQL
break if count == 0
break if count == 0
end
end
end

View File

@ -3,26 +3,28 @@ class BackfillPostEmbeddings < ActiveRecord::Migration[7.2]
disable_ddl_transaction!
def up
# Copy data from old tables to new tables in batches.
if table_exists?(:ai_post_embeddings)
# Copy data from old tables to new tables in batches.
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_post_embeddings old_table
LEFT JOIN ai_posts_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.post_id = old_table.post_id
)
WHERE target.post_id IS NULL
LIMIT 10000
) source
SQL
loop do
count = execute(<<~SQL).cmd_tuples
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT source.*
FROM (
SELECT old_table.*
FROM ai_post_embeddings old_table
LEFT JOIN ai_posts_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.post_id = old_table.post_id
)
WHERE target.post_id IS NULL
LIMIT 10000
) source
SQL
break if count == 0
break if count == 0
end
end
end

View File

@ -1,15 +1,17 @@
# frozen_string_literal: true
class BackfillRagEmbeddings < ActiveRecord::Migration[7.2]
def up
not_backfilled =
DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0
if table_exists?(:ai_document_fragment_embeddings)
not_backfilled =
DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0
if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_document_fragment_embeddings;
SQL
if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_document_fragment_embeddings;
SQL
end
end
end

View File

@ -0,0 +1,53 @@
# frozen_string_literal: true
class DropOldEmbeddingTables2 < ActiveRecord::Migration[7.2]
def up
# Copy rag embeddings created during deploy.
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
(
SELECT old_table.*
FROM ai_document_fragment_embeddings old_table
LEFT OUTER JOIN ai_document_fragments_embeddings target ON (
target.model_id = old_table.model_id AND
target.strategy_id = old_table.strategy_id AND
target.rag_document_fragment_id = old_table.rag_document_fragment_id
)
WHERE target.rag_document_fragment_id IS NULL
)
SQL
execute <<~SQL
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit;
SQL
drop_table :ai_topic_embeddings, if_exists: true
drop_table :ai_post_embeddings, if_exists: true
drop_table :ai_document_fragment_embeddings, if_exists: true
end
def down
end
end