FIX: Ensure embeddings database outages are handled gracefully (#80)

The rails_failover middleware will intercept all `PG::ConnectionBad` errors and put the cluster into readonly mode. It does not have any handling for multiple databases. Therefore, an issue with the embeddings database was taking the whole cluster into readonly.

This commit fixes the issue by rescuing `PG::Error` from all AI database accesses, and re-raises errors with a different class. It also adds a spec to ensure that an embeddings database outage does not affect the functionality of the topics/show route.

Co-authored-by: David Taylor <david@taylorhq.com>
This commit is contained in:
Rafael dos Santos Silva 2023-05-23 18:57:52 -03:00 committed by GitHub
parent b213fe7f94
commit cfc6e388df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 39 deletions

View File

@ -33,22 +33,27 @@ module DiscourseAi
def asymmetric_semantic_search(model, query, limit, offset) def asymmetric_semantic_search(model, query, limit, offset)
embedding = model.generate_embedding(query) embedding = model.generate_embedding(query)
candidate_ids = begin
DiscourseAi::Database::Connection candidate_ids =
.db DiscourseAi::Database::Connection
.query(<<~SQL, query_embedding: embedding, limit: limit, offset: offset) .db
SELECT .query(<<~SQL, query_embedding: embedding, limit: limit, offset: offset)
topic_id SELECT
FROM topic_id
topic_embeddings_#{model.name.underscore} FROM
ORDER BY topic_embeddings_#{model.name.underscore}
embedding #{model.pg_function} '[:query_embedding]' ORDER BY
LIMIT :limit embedding #{model.pg_function} '[:query_embedding]'
OFFSET :offset LIMIT :limit
SQL OFFSET :offset
.map(&:topic_id) SQL
.map(&:topic_id)
raise StandardError, "No embeddings found for topic #{topic.id}" if candidate_ids.empty? rescue PG::Error => e
Rails.logger.error(
"Error #{e} querying embeddings for topic #{topic.id} and model #{model.name}",
)
raise MissingEmbeddingError
end
candidate_ids candidate_ids
end end
@ -56,32 +61,49 @@ module DiscourseAi
private private
def query_symmetric_embeddings(model, topic) def query_symmetric_embeddings(model, topic)
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id) begin
SELECT DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
topic_id SELECT
FROM topic_id
topic_embeddings_#{model.name.underscore} FROM
ORDER BY topic_embeddings_#{model.name.underscore}
embedding #{model.pg_function} ( ORDER BY
SELECT embedding #{model.pg_function} (
embedding SELECT
FROM embedding
topic_embeddings_#{model.name.underscore} FROM
WHERE topic_embeddings_#{model.name.underscore}
topic_id = :topic_id WHERE
LIMIT 1 topic_id = :topic_id
) LIMIT 1
LIMIT 100 )
SQL LIMIT 100
SQL
rescue PG::Error => e
Rails.logger.error(
"Error #{e} querying embeddings for topic #{topic.id} and model #{model.name}",
)
raise MissingEmbeddingError
end
end end
def persist_embedding(topic, model, embedding) def persist_embedding(topic, model, embedding)
DiscourseAi::Database::Connection.db.exec(<<~SQL, topic_id: topic.id, embedding: embedding) begin
INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding) DiscourseAi::Database::Connection.db.exec(
VALUES (:topic_id, '[:embedding]') <<~SQL,
ON CONFLICT (topic_id) INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding)
DO UPDATE SET embedding = '[:embedding]' VALUES (:topic_id, '[:embedding]')
SQL ON CONFLICT (topic_id)
DO UPDATE SET embedding = '[:embedding]'
SQL
topic_id: topic.id,
embedding: embedding,
)
rescue PG::Error => e
Rails.logger.error(
"Error #{e} persisting embedding for topic #{topic.id} and model #{model.name}",
)
end
end end
end end
end end

View File

@ -25,6 +25,7 @@ describe ::TopicsController do
.returns([topic1.id, topic2.id, topic3.id]) .returns([topic1.id, topic2.id, topic3.id])
get("#{topic.relative_url}.json") get("#{topic.relative_url}.json")
expect(response.status).to eq(200)
json = response.parsed_body json = response.parsed_body
expect(json["suggested_topics"].length).to eq(0) expect(json["suggested_topics"].length).to eq(0)
@ -38,5 +39,16 @@ describe ::TopicsController do
expect(json["suggested_topics"].length).to eq(0) expect(json["suggested_topics"].length).to eq(0)
expect(json["related_topics"].length).to eq(2) expect(json["related_topics"].length).to eq(2)
end end
it "excludes embeddings when the database is offline" do
DiscourseAi::Database::Connection.stubs(:db).raises(PG::ConnectionBad)
get "#{topic.relative_url}.json"
expect(response.status).to eq(200)
json = response.parsed_body
expect(json["suggested_topics"].length).not_to eq(0)
expect(json["related_topics"].length).to eq(0)
end
end end
end end