diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index bb875df0..9e60eb67 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -61,16 +61,6 @@ module DiscourseAi foldable_models.each do |model| plugin.register_summarization_strategy(Strategies::FoldContent.new(model)) end - - truncatable_models = [ - Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), - Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024), - Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512), - ] - - truncatable_models.each do |model| - plugin.register_summarization_strategy(Strategies::TruncateContent.new(model)) - end end end end diff --git a/lib/summarization/models/discourse.rb b/lib/summarization/models/discourse.rb deleted file mode 100644 index c37c6cb9..00000000 --- a/lib/summarization/models/discourse.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Models - class Discourse < Base - def display_name - "Discourse AI's #{model}" - end - - def correctly_configured? - SiteSetting.ai_summarization_discourse_service_api_endpoint.present? && - SiteSetting.ai_summarization_discourse_service_api_key.present? - end - - def configuration_hint - I18n.t( - "discourse_ai.summarization.configuration_hint", - count: 2, - settings: - "ai_summarization_discourse_service_api_endpoint, ai_summarization_discourse_service_api_key", - ) - end - - private - - def reserved_tokens - 0 - end - end - end - end -end diff --git a/lib/summarization/strategies/truncate_content.rb b/lib/summarization/strategies/truncate_content.rb deleted file mode 100644 index afbfa5f9..00000000 --- a/lib/summarization/strategies/truncate_content.rb +++ /dev/null @@ -1,68 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Strategies - class TruncateContent < ::Summarization::Base - def initialize(completion_model) - @completion_model = completion_model - end - - attr_reader :completion_model - - delegate :correctly_configured?, - :display_name, - :configuration_hint, - :model, - to: :completion_model - - def summarize(content, _user, &on_partial_blk) - opts = content.except(:contents) - - { - summary: summarize_with_truncation(content[:contents], opts, &on_partial_blk), - chunks: [], - } - end - - private - - def format_content_item(item) - "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - - def summarize_with_truncation(contents, opts) - text_to_summarize = contents.map { |c| format_content_item(c) }.join - truncated_content = - ::DiscourseAi::Tokenizer::BertTokenizer.truncate( - text_to_summarize, - completion_model.available_tokens, - ) - - completion(truncated_content) - end - - def completion(prompt) - ::DiscourseAi::Inference::DiscourseClassifier.perform!( - "#{endpoint}/api/v1/classify", - completion_model.model, - prompt, - SiteSetting.ai_summarization_discourse_service_api_key, - ).dig(:summary_text) - end - - def endpoint - if SiteSetting.ai_summarization_discourse_service_api_endpoint_srv.present? - service = - DiscourseAi::Utils::DnsSrv.lookup( - SiteSetting.ai_summarization_discourse_service_api_endpoint_srv, - ) - "https://#{service.target}:#{service.port}" - else - SiteSetting.ai_summarization_discourse_service_api_endpoint - end - end - end - end - end -end diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb deleted file mode 100644 index 0857c46f..00000000 --- a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb +++ /dev/null @@ -1,46 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do - subject(:strategy) { described_class.new(model) } - - before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" } - - let(:summarize_text) { "This is a text" } - let(:full_text) { "(1 asd said: #{summarize_text} " } - let(:model_tokens) { ::DiscourseAi::Tokenizer::BertTokenizer.size(full_text) - 5 } - - let(:model) do - DiscourseAi::Summarization::Models::Discourse.new( - "flan-t5-base-samsum", - max_tokens: model_tokens, - ) - end - - let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } - - let(:summarized_text) { "this is a single summary" } - - let(:user) { User.new } - - describe "#summary" do - it "truncates the content and requests a summary" do - truncated = - ::DiscourseAi::Tokenizer::BertTokenizer.truncate( - "(1 asd said: This is a text ", - model_tokens, - ) - - WebMock - .stub_request( - :post, - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - ) - .with(body: JSON.dump(model: model.model, content: truncated)) - .to_return(status: 200, body: JSON.dump({ summary_text: summarized_text })) - - summary = strategy.summarize(content, user).dig(:summary) - - expect(summary).to eq(summarized_text) - end - end -end