diff --git a/config/settings.yml b/config/settings.yml index 0ad3e68f..f869c212 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -114,7 +114,10 @@ plugins: ai_hugging_face_api_key: default: "" secret: true - + ai_hugging_face_token_limit: + default: 4096 + ai_hugging_face_model_display_name: + default: "" ai_google_custom_search_api_key: default: "" diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb index c4a856c2..38506991 100644 --- a/lib/modules/summarization/entry_point.rb +++ b/lib/modules/summarization/entry_point.rb @@ -9,6 +9,7 @@ module DiscourseAi require_relative "models/discourse" require_relative "models/open_ai" require_relative "models/llama2" + require_relative "models/llama2_fine_tuned_orca_style" require_relative "strategies/fold_content" require_relative "strategies/truncate_content" @@ -22,7 +23,11 @@ module DiscourseAi Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), Models::Anthropic.new("claude-2", max_tokens: 100_000), - Models::Llama2.new("Llama-2-7b-chat-hf", max_tokens: 4096), + Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit), + Models::Llama2FineTunedOrcaStyle.new( + "StableBeluga2", + max_tokens: SiteSetting.ai_hugging_face_token_limit, + ), ] foldable_models.each do |model| diff --git a/lib/modules/summarization/models/llama2.rb b/lib/modules/summarization/models/llama2.rb index a6e5ef37..55cf3ac0 100644 --- a/lib/modules/summarization/models/llama2.rb +++ b/lib/modules/summarization/models/llama2.rb @@ -5,7 +5,7 @@ module DiscourseAi module Models class Llama2 < Base def display_name - "Llama2's #{model}" + "Llama2's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" end def correctly_configured? @@ -42,6 +42,7 @@ module DiscourseAi Summarize the following in up to 400 words: #{truncated_content} [/INST] + Here is a summary of the above topic: TEXT end @@ -66,6 +67,7 @@ module DiscourseAi #{summary_instruction} #{chunk_text} [/INST] + Here is a summary of the above topic: TEXT end @@ -90,20 +92,14 @@ module DiscourseAi end def completion(prompt) - ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!( - prompt, - model, - token_limit: token_limit, - ).dig(:generated_text) + ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig( + :generated_text, + ) end def tokenizer DiscourseAi::Tokenizer::Llama2Tokenizer end - - def token_limit - 4096 - end end end end diff --git a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb new file mode 100644 index 00000000..1ed1c130 --- /dev/null +++ b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class Llama2FineTunedOrcaStyle < Llama2 + def display_name + "Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" + end + + def concatenate_summaries(summaries) + completion(<<~TEXT) + ### System: + You are a helpful bot + + ### User: + Concatenate these disjoint summaries, creating a cohesive narrative: + #{summaries.join("\n")} + + ### Assistant: + TEXT + end + + def summarize_with_truncation(contents, opts) + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) + + completion(<<~TEXT) + ### System: + #{build_base_prompt(opts)} + + ### User: + Summarize the following in up to 400 words: + #{truncated_content} + + ### Assistant: + Here is a summary of the above topic: + TEXT + end + + private + + def summarize_chunk(chunk_text, opts) + summary_instruction = + if opts[:single_chunk] + "Summarize the following forum discussion, creating a cohesive narrative:" + else + "Summarize the following in up to 400 words:" + end + + completion(<<~TEXT) + ### System: + #{build_base_prompt(opts)} + + ### User: + #{summary_instruction} + #{chunk_text} + + ### Assistant: + Here is a summary of the above topic: + TEXT + end + end + end + end +end diff --git a/lib/shared/inference/hugging_face_text_generation.rb b/lib/shared/inference/hugging_face_text_generation.rb index 52b8d174..fac27a03 100644 --- a/lib/shared/inference/hugging_face_text_generation.rb +++ b/lib/shared/inference/hugging_face_text_generation.rb @@ -17,7 +17,7 @@ module ::DiscourseAi repetition_penalty: 1.1, user_id: nil, tokenizer: DiscourseAi::Tokenizer::Llama2Tokenizer, - token_limit: 4096 + token_limit: nil ) raise CompletionFailed if model.blank? @@ -33,6 +33,8 @@ module ::DiscourseAi headers["Authorization"] = "Bearer #{SiteSetting.ai_hugging_face_api_key}" end + token_limit = token_limit || SiteSetting.ai_hugging_face_token_limit + parameters = {} payload = { inputs: prompt, parameters: parameters } prompt_size = tokenizer.size(prompt)