From eb7fff3a551a8cc97b87e55d99fd7e9ffa1d416c Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Thu, 3 Aug 2023 15:29:30 -0300 Subject: [PATCH] FEATURE: Add support for StableBeluga and Upstage Llama2 instruct (#126) * FEATURE: Add support for StableBeluga and Upstage Llama2 instruct This means we support all models in the top3 of the Open LLM Leaderboard Since some of those models have RoPE, we now have a setting so you can customize the token limit depending which model you use. --- config/settings.yml | 5 +- lib/modules/summarization/entry_point.rb | 7 +- lib/modules/summarization/models/llama2.rb | 16 ++--- .../models/llama2_fine_tuned_orca_style.rb | 66 +++++++++++++++++++ .../inference/hugging_face_text_generation.rb | 4 +- 5 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb diff --git a/config/settings.yml b/config/settings.yml index 0ad3e68f..f869c212 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -114,7 +114,10 @@ plugins: ai_hugging_face_api_key: default: "" secret: true - + ai_hugging_face_token_limit: + default: 4096 + ai_hugging_face_model_display_name: + default: "" ai_google_custom_search_api_key: default: "" diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb index c4a856c2..38506991 100644 --- a/lib/modules/summarization/entry_point.rb +++ b/lib/modules/summarization/entry_point.rb @@ -9,6 +9,7 @@ module DiscourseAi require_relative "models/discourse" require_relative "models/open_ai" require_relative "models/llama2" + require_relative "models/llama2_fine_tuned_orca_style" require_relative "strategies/fold_content" require_relative "strategies/truncate_content" @@ -22,7 +23,11 @@ module DiscourseAi Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), Models::Anthropic.new("claude-2", max_tokens: 100_000), - Models::Llama2.new("Llama-2-7b-chat-hf", max_tokens: 4096), + Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit), + Models::Llama2FineTunedOrcaStyle.new( + "StableBeluga2", + max_tokens: SiteSetting.ai_hugging_face_token_limit, + ), ] foldable_models.each do |model| diff --git a/lib/modules/summarization/models/llama2.rb b/lib/modules/summarization/models/llama2.rb index a6e5ef37..55cf3ac0 100644 --- a/lib/modules/summarization/models/llama2.rb +++ b/lib/modules/summarization/models/llama2.rb @@ -5,7 +5,7 @@ module DiscourseAi module Models class Llama2 < Base def display_name - "Llama2's #{model}" + "Llama2's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" end def correctly_configured? @@ -42,6 +42,7 @@ module DiscourseAi Summarize the following in up to 400 words: #{truncated_content} [/INST] + Here is a summary of the above topic: TEXT end @@ -66,6 +67,7 @@ module DiscourseAi #{summary_instruction} #{chunk_text} [/INST] + Here is a summary of the above topic: TEXT end @@ -90,20 +92,14 @@ module DiscourseAi end def completion(prompt) - ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!( - prompt, - model, - token_limit: token_limit, - ).dig(:generated_text) + ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig( + :generated_text, + ) end def tokenizer DiscourseAi::Tokenizer::Llama2Tokenizer end - - def token_limit - 4096 - end end end end diff --git a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb new file mode 100644 index 00000000..1ed1c130 --- /dev/null +++ b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class Llama2FineTunedOrcaStyle < Llama2 + def display_name + "Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" + end + + def concatenate_summaries(summaries) + completion(<<~TEXT) + ### System: + You are a helpful bot + + ### User: + Concatenate these disjoint summaries, creating a cohesive narrative: + #{summaries.join("\n")} + + ### Assistant: + TEXT + end + + def summarize_with_truncation(contents, opts) + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) + + completion(<<~TEXT) + ### System: + #{build_base_prompt(opts)} + + ### User: + Summarize the following in up to 400 words: + #{truncated_content} + + ### Assistant: + Here is a summary of the above topic: + TEXT + end + + private + + def summarize_chunk(chunk_text, opts) + summary_instruction = + if opts[:single_chunk] + "Summarize the following forum discussion, creating a cohesive narrative:" + else + "Summarize the following in up to 400 words:" + end + + completion(<<~TEXT) + ### System: + #{build_base_prompt(opts)} + + ### User: + #{summary_instruction} + #{chunk_text} + + ### Assistant: + Here is a summary of the above topic: + TEXT + end + end + end + end +end diff --git a/lib/shared/inference/hugging_face_text_generation.rb b/lib/shared/inference/hugging_face_text_generation.rb index 52b8d174..fac27a03 100644 --- a/lib/shared/inference/hugging_face_text_generation.rb +++ b/lib/shared/inference/hugging_face_text_generation.rb @@ -17,7 +17,7 @@ module ::DiscourseAi repetition_penalty: 1.1, user_id: nil, tokenizer: DiscourseAi::Tokenizer::Llama2Tokenizer, - token_limit: 4096 + token_limit: nil ) raise CompletionFailed if model.blank? @@ -33,6 +33,8 @@ module ::DiscourseAi headers["Authorization"] = "Bearer #{SiteSetting.ai_hugging_face_api_key}" end + token_limit = token_limit || SiteSetting.ai_hugging_face_token_limit + parameters = {} payload = { inputs: prompt, parameters: parameters } prompt_size = tokenizer.size(prompt)