FEATURE: streaming mode for the FoldContent strategy. (#134)

This commit is contained in:
Roman Rizzi 2023-08-11 15:08:54 -03:00 committed by GitHub
parent 7077c31ab8
commit b076e43d67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 139 additions and 62 deletions

View File

@ -20,7 +20,7 @@ module DiscourseAi
) )
end end
def concatenate_summaries(summaries) def concatenate_summaries(summaries, &on_partial_blk)
instructions = <<~TEXT instructions = <<~TEXT
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
Include only the summary inside <ai> tags. Include only the summary inside <ai> tags.
@ -29,10 +29,10 @@ module DiscourseAi
instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" } instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
instructions += "Assistant:\n" instructions += "Assistant:\n"
completion(instructions) completion(instructions, &on_partial_blk)
end end
def summarize_with_truncation(contents, opts) def summarize_with_truncation(contents, opts, &on_partial_blk)
instructions = build_base_prompt(opts) instructions = build_base_prompt(opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join text_to_summarize = contents.map { |c| format_content_item(c) }.join
@ -40,17 +40,20 @@ module DiscourseAi
instructions += "<input>#{truncated_content}</input>\nAssistant:\n" instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
completion(instructions) completion(instructions, &on_partial_blk)
end end
def summarize_single(chunk_text, opts) def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true)) summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end end
private private
def summarize_chunk(chunk_text, opts) def summarize_chunk(chunk_text, opts, &on_partial_blk)
completion(build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n") completion(
build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n",
&on_partial_blk
)
end end
def build_base_prompt(opts) def build_base_prompt(opts)
@ -79,9 +82,33 @@ module DiscourseAi
base_prompt base_prompt
end end
def completion(prompt) def completion(prompt, &on_partial_blk)
response = # We need to discard any text that might come before the <ai> tag.
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion) # Instructing the model to reply only with the summary seems impossible.
pre_tag_partial = +""
if on_partial_blk
on_partial_read =
Proc.new do |partial|
if pre_tag_partial.include?("<ai>")
on_partial_blk.call(partial[:completion])
else
pre_tag_partial << partial[:completion]
end
end
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt,
model,
&on_partial_read
)
else
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(
:completion,
)
end
Nokogiri::HTML5.fragment(response).at("ai").text Nokogiri::HTML5.fragment(response).at("ai").text
end end

View File

@ -20,8 +20,8 @@ module DiscourseAi
) )
end end
def concatenate_summaries(summaries) def concatenate_summaries(summaries, &on_partial_blk)
completion(<<~TEXT) prompt = <<~TEXT
[INST] <<SYS>> [INST] <<SYS>>
You are a helpful bot You are a helpful bot
<</SYS>> <</SYS>>
@ -29,13 +29,15 @@ module DiscourseAi
Concatenate these disjoint summaries, creating a cohesive narrative: Concatenate these disjoint summaries, creating a cohesive narrative:
#{summaries.join("\n")} [/INST] #{summaries.join("\n")} [/INST]
TEXT TEXT
completion(prompt, &on_partial_blk)
end end
def summarize_with_truncation(contents, opts) def summarize_with_truncation(contents, opts, &on_partial_blk)
text_to_summarize = contents.map { |c| format_content_item(c) }.join text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
completion(<<~TEXT) prompt = <<~TEXT
[INST] <<SYS>> [INST] <<SYS>>
#{build_base_prompt(opts)} #{build_base_prompt(opts)}
<</SYS>> <</SYS>>
@ -44,15 +46,17 @@ module DiscourseAi
#{truncated_content} [/INST] #{truncated_content} [/INST]
Here is a summary of the above topic: Here is a summary of the above topic:
TEXT TEXT
completion(prompt, &on_partial_blk)
end end
def summarize_single(chunk_text, opts) def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true)) summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end end
private private
def summarize_chunk(chunk_text, opts) def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction = summary_instruction =
if opts[:single_chunk] if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:" "Summarize the following forum discussion, creating a cohesive narrative:"
@ -60,7 +64,7 @@ module DiscourseAi
"Summarize the following in up to 400 words:" "Summarize the following in up to 400 words:"
end end
completion(<<~TEXT) prompt = <<~TEXT
[INST] <<SYS>> [INST] <<SYS>>
#{build_base_prompt(opts)} #{build_base_prompt(opts)}
<</SYS>> <</SYS>>
@ -69,6 +73,8 @@ module DiscourseAi
#{chunk_text} [/INST] #{chunk_text} [/INST]
Here is a summary of the above topic: Here is a summary of the above topic:
TEXT TEXT
completion(prompt, &on_partial_blk)
end end
def build_base_prompt(opts) def build_base_prompt(opts)
@ -91,10 +97,21 @@ module DiscourseAi
base_prompt base_prompt
end end
def completion(prompt) def completion(prompt, &on_partial_blk)
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig( if on_partial_blk
:generated_text, on_partial_read =
) Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) }
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
prompt,
model,
&on_partial_read
)
else
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig(
:generated_text,
)
end
end end
def tokenizer def tokenizer

View File

@ -8,8 +8,8 @@ module DiscourseAi
"Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" "Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}"
end end
def concatenate_summaries(summaries) def concatenate_summaries(summaries, &on_partial_blk)
completion(<<~TEXT) prompt = <<~TEXT
### System: ### System:
You are a helpful bot You are a helpful bot
@ -19,28 +19,32 @@ module DiscourseAi
### Assistant: ### Assistant:
TEXT TEXT
completion(prompt, &on_partial_blk)
end end
def summarize_with_truncation(contents, opts) def summarize_with_truncation(contents, opts, &on_partial_blk)
text_to_summarize = contents.map { |c| format_content_item(c) }.join text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
completion(<<~TEXT) prompt = <<~TEXT
### System: ### System:
#{build_base_prompt(opts)} #{build_base_prompt(opts)}
### User: ### User:
Summarize the following in up to 400 words: Summarize the following in up to 400 words:
#{truncated_content} #{truncated_content}
### Assistant: ### Assistant:
Here is a summary of the above topic: Here is a summary of the above topic:
TEXT TEXT
completion(prompt, &on_partial_blk)
end end
private private
def summarize_chunk(chunk_text, opts) def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction = summary_instruction =
if opts[:single_chunk] if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:" "Summarize the following forum discussion, creating a cohesive narrative:"
@ -48,7 +52,7 @@ module DiscourseAi
"Summarize the following in up to 400 words:" "Summarize the following in up to 400 words:"
end end
completion(<<~TEXT) prompt = <<~TEXT
### System: ### System:
#{build_base_prompt(opts)} #{build_base_prompt(opts)}
@ -59,6 +63,8 @@ module DiscourseAi
### Assistant: ### Assistant:
Here is a summary of the above topic: Here is a summary of the above topic:
TEXT TEXT
completion(prompt, &on_partial_blk)
end end
end end
end end

View File

@ -20,7 +20,7 @@ module DiscourseAi
) )
end end
def concatenate_summaries(summaries) def concatenate_summaries(summaries, &on_partial_blk)
messages = [ messages = [
{ role: "system", content: "You are a helpful bot" }, { role: "system", content: "You are a helpful bot" },
{ {
@ -30,10 +30,10 @@ module DiscourseAi
}, },
] ]
completion(messages) completion(messages, &on_partial_blk)
end end
def summarize_with_truncation(contents, opts) def summarize_with_truncation(contents, opts, &on_partial_blk)
messages = [{ role: "system", content: build_base_prompt(opts) }] messages = [{ role: "system", content: build_base_prompt(opts) }]
text_to_summarize = contents.map { |c| format_content_item(c) }.join text_to_summarize = contents.map { |c| format_content_item(c) }.join
@ -44,16 +44,16 @@ module DiscourseAi
content: "Summarize the following in 400 words:\n#{truncated_content}", content: "Summarize the following in 400 words:\n#{truncated_content}",
} }
completion(messages) completion(messages, &on_partial_blk)
end end
def summarize_single(chunk_text, opts) def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true)) summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end end
private private
def summarize_chunk(chunk_text, opts) def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction = summary_instruction =
if opts[:single_chunk] if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:" "Summarize the following forum discussion, creating a cohesive narrative:"
@ -66,6 +66,7 @@ module DiscourseAi
{ role: "system", content: build_base_prompt(opts) }, { role: "system", content: build_base_prompt(opts) },
{ role: "user", content: "#{summary_instruction}\n#{chunk_text}" }, { role: "user", content: "#{summary_instruction}\n#{chunk_text}" },
], ],
&on_partial_blk
) )
end end
@ -89,13 +90,22 @@ module DiscourseAi
base_prompt base_prompt
end end
def completion(prompt) def completion(prompt, &on_partial_blk)
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig( if on_partial_blk
:choices, on_partial_read =
0, Proc.new do |partial|
:message, on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s)
:content, end
)
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read)
else
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
:choices,
0,
:message,
:content,
)
end
end end
def tokenizer def tokenizer

View File

@ -16,17 +16,24 @@ module DiscourseAi
:model, :model,
to: :completion_model to: :completion_model
def summarize(content) def summarize(content, &on_partial_blk)
opts = content.except(:contents) opts = content.except(:contents)
chunks = split_into_chunks(content[:contents]) chunks = split_into_chunks(content[:contents])
if chunks.length == 1 if chunks.length == 1
{ summary: completion_model.summarize_single(chunks.first[:summary], opts), chunks: [] } {
summary:
completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk),
chunks: [],
}
else else
summaries = completion_model.summarize_in_chunks(chunks, opts) summaries = completion_model.summarize_in_chunks(chunks, opts)
{ summary: completion_model.concatenate_summaries(summaries), chunks: summaries } {
summary: completion_model.concatenate_summaries(summaries, &on_partial_blk),
chunks: summaries,
}
end end
end end

View File

@ -16,11 +16,12 @@ module DiscourseAi
:model, :model,
to: :completion_model to: :completion_model
def summarize(content) def summarize(content, &on_partial_blk)
opts = content.except(:contents) opts = content.except(:contents)
{ {
summary: completion_model.summarize_with_truncation(content[:contents], opts), summary:
completion_model.summarize_with_truncation(content[:contents], opts, &on_partial_blk),
chunks: [], chunks: [],
} }
end end

View File

@ -68,10 +68,11 @@ module ::DiscourseAi
return parsed_response return parsed_response
end end
response_data = +""
begin begin
cancelled = false cancelled = false
cancel = lambda { cancelled = true } cancel = lambda { cancelled = true }
response_data = +""
response_raw = +"" response_raw = +""
response.read_body do |chunk| response.read_body do |chunk|
@ -111,6 +112,8 @@ module ::DiscourseAi
) )
end end
end end
return response_data
end end
end end

View File

@ -85,10 +85,11 @@ module ::DiscourseAi
return parsed_response return parsed_response
end end
response_data = +""
begin begin
cancelled = false cancelled = false
cancel = lambda { cancelled = true } cancel = lambda { cancelled = true }
response_data = +""
response_raw = +"" response_raw = +""
response.read_body do |chunk| response.read_body do |chunk|
@ -102,7 +103,7 @@ module ::DiscourseAi
chunk chunk
.split("\n") .split("\n")
.each do |line| .each do |line|
data = line.split("data: ", 2)[1] data = line.split("data:", 2)[1]
next if !data || data.squish == "[DONE]" next if !data || data.squish == "[DONE]"
if !cancelled if !cancelled
@ -113,7 +114,7 @@ module ::DiscourseAi
# this is the last chunk and contains the full response # this is the last chunk and contains the full response
next if partial[:token][:special] == true next if partial[:token][:special] == true
response_data = partial[:token][:text].to_s response_data << partial[:token][:text].to_s
yield partial, cancel yield partial, cancel
rescue JSON::ParserError rescue JSON::ParserError
@ -131,6 +132,8 @@ module ::DiscourseAi
) )
end end
end end
return response_data
end end
end end

View File

@ -121,10 +121,11 @@ module ::DiscourseAi
return parsed_response return parsed_response
end end
response_data = +""
begin begin
cancelled = false cancelled = false
cancel = lambda { cancelled = true } cancel = lambda { cancelled = true }
response_data = +""
response_raw = +"" response_raw = +""
leftover = "" leftover = ""
@ -170,6 +171,8 @@ module ::DiscourseAi
) )
end end
end end
return response_data
end end
end end
end end