FEATURE: GPT-4 turbo vision support (#575)
Recent release of GPT-4 turbo adds vision support, this adds the pipeline for sending images to Open AI.
This commit is contained in:
parent
a77658e2b1
commit
23d12c8927
|
@ -65,6 +65,7 @@ module DiscourseAi
|
||||||
user_message[:name] = msg[:id]
|
user_message[:name] = msg[:id]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
user_message[:content] = inline_images(user_message[:content], msg)
|
||||||
user_message
|
user_message
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -106,6 +107,30 @@ module DiscourseAi
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def inline_images(content, message)
|
||||||
|
if model_name.include?("gpt-4-vision") || model_name == "gpt-4-turbo"
|
||||||
|
content = message[:content]
|
||||||
|
encoded_uploads = prompt.encoded_uploads(message)
|
||||||
|
if encoded_uploads.present?
|
||||||
|
new_content = []
|
||||||
|
new_content.concat(
|
||||||
|
encoded_uploads.map do |details|
|
||||||
|
{
|
||||||
|
type: "image_url",
|
||||||
|
image_url: {
|
||||||
|
url: "data:#{details[:mime_type]};base64,#{details[:base64]}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end,
|
||||||
|
)
|
||||||
|
new_content << { type: "text", text: content }
|
||||||
|
content = new_content
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
content
|
||||||
|
end
|
||||||
|
|
||||||
def per_message_overhead
|
def per_message_overhead
|
||||||
# open ai defines about 4 tokens per message of overhead
|
# open ai defines about 4 tokens per message of overhead
|
||||||
4
|
4
|
||||||
|
|
|
@ -165,6 +165,58 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
|
||||||
EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::ChatGpt, user)
|
EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::ChatGpt, user)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") }
|
||||||
|
let(:upload100x100) do
|
||||||
|
UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id)
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "image support" do
|
||||||
|
it "can handle images" do
|
||||||
|
llm = DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4-turbo")
|
||||||
|
prompt =
|
||||||
|
DiscourseAi::Completions::Prompt.new(
|
||||||
|
"You are image bot",
|
||||||
|
messages: [type: :user, id: "user1", content: "hello", upload_ids: [upload100x100.id]],
|
||||||
|
)
|
||||||
|
|
||||||
|
encoded = prompt.encoded_uploads(prompt.messages.last)
|
||||||
|
|
||||||
|
parsed_body = nil
|
||||||
|
|
||||||
|
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
|
||||||
|
body:
|
||||||
|
proc do |req_body|
|
||||||
|
parsed_body = JSON.parse(req_body, symbolize_names: true)
|
||||||
|
true
|
||||||
|
end,
|
||||||
|
).to_return(status: 200, body: { choices: [message: { content: "nice pic" }] }.to_json)
|
||||||
|
|
||||||
|
completion = llm.generate(prompt, user: user)
|
||||||
|
|
||||||
|
expect(completion).to eq("nice pic")
|
||||||
|
expected_body = {
|
||||||
|
model: "gpt-4-turbo",
|
||||||
|
messages: [
|
||||||
|
{ role: "system", content: "You are image bot" },
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "image_url",
|
||||||
|
image_url: {
|
||||||
|
url: "data:#{encoded[0][:mime_type]};base64,#{encoded[0][:base64]}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{ type: "text", text: "hello" },
|
||||||
|
],
|
||||||
|
name: "user1",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
expect(parsed_body).to eq(expected_body)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe "#perform_completion!" do
|
describe "#perform_completion!" do
|
||||||
context "when using regular mode" do
|
context "when using regular mode" do
|
||||||
context "with simple prompts" do
|
context "with simple prompts" do
|
||||||
|
|
Loading…
Reference in New Issue