#frozen_string_literal: true class DiscourseAi::Evals::Eval attr_reader :type, :path, :name, :description, :id, :args, :vision, :expected_output, :expected_output_regex, :expected_tool_call def initialize(path:) @yaml = YAML.load_file(path).symbolize_keys @path = path @name = @yaml[:name] @id = @yaml[:id] @description = @yaml[:description] @vision = @yaml[:vision] @args = @yaml[:args]&.symbolize_keys @type = @yaml[:type] @expected_output = @yaml[:expected_output] @expected_output_regex = @yaml[:expected_output_regex] @expected_output_regex = Regexp.new(@expected_output_regex, Regexp::MULTILINE) if @expected_output_regex @expected_tool_call = @yaml[:expected_tool_call] @expected_tool_call.symbolize_keys! if @expected_tool_call @args[:path] = File.expand_path(File.join(File.dirname(path), @args[:path])) if @args&.key?( :path, ) end def run(llm:) result = case type when "helper" helper(llm, **args) when "pdf_to_text" pdf_to_text(llm, **args) when "image_to_text" image_to_text(llm, **args) when "prompt" prompt_call(llm, **args) end if expected_output if result == expected_output { result: :pass } else { result: :fail, expected_output: expected_output, actual_output: result } end elsif expected_output_regex if result.match?(expected_output_regex) { result: :pass } else { result: :fail, expected_output: expected_output_regex, actual_output: result } end elsif expected_tool_call tool_call = result if result.is_a?(Array) tool_call = result.find { |r| r.is_a?(DiscourseAi::Completions::ToolCall) } end if !tool_call.is_a?(DiscourseAi::Completions::ToolCall) || (tool_call.name != expected_tool_call[:name]) || (tool_call.parameters != expected_tool_call[:params]) { result: :fail, expected_output: expected_tool_call, actual_output: result } else { result: :pass } end else { result: :unknown, actual_output: result } end end def print puts "#{id}: #{description}" end def to_json { type: @type, path: @path, name: @name, description: @description, id: @id, args: @args, vision: @vision, expected_output: @expected_output, expected_output_regex: @expected_output_regex, }.compact end private def helper(llm, input:, name:) completion_prompt = CompletionPrompt.find_by(name: name) helper = DiscourseAi::AiHelper::Assistant.new(helper_llm: llm.llm_proxy) result = helper.generate_and_send_prompt( completion_prompt, input, current_user = Discourse.system_user, _force_default_locale = false, ) result[:suggestions].first end def image_to_text(llm, path:) upload = UploadCreator.new(File.open(path), File.basename(path)).create_for(Discourse.system_user.id) text = +"" DiscourseAi::Utils::ImageToText .new(upload: upload, llm_model: llm.llm_model, user: Discourse.system_user) .extract_text do |chunk, error| text << chunk if chunk text << "\n\n" if chunk end text ensure upload.destroy if upload end def pdf_to_text(llm, path:) upload = UploadCreator.new(File.open(path), File.basename(path)).create_for(Discourse.system_user.id) uploads = DiscourseAi::Utils::PdfToImages.new( upload: upload, user: Discourse.system_user, ).uploaded_pages text = +"" uploads.each do |page_upload| DiscourseAi::Utils::ImageToText .new(upload: page_upload, llm_model: llm.llm_model, user: Discourse.system_user) .extract_text do |chunk, error| text << chunk if chunk text << "\n\n" if chunk end upload.destroy end text ensure upload.destroy if upload end def prompt_call(llm, system_prompt:, message:, tools: nil, stream: false) if tools tools.each do |tool| tool.symbolize_keys! tool[:parameters].symbolize_keys! if tool[:parameters] end end prompt = DiscourseAi::Completions::Prompt.new( system_prompt, messages: [{ type: :user, content: message }], tools: tools, ) result = nil if stream result = [] llm .llm_model .to_llm .generate(prompt, user: Discourse.system_user) { |partial| result << partial } else result = llm.llm_model.to_llm.generate(prompt, user: Discourse.system_user) end result end end