From 03e689deb778e7260cc6861826164aa92a26cc00 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 8 Aug 2023 15:41:57 +1000 Subject: [PATCH] FIX: Google command was including full payload (#128) * FIX: Google command was including full payload Additionally there was no truncating happening meaning you could blow token budget easily on a single search. This made Google search mostly useless and it would mean that after using Google we would revert to a clean slate which is very confusing. * no need for nil there --- lib/modules/ai_bot/commands/google_command.rb | 22 ++++++++++++------- .../ai_bot/commands/google_command_spec.rb | 3 +++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/lib/modules/ai_bot/commands/google_command.rb b/lib/modules/ai_bot/commands/google_command.rb index ce9daad2..85147b13 100644 --- a/lib/modules/ai_bot/commands/google_command.rb +++ b/lib/modules/ai_bot/commands/google_command.rb @@ -48,22 +48,28 @@ module DiscourseAi::AiBot::Commands URI("https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{query}&num=10") body = Net::HTTP.get(uri) - parse_search_json(body) + parse_search_json(body, query) end - def parse_search_json(json_data) + def minimize_field(result, field, max_tokens: 100) + data = result[field].squish + data = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(data, max_tokens).squish + data + end + + def parse_search_json(json_data, query) parsed = JSON.parse(json_data) results = parsed["items"] @last_num_results = parsed.dig("searchInformation", "totalResults").to_i - format_results(results, args: json_data) do |result| + format_results(results, args: query) do |result| { - title: result["title"], - link: result["link"], - snippet: result["snippet"], - displayLink: result["displayLink"], - formattedUrl: result["formattedUrl"], + title: minimize_field(result, "title"), + link: minimize_field(result, "link"), + snippet: minimize_field(result, "snippet", max_tokens: 120), + displayLink: minimize_field(result, "displayLink"), + formattedUrl: minimize_field(result, "formattedUrl"), } end end diff --git a/spec/lib/modules/ai_bot/commands/google_command_spec.rb b/spec/lib/modules/ai_bot/commands/google_command_spec.rb index b34ecb16..054e0de4 100644 --- a/spec/lib/modules/ai_bot/commands/google_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/google_command_spec.rb @@ -23,6 +23,7 @@ RSpec.describe DiscourseAi::AiBot::Commands::GoogleCommand do snippet: "snippet1", displayLink: "displayLink1", formattedUrl: "formattedUrl1", + oops: "do no include me ... oops", }, ], }.to_json @@ -38,6 +39,8 @@ RSpec.describe DiscourseAi::AiBot::Commands::GoogleCommand do expect(google.description_args[:count]).to eq(1) expect(info).to include("title1") expect(info).to include("snippet1") + expect(info).to include("some+search+term") + expect(info).to_not include("oops") end end end