From 86b7faf9c21f5402ad1a12ba02978c1716abb6a6 Mon Sep 17 00:00:00 2001 From: Gregorio Galante <me@gregoriogalante.com> Date: Mon, 25 Mar 2024 02:25:57 +0100 Subject: [PATCH] Added remove_texts on base, elasticsearch, pgvector vectorsearch (#525) * Added remove_texts on base vectorsearch * Added remove_texts on elasticsearch vectorsearch * Added remove_texts on pgvector vectorsearch * Update tests * Update pgvector_spec.rb --------- Co-authored-by: Andrei Bondarev <andrei@sourcelabs.io> --- lib/langchain/vectorsearch/pgvector.rb | 7 +++++ spec/langchain/vectorsearch/pgvector_spec.rb | 30 ++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/lib/langchain/vectorsearch/pgvector.rb b/lib/langchain/vectorsearch/pgvector.rb index a13cbf662..f789678bb 100644 --- a/lib/langchain/vectorsearch/pgvector.rb +++ b/lib/langchain/vectorsearch/pgvector.rb @@ -89,6 +89,13 @@ def update_texts(texts:, ids:) upsert_texts(texts: texts, ids: ids) end + # Remove a list of texts from the index + # @param ids [Array<Integer>] The ids of the texts to remove from the index + # @return [Integer] The number of texts removed from the index + def remove_texts(ids:) + @db[table_name.to_sym].where(id: ids).delete + end + # Create default schema def create_default_schema db.run "CREATE EXTENSION IF NOT EXISTS vector" diff --git a/spec/langchain/vectorsearch/pgvector_spec.rb b/spec/langchain/vectorsearch/pgvector_spec.rb index 1b52a14da..5b3d6a725 100644 --- a/spec/langchain/vectorsearch/pgvector_spec.rb +++ b/spec/langchain/vectorsearch/pgvector_spec.rb @@ -98,6 +98,36 @@ end end + describe "#remove_texts" do + before do + allow_any_instance_of( + OpenAI::Client + ).to receive(:embeddings) + .with( + parameters: { + dimensions: 1536, + model: "text-embedding-3-small", + input: "Hello World" + } + ) + .and_return({ + "object" => "list", + "data" => [ + {"embedding" => 1536.times.map { rand }} + ] + }) + end + + it "removes texts" do + values = subject.add_texts(texts: ["Hello World", "Hello World"]) + ids = values.flatten + expect(ids.length).to eq(2) + + result = subject.remove_texts(ids: ids) + expect(result).to eq(2) + end + end + describe "#similarity_search" do before do allow_any_instance_of(