From 86b7faf9c21f5402ad1a12ba02978c1716abb6a6 Mon Sep 17 00:00:00 2001
From: Gregorio Galante <me@gregoriogalante.com>
Date: Mon, 25 Mar 2024 02:25:57 +0100
Subject: [PATCH] Added remove_texts on base, elasticsearch, pgvector
 vectorsearch (#525)

* Added remove_texts on base vectorsearch

* Added remove_texts on elasticsearch vectorsearch

* Added remove_texts on pgvector vectorsearch

* Update tests

* Update pgvector_spec.rb

---------

Co-authored-by: Andrei Bondarev <andrei@sourcelabs.io>
---
 lib/langchain/vectorsearch/pgvector.rb       |  7 +++++
 spec/langchain/vectorsearch/pgvector_spec.rb | 30 ++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/lib/langchain/vectorsearch/pgvector.rb b/lib/langchain/vectorsearch/pgvector.rb
index a13cbf662..f789678bb 100644
--- a/lib/langchain/vectorsearch/pgvector.rb
+++ b/lib/langchain/vectorsearch/pgvector.rb
@@ -89,6 +89,13 @@ def update_texts(texts:, ids:)
       upsert_texts(texts: texts, ids: ids)
     end
 
+    # Remove a list of texts from the index
+    # @param ids [Array<Integer>] The ids of the texts to remove from the index
+    # @return [Integer] The number of texts removed from the index
+    def remove_texts(ids:)
+      @db[table_name.to_sym].where(id: ids).delete
+    end
+
     # Create default schema
     def create_default_schema
       db.run "CREATE EXTENSION IF NOT EXISTS vector"
diff --git a/spec/langchain/vectorsearch/pgvector_spec.rb b/spec/langchain/vectorsearch/pgvector_spec.rb
index 1b52a14da..5b3d6a725 100644
--- a/spec/langchain/vectorsearch/pgvector_spec.rb
+++ b/spec/langchain/vectorsearch/pgvector_spec.rb
@@ -98,6 +98,36 @@
       end
     end
 
+    describe "#remove_texts" do
+      before do
+        allow_any_instance_of(
+          OpenAI::Client
+        ).to receive(:embeddings)
+          .with(
+            parameters: {
+              dimensions: 1536,
+              model: "text-embedding-3-small",
+              input: "Hello World"
+            }
+          )
+          .and_return({
+            "object" => "list",
+            "data" => [
+              {"embedding" => 1536.times.map { rand }}
+            ]
+          })
+      end
+
+      it "removes texts" do
+        values = subject.add_texts(texts: ["Hello World", "Hello World"])
+        ids = values.flatten
+        expect(ids.length).to eq(2)
+
+        result = subject.remove_texts(ids: ids)
+        expect(result).to eq(2)
+      end
+    end
+
     describe "#similarity_search" do
       before do
         allow_any_instance_of(