The Enterprise-Grade Production-Ready RAG Framework
🐦 Twitter • 📢 Discord • Swarms Platform • 📙 Documentation
Here's a more detailed and larger table with descriptions and website links for each RAG system:
RAG System | Status | Description | Documentation | Website |
---|---|---|---|---|
ChromaDB | Available | A high-performance, distributed database optimized for handling large-scale AI tasks. | ChromaDB Documentation | ChromaDB |
Pinecone | Available | A fully managed vector database that makes it easy to add vector search to your applications. | Pinecone Documentation | Pinecone |
Redis | Coming Soon | An open-source, in-memory data structure store, used as a database, cache, and message broker. | Redis Documentation | Redis |
Faiss | Available | A library for efficient similarity search and clustering of dense vectors, developed by Facebook AI. | Faiss Documentation | Faiss |
SingleStore | Available | A distributed SQL database that provides high-performance vector similarity search. | SingleStore Documentation | SingleStore |
HNSW | Coming Soon | A graph-based algorithm for approximate nearest neighbor search. | HNSW Documentation | HNSW |
This table includes a brief description of each system, their current status, links to their documentation, and their respective websites for further information.
python 3.10
.env
with your respective keys likePINECONE_API_KEY
can be found in the.env.examples
$ pip install swarms-memory
from typing import List, Dict, Any
from swarms_memory import PineconeMemory
# Example usage
if __name__ == "__main__":
from transformers import AutoTokenizer, AutoModel
import torch
# Custom embedding function using a HuggingFace model
def custom_embedding_function(text: str) -> List[float]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
inputs = tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with torch.no_grad():
outputs = model(**inputs)
embeddings = (
outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
)
return embeddings
# Custom preprocessing function
def custom_preprocess(text: str) -> str:
return text.lower().strip()
# Custom postprocessing function
def custom_postprocess(
results: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
for result in results:
result["custom_score"] = (
result["score"] * 2
) # Example modification
return results
# Initialize the wrapper with custom functions
wrapper = PineconeMemory(
api_key="your-api-key",
environment="your-environment",
index_name="your-index-name",
embedding_function=custom_embedding_function,
preprocess_function=custom_preprocess,
postprocess_function=custom_postprocess,
logger_config={
"handlers": [
{
"sink": "custom_rag_wrapper.log",
"rotation": "1 GB",
},
{
"sink": lambda msg: print(
f"Custom log: {msg}", end=""
)
},
],
},
)
# Adding documents
wrapper.add(
"This is a sample document about artificial intelligence.",
{"category": "AI"},
)
wrapper.add(
"Python is a popular programming language for data science.",
{"category": "Programming"},
)
# Querying
results = wrapper.query("What is AI?", filter={"category": "AI"})
for result in results:
print(
f"Score: {result['score']}, Custom Score: {result['custom_score']}, Text: {result['metadata']['text']}"
)
from swarms_memory import ChromaDB
chromadb = ChromaDB(
metric="cosine",
output_dir="results",
limit_tokens=1000,
n_results=2,
docs_folder="path/to/docs",
verbose=True,
)
# Add a document
doc_id = chromadb.add("This is a test document.")
# Query the document
result = chromadb.query("This is a test query.")
# Traverse a directory
chromadb.traverse_directory()
# Display the result
print(result)
from typing import List, Dict, Any
from swarms_memory.faiss_wrapper import FAISSDB
from transformers import AutoTokenizer, AutoModel
import torch
# Custom embedding function using a HuggingFace model
def custom_embedding_function(text: str) -> List[float]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
inputs = tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with torch.no_grad():
outputs = model(**inputs)
embeddings = (
outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
)
return embeddings
# Custom preprocessing function
def custom_preprocess(text: str) -> str:
return text.lower().strip()
# Custom postprocessing function
def custom_postprocess(
results: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
for result in results:
result["custom_score"] = (
result["score"] * 2
) # Example modification
return results
# Initialize the wrapper with custom functions
wrapper = FAISSDB(
dimension=768,
index_type="Flat",
embedding_function=custom_embedding_function,
preprocess_function=custom_preprocess,
postprocess_function=custom_postprocess,
metric="cosine",
logger_config={
"handlers": [
{
"sink": "custom_faiss_rag_wrapper.log",
"rotation": "1 GB",
},
{"sink": lambda msg: print(f"Custom log: {msg}", end="")},
],
},
)
# Adding documents
wrapper.add(
"This is a sample document about artificial intelligence.",
{"category": "AI"},
)
wrapper.add(
"Python is a popular programming language for data science.",
{"category": "Programming"},
)
# Querying
results = wrapper.query("What is AI?")
for result in results:
print(
f"Score: {result['score']}, Custom Score: {result['custom_score']}, Text: {result['metadata']['text']}"
)
from swarms_memory.vector_dbs.singlestore_wrapper import SingleStoreDB
# Initialize SingleStore with environment variables
db = SingleStoreDB(
host="your_host",
port=3306,
user="your_user",
password="your_password",
database="your_database",
table_name="example_vectors",
dimension=768, # Default dimension for all-MiniLM-L6-v2
namespace="example"
)
# Custom embedding function example (optional)
def custom_embedding_function(text: str) -> List[float]:
# Your custom embedding logic here
return embeddings
# Initialize with custom functions
db = SingleStoreDB(
host="your_host",
port=3306,
user="your_user",
password="your_password",
database="your_database",
table_name="example_vectors",
dimension=768,
namespace="example",
embedding_function=custom_embedding_function,
preprocess_function=lambda x: x.lower(), # Simple preprocessing
postprocess_function=lambda x: sorted(x, key=lambda k: k['similarity'], reverse=True) # Sort by similarity
)
# Add documents with metadata
doc_id = db.add(
document="SingleStore is a distributed SQL database that combines horizontal scalability with ACID guarantees.",
metadata={"source": "docs", "category": "database"}
)
# Query similar documents
results = db.query(
query="How does SingleStore scale?",
top_k=3,
metadata_filter={"source": "docs"}
)
# Process results
for result in results:
print(f"Document: {result['document']}")
print(f"Similarity: {result['similarity']:.4f}")
print(f"Metadata: {result['metadata']}\n")
# Delete a document
db.delete(doc_id)
# Key features:
# - Built on SingleStore's native vector similarity search
# - Supports custom embedding models and functions
# - Automatic table creation with optimized vector indexing
# - Metadata filtering for refined searches
# - Document preprocessing and postprocessing
# - Namespace support for document organization
# - SSL support for secure connections
# For more examples, see the [SingleStore example](examples/singlestore_wrapper_example.py).
MIT
Please cite Swarms in your paper or your project if you found it beneficial in any way! Appreciate you.
@misc{swarms,
author = {Gomez, Kye},
title = {{Swarms: The Multi-Agent Collaboration Framework}},
howpublished = {\url{https://github.com/kyegomez/swarms}},
year = {2023},
note = {Accessed: Date}
}