This section describes the procedure to store domain specific documents embeddings in the Redis vector store.
This section provides an example on how to ingest web document content into a Redis vector store.
Note: To ingest PDF documents content into Redis vector store, follow this procedure.
Requirements to create an index include a Redis cluster and a Redis database with at least 2GB of memory (to match with the initial index cap).
# Base parameters, the Redis information:
redis_url = "redis://server:port" index_name = "dellwebdocs"
# Imports:
from langchain.document_loaders import WebBaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.vectorstores.redis import Redis
# Ingesting new documents:
loader = WebBaseLoader(["https://infohub.delltechnologies.com/l/design-guide-sql-server-2022-database-solution-with-object-storage-on-dell-hardware-stack/business-challenge-193/", "https://infohub.delltechnologies.com/l/design-guide-sql-server-2022-database-solution-with-object-storage-on-dell-hardware-stack/solution-introduction-81/", "https://infohub.delltechnologies.com/l/design-guide-sql-server-2022-database-solution-with-object-storage-on-dell-hardware-stack/design-guide-introduction-28/" ]) data = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=40) all_splits = text_splitter.split_documents(data) embeddings = HuggingFaceEmbeddings() rds = Redis.from_existing_index(embeddings, redis_url=redis_url, index_name=index_name schema="dellwebdocs_redis_schema.yaml") rds.add_documents(all_splits)
# Write the schema to a yaml file to be able to open the index later:
rds.write_schema("redis_schema.yaml")