Python 6 min read

RAG Applications: Build AI with Your Own Data

Master Retrieval-Augmented Generation for LLM applications. Learn document processing, embeddings, vector search, and build production RAG systems.

MR

Moshiour Rahman

Advertisement

What is RAG?

Retrieval-Augmented Generation (RAG) enhances LLMs by retrieving relevant information from external knowledge bases before generating responses. This allows AI to answer questions using your specific data.

Why RAG?

LLM AloneRAG-Enhanced
Limited to training dataUses your documents
Can hallucinateGrounded in facts
No updatesAlways current
Generic answersDomain-specific

RAG Architecture

User Query → Embed Query → Vector Search → Retrieve Documents → Augment Prompt → LLM → Response

Core Components

  1. Document Loader: Ingest documents (PDF, web, database)
  2. Text Splitter: Chunk documents into smaller pieces
  3. Embeddings: Convert text to vectors
  4. Vector Store: Store and search embeddings
  5. Retriever: Find relevant documents
  6. LLM: Generate final response

Building a RAG System

Installation

pip install langchain langchain-openai chromadb
pip install pypdf unstructured python-docx

Document Loading

from langchain_community.document_loaders import (
    PyPDFLoader,
    TextLoader,
    WebBaseLoader,
    DirectoryLoader
)

# Load PDF
pdf_loader = PyPDFLoader("document.pdf")
pdf_docs = pdf_loader.load()

# Load text file
text_loader = TextLoader("notes.txt")
text_docs = text_loader.load()

# Load from web
web_loader = WebBaseLoader("https://example.com/article")
web_docs = web_loader.load()

# Load directory of files
dir_loader = DirectoryLoader(
    "./documents",
    glob="**/*.pdf",
    loader_cls=PyPDFLoader
)
all_docs = dir_loader.load()

print(f"Loaded {len(all_docs)} documents")

Text Splitting

from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    CharacterTextSplitter
)

# Recursive splitter (recommended)
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n", "\n", " ", ""]
)

chunks = splitter.split_documents(all_docs)
print(f"Created {len(chunks)} chunks")

# For code
code_splitter = RecursiveCharacterTextSplitter.from_language(
    language="python",
    chunk_size=500,
    chunk_overlap=50
)

Creating Embeddings

from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

# OpenAI embeddings
openai_embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

# Local embeddings (free)
local_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Test embeddings
text = "Machine learning is fascinating"
embedding = openai_embeddings.embed_query(text)
print(f"Embedding dimension: {len(embedding)}")

Vector Store

from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# Create vector store
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db",
    collection_name="my_documents"
)

# Load existing vector store
vectorstore = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embeddings,
    collection_name="my_documents"
)

# Similarity search
results = vectorstore.similarity_search(
    "What is machine learning?",
    k=4
)

for doc in results:
    print(doc.page_content[:200])
    print("---")

Complete RAG Chain

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Setup components
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embeddings
)
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 4}
)
llm = ChatOpenAI(model="gpt-4", temperature=0)

# RAG prompt
template = """Answer the question based only on the following context.
If you cannot answer from the context, say "I don't have enough information."

Context:
{context}

Question: {question}

Answer:"""

prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Build chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Query
response = rag_chain.invoke("What are the main topics covered?")
print(response)

Advanced RAG Techniques

from langchain.retrievers import BM25Retriever, EnsembleRetriever

# Keyword search (BM25)
bm25_retriever = BM25Retriever.from_documents(chunks)
bm25_retriever.k = 4

# Vector search
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# Combine both
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, vector_retriever],
    weights=[0.4, 0.6]  # Balance keyword and semantic
)

results = ensemble_retriever.get_relevant_documents("query")

Multi-Query Retrieval

from langchain.retrievers.multi_query import MultiQueryRetriever

# Generate multiple query variations
multi_retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(),
    llm=llm
)

# Original query: "What is machine learning?"
# Generated queries:
# - "Define machine learning"
# - "How does ML work?"
# - "Explain artificial intelligence learning"

results = multi_retriever.get_relevant_documents(
    "What is machine learning?"
)

Contextual Compression

from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# Compress retrieved documents to relevant parts only
compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectorstore.as_retriever()
)

compressed_docs = compression_retriever.get_relevant_documents(
    "What are the benefits?"
)

Parent Document Retrieval

from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore

# Store full documents
docstore = InMemoryStore()

# Small chunks for retrieval, return parent document
parent_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=docstore,
    child_splitter=RecursiveCharacterTextSplitter(chunk_size=200),
    parent_splitter=RecursiveCharacterTextSplitter(chunk_size=2000),
)

parent_retriever.add_documents(documents)

Chat with Documents

Conversational RAG

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"
)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True
)

# First question
result = qa_chain({"question": "What is this document about?"})
print(result["answer"])

# Follow-up question (remembers context)
result = qa_chain({"question": "Can you elaborate on that?"})
print(result["answer"])

Citations and Sources

from langchain.prompts import ChatPromptTemplate

template = """Answer based on the context below. Include citations [1], [2], etc.

Context:
{context}

Question: {question}

Provide your answer with citations:"""

prompt = ChatPromptTemplate.from_template(template)

def format_docs_with_citations(docs):
    formatted = []
    for i, doc in enumerate(docs, 1):
        formatted.append(f"[{i}] {doc.page_content}")
    return "\n\n".join(formatted)

chain = (
    {
        "context": retriever | format_docs_with_citations,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

Production Considerations

Document Processing Pipeline

class DocumentProcessor:
    def __init__(self, embeddings, vectorstore_path):
        self.embeddings = embeddings
        self.vectorstore_path = vectorstore_path
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )

    def process_file(self, file_path: str):
        # Load document
        if file_path.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif file_path.endswith('.txt'):
            loader = TextLoader(file_path)
        else:
            raise ValueError(f"Unsupported file type: {file_path}")

        docs = loader.load()

        # Add metadata
        for doc in docs:
            doc.metadata["source"] = file_path
            doc.metadata["processed_at"] = datetime.now().isoformat()

        # Split
        chunks = self.splitter.split_documents(docs)

        # Add to vector store
        vectorstore = Chroma(
            persist_directory=self.vectorstore_path,
            embedding_function=self.embeddings
        )
        vectorstore.add_documents(chunks)

        return len(chunks)

    def delete_source(self, source: str):
        vectorstore = Chroma(
            persist_directory=self.vectorstore_path,
            embedding_function=self.embeddings
        )
        # Delete by metadata filter
        vectorstore.delete(where={"source": source})

Evaluation

from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
)

# Prepare evaluation data
questions = ["What is ML?", "How does RAG work?"]
ground_truths = ["ML is...", "RAG is..."]

# Run evaluation
results = evaluate(
    dataset,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_precision,
        context_recall
    ]
)

print(results)

Summary

ComponentPurpose
Document LoaderIngest various file types
Text SplitterChunk documents optimally
EmbeddingsConvert text to vectors
Vector StoreEfficient similarity search
RetrieverFind relevant context
LLMGenerate final answer

RAG enables building AI applications that leverage your specific knowledge base while maintaining accuracy and reducing hallucinations.

Advertisement

MR

Moshiour Rahman

Software Architect & AI Engineer

Share:
MR

Moshiour Rahman

Software Architect & AI Engineer

Enterprise software architect with deep expertise in financial systems, distributed architecture, and AI-powered applications. Building large-scale systems at Fortune 500 companies. Specializing in LLM orchestration, multi-agent systems, and cloud-native solutions. I share battle-tested patterns from real enterprise projects.

Related Articles

Comments

Comments are powered by GitHub Discussions.

Configure Giscus at giscus.app to enable comments.