Comprehensive configuration guide for LlamaIndex applications including LLM settings, embeddings, vector stores, and data connectors.
LlamaIndex is configured through Python code and environment variables. This guide covers the main configuration options.
from llama_index.llms.openai import OpenAI
# Basic configuration
llm = OpenAI(model="gpt-4o", temperature=0.7)
# Advanced configuration
llm = OpenAI(
model="gpt-4o",
temperature=0.7,
max_tokens=2000,
timeout=30,
retries=3,
api_key="sk-..." # Or use OPENAI_API_KEY env var
)
from llama_index.llms.anthropic import Anthropic
llm = Anthropic(
model="claude-sonnet-4-20250514",
temperature=0.7,
max_tokens=2000
)
from llama_index.llms.ollama import Ollama
llm = Ollama(
model="llama3.1",
base_url="http://localhost:11434",
temperature=0.7,
request_timeout=60.0
)
from llama_index.llms.azure_openai import AzureOpenAI
llm = AzureOpenAI(
model="gpt-4o",
deployment_name="gpt-4o",
azure_endpoint="https://your-endpoint.openai.azure.com/",
api_version="2025-01-01-preview",
api_key="..." # Or use AZURE_OPENAI_API_KEY env var
)
from llama_index.embeddings.openai import OpenAIEmbedding
# Basic
embed_model = OpenAIEmbedding(model="text-embedding-3-small")
# Advanced
embed_model = OpenAIEmbedding(
model="text-embedding-3-large",
dimensions=1024,
timeout=30
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5",
device="cpu", # or "cuda" for GPU
query_instruction_prefix="Represent this sentence for searching relevant passages: "
)
from llama_index.embeddings.ollama import OllamaEmbedding
embed_model = OllamaEmbedding(
model_name="llama3.1",
base_url="http://localhost:11434"
)
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
# In-memory
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# Persistent
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone
pc = Pinecone(api_key="...")
index = pc.Index("your-index")
vector_store = PineconeVectorStore(pinecone_index=index)
from llama_index.vector_stores.weaviate import WeaviateVectorStore
import weaviate
client = weaviate.Client(
url="http://localhost:8080",
additional_headers={"X-OpenAI-Api-Key": "..."}
)
vector_store = WeaviateVectorStore(weaviate_client=client)
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
client = QdrantClient(host="localhost", port=6333)
vector_store = QdrantVectorStore(client=client, collection_name="your_collection")
Configure global settings for your LlamaIndex application:
from llama_index.core import Settings
# LLM
Settings.llm = OpenAI(model="gpt-4o", temperature=0.7)
# Embeddings
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
# Chunk size for document splitting
Settings.chunk_size = 512
# Chunk overlap
Settings.chunk_overlap = 20
# Callback managers for logging/monitoring
# Settings.callback_manager = ...
# Transformations (applied to all documents)
from llama_index.core.node_parser import SentenceSplitter
Settings.text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=20)
from llama_index.core import SimpleDirectoryReader
# Basic
documents = SimpleDirectoryReader("data").load_data()
# With file glob
documents = SimpleDirectoryReader("data", glob="**/*.pdf").load_data()
# With recursive loading
documents = SimpleDirectoryReader("data", recursive=True).load_data()
# With file metadata
documents = SimpleDirectoryReader(
"data",
file_metadata=lambda x: {"filename": x}
).load_data()
from llama_index.readers.pdf import PDFReader
reader = PDFReader()
documents = reader.load_data(file="document.pdf")
from llama_index.readers.database import DatabaseReader
db_reader = DatabaseReader(
scheme="postgresql",
host="localhost",
port=5432,
user="user",
password="password",
dbname="database"
)
documents = db_reader.load_data("SELECT * FROM documents")
from llama_index.readers.web import SimpleWebPageReader
web_reader = SimpleWebPageReader(html_to_text=True)
documents = web_reader.load_data(["https://example.com/page1", "https://example.com/page2"])
Configure how documents are split into nodes:
from llama_index.core.node_parser import SentenceSplitter
splitter = SentenceSplitter(
chunk_size=512,
chunk_overlap=20,
separator=" ",
paragraph_separator="\n\n\n"
)
nodes = splitter.get_nodes_from_documents(documents)
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.openai import OpenAIEmbedding
splitter = SemanticSplitterNodeParser(
buffer_size=1,
breakpoint_percentile_threshold=95,
embed_model=OpenAIEmbedding()
)
nodes = splitter.get_nodes_from_documents(documents)
from llama_index.core.node_parser import TokenTextSplitter
splitter = TokenTextSplitter(
chunk_size=512,
chunk_overlap=20
)
nodes = splitter.get_nodes_from_documents(documents)
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents)
# Basic retriever
retriever = index.as_retriever(similarity_top_k=5)
# Advanced retriever
retriever = index.as_retriever(
similarity_top_k=5,
similarity_cutoff=0.7, # Minimum similarity score
vector_store_query_mode="default" # or "hybrid"
)
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
# Create multiple retrievers
vector_retriever = index.as_retriever(similarity_top_k=3)
keyword_retriever = index.as_retriever(similarity_top_k=3)
# Combine with recursive retriever
retriever = RecursiveRetriever(
root_id="root",
retriever_dict={"vector": vector_retriever, "keyword": keyword_retriever}
)
from llama_index.core.retrievers import QueryFusionRetriever
fusion_retriever = QueryFusionRetriever(
retrievers=[vector_retriever, keyword_retriever],
similarity_top_k=5,
num_queries=1,
mode="reciprocal_rerank" # or "average", "sum"
)
query_engine = index.as_query_engine(
similarity_top_k=5,
response_mode="default" # or "compact", "refine", "tree_summarize"
)
from llama_index.core.response_synthesizers import get_response_synthesizer
synthesizer = get_response_synthesizer(
response_mode="refine",
streaming=True
)
query_engine = index.as_query_engine(
response_synthesizer=synthesizer
)
from llama_index.core.postprocessor import SimilarityPostprocessor
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[
SimilarityPostprocessor(similarity_cutoff=0.7)
]
)
Set these environment variables for configuration:
# OpenAI
export OPENAI_API_KEY="sk-..."
# Anthropic
export ANTHROPIC_API_KEY="sk-ant-..."
# Azure OpenAI
export AZURE_OPENAI_API_KEY="..."
export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/"
# HuggingFace
export HUGGING_FACE_HUB_TOKEN="..."
# Pinecone
export PINECONE_API_KEY="..."
export PINECONE_ENVIRONMENT="us-east-1"
# Weaviate
export WEAVIATE_URL="http://localhost:8080"
# Qdrant
export QDRANT_URL="http://localhost:6333"
For advanced document parsing with LlamaParse:
from llama_parse import LlamaParse
# Basic
parser = LlamaParse(api_key="...")
# Advanced
parser = LlamaParse(
api_key="...",
result_type="markdown", # or "text"
verbose=True,
num_workers=4,
show_progress=True
)
documents = parser.load_data("document.pdf")
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
# Configure global settings
Settings.llm = OpenAI(
model="gpt-4o",
temperature=0.7,
max_tokens=2000
)
Settings.embed_model = OpenAIEmbedding(
model="text-embedding-3-small",
dimensions=1024
)
Settings.text_splitter = SentenceSplitter(
chunk_size=512,
chunk_overlap=20
)
Settings.chunk_size = 512
Settings.chunk_overlap = 20
# Now create your index
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("Your question here")
print(response)
Any questions?
Feel free to contact us. Find all contact information on our contact page.