The LM Studio Python SDK (lmstudio) provides a clean interface for interacting with local LLMs through LM Studio. It’s MIT licensed and developed openly on GitHub.
License: MIT
Repository: lmstudio-ai/lmstudio-python
Stars: 757+
PyPI: lmstudio
pip install lmstudio
git clone https://github.com/lmstudio-ai/lmstudio-python
cd lmstudio-python
pip install -e .
import lmstudio as lms
# Load a model and complete text
model = lms.llm()
response = model.complete("Once upon a time,")
print(response)
import lmstudio as lms
# Create chat with system prompt
chat = lms.Chat("You are a helpful shopkeeper")
# Add messages
chat.add_user_message("Hello! What do you sell?")
# Get response
model = lms.llm()
response = model.respond(chat)
print(response)
import lmstudio as lms
model = lms.llm()
# Stream completion
for chunk in model.complete_stream("Write a haiku about coding:"):
print(chunk, end="", flush=True)
import lmstudio as lms
# Create client (auto-connects to local LM Studio)
client = lms.Client()
# Create client with custom URL
client = lms.Client("http://localhost:1234")
# List loaded models
models = client.list_loaded_models()
# List downloaded models
downloaded = client.list_downloaded_models()
import lmstudio as lms
# Load model
model = lms.llm("llama-3-8b-instruct")
# Load with custom parameters
model = lms.llm(
"llama-3-8b-instruct",
context_length=8192,
gpu_offload="max"
)
# Unload model
lms.unload("llama-3-8b-instruct")
# Unload all models
lms.unload_all()
import lmstudio as lms
model = lms.llm()
# Basic completion
response = model.complete("The future of AI is")
# With parameters
response = model.complete(
"Write a story about",
max_tokens=500,
temperature=0.7,
top_p=0.9
)
# Streaming
for chunk in model.complete_stream("Write a poem:"):
print(chunk, end="", flush=True)
import lmstudio as lms
# Create chat
chat = lms.Chat("You are a helpful assistant")
# Add messages
chat.add_user_message("Hello!")
chat.add_assistant_message("Hi! How can I help?")
chat.add_user_message("Tell me a joke")
# Get response
model = lms.llm()
response = model.respond(chat)
print(response)
# Streaming response
for chunk in model.respond_stream(chat):
print(chunk, end="", flush=True)
import lmstudio as lms
# Define functions
def get_weather(location: str) -> str:
"""Get weather for a location"""
return f"Weather in {location}: Sunny, 25°C"
def calculate(expression: str) -> str:
"""Calculate mathematical expression"""
return str(eval(expression))
# Create model with tools
model = lms.llm(tools=[get_weather, calculate])
# Chat with function calling
chat = lms.Chat()
chat.add_user_message("What's the weather in Paris?")
response = model.respond(chat)
print(response)
import lmstudio as lms
# Load embedding model
embedding_model = lms.embedding()
# Generate embedding
embedding = embedding_model.embed("Hello, world!")
print(f"Embedding shape: {len(embedding)}")
# Batch embeddings
embeddings = embedding_model.embed_batch([
"First text",
"Second text",
"Third text"
])
import lmstudio as lms
# Custom client with retry logic
client = lms.Client(
base_url="http://localhost:1234",
timeout=30,
max_retries=3
)
# Use client for requests
model = lms.llm(client=client)
import lmstudio as lms
# Get model info
model = lms.llm("llama-3-8b-instruct")
info = model.info()
print(f"Model: {info['id']}")
print(f"Context length: {info['context_length']}")
print(f"Parameters: {info['parameter_count']}")
import lmstudio as lms
from concurrent.futures import ThreadPoolExecutor
model = lms.llm()
def generate(prompt):
return model.complete(prompt)
# Parallel completions
prompts = ["Story about cats", "Story about dogs", "Story about birds"]
with ThreadPoolExecutor() as executor:
results = list(executor.map(generate, prompts))
for result in results:
print(result)
import lmstudio as lms
from lmstudio import LMStudioError
try:
model = lms.llm("nonexistent-model")
except LMStudioError as e:
print(f"Error loading model: {e}")
try:
response = model.complete("Test")
except LMStudioError as e:
print(f"Completion error: {e}")
from fastapi import FastAPI
from pydantic import BaseModel
import lmstudio as lms
app = FastAPI()
class Prompt(BaseModel):
text: str
max_tokens: int = 500
@app.post("/complete")
async def complete(prompt: Prompt):
model = lms.llm()
response = model.complete(prompt.text, max_tokens=prompt.max_tokens)
return {"response": response}
@app.post("/chat")
async def chat(message: str):
chat = lms.Chat("You are a helpful assistant")
chat.add_user_message(message)
model = lms.llm()
response = model.respond(chat)
return {"response": response}
from langchain_community.llms import LMStudio
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Initialize LMStudio with LangChain
llm = LMStudio(server_url="http://localhost:1234")
# Create chain
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
chain = LLMChain(llm=llm, prompt=prompt)
# Run chain
response = chain.run("What is the capital of France?")
print(response)
import lmstudio as lms
import pandas as pd
# Load data
df = pd.read_csv("reviews.csv")
# Initialize model
model = lms.llm()
# Process reviews
def analyze_sentiment(review):
chat = lms.Chat("You are a sentiment analyzer. Reply with POSITIVE, NEGATIVE, or NEUTRAL.")
chat.add_user_message(review)
return model.respond(chat).strip()
# Apply to dataframe
df["sentiment"] = df["review"].apply(analyze_sentiment)
print(df["sentiment"].value_counts())
The SDK uses LM Studio’s configuration:
%APPDATA%/LM Studio~/Library/Application Support/LM Studio~/.config/LM Studio# Custom API endpoint
export LMSTUDIO_API_BASE=http://localhost:1234
# Authentication token
export LMSTUDIO_API_KEY=your-token
# Timeout settings
export LMSTUDIO_TIMEOUT=30
# Ensure LM Studio is running
# Check server is started
import lmstudio as lms
lms.server_start()
# Verify connection
try:
lms.Client()
print("Connected!")
except Exception as e:
print(f"Connection error: {e}")
# List available models
import lmstudio as lms
print(lms.list_downloaded_models())
# Download model if needed
lms.download("llama-3-8b-instruct")
# Increase timeout
client = lms.Client(timeout=60)
model = lms.llm(client=client)
Any questions?
Feel free to contact us. Find all contact information on our contact page.