This guide provides a full Ansible playbook to deploy LocalAI with native installation (Go binary) on Debian 10+, Ubuntu LTS, and RHEL 9+ compatible hosts. LocalAI provides OpenAI-compatible APIs for running models locally with support for LLMs, image generation, audio, and more.
Note: For Docker-based deployment, see LocalAI Docker Ansible Setup.
- name: Deploy LocalAI (Native)
hosts: localai
become: true
vars:
# Application settings
app_root: /opt/localai
app_user: localai
app_group: localai
app_port: 8080
# LocalAI settings
localai_version: "v4.1.3"
localai_repo_url: https://github.com/mudler/LocalAI.git
models_dir: "{{ app_root }}/models"
data_dir: "{{ app_root }}/data"
# GPU settings (set to false for CPU-only)
enable_gpu: false
gpu_backend: "cuda" # cuda, rocm, or cpu
# Model settings
default_model: "" # e.g., "llama-3.2-3b-instruct.Q4_K_M.gguf"
context_size: 4096
threads: "{{ ansible_processor_vcpus | default(4) }}"
tasks:
# ====================
# System Dependencies
# ====================
- name: Install system dependencies (Debian/Ubuntu)
apt:
name:
- git
- curl
- wget
- build-essential
- cmake
- libssl-dev
- libffi-dev
- pkg-config
- libopenblas-dev
- libopencv-dev
- libgtk-3-dev
- libavcodec-dev
- libavformat-dev
- libswscale-dev
- nginx
state: present
update_cache: true
when: ansible_os_family == "Debian"
- name: Install system dependencies (RHEL family)
dnf:
name:
- git
- curl
- wget
- "@Development Tools"
- cmake
- openssl-devel
- libffi-devel
- pkgconfig
- openblas-devel
- opencv-devel
- gtk3-devel
- ffmpeg-devel
- nginx
state: present
when: ansible_os_family == "RedHat"
- name: Install Go (Debian/Ubuntu)
apt:
name: golang-go
state: present
when: ansible_os_family == "Debian"
- name: Install Go (RHEL)
dnf:
name: golang
state: present
when: ansible_os_family == "RedHat"
# ====================
# GPU Dependencies (Optional)
# ====================
- name: Install NVIDIA CUDA toolkit
apt:
name:
- nvidia-cuda-toolkit
- nvidia-cuda-dev
state: present
when:
- ansible_os_family == "Debian"
- enable_gpu
- gpu_backend == "cuda"
ignore_errors: true
- name: Install ROCm (AMD GPU)
apt:
name:
- rocm-libs
- rocm-dev
state: present
when:
- ansible_os_family == "Debian"
- enable_gpu
- gpu_backend == "rocm"
ignore_errors: true
# ====================
# Create Application User
# ====================
- name: Create LocalAI group
group:
name: "{{ app_group }}"
state: present
- name: Create LocalAI user
user:
name: "{{ app_user }}"
group: "{{ app_group }}"
shell: /bin/false
system: true
create_home: false
# ====================
# Create Directories
# ====================
- name: Create application directories
file:
path: "{{ item }}"
state: directory
owner: "{{ app_user }}"
group: "{{ app_group }}"
mode: "0755"
loop:
- "{{ app_root }}"
- "{{ models_dir }}"
- "{{ data_dir }}"
- "{{ data_dir }}/generated"
- "{{ data_dir }}/assets"
- "{{ data_dir }}/config"
# ====================
# Download LocalAI Binary
# ====================
- name: Download LocalAI binary
get_url:
url: "https://github.com/mudler/LocalAI/releases/download/{{ localai_version }}/localai-{{ ansible_system | lower }}-{{ ansible_machine | lower }}-{{ gpu_backend }}"
dest: "{{ app_root }}/localai"
mode: "0755"
owner: "{{ app_user }}"
group: "{{ app_group }}"
when: not enable_gpu
ignore_errors: true
- name: Download LocalAI binary (CUDA)
get_url:
url: "https://github.com/mudler/LocalAI/releases/download/{{ localai_version }}/localai-{{ ansible_system | lower }}-{{ ansible_machine | lower }}-cuda-12"
dest: "{{ app_root }}/localai"
mode: "0755"
owner: "{{ app_user }}"
group: "{{ app_group }}"
when:
- enable_gpu
- gpu_backend == "cuda"
ignore_errors: true
- name: Build LocalAI from source (fallback)
block:
- name: Clone LocalAI repository
git:
repo: "{{ localai_repo_url }}"
dest: "{{ app_root }}/src"
version: "{{ localai_version }}"
depth: 1
become_user: "{{ app_user }}"
- name: Build LocalAI
command: make build
args:
chdir: "{{ app_root }}/src"
become_user: "{{ app_user }}"
environment:
GRPC_BACKENDS: "all"
- name: Copy built binary
copy:
src: "{{ app_root }}/src/localai"
dest: "{{ app_root }}/localai"
mode: "0755"
owner: "{{ app_user }}"
group: "{{ app_group }}"
remote_src: true
when: enable_gpu or (gpu_backend != "cpu")
# ====================
# Create Configuration
# ====================
- name: Create LocalAI configuration
copy:
dest: "{{ data_dir }}/config/config.yaml"
owner: "{{ app_user }}"
group: "{{ app_group }}"
mode: "0644"
content: |
# LocalAI Configuration
# Generated by Ansible
# Model configuration directory
models: {{ models_dir }}
# Default model settings
context_size: {{ context_size }}
threads: {{ threads }}
# Example model configuration
# Uncomment and modify as needed
#
# - name: llama-3.2
# backend: llama-cpp
# parameters:
# model: llama-3.2-3b-instruct.Q4_K_M.gguf
# ctx_size: {{ context_size }}
# threads: {{ threads }}
#
# - name: whisper
# backend: whisper
# parameters:
# model: whisper-base.bin
#
# - name: stable-diffusion
# backend: stablediffusion
# parameters:
# model: stable-diffusion-v1-5.bin
- name: Create LocalAI environment file
copy:
dest: "{{ app_root }}/.env"
owner: "{{ app_user }}"
group: "{{ app_group }}"
mode: "0600"
content: |
# LocalAI Environment Variables
# Generated by Ansible
# Server settings
ADDRESS=0.0.0.0:{{ app_port }}
THREADS={{ threads }}
# Paths
MODELS_PATH={{ models_dir }}
DATA_PATH={{ data_dir }}
# Performance
CONTEXT_SIZE={{ context_size }}
# GPU settings
{% if enable_gpu and gpu_backend == "cuda" %}
NVIDIA_VISIBLE_DEVICES=all
{% endif %}
# Optional: Upload limits
# IMAGE_UPLOAD_LIMIT=10MB
# AUDIO_UPLOAD_LIMIT=10MB
# ====================
# Systemd Service
# ====================
- name: Create LocalAI systemd service
copy:
dest: /etc/systemd/system/localai.service
mode: "0644"
content: |
[Unit]
Description=LocalAI - OpenAI-compatible API
After=network.target
[Service]
Type=simple
User={{ app_user }}
Group={{ app_group }}
WorkingDirectory={{ app_root }}
EnvironmentFile={{ app_root }}/.env
ExecStart={{ app_root }}/localai --address 0.0.0.0:{{ app_port }} --models-path {{ models_dir }} --data-path {{ data_dir }}
Restart=always
RestartSec=10
LimitNOFILE=65535
LimitMEMLOCK=infinity
{% if enable_gpu %}
Environment="NVIDIA_VISIBLE_DEVICES=all"
{% endif %}
[Install]
WantedBy=multi-user.target
- name: Reload systemd
systemd:
daemon_reload: true
- name: Enable and start LocalAI
systemd:
name: localai
enabled: true
state: started
# ====================
# Nginx Reverse Proxy
# ====================
- name: Create Nginx configuration
copy:
dest: /etc/nginx/sites-available/localai
mode: "0644"
content: |
upstream localai {
server 127.0.0.1:{{ app_port }};
}
server {
listen 80;
server_name _;
# Increase client body size for model uploads
client_max_body_size 100M;
# API endpoints
location / {
proxy_pass http://localai;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 600s;
proxy_connect_timeout 75s;
}
# OpenAI-compatible chat completions
location /v1/chat/completions {
proxy_pass http://localai;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 600s;
}
# OpenAI-compatible completions
location /v1/completions {
proxy_pass http://localai;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 600s;
}
# Embeddings
location /v1/embeddings {
proxy_pass http://localai;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300s;
}
# Images API
location /v1/images {
proxy_pass http://localai;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300s;
}
# Audio API
location /v1/audio {
proxy_pass http://localai;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 300s;
}
}
- name: Enable Nginx site
file:
src: /etc/nginx/sites-available/localai
dest: /etc/nginx/sites-enabled/localai
state: link
when: ansible_os_family == "Debian"
- name: Enable and start Nginx
service:
name: nginx
state: started
enabled: true
# ====================
# Firewall Configuration
# ====================
- name: Allow HTTP through firewall
ufw:
rule: allow
port: "80"
proto: tcp
when: ansible_os_family == "Debian"
ignore_errors: true
# ====================
# Final Status
# ====================
- name: Wait for LocalAI to be ready
uri:
url: "http://localhost:{{ app_port }}/ready"
method: GET
status_code: 200
register: health_check
retries: 30
delay: 10
until: health_check.status == 200
ignore_errors: true
- name: Display deployment information
debug:
msg: |
LocalAI has been deployed successfully!
ACCESS:
- URL: http://{{ ansible_host | default(inventory_hostname) }}
- API: http://localhost:{{ app_port }}
SERVICES:
- LocalAI: http://localhost:{{ app_port }} (systemctl status localai)
PATHS:
- Models: {{ models_dir }}
- Data: {{ data_dir }}
- Config: {{ data_dir }}/config
API ENDPOINTS:
- Chat: /v1/chat/completions
- Completions: /v1/completions
- Embeddings: /v1/embeddings
- Images: /v1/images/generations
- Audio: /v1/audio/transcriptions
IMPORTANT:
- Download models to {{ models_dir }}
- Configure model YAML files in {{ data_dir }}/config
- Configure TLS/HTTPS before exposing to production
- For GPU: Ensure NVIDIA drivers are installed
- First request may be slow as models load
handlers:
- name: Restart Nginx
service:
name: nginx
state: restarted
# Basic run
ansible-playbook -i inventory.ini deploy-localai-native.yml
# Limit to specific host
ansible-playbook -i inventory.ini deploy-localai-native.yml --limit localai-server
# With GPU support
ansible-playbook -i inventory.ini deploy-localai-native.yml -e "enable_gpu=true gpu_backend=cuda"
Test the API with curl:
# Check health
curl http://localhost:8080/ready
# List models
curl http://localhost:8080/v1/models
# Chat completion (after adding a model)
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama-3.2",
"messages": [{"role": "user", "content": "Hello!"}]
}'
# Download a GGUF model
cd /opt/localai/models
wget https://huggingface.co/TheBloke/Llama-3.2-3B-Instruct-GGUF/resolve/main/llama-3.2-3b-instruct.Q4_K_M.gguf
# Create model config
cat > /opt/localai/data/config/llama-3.2.yaml << EOF
name: llama-3.2
backend: llama-cpp
parameters:
model: llama-3.2-3b-instruct.Q4_K_M.gguf
ctx_size: 4096
threads: 4
EOF
# Restart LocalAI
sudo systemctl restart localai
# Check LocalAI status
systemctl status localai
# View logs
journalctl -u localai -f
# SSH to server
ssh user@server
# Stop service
sudo systemctl stop localai
# Download new binary
sudo -u localai wget -O /opt/localai/localai \
https://github.com/mudler/LocalAI/releases/download/v4.1.3/localai-linux-amd64
# Make executable
sudo chmod +x /opt/localai/localai
# Restart service
sudo systemctl start localai
- name: Backup LocalAI models and config
hosts: localai
become: true
vars:
app_root: /opt/localai
backup_dir: /backup/localai
tasks:
- name: Create backup directory
file:
path: "{{ backup_dir }}"
state: directory
mode: "0700"
- name: Backup models directory
archive:
path: "{{ app_root }}/models"
dest: "{{ backup_dir }}/localai-models-{{ ansible_date_time.date }}.tar.gz"
exclude_path:
- "{{ app_root }}/models/*.gguf" # Exclude large model files
- name: Backup config directory
archive:
path: "{{ app_root }}/data/config"
dest: "{{ backup_dir }}/localai-config-{{ ansible_date_time.date }}.tar.gz"
# LocalAI
systemctl status localai
# Nginx
systemctl status nginx
# LocalAI logs
journalctl -u localai -f
# Nginx error log
tail -f /var/log/nginx/error.log
# Health check
curl http://localhost:8080/ready
# List available models
curl http://localhost:8080/v1/models
# Check NVIDIA GPU
nvidia-smi
# Check CUDA availability
/opt/localai/localai --version
# Check model files exist
ls -la /opt/localai/models
# Check model config
cat /opt/localai/data/config/*.yaml
We develop tailored automation solutions for:
Let’s discuss your requirements: office@linux-server-admin.com | Contact