This guide provides a full Ansible playbook to deploy Ollama with native installation (Go binary) on Debian 10+, Ubuntu LTS, and RHEL 9+ compatible hosts. Ollama is a local model runner that provides a simple API for downloading and running LLMs with GPU support.
Note: For Docker-based deployment, see Ollama Docker Ansible Setup.
- name: Deploy Ollama (Native)
hosts: ollama
become: true
vars:
# Application settings
app_user: ollama
app_group: ollama
app_port: 11434
ollama_host: "0.0.0.0"
# Ollama settings
ollama_version: "0.17.7"
models_dir: /home/ollama/ollama-models
keep_alive: "5m"
# GPU settings
enable_gpu: false
gpu_type: "" # nvidia, amd, or empty for CPU
# Optional: Environment variables
ollama_origins: "*" # Allow CORS
ollama_debug: false
tasks:
# ====================
# System Dependencies
# ====================
- name: Install system dependencies (Debian/Ubuntu)
apt:
name:
- git
- curl
- wget
- nginx
state: present
update_cache: true
when: ansible_os_family == "Debian"
- name: Install system dependencies (RHEL family)
dnf:
name:
- git
- curl
- wget
- nginx
state: present
when: ansible_os_family == "RedHat"
# ====================
# GPU Dependencies (Optional)
# ====================
- name: Install NVIDIA CUDA toolkit
apt:
name:
- nvidia-cuda-toolkit
state: present
when:
- ansible_os_family == "Debian"
- enable_gpu
- gpu_type == "nvidia"
ignore_errors: true
- name: Install NVIDIA driver (Debian/Ubuntu)
apt:
name:
- nvidia-driver-535
state: present
when:
- ansible_os_family == "Debian"
- enable_gpu
- gpu_type == "nvidia"
ignore_errors: true
- name: Install ROCm (AMD GPU)
apt:
name:
- rocm-libs
state: present
when:
- ansible_os_family == "Debian"
- enable_gpu
- gpu_type == "amd"
ignore_errors: true
# ====================
# Create Application User
# ====================
- name: Create Ollama group
group:
name: "{{ app_group }}"
state: present
- name: Create Ollama user
user:
name: "{{ app_user }}"
group: "{{ app_group }}"
shell: /bin/bash
system: false
create_home: true
home: /home/ollama
- name: Create models directory
file:
path: "{{ models_dir }}"
state: directory
owner: "{{ app_user }}"
group: "{{ app_group }}"
mode: "0755"
# ====================
# Install Ollama
# ====================
- name: Download Ollama install script
get_url:
url: https://ollama.com/install.sh
dest: /tmp/install-ollama.sh
mode: "0755"
- name: Install Ollama (official script)
command: /tmp/install-ollama.sh
environment:
OLLAMA_VERSION: "{{ ollama_version }}"
register: install_result
changed_when: "'installed' in install_result.stdout or 'Updated' in install_result.stdout"
failed_when: install_result.rc != 0 and 'already installed' not in install_result.stderr
- name: Alternative: Download Ollama binary directly
block:
- name: Download Ollama binary
get_url:
url: "https://github.com/ollama/ollama/releases/download/v{{ ollama_version }}/ollama-linux-{{ ansible_machine | lower }}"
dest: /usr/local/bin/ollama
mode: "0755"
when: not ansible_check_mode
rescue:
- name: Fallback to official install
debug:
msg: "Direct binary download failed, Ollama should be installed via script"
# ====================
# Configure Ollama Environment
# ====================
- name: Create Ollama environment directory
file:
path: /etc/ollama
state: directory
mode: "0755"
- name: Create Ollama environment file
copy:
dest: /etc/ollama/ollama.env
mode: "0644"
content: |
# Ollama Environment Variables
# Generated by Ansible
# Server binding
OLLAMA_HOST={{ ollama_host }}:{{ app_port }}
# Origins (CORS)
OLLAMA_ORIGINS={{ ollama_origins }}
# Debug mode
OLLAMA_DEBUG={{ ollama_debug | lower }}
# Keep alive time for models in memory
OLLAMA_KEEP_ALIVE={{ keep_alive }}
# Models directory
OLLAMA_MODELS={{ models_dir }}
# GPU settings
{% if enable_gpu and gpu_type == "nvidia" %}
NVIDIA_VISIBLE_DEVICES=all
{% endif %}
{% if enable_gpu and gpu_type == "amd" %}
HSA_OVERRIDE_GFX_VERSION=11.0.0
{% endif %}
# ====================
# Update Systemd Service
# ====================
- name: Create Ollama systemd service
copy:
dest: /etc/systemd/system/ollama.service
mode: "0644"
content: |
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
Type=exec
User={{ app_user }}
Group={{ app_group }}
EnvironmentFile=/etc/ollama/ollama.env
ExecStart=/usr/local/bin/ollama serve
Restart=always
RestartSec=10
LimitNOFILE=65535
LimitMEMLOCK=infinity
{% if enable_gpu %}
Environment="NVIDIA_VISIBLE_DEVICES=all"
{% endif %}
[Install]
WantedBy=multi-user.target
- name: Reload systemd
systemd:
daemon_reload: true
- name: Enable and start Ollama
systemd:
name: ollama
enabled: true
state: started
# ====================
# Nginx Reverse Proxy
# ====================
- name: Create Nginx configuration
copy:
dest: /etc/nginx/sites-available/ollama
mode: "0644"
content: |
upstream ollama {
server 127.0.0.1:{{ app_port }};
}
server {
listen 80;
server_name _;
# Increase client body size for model downloads
client_max_body_size 10G;
# Increase timeouts for model pulls
proxy_read_timeout 600s;
proxy_connect_timeout 75s;
proxy_send_timeout 300s;
location / {
proxy_pass http://ollama;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_buffering off;
proxy_cache off;
chunked_transfer_encoding on;
}
# WebSocket support (if needed)
location /ws {
proxy_pass http://ollama;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
- name: Enable Nginx site
file:
src: /etc/nginx/sites-available/ollama
dest: /etc/nginx/sites-enabled/ollama
state: link
when: ansible_os_family == "Debian"
- name: Enable and start Nginx
service:
name: nginx
state: started
enabled: true
# ====================
# Firewall Configuration
# ====================
- name: Allow HTTP through firewall
ufw:
rule: allow
port: "80"
proto: tcp
when: ansible_os_family == "Debian"
ignore_errors: true
# ====================
# Pre-download Models (Optional)
# ====================
- name: Wait for Ollama to be ready
uri:
url: "http://localhost:{{ app_port }}/api/tags"
method: GET
status_code: 200
register: health_check
retries: 30
delay: 10
until: health_check.status == 200
ignore_errors: true
- name: Display deployment information
debug:
msg: |
Ollama has been deployed successfully!
ACCESS:
- URL: http://{{ ansible_host | default(inventory_hostname) }}
- API: http://localhost:{{ app_port }}
SERVICES:
- Ollama: http://localhost:{{ app_port }} (systemctl status ollama)
PATHS:
- Models: {{ models_dir }}
API ENDPOINTS:
- List models: /api/tags
- Generate: /api/generate
- Chat: /api/chat
- Pull model: /api/pull
- Push model: /api/push
USAGE EXAMPLES:
# Pull a model
ollama pull llama3.2
# Run a model
ollama run llama3.2 "Hello!"
# List models
ollama list
# Via API
curl http://localhost:{{ app_port }}/api/generate -d '{
"model": "llama3.2",
"prompt": "Hello!"
}'
IMPORTANT:
- Models are stored in {{ models_dir }}
- Configure TLS/HTTPS before exposing to production
- For GPU: Ensure NVIDIA/AMD drivers are installed
- First model pull may take time depending on size
- Default model storage: ~/.ollama/models (symlinked to {{ models_dir }})
handlers:
- name: Restart Nginx
service:
name: nginx
state: restarted
# Basic run (CPU-only)
ansible-playbook -i inventory.ini deploy-ollama-native.yml
# With GPU support
ansible-playbook -i inventory.ini deploy-ollama-native.yml -e "enable_gpu=true gpu_type=nvidia"
# Limit to specific host
ansible-playbook -i inventory.ini deploy-ollama-native.yml --limit ollama-server
# Dry run (check mode)
ansible-playbook -i inventory.ini deploy-ollama-native.yml --check
Test the API with curl:
# Check health / list models
curl http://localhost:11434/api/tags
# Pull a model
ollama pull llama3.2
# Run a model
ollama run llama3.2 "Hello, how are you?"
# Generate via API
curl http://localhost:11434/api/generate -d '{
"model": "llama3.2",
"prompt": "Hello!"
}'
# Check Ollama status
systemctl status ollama
# View logs
journalctl -u ollama -f
# Check GPU (NVIDIA)
nvidia-smi
# Create symlink if needed
sudo -u ollama ln -s /home/ollama/ollama-models /home/ollama/.ollama/models
# Or set OLLAMA_MODELS environment variable
# SSH to server
ssh user@server
# Stop service
sudo systemctl stop ollama
# Run install script with new version
curl -fsSL https://ollama.com/install.sh | sh
# Or download specific version
sudo wget -O /usr/local/bin/ollama \
https://github.com/ollama/ollama/releases/download/v0.20.6/ollama-linux-amd64
sudo chmod +x /usr/local/bin/ollama
# Restart service
sudo systemctl start ollama
# Verify version
ollama --version
- name: Backup Ollama models
hosts: ollama
become: true
vars:
models_dir: /home/ollama/ollama-models
backup_dir: /backup/ollama
tasks:
- name: Create backup directory
file:
path: "{{ backup_dir }}"
state: directory
mode: "0700"
- name: Backup models manifest
archive:
path: "{{ models_dir }}/manifests"
dest: "{{ backup_dir }}/ollama-manifests-{{ ansible_date_time.date }}.tar.gz"
- name: Note: Model blobs are large
debug:
msg: "Model blobs in {{ models_dir }}/blobs are large. Consider backing up only manifests and re-downloading models."
- name: Export Ollama model list
hosts: ollama
become: true
vars:
backup_dir: /backup/ollama
tasks:
- name: Get model list
command: ollama list
register: model_list
changed_when: false
- name: Save model list
copy:
dest: "{{ backup_dir }}/ollama-models-list-{{ ansible_date_time.date }}.txt"
content: "{{ model_list.stdout }}"
# Ollama
systemctl status ollama
# Nginx
systemctl status nginx
# Ollama logs
journalctl -u ollama -f
# With debug
OLLAMA_DEBUG=1 journalctl -u ollama -f
# List models
curl http://localhost:11434/api/tags
# Test generate
curl http://localhost:11434/api/generate -d '{"model": "llama3.2", "prompt": "Hello"}'
# Check NVIDIA GPU
nvidia-smi
# Check ROCm (AMD)
rocminfo
# Test Ollama with GPU
ollama run llama3.2 --verbose
# Check network connectivity
curl -I https://ollama.com
# Try pulling with verbose output
OLLAMA_DEBUG=1 ollama pull llama3.2
# Check disk space
df -h /home/ollama
# Fix models directory permissions
sudo chown -R ollama:ollama /home/ollama/ollama-models
sudo chmod -R 755 /home/ollama/ollama-models
We develop tailored automation solutions for:
Let’s discuss your requirements: office@linux-server-admin.com | Contact