SigNoz in a Nomad (+ Consul) cluster
by Afanasy Barbarov
Complete Guide: Installing SigNoz APM on Nomad with Consul Service Mesh
A comprehensive guide for deploying a production-ready observability stack using SigNoz on HashiCorp Nomad with Consul Connect service mesh. This builds on the Nomad cluster established in the previous guide.
Prerequisites
- Nomad cluster with Consul Connect (from previous guide)
- Host volumes configured for persistent storage
- Traefik load balancer deployed
- At least 6GB RAM and 3 CPU cores available across cluster
Architecture Overview
┌─────────────────────────────────────────────────────────────┐
│ Internet │
└─────────────────────┬───────────────────────────────────────┘
│
┌───────▼────────┐
│ Traefik/Ingress│
│ Gateway │
└───────┬────────┘
│
┌─────────────────────┼─────────────────────────────────────────┐
│ Consul Service Mesh (Encrypted Communication) │
│ │ │
│ ┌─────────────────┐ ▼ ┌─────────────────┐ ┌─────────────────┐ │
│ │ ZooKeeper │◄──│ SigNoz │ │ OpenTelemetry │ │
│ │ 1,2,3 │ │ Frontend │ │ Collector │ │
│ │ (Coordination) │ └┬────────────────┘ └┬────────────────┘ │
│ └─────────────────┘ │ │ │
│ │ │ │
│ ┌─────────────────┐ ┌▼────────────────┐ ┌▼────────────────┐ │
│ │ ClickHouse │◄──│ SigNoz Query │◄│ Metrics/Traces/ │ │
│ │ 1,2,3 │ │ Service │ │ Logs Processing │ │
│ │ (Data Storage) │ └┬────────────────┘ └─────────────────┘ │
│ └─────────────────┘ │ │
│ │ │
│ ┌─▼────────────────┐ │
│ │ AlertManager │ │
│ │ │ │
│ └──────────────────┘ │
└─────────────────────────────────────────────────────────────┘Step 1: Prepare Host Volumes
1.1 Create Directory Structure
On each Nomad node, create the required directories:
# On all nodes (10.0.1.3, 10.0.1.4, 10.0.1.5)
sudo mkdir -p /opt/nomad/data/apm/{zookeeper,clickhouse,queryservice,dashboards,alertmanager,otel}
# Set proper ownership
sudo chown -R 1001:1001 /opt/nomad/data/apm/zookeeper
sudo chown -R 1001:1001 /opt/nomad/data/apm/clickhouse
sudo chown -R 10001:10001 /opt/nomad/data/apm/otel
sudo chown -R nomad:nomad /opt/nomad/data/apm/queryservice
sudo chown -R nomad:nomad /opt/nomad/data/apm/dashboards
sudo chown -R nomad:nomad /opt/nomad/data/apm/alertmanager
# Set permissions
sudo chmod -R 755 /opt/nomad/data/apm/zookeeper
sudo chmod -R 755 /opt/nomad/data/apm/clickhouse
sudo chmod -R 755 /opt/nomad/data/apm/otel1.2 Update Nomad Configuration
Add host volumes to each node's /etc/nomad.d/nomad.hcl:
client {
# ... existing configuration ...
host_volume "zookeeper" {
path = "/opt/nomad/data/apm/zookeeper"
read_only = false
}
host_volume "clickhouse" {
path = "/opt/nomad/data/apm/clickhouse"
read_only = false
}
host_volume "signoz_queryservice" {
path = "/opt/nomad/data/apm/queryservice"
read_only = false
}
host_volume "signoz_dashboards" {
path = "/opt/nomad/data/apm/dashboards"
read_only = false
}
host_volume "signoz_alertmanager" {
path = "/opt/nomad/data/apm/alertmanager"
read_only = false
}
host_volume "otel" {
path = "/opt/nomad/data/apm/otel"
read_only = false
}
host_volume "hostfs" {
path = "/"
read_only = true
}
host_volume "containers" {
path = "/var/lib/docker/containers"
read_only = true
}
}# Restart Nomad on all nodes
sudo systemctl restart nomadStep 2: ZooKeeper Cluster (Foundation)
2.1 Deploy ZooKeeper
Create 1.zookeeper.nomad.hcl:
job "zookeeper" {
datacenters = ["eu-central"]
type = "service"
update {
max_parallel = 1
}
group "zookeeper-1" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
restart {
attempts = 3
interval = "2m"
delay = "30s"
mode = "delay"
}
network {
mode = "bridge"
}
volume "zookeeper" {
type = "host"
source = "zookeeper"
read_only = false
}
service {
name = "zookeeper-1-client"
port = 2181
tags = ["client", "zookeeper-1"]
provider = "consul"
check {
task = "zookeeper"
type = "script"
name = "Zookeeper Client Check"
command = "bash"
args = ["-c", "status=$(echo ruok | nc localhost 2181); echo $status; if [ \"$status\" != \"imok\" ]; then exit 2; fi"]
interval = "30s"
timeout = "5s"
}
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-2-client"
local_bind_port = 2182
}
upstreams {
destination_name = "zookeeper-3-client"
local_bind_port = 2183
}
}
}
}
}
service {
name = "zookeeper-1-leader"
port = 2888
tags = ["leader", "zookeeper-1"]
provider = "consul"
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-2-leader"
local_bind_port = 2889
}
upstreams {
destination_name = "zookeeper-3-leader"
local_bind_port = 2890
}
}
}
}
}
service {
name = "zookeeper-1-election"
port = 3888
tags = ["election", "zookeeper-1"]
provider = "consul"
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-2-election"
local_bind_port = 3889
}
upstreams {
destination_name = "zookeeper-3-election"
local_bind_port = 3890
}
}
}
}
}
task "zookeeper" {
driver = "docker"
config {
image = "bitnami/zookeeper:3.7.1"
auth_soft_fail = true
privileged = true
}
template {
destination = "local/env"
env = true
data = <<EOF
ZOO_SERVER_ID="1"
ZOO_AUTOPURGE_INTERVAL=1
ALLOW_ANONYMOUS_LOGIN=yes
ZOO_PORT_NUMBER="2181"
ZOO_4LW_COMMANDS_WHITELIST="ruok"
ZOO_SERVERS="127.0.0.1:2888:3888,127.0.0.1:2889:3889,127.0.0.1:2890:3890"
EOF
}
resources {
cpu = 500
memory = 512
}
volume_mount {
volume = "zookeeper"
destination = "/bitnami/zookeeper"
read_only = false
}
}
}
group "zookeeper-2" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "2"
}
restart {
attempts = 3
interval = "2m"
delay = "30s"
mode = "delay"
}
network {
mode = "bridge"
}
volume "zookeeper" {
type = "host"
source = "zookeeper"
read_only = false
}
service {
name = "zookeeper-2-client"
port = 2181
tags = ["client", "zookeeper-2"]
provider = "consul"
check {
task = "zookeeper"
type = "script"
name = "Zookeeper Client Check"
command = "bash"
args = ["-c", "status=$(echo ruok | nc localhost 2181); echo $status; if [ \"$status\" != \"imok\" ]; then exit 2; fi"]
interval = "30s"
timeout = "5s"
}
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-1-client"
local_bind_port = 2182
}
upstreams {
destination_name = "zookeeper-3-client"
local_bind_port = 2183
}
}
}
}
}
service {
name = "zookeeper-2-leader"
port = 2888
tags = ["leader", "zookeeper-2"]
provider = "consul"
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-1-leader"
local_bind_port = 2889
}
upstreams {
destination_name = "zookeeper-3-leader"
local_bind_port = 2890
}
}
}
}
}
service {
name = "zookeeper-2-election"
port = 3888
tags = ["election", "zookeeper-2"]
provider = "consul"
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-1-election"
local_bind_port = 3889
}
upstreams {
destination_name = "zookeeper-3-election"
local_bind_port = 3890
}
}
}
}
}
task "zookeeper" {
driver = "docker"
config {
image = "bitnami/zookeeper:3.7.1"
auth_soft_fail = true
privileged = true
}
template {
destination = "local/env"
env = true
data = <<EOF
ZOO_SERVER_ID="2"
ZOO_AUTOPURGE_INTERVAL=1
ALLOW_ANONYMOUS_LOGIN=yes
ZOO_PORT_NUMBER="2181"
ZOO_4LW_COMMANDS_WHITELIST="ruok"
ZOO_SERVERS="127.0.0.1:2888:3888,127.0.0.1:2889:3889,127.0.0.1:2890:3890"
EOF
}
resources {
cpu = 500
memory = 512
}
volume_mount {
volume = "zookeeper"
destination = "/bitnami/zookeeper"
read_only = false
}
}
}
group "zookeeper-3" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "3"
}
restart {
attempts = 3
interval = "2m"
delay = "30s"
mode = "delay"
}
network {
mode = "bridge"
}
volume "zookeeper" {
type = "host"
source = "zookeeper"
read_only = false
}
service {
name = "zookeeper-3-client"
port = 2181
tags = ["client", "zookeeper-3"]
provider = "consul"
check {
task = "zookeeper"
type = "script"
name = "Zookeeper Client Check"
command = "bash"
args = ["-c", "status=$(echo ruok | nc localhost 2181); echo $status; if [ \"$status\" != \"imok\" ]; then exit 2; fi"]
interval = "30s"
timeout = "5s"
}
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-1-client"
local_bind_port = 2182
}
upstreams {
destination_name = "zookeeper-2-client"
local_bind_port = 2183
}
}
}
}
}
service {
name = "zookeeper-3-leader"
port = 2888
tags = ["leader", "zookeeper-3"]
provider = "consul"
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-1-leader"
local_bind_port = 2889
}
upstreams {
destination_name = "zookeeper-2-leader"
local_bind_port = 2890
}
}
}
}
}
service {
name = "zookeeper-3-election"
port = 3888
tags = ["election", "zookeeper-3"]
provider = "consul"
connect {
sidecar_service {
disable_default_tcp_check = true
proxy {
upstreams {
destination_name = "zookeeper-1-election"
local_bind_port = 3889
}
upstreams {
destination_name = "zookeeper-2-election"
local_bind_port = 3890
}
}
}
}
}
task "zookeeper" {
driver = "docker"
config {
image = "bitnami/zookeeper:3.7.1"
auth_soft_fail = true
privileged = true
}
template {
destination = "local/env"
env = true
data = <<EOF
ZOO_SERVER_ID="3"
ZOO_AUTOPURGE_INTERVAL=1
ALLOW_ANONYMOUS_LOGIN=yes
ZOO_PORT_NUMBER="2181"
ZOO_4LW_COMMANDS_WHITELIST="ruok"
ZOO_SERVERS="127.0.0.1:2888:3888,127.0.0.1:2889:3889,127.0.0.1:2890:3890"
EOF
}
resources {
cpu = 500
memory = 512
}
volume_mount {
volume = "zookeeper"
destination = "/bitnami/zookeeper"
read_only = false
}
}
}
}Deploy ZooKeeper:
nomad job run 1.zookeeper.nomad.hcl
# Verify cluster formation
nomad alloc logs $(nomad job allocs zookeeper | grep zookeeper-1 | head -1 | awk '{print $1}') zookeeperStep 3: ClickHouse Database Cluster
3.1 Create ClickHouse User Scripts
Copy histogram quantile function files to each node:
# Create the custom function for histogram quantiles
sudo mkdir -p /opt/nomad/data/apm/clickhouse/user_scripts
# Create histogram quantile executable
cat << 'EOF' | sudo tee /opt/nomad/data/apm/clickhouse/user_scripts/histogramQuantile
#!/bin/bash
exec /usr/bin/histogram_quantile_go "$@"
EOF
sudo chmod +x /opt/nomad/data/apm/clickhouse/user_scripts/histogramQuantile
sudo chown -R 1001:1001 /opt/nomad/data/apm/clickhouse/user_scripts3.2 Deploy ClickHouse
Create 2.clickhouse.nomad.hcl:
job "clickhouse" {
datacenters = ["eu-central"]
type = "service"
group "clickhouse-1" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
network {
mode = "bridge"
}
volume "clickhouse" {
type = "host"
source = "clickhouse"
read_only = false
}
service {
name = "clickhouse-1-http"
port = 8121
tags = ["clickhouse-1"]
provider = "consul"
check {
task = "clickhouse"
type = "script"
name = "Clickhouse client check"
command = "bash"
args = ["-c", "wget --spider -q 0.0.0.0:8121/ping"]
interval = "30s"
timeout = "5s"
}
connect {
sidecar_service {}
}
}
service {
name = "clickhouse-1-tcp"
port = 8221
tags = ["clickhouse-1"]
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-2-tcp"
local_bind_port = 8222
}
upstreams {
destination_name = "clickhouse-3-tcp"
local_bind_port = 8223
}
upstreams {
destination_name = "zookeeper-1-client"
local_bind_port = 2181
}
upstreams {
destination_name = "zookeeper-2-client"
local_bind_port = 2182
}
upstreams {
destination_name = "zookeeper-3-client"
local_bind_port = 2183
}
}
}
}
}
task "clickhouse" {
driver = "docker"
config {
image = "clickhouse/clickhouse-server:24.3.12-alpine"
tty = true
auth_soft_fail = true
privileged = true
volumes = [
"local/clickhouse-config.xml:/etc/clickhouse-server/config.xml",
"local/clickhouse-users.xml:/etc/clickhouse-server/users.xml",
"local/custom-function.xml:/etc/clickhouse-server/custom-function.xml",
"local/clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml"
]
ulimit {
nofile = "262144:262144"
nproc = "65535:65535"
}
}
# Configuration templates
template {
destination = "local/clickhouse-config.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<logger>
<level>information</level>
<formatting>
<type>json</type>
</formatting>
</logger>
<display_name>signoz-clickhouse-1</display_name>
<http_port>8121</http_port>
<tcp_port>8221</tcp_port>
<interserver_http_port>8321</interserver_http_port>
<listen_host>0.0.0.0</listen_host>
<max_connections>4096</max_connections>
<path>/var/lib/clickhouse/</path>
<user_scripts_path>/var/lib/clickhouse/user_scripts/</user_scripts_path>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
</clickhouse>
EOF
}
template {
destination = "local/clickhouse-users.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<profiles>
<default>
<max_memory_usage>1000000000</max_memory_usage>
<load_balancing>random</load_balancing>
</default>
</profiles>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
</clickhouse>
EOF
}
template {
destination = "local/clickhouse-cluster.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<zookeeper>
<node index="1">
<host>127.0.0.1</host>
<port>2181</port>
</node>
<node index="2">
<host>127.0.0.1</host>
<port>2182</port>
</node>
<node index="3">
<host>127.0.0.1</host>
<port>2183</port>
</node>
</zookeeper>
<remote_servers>
<cluster>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8221</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8222</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8223</port>
</replica>
</shard>
</cluster>
</remote_servers>
</clickhouse>
EOF
}
template {
destination = "local/custom-function.xml"
data = <<EOF
<functions>
<function>
<type>executable</type>
<name>histogramQuantile</name>
<return_type>Float64</return_type>
<argument>
<type>Array(Float64)</type>
<name>buckets</name>
</argument>
<argument>
<type>Array(Float64)</type>
<name>counts</name>
</argument>
<argument>
<type>Float64</type>
<name>quantile</name>
</argument>
<format>CSV</format>
<command>./histogramQuantile</command>
</function>
</functions>
EOF
}
volume_mount {
volume = "clickhouse"
destination = "/var/lib/clickhouse/"
read_only = false
}
resources {
cpu = 1000
memory = 2048
}
}
}
group "clickhouse-2" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "2"
}
network {
mode = "bridge"
}
volume "clickhouse" {
type = "host"
source = "clickhouse"
read_only = false
}
service {
name = "clickhouse-2-http"
port = 8121
tags = ["clickhouse-2"]
provider = "consul"
check {
task = "clickhouse"
type = "script"
name = "Clickhouse client check"
command = "bash"
args = ["-c", "wget --spider -q 0.0.0.0:8121/ping"]
interval = "30s"
timeout = "5s"
}
connect {
sidecar_service {}
}
}
service {
name = "clickhouse-2-tcp"
port = 8221
tags = ["clickhouse-2"]
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-1-tcp"
local_bind_port = 8222
}
upstreams {
destination_name = "clickhouse-3-tcp"
local_bind_port = 8223
}
upstreams {
destination_name = "zookeeper-1-client"
local_bind_port = 2181
}
upstreams {
destination_name = "zookeeper-2-client"
local_bind_port = 2182
}
upstreams {
destination_name = "zookeeper-3-client"
local_bind_port = 2183
}
}
}
}
}
task "clickhouse" {
driver = "docker"
config {
image = "clickhouse/clickhouse-server:24.3.12-alpine"
tty = true
auth_soft_fail = true
privileged = true
volumes = [
"local/clickhouse-config.xml:/etc/clickhouse-server/config.xml",
"local/clickhouse-users.xml:/etc/clickhouse-server/users.xml",
"local/custom-function.xml:/etc/clickhouse-server/custom-function.xml",
"local/clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml"
]
ulimit {
nofile = "262144:262144"
nproc = "65535:65535"
}
}
template {
destination = "local/clickhouse-config.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<logger>
<level>information</level>
<formatting>
<type>json</type>
</formatting>
</logger>
<display_name>signoz-clickhouse-2</display_name>
<http_port>8121</http_port>
<tcp_port>8221</tcp_port>
<interserver_http_port>8321</interserver_http_port>
<listen_host>0.0.0.0</listen_host>
<max_connections>4096</max_connections>
<path>/var/lib/clickhouse/</path>
<user_scripts_path>/var/lib/clickhouse/user_scripts/</user_scripts_path>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
</clickhouse>
EOF
}
template {
destination = "local/clickhouse-users.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<profiles>
<default>
<max_memory_usage>1000000000</max_memory_usage>
<load_balancing>random</load_balancing>
</default>
</profiles>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
</clickhouse>
EOF
}
template {
destination = "local/clickhouse-cluster.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<zookeeper>
<node index="1">
<host>127.0.0.1</host>
<port>2181</port>
</node>
<node index="2">
<host>127.0.0.1</host>
<port>2182</port>
</node>
<node index="3">
<host>127.0.0.1</host>
<port>2183</port>
</node>
</zookeeper>
<remote_servers>
<cluster>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8221</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8222</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8223</port>
</replica>
</shard>
</cluster>
</remote_servers>
</clickhouse>
EOF
}
template {
destination = "local/custom-function.xml"
data = <<EOF
<functions>
<function>
<type>executable</type>
<name>histogramQuantile</name>
<return_type>Float64</return_type>
<argument>
<type>Array(Float64)</type>
<name>buckets</name>
</argument>
<argument>
<type>Array(Float64)</type>
<name>counts</name>
</argument>
<argument>
<type>Float64</type>
<name>quantile</name>
</argument>
<format>CSV</format>
<command>./histogramQuantile</command>
</function>
</functions>
EOF
}
volume_mount {
volume = "clickhouse"
destination = "/var/lib/clickhouse/"
read_only = false
}
resources {
cpu = 1000
memory = 2048
}
}
}
group "clickhouse-3" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "3"
}
network {
mode = "bridge"
}
volume "clickhouse" {
type = "host"
source = "clickhouse"
read_only = false
}
service {
name = "clickhouse-3-http"
port = 8121
tags = ["clickhouse-3"]
provider = "consul"
check {
task = "clickhouse"
type = "script"
name = "Clickhouse client check"
command = "bash"
args = ["-c", "wget --spider -q 0.0.0.0:8121/ping"]
interval = "30s"
timeout = "5s"
}
connect {
sidecar_service {}
}
}
service {
name = "clickhouse-3-tcp"
port = 8221
tags = ["clickhouse-3"]
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-1-tcp"
local_bind_port = 8222
}
upstreams {
destination_name = "clickhouse-2-tcp"
local_bind_port = 8223
}
upstreams {
destination_name = "zookeeper-1-client"
local_bind_port = 2181
}
upstreams {
destination_name = "zookeeper-2-client"
local_bind_port = 2182
}
upstreams {
destination_name = "zookeeper-3-client"
local_bind_port = 2183
}
}
}
}
}
task "clickhouse" {
driver = "docker"
config {
image = "clickhouse/clickhouse-server:24.3.12-alpine"
tty = true
auth_soft_fail = true
privileged = true
volumes = [
"local/clickhouse-config.xml:/etc/clickhouse-server/config.xml",
"local/clickhouse-users.xml:/etc/clickhouse-server/users.xml",
"local/custom-function.xml:/etc/clickhouse-server/custom-function.xml",
"local/clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml"
]
ulimit {
nofile = "262144:262144"
nproc = "65535:65535"
}
}
template {
destination = "local/clickhouse-config.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<logger>
<level>information</level>
<formatting>
<type>json</type>
</formatting>
</logger>
<display_name>signoz-clickhouse-3</display_name>
<http_port>8121</http_port>
<tcp_port>8221</tcp_port>
<interserver_http_port>8321</interserver_http_port>
<listen_host>0.0.0.0</listen_host>
<max_connections>4096</max_connections>
<path>/var/lib/clickhouse/</path>
<user_scripts_path>/var/lib/clickhouse/user_scripts/</user_scripts_path>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
</clickhouse>
EOF
}
template {
destination = "local/clickhouse-users.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<profiles>
<default>
<max_memory_usage>1000000000</max_memory_usage>
<load_balancing>random</load_balancing>
</default>
</profiles>
<users>
<default>
<password></password>
<networks>
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
</clickhouse>
EOF
}
template {
destination = "local/clickhouse-cluster.xml"
data = <<EOF
<?xml version="1.0"?>
<clickhouse>
<zookeeper>
<node index="1">
<host>127.0.0.1</host>
<port>2181</port>
</node>
<node index="2">
<host>127.0.0.1</host>
<port>2182</port>
</node>
<node index="3">
<host>127.0.0.1</host>
<port>2183</port>
</node>
</zookeeper>
<remote_servers>
<cluster>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8221</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8222</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.1</host>
<port>8223</port>
</replica>
</shard>
</cluster>
</remote_servers>
</clickhouse>
EOF
}
template {
destination = "local/custom-function.xml"
data = <<EOF
<functions>
<function>
<type>executable</type>
<name>histogramQuantile</name>
<return_type>Float64</return_type>
<argument>
<type>Array(Float64)</type>
<name>buckets</name>
</argument>
<argument>
<type>Array(Float64)</type>
<name>counts</name>
</argument>
<argument>
<type>Float64</type>
<name>quantile</name>
</argument>
<format>CSV</format>
<command>./histogramQuantile</command>
</function>
</functions>
EOF
}
volume_mount {
volume = "clickhouse"
destination = "/var/lib/clickhouse/"
read_only = false
}
resources {
cpu = 1000
memory = 2048
}
}
}
}Deploy ClickHouse:
nomad job run 2.clickhouse.nomad.hcl
# Verify cluster
nomad alloc logs $(nomad job allocs clickhouse | grep clickhouse-1 | head -1 | awk '{print $1}') clickhouseStep 4: Schema Migration
4.1 Synchronous Schema Migration
Create 3.signoz-schema-migrator-sync.nomad.hcl:
job "signoz-schema-migrator-sync" {
datacenters = ["eu-central"]
type = "batch"
group "schema-sync" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
network {
mode = "bridge"
}
service {
name = "signoz-schema-migrator-sync"
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-1-tcp"
local_bind_port = 9000
}
}
}
}
}
task "schema-migrator-sync" {
driver = "docker"
config {
image = "signoz/schema-migrator:0.111.5"
auth_soft_fail = true
command = "./schema-migrator"
args = [
"-dsn=tcp://127.0.0.1:9000/signoz_traces",
"-db=signoz_traces",
]
}
resources {
cpu = 100
memory = 128
}
}
}
}4.2 Asynchronous Schema Migration
Create 4.signoz-schema-migrator-async.nomad.hcl:
job "signoz-schema-migrator-async" {
datacenters = ["eu-central"]
type = "batch"
group "schema-async" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
network {
mode = "bridge"
}
service {
name = "signoz-schema-migrator-async"
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-1-tcp"
local_bind_port = 9000
}
}
}
}
}
task "schema-migrator-async" {
driver = "docker"
config {
image = "signoz/schema-migrator:0.111.5"
auth_soft_fail = true
command = "./schema-migrator"
args = [
"-dsn=tcp://127.0.0.1:9000/signoz_logs",
"-db=signoz_logs",
"-migration-type=async",
]
}
resources {
cpu = 100
memory = 128
}
}
}
}Deploy schema migrations:
# Deploy synchronous migration first
nomad job run 3.signoz-schema-migrator-sync.nomad.hcl
# Wait for completion, then deploy async
nomad job run 4.signoz-schema-migrator-async.nomad.hcl
# Verify migrations completed successfully
nomad job status signoz-schema-migrator-sync
nomad job status signoz-schema-migrator-asyncStep 5: SigNoz Query Service
Create 5.signoz-query-service.hcl:
job "signoz-query-service" {
datacenters = ["eu-central"]
type = "service"
group "query-service" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
network {
mode = "bridge"
}
volume "signoz_queryservice" {
type = "host"
source = "signoz_queryservice"
read_only = false
}
volume "signoz_dashboards" {
type = "host"
source = "signoz_dashboards"
read_only = false
}
service {
name = "signoz-query-service-api"
port = 8080
provider = "consul"
check {
name = "Query Service Health"
type = "http"
path = "/api/v1/health"
interval = "30s"
timeout = "10s"
}
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-1-tcp"
local_bind_port = 9000
}
}
}
}
}
service {
name = "signoz-query-service-internal"
port = 8085
provider = "consul"
connect {
sidecar_service {}
}
}
service {
name = "signoz-query-service-ws"
port = 4320
provider = "consul"
connect {
sidecar_service {}
}
}
task "query-service" {
driver = "docker"
config {
image = "signoz/query-service:0.111.5"
auth_soft_fail = true
args = [
"-config=/root/config/prometheus.yml",
"--prefer-delta=true",
]
}
template {
destination = "local/prometheus.yml"
data = <<EOF
global:
scrape_interval: 60s
evaluation_interval: 60s
rule_files:
- "/root/config/rules/*.yml"
scrape_configs:
- job_name: 'signoz-query-service'
static_configs:
- targets: ['localhost:8080']
- job_name: 'nomad-client'
consul_sd_configs:
- server: 'localhost:8500'
services: ['nomad-client']
EOF
}
env {
ClickHouseUrl = "tcp://127.0.0.1:9000"
ALERTMANAGER_API_PREFIX = "http://127.0.0.1:9093/api/"
TELEMETRY_ENABLED = "true"
DEPLOYMENT_TYPE = "docker-standalone-amd"
}
volume_mount {
volume = "signoz_queryservice"
destination = "/var/lib/signoz/"
read_only = false
}
volume_mount {
volume = "signoz_dashboards"
destination = "/root/config/dashboards/"
read_only = false
}
resources {
cpu = 100
memory = 256
}
}
}
}Deploy query service:
nomad job run 5.signoz-query-service.hcl
# Verify service health
curl -k http://localhost:8080/api/v1/healthStep 6: AlertManager
Create 6.signoz-alertmanager.nomad.hcl:
job "signoz-alertmanager" {
datacenters = ["eu-central"]
type = "service"
group "alertmanager" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
network {
mode = "bridge"
}
volume "signoz_alertmanager" {
type = "host"
source = "signoz_alertmanager"
read_only = false
}
service {
name = "signoz-alertmanager"
port = 9093
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "signoz-query-service-internal"
local_bind_port = 8085
}
}
}
}
}
task "alertmanager" {
driver = "docker"
config {
image = "signoz/alertmanager:0.23.4"
auth_soft_fail = true
args = [
"--queryService.url=http://127.0.0.1:8085",
"--storage.path=/data",
"--config.file=/etc/alertmanager/config.yml",
]
}
template {
destination = "local/config.yml"
data = <<EOF
global:
smtp_smarthost: 'localhost:587'
smtp_from: '[email protected]'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:9093/api/v1/alerts'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
EOF
}
volume_mount {
volume = "signoz_alertmanager"
destination = "/data"
read_only = false
}
resources {
cpu = 50
memory = 128
}
}
}
}Deploy AlertManager:
nomad job run 6.signoz-alertmanager.nomad.hclStep 7: Frontend and Ingress
7.1 SigNoz Frontend
Create 7.1.signoz-frontend.nomad.hcl:
job "signoz-frontend" {
datacenters = ["eu-central"]
type = "service"
group "frontend" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
network {
mode = "bridge"
}
service {
name = "signoz-frontend"
port = 3301
provider = "consul"
check {
name = "Frontend Health"
type = "http"
path = "/"
interval = "30s"
timeout = "10s"
}
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "signoz-query-service-api"
local_bind_port = 8080
}
upstreams {
destination_name = "signoz-query-service-ws"
local_bind_port = 4320
}
}
}
}
}
task "frontend" {
driver = "docker"
config {
image = "signoz/frontend:0.111.5"
auth_soft_fail = true
volumes = [
"local/nginx.conf:/etc/nginx/nginx.conf"
]
}
template {
destination = "local/nginx.conf"
data = <<EOF
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
gzip on;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml;
server {
listen 3301;
root /usr/share/nginx/html;
index index.html;
location /api/ {
proxy_pass http://127.0.0.1:8080/api/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
location /ws/ {
proxy_pass http://127.0.0.1:4320/;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location / {
try_files $uri $uri/ /index.html;
}
}
}
EOF
}
resources {
cpu = 100
memory = 128
}
}
}
}7.2 Ingress Gateway
Create 7.2.signoz-ingress.nomad.hcl:
job "signoz-ingress-gateway" {
datacenters = ["eu-central"]
type = "service"
group "ingress" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
network {
mode = "host"
}
service {
name = "signoz-ingress"
port = 3301
tags = [
"traefik.enable=true",
"traefik.http.routers.signoz.rule=Host(`signoz.example.com`)",
"traefik.http.routers.signoz.entrypoints=https",
"traefik.http.routers.signoz.service=signoz-ingress",
"traefik.http.routers.signoz.tls=true"
]
connect {
gateway {
proxy {}
ingress {
listener {
port = 3301
protocol = "tcp"
service {
name = "signoz-frontend"
}
}
}
}
}
}
}
}Deploy frontend and ingress:
nomad job run 7.1.signoz-frontend.nomad.hcl
nomad job run 7.2.signoz-ingress.nomad.hcl
# Verify frontend access
curl -k https://signoz.example.comStep 8: OpenTelemetry Collector
Create 8.signoz-otel-collector.nomad.hcl:
job "signoz-otel-collector" {
datacenters = ["eu-central"]
type = "service"
group "otel-collector" {
count = 1
constraint {
attribute = "${meta.node_id}"
value = "1"
}
network {
mode = "bridge"
}
volume "otel" {
type = "host"
source = "otel"
read_only = false
}
volume "hostfs" {
type = "host"
source = "hostfs"
read_only = true
}
volume "containers" {
type = "host"
source = "containers"
read_only = true
}
service {
name = "signoz-otel-http"
provider = "consul"
port = 4318
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "clickhouse-1-tcp"
local_bind_port = 9090
}
upstreams {
destination_name = "signoz-query-service-ws"
local_bind_port = 4320
}
}
}
}
}
service {
name = "signoz-otel-grpc"
provider = "consul"
port = 4317
connect {
sidecar_service {}
}
}
service {
name = "signoz-otel-logspout"
provider = "consul"
port = 2255
connect {
sidecar_service {}
}
}
task "otel-collector" {
driver = "docker"
config {
image = "signoz/signoz-otel-collector:0.111.5"
auth_soft_fail = true
privileged = true
args = [
"--config=/etc/otel-collector-config.yaml",
"--manager-config=/etc/manager-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
volumes = [
"local/otel-collector-config.yaml:/etc/otel-collector-config.yaml",
"local/otel-collector-opamp-config.yaml:/etc/manager-config.yaml"
]
}
env {
OTEL_RESOURCE_ATTRIBUTES = "host.name=signoz-host,os.type=linux"
LOW_CARDINAL_EXCEPTION_GROUPING = "false"
}
template {
destination = "local/otel-collector-config.yaml"
data = <<EOF
receivers:
tcplog/docker:
listen_address: "0.0.0.0:2255"
operators:
- type: regex_parser
regex: '^<([0-9]+)>[0-9]+ (?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?) (?P<container_id>\S+) (?P<container_name>\S+) [0-9]+ - -( (?P<body>.*))?'
timestamp:
parse_from: attributes.timestamp
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
- type: move
from: attributes["body"]
to: body
- type: remove
field: attributes.timestamp
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
jaeger:
protocols:
grpc:
endpoint: 0.0.0.0:14250
thrift_http:
endpoint: 0.0.0.0:14268
hostmetrics:
collection_interval: 30s
root_path: /hostfs
scrapers:
cpu: {}
load: {}
memory: {}
disk: {}
filesystem: {}
network: {}
prometheus:
config:
global:
scrape_interval: 60s
scrape_configs:
- job_name: otel-collector
static_configs:
- targets:
- localhost:8888
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 11000
timeout: 10s
signozspanmetrics/cumulative:
metrics_exporter: clickhousemetricswrite
metrics_flush_interval: 60s
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
dimensions_cache_size: 100000
resourcedetection:
detectors: [env, system]
timeout: 2s
exporters:
clickhousetraces:
datasource: tcp://127.0.0.1:9090/signoz_traces
low_cardinal_exception_grouping: false
clickhousemetricswrite:
endpoint: tcp://127.0.0.1:9090/signoz_metrics
resource_to_telemetry_conversion:
enabled: true
clickhouselogsexporter:
dsn: tcp://127.0.0.1:9090/signoz_logs
timeout: 10s
use_new_schema: true
service:
telemetry:
logs:
encoding: json
metrics:
address: 0.0.0.0:8888
pipelines:
traces:
receivers: [jaeger, otlp]
processors: [signozspanmetrics/cumulative, batch]
exporters: [clickhousetraces]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [clickhousemetricswrite]
metrics/generic:
receivers: [hostmetrics]
processors: [resourcedetection, batch]
exporters: [clickhousemetricswrite]
metrics/prometheus:
receivers: [prometheus]
processors: [batch]
exporters: [clickhousemetricswrite]
logs:
receivers: [otlp, tcplog/docker]
processors: [batch]
exporters: [clickhouselogsexporter]
EOF
}
template {
destination = "local/otel-collector-opamp-config.yaml"
data = <<EOF
server_endpoint: ws://127.0.0.1:4320/v1/opamp
EOF
}
volume_mount {
volume = "hostfs"
destination = "/hostfs"
read_only = true
}
volume_mount {
volume = "containers"
destination = "/var/lib/docker/containers"
read_only = true
}
resources {
cpu = 350
memory = 256
}
}
}
}Deploy OTEL collector:
nomad job run 8.signoz-otel-collector.nomad.hclStep 9: Log Collection
Create 9.signoz-logspout.nomad.hcl:
job "signoz-logspout" {
datacenters = ["eu-central"]
type = "system"
group "logspout" {
restart {
attempts = 3
interval = "5m"
delay = "25s"
mode = "delay"
}
network {
mode = "bridge"
}
service {
name = "signoz-logspout"
provider = "consul"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "signoz-otel-logspout"
local_bind_port = 2255
}
}
}
}
}
task "logspout" {
driver = "docker"
config {
image = "gliderlabs/logspout:latest"
auth_soft_fail = true
volumes = [
"/var/run/docker.sock:/var/run/docker.sock"
]
command = "syslog+tcp://127.0.0.1:2255"
}
env {
SYSLOG_HOSTNAME = "{{.Container.Config.Hostname}}"
SYSLOG_FACILITY = "16"
SYSLOG_TAG = "{{.Container.Name}}"
LOGSPOUT = "ignore"
}
resources {
cpu = 100
memory = 128
}
}
}
}Deploy logspout:
nomad job run 9.signoz-logspout.nomad.hclStep 10: Deployment and Verification
10.1 Complete Deployment Sequence
# Deploy in exact order
echo "Deploying ZooKeeper cluster..."
nomad job run 1.zookeeper.nomad.hcl
sleep 60
echo "Deploying ClickHouse cluster..."
nomad job run 2.clickhouse.nomad.hcl
sleep 90
echo "Running schema migrations..."
nomad job run 3.signoz-schema-migrator-sync.nomad.hcl
nomad job run 4.signoz-schema-migrator-async.nomad.hcl
sleep 30
echo "Deploying SigNoz services..."
nomad job run 5.signoz-query-service.hcl
sleep 30
nomad job run 6.signoz-alertmanager.nomad.hcl
sleep 15
echo "Deploying frontend and ingress..."
nomad job run 7.1.signoz-frontend.nomad.hcl
nomad job run 7.2.signoz-ingress.nomad.hcl
sleep 30
echo "Deploying data collection..."
nomad job run 8.signoz-otel-collector.nomad.hcl
nomad job run 9.signoz-logspout.nomad.hcl
echo "Deployment complete!"10.2 Health Verification
# Check all jobs are running
nomad job status | grep -E "(zookeeper|clickhouse|signoz|otel)"
# Verify ZooKeeper cluster
for i in {1..3}; do
echo "Checking ZooKeeper node $i..."
nomad alloc exec $(nomad job allocs zookeeper | grep "zookeeper-$i" | head -1 | awk '{print $1}') zookeeper echo "ruok" | nc localhost 218$i
done
# Verify ClickHouse cluster
for i in {1..3}; do
echo "Checking ClickHouse node $i..."
curl -s "http://10.0.1.$((i+2)):812$i/ping"
done
# Check SigNoz frontend
curl -k https://signoz.example.com
# Check OTEL collector endpoints
curl -s http://10.0.1.3:4318/v1/traces
curl -s http://10.0.1.3:8888/metrics10.3 Access URLs
- SigNoz UI: https://signoz.example.com
- OTEL HTTP: http://10.0.1.3:4318/v1/traces
- OTEL gRPC: http://10.0.1.3:4317
- Jaeger: http://10.0.1.3:14268/api/traces
Step 11: Application Integration
11.1 Configure Your Applications
Update your existing applications to send telemetry to SigNoz:
# Example OTEL configuration
exporters:
otlp:
endpoint: "http://10.0.1.3:4317"
insecure: true
service:
pipelines:
traces:
exporters: [otlp]
metrics:
exporters: [otlp]11.2 Update Traefik for OTEL Metrics
Add to your Traefik configuration:
# In traefik.nomad.hcl, add these args:
"--metrics.otlp=true",
"--metrics.otlp.grpc=true",
"--metrics.otlp.grpc.endpoint=10.0.1.3:4317",
"--metrics.otlp.grpc.insecure=true",
"--tracing.otlp=true",
"--tracing.otlp.grpc=true",
"--tracing.otlp.grpc.endpoint=10.0.1.3:4317",
"--tracing.otlp.grpc.insecure=true"