# documentation: https://posthog.com
# slogan: The single platform to analyze, test, observe, and deploy new features
# tags: analytics, product, open-source, self-hosted, ab-testing, event-tracking
# logo: svgs/posthog.svg
# minversion: 4.0.0-beta.222
services:
db:
image: postgres:12-alpine
volumes:
- posthog-postgres-data:/var/lib/postgresql/data
environment:
- POSTGRES_USER=posthog
- POSTGRES_DB=posthog
- POSTGRES_PASSWORD=$SERVICE_PASSWORD_POSTGRES
healthcheck:
test: ["CMD-SHELL", "pg_isready -U posthog"]
interval: 2s
timeout: 10s
retries: 15
redis:
image: redis:6.2.7-alpine
command: redis-server --maxmemory-policy allkeys-lru --maxmemory 200mb
clickhouse:
image: clickhouse/clickhouse-server:23.11.2.11-alpine
volumes:
- type: bind
source: ./idl/events_dead_letter_queue.json
target: /idl/events_dead_letter_queue.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/events_dead_letter_queue.json",
"title": "events_dead_letter_queue",
"description": "Events that failed to be validated or processed and are sent to the DLQ",
"type": "object",
"properties": {
"id": {
"description": "uuid for the submission",
"type": "string"
},
"event_uuid": {
"description": "uuid for the event",
"type": "string"
},
"event": {
"description": "event type",
"type": "string"
},
"properties": {
"description": "String representation of the properties json object",
"type": "string"
},
"distinct_id": {
"description": "PostHog distinct_id",
"type": "string"
},
"team_id": {
"description": "team_id (maps to the project under the organization)",
"type": "number"
},
"elements_chain": {
"description": "Used for autocapture. DOM element hierarchy",
"type": "string"
},
"created_at": {
"description": "Used for autocapture. DOM element hierarchy",
"type": "number"
},
"ip": {
"description": "IP Address of the associated with the event",
"type": "string"
},
"site_url": {
"description": "Site URL associated with the event the event",
"type": "string"
},
"now": {
"description": "Timestamp of the DLQ event",
"type": "number"
},
"raw_payload": {
"description": "Raw payload of the event that failed to be consumed",
"type": "string"
},
"error_timestamp": {
"description": "Timestamp that the error of ingestion occurred",
"type": "number"
},
"error_location": {
"description": "Source of error if known",
"type": "string"
},
"error": {
"description": "Error if known",
"type": "string"
},
"tags": {
"description": "Tags associated with the error or event",
"type": "array",
"items": {
"type": "string"
}
}
},
"required": ["raw_payload"]
}
- type: bind
source: ./idl/events_json.json
target: /idl/events_json.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/events_json.json",
"title": "events_json",
"description": "Event schema that is destined for ClickHouse",
"type": "object",
"properties": {
"uuid": {
"description": "uuid for the event",
"type": "string"
},
"event": {
"description": "event type",
"type": "string"
},
"properties": {
"description": "String representation of the properties json object",
"type": "string"
},
"timestamp": {
"description": "Timestamp that the event occurred",
"type": "number"
},
"team_id": {
"description": "team_id (maps to the project under the organization)",
"type": "number"
},
"distinct_id": {
"description": "PostHog distinct_id",
"type": "string"
},
"elements_chain": {
"description": "Used for autocapture. DOM element hierarchy",
"type": "string"
},
"created_at": {
"description": "Timestamp when event was created",
"type": "number"
},
"person_id": {
"description": "UUID for the associated person if available",
"type": "string"
},
"person_created_at": {
"description": "Timestamp for when the associated person was created",
"type": "number"
},
"person_properties": {
"description": "String representation of the person JSON object",
"type": "string"
},
"group0_properties": {
"description": "String representation of a group's properties",
"type": "string"
},
"group1_properties": {
"description": "String representation of a group's properties",
"type": "string"
},
"group2_properties": {
"description": "String representation of a group's properties",
"type": "string"
},
"group3_properties": {
"description": "String representation of a group's properties",
"type": "string"
},
"group4_properties": {
"description": "String representation of a group's properties",
"type": "string"
},
"group0_created_at": {
"description": "Group's creation timestamp",
"type": "number"
},
"group1_created_at": {
"description": "Group's creation timestamp",
"type": "number"
},
"group2_created_at": {
"description": "Group's creation timestamp",
"type": "number"
},
"group3_created_at": {
"description": "Group's creation timestamp",
"type": "number"
},
"group4_created_at": {
"description": "Group's creation timestamp",
"type": "number"
}
},
"required": ["uuid", "event", "properties", "timestamp", "team_id"]
}
- type: bind
source: ./idl/groups.json
target: /idl/groups.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/groups.json",
"title": "groups",
"description": "Groups schema that is destined for ClickHouse",
"type": "object",
"properties": {
"group_type_index": {
"description": "Group type index",
"type": "number"
},
"group_key": {
"description": "Group Key",
"type": "string"
},
"created_at": {
"description": "Group creation timestamp",
"type": "number"
},
"team_id": {
"description": "Team ID associated with group",
"type": "number"
},
"group_properties": {
"description": "String representation of group JSON properties object",
"type": "string"
}
},
"required": ["group_type_index", "group_key", "created_at", "team_id", "group_properties"]
}
- type: bind
source: ./idl/idl.md
target: /idl/idl.md
content: |
# IDL - Interface Definition Language
This directory is responsible for defining the schemas of the data between services.
Primarily this will be between services and ClickHouse, but can be really any thing at the boundry of services.
The reason why we do this is because it makes generating code, validating data, and understanding the system a whole lot easier. We've had a few customers request this of us for engineering a deeper integration with us.
- type: bind
source: ./idl/person.json
target: /idl/person.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/person.json",
"title": "person",
"description": "Person schema that is destined for ClickHouse",
"type": "object",
"properties": {
"id": {
"description": "UUID for the person",
"type": "string"
},
"created_at": {
"description": "Person creation timestamp",
"type": "number"
},
"team_id": {
"description": "Team ID associated with person",
"type": "number"
},
"properties": {
"description": "String representation of person JSON properties object",
"type": "string"
},
"is_identified": {
"description": "Boolean is the person identified?",
"type": "boolean"
},
"is_deleted": {
"description": "Boolean is the person deleted?",
"type": "boolean"
},
"version": {
"description": "Version field for collapsing later (psuedo-tombstone)",
"type": "number"
}
},
"required": ["id", "created_at", "team_id", "properties", "is_identified", "is_deleted", "version"]
}
- type: bind
source: ./idl/person_distinct_id.json
target: /idl/person_distinct_id.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/person_distinct_id.json",
"title": "person_distinct_id",
"description": "Person distinct id schema that is destined for ClickHouse",
"type": "object",
"properties": {
"distinct_id": {
"description": "User provided ID for the distinct user",
"type": "string"
},
"person_id": {
"description": "UUID of the person",
"type": "string"
},
"team_id": {
"description": "Team ID associated with person_distinct_id",
"type": "number"
},
"_sign": {
"description": "Used for collapsing later different versions of a distinct id (psuedo-tombstone)",
"type": "number"
},
"is_deleted": {
"description": "Boolean is the person distinct_id deleted?",
"type": "boolean"
}
},
"required": ["distinct_id", "person_id", "team_id", "_sign", "is_deleted"]
}
- type: bind
source: ./idl/person_distinct_id2.json
target: /idl/person_distinct_id2.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/person_distinct_id2.json",
"title": "person_distinct_id2",
"description": "Person distinct id2 schema that is destined for ClickHouse",
"type": "object",
"properties": {
"distinct_id": {
"description": "User provided ID for the distinct user",
"type": "string"
},
"person_id": {
"description": "UUID of the person",
"type": "string"
},
"team_id": {
"description": "Team ID associated with person_distinct_id",
"type": "number"
},
"version": {
"description": "Used for collapsing later different versions of a distinct id (psuedo-tombstone)",
"type": "number"
},
"is_deleted": {
"description": "Boolean is the person distinct_id deleted?",
"type": "boolean"
}
},
"required": ["distinct_id", "person_id", "team_id", "version", "is_deleted"]
}
- type: bind
source: ./idl/plugin_log_entries.json
target: /idl/plugin_log_entries.json
content: |
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "file://posthog/idl/plugin_log_entries.json",
"title": "plugin_log_entries",
"description": "Plugin log entries that are destined for ClickHouse",
"type": "object",
"properties": {
"id": {
"description": "UUID for the log entry",
"type": "string"
},
"team_id": {
"description": "Team ID associated with person_distinct_id",
"type": "number"
},
"plugin_id": {
"description": "Plugin ID associated with the log entry",
"type": "number"
},
"plugin_config_id": {
"description": "Plugin Config ID associated with the log entry",
"type": "number"
},
"timestamp": {
"description": "Timestamp for when the log entry was created",
"type": "number"
},
"source": {
"description": "Source of the log entry",
"type": "string"
},
"type": {
"description": "Log entry type",
"type": "string"
},
"message": {
"description": "Log entry body",
"type": "string"
},
"instance_id": {
"description": "UUID of the instance that generated the log entry",
"type": "string"
}
},
"required": [
"id",
"team_id",
"plugin_id",
"plugin_config_id",
"timestamp",
"source",
"type",
"message",
"instance_id"
]
}
- type: bind
source: ./docker/clickhouse/docker-entrypoint-initdb.d/init-db.sh
target: /docker-entrypoint-initdb.d/init-db.sh
content: |
#!/bin/bash
set -e
cp -r /idl/* /var/lib/clickhouse/format_schemas/
- type: bind
source: ./docker/clickhouse/config.xml
target: /etc/clickhouse-server/config.xml
content: |
trace
/var/log/clickhouse-server/clickhouse-server.log
/var/log/clickhouse-server/clickhouse-server.err.log
1000M
10
8123
9000
9004
9005
8443
9440
9009
4096
3
false
/path/to/ssl_cert_file
/path/to/ssl_key_file
false
/path/to/ssl_ca_cert_file
none
0
-1
-1
false
/etc/clickhouse-server/server.crt
/etc/clickhouse-server/server.key
/etc/clickhouse-server/dhparam.pem
none
true
true
sslv2,sslv3
true
true
true
sslv2,sslv3
true
RejectCertificateHandler
100
0
10000
0.9
4194304
0
8589934592
5368709120
1000
134217728
10000
/var/lib/clickhouse/
/var/lib/clickhouse/tmp/
/var/lib/clickhouse/user_files/
users.xml
/var/lib/clickhouse/access/
default
default
true
false
' | sed -e 's|.*>\(.*\)<.*|\1|')
wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb
apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb
clickhouse-jdbc-bridge &
* [CentOS/RHEL]
export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge
export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|')
wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm
yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm
clickhouse-jdbc-bridge &
Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information.
]]>
localhost
9000
.*
zookeeper
2181
01
ch1
3600
3600
60
system
toYYYYMM(event_date)
7500
system
toYYYYMM(event_date)
7500
system
toYYYYMM(event_date)
7500
system
toYYYYMM(event_date)
7500
system
toYYYYMM(event_date)
7500
system
7500
1000
system
7000
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
system
7500
system
1000
system
toYYYYMM(event_date)
7500
*_dictionary.xml
*_function.xml
/clickhouse/task_queue/ddl
click_cost
any
0
3600
86400
60
max
0
60
3600
300
86400
3600
/var/lib/clickhouse/format_schemas/
hide encrypt/decrypt arguments
((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)
\1(???)
false
false
https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277
- type: bind
source: ./docker/clickhouse/users.xml
target: /etc/clickhouse-server/users.xml
content: |
10000000000
random
1
1
::/0
default
default
3600
0
0
0
0
0
- clickhouse-data:/var/lib/clickhouse
depends_on:
- kafka
- zookeeper
zookeeper:
image: zookeeper:3.7.0
volumes:
- zookeeper-datalog:/datalog
- zookeeper-data:/data
- zookeeper-logs:/logs
kafka:
image: ghcr.io/posthog/kafka-container:v2.8.2
depends_on:
- zookeeper
environment:
- KAFKA_BROKER_ID=1001
- KAFKA_CFG_RESERVED_BROKER_MAX_ID=1001
- KAFKA_CFG_LISTENERS=PLAINTEXT://:9092
- KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
- KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181
- ALLOW_PLAINTEXT_LISTENER=yes
object_storage:
image: minio/minio:RELEASE.2022-06-25T15-50-16Z
environment:
- MINIO_ROOT_USER=$SERVICE_USER_MINIO
- MINIO_ROOT_PASSWORD=$SERVICE_PASSWORD_MINIO
entrypoint: sh
command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data'
volumes:
- object_storage:/data
maildev:
image: maildev/maildev:2.0.5
flower:
image: mher/flower:2.0.0
environment:
FLOWER_PORT: 5555
CELERY_BROKER_URL: redis://redis:6379
web:
image: posthog/posthog:latest
command: /compose/start
volumes:
- type: bind
source: ./compose/start
target: /compose/start
content: |
#!/bin/bash
/compose/wait
./bin/migrate
./bin/docker-server
- type: bind
source: ./compose/wait
target: /compose/wait
content: |
#!/usr/bin/env python3
import socket
import time
def loop():
print("Waiting for ClickHouse and Postgres to be ready")
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.connect(('clickhouse', 9000))
print("Clickhouse is ready")
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.connect(('db', 5432))
print("Postgres is ready")
except ConnectionRefusedError as e:
time.sleep(5)
loop()
loop()
environment:
- SERVICE_FQDN_WEB_8000
- OPT_OUT_CAPTURING=true
- DISABLE_SECURE_SSL_REDIRECT=true
- IS_BEHIND_PROXY=true
- TRUST_ALL_PROXIES=true
- DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog
- CLICKHOUSE_HOST=clickhouse
- CLICKHOUSE_DATABASE=posthog
- CLICKHOUSE_SECURE=false
- CLICKHOUSE_VERIFY=false
- KAFKA_HOSTS=kafka
- REDIS_URL=redis://redis:6379/
- PGHOST=db
- PGUSER=posthog
- PGPASSWORD=$SERVICE_PASSWORD_POSTGRES
- DEPLOYMENT=hobby
- SITE_URL=$SERVICE_FQDN_WEB
- SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY
depends_on:
- db
- redis
- clickhouse
- kafka
- object_storage
worker:
image: posthog/posthog:latest
command: ./bin/docker-worker-celery --with-scheduler
environment:
- OPT_OUT_CAPTURING=true
- DISABLE_SECURE_SSL_REDIRECT=true
- IS_BEHIND_PROXY=true
- TRUST_ALL_PROXIES=true
- DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog
- CLICKHOUSE_HOST=clickhouse
- CLICKHOUSE_DATABASE=posthog
- CLICKHOUSE_SECURE=false
- CLICKHOUSE_VERIFY=false
- KAFKA_HOSTS=kafka
- REDIS_URL=redis://redis:6379/
- PGHOST=db
- PGUSER=posthog
- PGPASSWORD=$SERVICE_PASSWORD_POSTGRES
- DEPLOYMENT=hobby
- SITE_URL=$SERVICE_FQDN_WEB
- SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY
depends_on:
- db
- redis
- clickhouse
- kafka
- object_storage
# capture:
# image: ghcr.io/posthog/capture:main
# environment:
# ADDRESS: "0.0.0.0:3000"
# KAFKA_TOPIC: "events_plugin_ingestion"
# KAFKA_HOSTS: "kafka:9092"
# REDIS_URL: "redis://redis:6379/"
# depends_on:
# - redis
# - kafka
plugins:
image: posthog/posthog:latest
command: ./bin/plugin-server --no-restart-loop
environment:
- DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog
- KAFKA_HOSTS=kafka:9092
- REDIS_URL=redis://redis:6379/
- CLICKHOUSE_HOST=clickhouse
- CLICKHOUSE_DATABASE=posthog
- CLICKHOUSE_SECURE=false
- CLICKHOUSE_VERIFY=false
- SITE_URL=$SERVICE_FQDN_WEB
- SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY
depends_on:
- db
- redis
- clickhouse
- kafka
- object_storage
# migrate:
# image: posthog/posthog:latest
# restart: "no"
# command: sh -c "python manage.py migrate && python manage.py migrate_clickhouse && python manage.py run_async_migrations"
# environment:
# - DISABLE_SECURE_SSL_REDIRECT=true
# - IS_BEHIND_PROXY=true
# - TRUST_ALL_PROXIES=true
# - DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog
# - CLICKHOUSE_HOST=clickhouse
# - CLICKHOUSE_DATABASE=posthog
# - CLICKHOUSE_SECURE=false
# - CLICKHOUSE_VERIFY=false
# - KAFKA_HOSTS=kafka
# - REDIS_URL=redis://redis:6379/
# - PGHOST=db
# - PGUSER=posthog
# - PGPASSWORD=$SERVICE_PASSWORD_POSTGRES
# - DEPLOYMENT=hobby
# - SITE_URL=$SERVICE_FQDN_WEB
# - SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY
# depends_on:
# - db
# - redis
# - clickhouse
# - kafka
# - object_storage
# Temporal containers
elasticsearch:
image: elasticsearch:7.16.2
environment:
- cluster.routing.allocation.disk.threshold_enabled=true
- cluster.routing.allocation.disk.watermark.low=512mb
- cluster.routing.allocation.disk.watermark.high=256mb
- cluster.routing.allocation.disk.watermark.flood_stage=128mb
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms256m -Xmx256m
- xpack.security.enabled=false
volumes:
- elasticsearch-data:/var/lib/elasticsearch/data
temporal:
image: temporalio/auto-setup:1.20.0
environment:
- DB=postgresql
- DB_PORT=5432
- POSTGRES_USER=posthog
- POSTGRES_PWD=$SERVICE_PASSWORD_POSTGRES
- POSTGRES_SEEDS=db
- DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/development-sql.yaml
- ENABLE_ES=true
- ES_SEEDS=elasticsearch
- ES_VERSION=v7
- ENABLE_ES=false
depends_on:
db:
condition: service_healthy
volumes:
- type: bind
source: ./docker/temporal/dynamicconfig/development-sql.yaml
target: /etc/temporal/config/dynamicconfig/development-sql.yaml
content: |
limit.maxIDLength:
- value: 255
constraints: {}
system.forceSearchAttributesCacheRefreshOnRead:
- value: false
constraints: {}
temporal-admin-tools:
image: temporalio/admin-tools:1.20.0
depends_on:
- temporal
environment:
- TEMPORAL_CLI_ADDRESS=temporal:7233
stdin_open: true
tty: true
temporal-ui:
image: temporalio/ui:2.10.3
depends_on:
- temporal
environment:
- TEMPORAL_ADDRESS=temporal:7233
- TEMPORAL_CORS_ORIGINS=http://localhost:3000
temporal-django-worker:
image: posthog/posthog:latest
command: ./bin/temporal-django-worker
environment:
- DISABLE_SECURE_SSL_REDIRECT=true
- IS_BEHIND_PROXY=true
- TRUST_ALL_PROXIES=true
- DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog
- CLICKHOUSE_HOST=clickhouse
- CLICKHOUSE_DATABASE=posthog
- CLICKHOUSE_SECURE=false
- CLICKHOUSE_VERIFY=false
- KAFKA_HOSTS=kafka
- REDIS_URL=redis://redis:6379/
- PGHOST=db
- PGUSER=posthog
- PGPASSWORD=$SERVICE_PASSWORD_POSTGRES
- DEPLOYMENT=hobby
- SITE_URL=$SERVICE_FQDN_WEB
- SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY
- TEMPORAL_HOST=temporal
depends_on:
- db
- redis
- clickhouse
- kafka
- object_storage
- temporal