Skip to content

Configuration

The below example configuration ships with every Tenzir package. Head over to the explanation of the configuration for details on how the various settings work.

tenzir.yaml
# This is an example configuration file for Tenzir that shows all available
# options. Options in angle brackets have their default value determined at
# runtime.
# Options that concern Tenzir.
tenzir:
# The host and port to listen at for node-to-node connections to in the form
# `<host>:<port>`. Host or port may be emitted to use their defaults, which
# are localhost and 5158, respectively. Set the port to zero to automatically
# choose a port. Set to false to disable exposing an endpoint.
endpoint: localhost:5158
# The timeout for connecting to a Tenzir server. Set to 0 seconds to wait
# indefinitely.
connection-timeout: 5m
# The delay between two connection attempts. Set to 0s to try connecting
# without retries.
connection-retry-delay: 3s
# Configure retention policies.
retention:
# How long to keep metrics for. Set to 0s to disable metrics retention
# entirely.
# WARNING: A low retention period may negatively impact the usability of
# pipeline activity in the Tenzir Platform.
#metrics: 7d
# How long to keep diagnostics for. Set to 0s to disable diagnostics
# retention entirely.
# WARNING: A low retention period may negatively impact the usability of
# diagnostics in the Tenzir Platform.
#diagnostics: 30d
# Configure the behavior of the `cache` operator. The Tenzir Platform uses the
# cache operator to store and retrieve data efficiently.
cache:
# Specifies the default write-timeout for the `cache` operator.
#lifetime: 10min
# Specifies an upper bound for the total memory usage in bytes across all
# caches in a node. If the memory usage exceeds this limit, the node will
# start evicting caches to make room for new data. The node requires a
# minimum total cache capacity of 64MiB.
#capacity: 1Gi
# Always use TQL2 for pipelines.
#tql2: false
# The file system path used for persistent state.
# Defaults to one of the following paths, selecting the first that is
# available:
# - $STATE_DIRECTORY
# - $PWD/tenzir.db
#state-directory:
# The file system path used for recoverable state.
# In a node process, defaults to the first of the following paths that is
# available:
# - $CACHE_DIRECTORY
# - $XDG_CACHE_HOME
# - $XDG_HOME_DIR/.cache/tenzir (linux) or $XDG_HOME_DIR/Libraries/caches/tenzir (mac)
# - $HOME/.cache/tenzir (linux) or $HOME/Libraries/caches/tenzir (mac)
# - $TEMPORARY_DIRECTORY/tenzir-cache-<uid>
# To determine $TEMPORARY_DIRECTORY, the values of TMPDIR, TMP, TEMP, TEMPDIR are
# checked in that order, and as a last resort "/tmp" is used.
# In a client process, this setting is ignored and
# `$TEMPORARY_DIRECTORY/tenzir-client-cache-<uid>` is used as cache directory.
#cache-directory:
# The file system path used for log files.
# Defaults to one of the following paths, selecting the first that is
# available:
# - $LOGS_DIRECTORY/server.log
# - <state-directory>/server.log
#log-file:
# The file system path used for client log files relative to the current
# working directory of the client. Note that this is disabled by default.
# If not specified no log files are written for clients at all.
client-log-file: "client.log"
# Format for printing individual log entries to the log-file.
# For a list of valid format specifiers, see spdlog format specification
# at https://github.com/gabime/spdlog/wiki/3.-Custom-formatting.
file-format: "[%Y-%m-%dT%T.%e%z] [%n] [%l] [%s:%#] %v"
# Configures the minimum severity of messages written to the log file.
# Possible values: quiet, error, warning, info, verbose, debug, trace.
# File logging is only available for commands that start a node (e.g.,
# tenzir-node). The levels above 'verbose' are usually not available in
# release builds.
file-verbosity: debug
# Whether to enable automatic log rotation. If set to false, a new log file
# will be created when the size of the current log file exceeds 10 MiB.
disable-log-rotation: false
# The size limit when a log file should be rotated.
log-rotation-threshold: 10MiB
# Maximum number of log messages in the logger queue.
log-queue-size: 1000000
# The sink type to use for console logging. Possible values: stderr,
# syslog, journald. Note that 'journald' can only be selected on linux
# systems, and only if Tenzir was built with journald support.
# The journald sink is used as default if Tenzir is started as a systemd
# service and the service is configured to use the journal for stderr,
# otherwise the default is the unstructured stderr sink.
#console-sink: stderr/journald
# Mode for console log output generation. Automatic renders color only when
# writing to a tty.
# Possible values: always, automatic, never. (default automatic)
console: automatic
# Format for printing individual log entries to the console. For a list
# of valid format specifiers, see spdlog format specification at
# https://github.com/gabime/spdlog/wiki/3.-Custom-formatting.
console-format: "%^[%T.%e] %v%$"
# Configures the minimum severity of messages written to the console.
# For a list of valid log levels, see file-verbosity.
console-verbosity: info
# List of directories to look for schema files in ascending order of
# priority.
schema-dirs: []
# Additional directories to load plugins specified using `tenzir.plugins`
# from.
plugin-dirs: []
# List of paths that contain statically configured packages.
# This setting is ignored unless the package manager plugin is enabled.
package-dirs: []
# The plugins to load at startup. For relative paths, Tenzir tries to find
# the files in the specified `tenzir.plugin-dirs`. The special values
# 'bundled' and 'all' enable autoloading of bundled and all plugins
# respectively. Note: Add `example` or `/path/to/libtenzir-plugin-example.so`
# to load the example plugin.
plugins: []
# Names of plugins and builtins to explicitly forbid from being used in
# Tenzir. For example, adding `shell` will prohibit use of the `shell`
# operator builtin, and adding `kafka` will prohibit use of the `kafka`
# connector plugin.
disable-plugins: []
# The unique ID of this node.
node-id: "node"
# Forbid unsafe location overrides for pipelines with the 'local' and 'remote'
# keywords, e.g., remotely reading from a file.
no-location-overrides: false
# The size of an index shard, expressed in number of events. This should
# be a power of 2.
max-partition-size: 4Mi
# Timeout after which an active partition is forcibly flushed, regardless of
# its size.
active-partition-timeout: 30 seconds
# Automatically rebuild undersized and outdated partitions in the background.
# The given number controls how much resources to spend on it. Set to 0 to
# disable.
automatic-rebuild: 1
# Timeout after which an automatic rebuild is triggered.
rebuild-interval: 2 hours
# Zstd compression level applied to the Feather store backend.
# zstd-compression-level: <default>
# Control how operator's calculate demand from their upstream operator. Note
# that this is an expert feature and should only be changed if you know what
# you are doing. All values may either be set to a number, or to a record
# containing `bytes` and `events` fields with numbers depending on the
# operator's input type.
demand:
# Issue demand only if room for at least this many elements is available.
# Must be greater than zero.
min-elements:
bytes: 128Ki
events: 8Ki
# Controls how many elements may be buffered until the operator stops
# issuing demand. Must be greater or equal to min-elements.
max-elements:
bytes: 4Mi
events: 254Ki
# Controls how many batches of elements may be buffered until the operator
# stops issuing demand. Must be greater than zero.
max-batches: 20
# Context configured as part of the configuration that are always available.
contexts:
# A unique name for the context that's used in the context, enrich, and
# lookup operators to refer to the context.
indicators:
# The type of the context.
type: bloom-filter
# Arguments for creating the context, depending on the type. Refer to the
# documentation of the individual context types to see the arguments they
# require. Note that changes to these arguments to not apply to any
# contexts that were previously created.
arguments:
capacity: 1B
fp-probability: 0.001
# The `index` key is used to adjust the false-positive rate of
# the first-level lookup data structures (called synopses) in the
# catalog. The lower the false-positive rate the more space will be
# required, so this setting can be used to manually tune the trade-off
# of performance vs. space.
index:
# The default false-positive rate for type synopses.
default-fp-rate: 0.01
# rules:
# Every rule adjusts the behaviour of Tenzir for a set of targets.
# Tenzir creates one synopsis per target. Targets can be either types
# or field names.
#
# fp-rate - false positive rate. Has effect on string and address type
# targets
#
# partition-index - Tenzir will not create dense index when set to false
# - targets: [:ip]
# fp-rate: 0.01
# The `tenzir-ctl start` command starts a new Tenzir server process.
start:
# Prints the endpoint for clients when the server is ready to accept
# connections. This comes in handy when letting the OS choose an
# available random port, i.e., when specifying 0 as port value.
print-endpoint: false
# Writes the endpoint for clients when the server is ready to accept
# connections to the specified destination. This comes in handy when letting
# the OS choose an available random port, i.e., when specifying 0 as port
# value, and `print-endpoint` is not sufficient.
#write-endpoint: /tmp/tenzir-node-endpoint
# An ordered list of commands to run inside the node after starting.
# As an example, to configure an auto-starting PCAP source that listens
# on the interface 'en0' and lives inside the Tenzir node, add `spawn
# source pcap -i en0`.
# Note that commands are not executed sequentially but in parallel.
commands: []
# Triggers removal of old data when the disk budget is exceeded.
disk-budget-high: 0GiB
# When the budget was exceeded, data is erased until the disk space is
# below this value.
disk-budget-low: 0GiB
# Seconds between successive disk space checks.
disk-budget-check-interval: 90
# When erasing, how many partitions to erase in one go before rechecking
# the size of the database directory.
disk-budget-step-size: 1
# Binary to use for checking the size of the database directory. If left
# unset, Tenzir will recursively add up the size of all files in the
# database directory to compute the size. Mainly useful for e.g.
# compressed filesystem where raw file size is not the correct metric.
# Must be the absolute path to an executable file, which will get passed
# the database directory as its first and only argument.
#disk-budget-check-binary: /opt/tenzir/libexec/tenzir-df-percent.sh
# User-defined operators.
operators:
# The Zeek operator is an example that takes raw bytes in the form of a
# PCAP and then parses Zeek's output via the `zeek-json` format to generate
# a stream of events.
zeek:
shell "zeek -r - LogAscii::output_to_stdout=T
JSONStreaming::disable_default_logs=T
JSONStreaming::enable_log_rotation=F
json-streaming-logs"
| read zeek-json
# The Suricata operator is analogous to the above Zeek example, with the
# difference that we are using Suricata. The commmand line configures
# Suricata such that it reads PCAP on stdin and produces EVE JSON logs on
# stdout, which we then parse with the `suricata` format.
suricata:
shell "suricata -r /dev/stdin
--set outputs.1.eve-log.filename=/dev/stdout
--set logging.outputs.0.console.enabled=no"
| read suricata
# In addition to running pipelines interactively, you can also deploy
# *Pipelines as Code*. This infrastrucutre-as-code-like method differs from
# pipelines run on the command-line or through app.tenzir.com in two ways:
# 1. Pipelines deployed as code always start alongside the Tenzir node.
# 2. Deletion via the user interface is not allowed for pipelines configured
# as code.
pipelines:
# A unique identifier for the pipeline that's used for metrics, diagnostics,
# and API calls interacting with the pipeline.
publish-suricata:
# An optional user-facing name for the pipeline. Defaults to the id.
name: Import Suricata from TCP
# The definition of the pipeline. Configured pipelines that fail to start
# cause the node to fail to start.
definition: |
from tcp://0.0.0.0:34343 read suricata --no-infer
| where event_type != "stats"
| publish suricata
# Pipelines that encounter an error stop running and show an error state.
# This option causes pipelines to automatically restart when they
# encounter an error instead. The first restart happens immediately, and
# subsequent restarts after the configured delay, defaulting to 1 minute.
# The following values are valid for this option:
# - Omit the option, or set it to null or false to disable.
# - Set the option to true to enable with the default delay of 1 minute.
# - Set the option to a valid duration to enable with a custom delay.
restart-on-error: 1 minute
# Pipelines that are unstoppable will run automatically and indefinitely.
# They are not able to pause or stop.
# If they do complete, they will end up in a failed state.
# If `restart-on-error` is enabled, they will restart after the specified
# duration.
unstoppable: false
# The below settings are internal to CAF, and aren't checked by Tenzir directly.
# Please be careful when changing these options. Note that some CAF options may
# be in conflict with Tenzir options, and are only listed here for completeness.
caf:
# Options affecting the internal scheduler.
scheduler:
# Accepted alternative: "sharing".
policy: stealing
# Configures whether the scheduler generates profiling output.
enable-profiling: false
# Output file for profiler data (only if profiling is enabled).
#profiling-output-file: </dev/null>
# Measurement resolution in milliseconds (only if profiling is enabled).
profiling-resolution: 100ms
# Forces a fixed number of threads if set. Defaults to the number of
# available CPU cores if starting a Tenzir node, or *2* for client commands.
#max-threads: <number of cores>
# Maximum number of messages actors can consume in one run.
max-throughput: 500
# When using "stealing" as scheduler policy.
work-stealing:
# Number of zero-sleep-interval polling attempts.
aggressive-poll-attempts: 100
# Frequency of steal attempts during aggressive polling.
aggressive-steal-interval: 10
# Number of moderately aggressive polling attempts.
moderate-poll-attempts: 500
# Frequency of steal attempts during moderate polling.
moderate-steal-interval: 5
# Sleep interval between poll attempts.
moderate-sleep-duration: 50us
# Frequency of steal attempts during relaxed polling.
relaxed-steal-interval: 1
# Sleep interval between poll attempts.
relaxed-sleep-duration: 10ms
stream:
# Maximum delay for partial batches.
max-batch-delay: 15ms
# Selects an implementation for credit computation.
# Accepted alternative: "token-based".
credit-policy: token-based
# When using "size-based" as credit-policy.
size-based-policy:
# Desired batch size in bytes.
bytes-per-batch: 32
# Maximum input buffer size in bytes.
buffer-capacity: 256
# Frequency of collecting batch sizes.
sampling-rate: 100
# Frequency of re-calibrations.
calibration-interval: 1
# Factor for discounting older samples.
smoothing-factor: 2.5
# When using "token-based" as credit-policy.
token-based-policy:
# Number of elements per batch.
batch-size: 1
# Max. number of elements in the input buffer.
buffer-size: 64
# Collecting metrics can be resource consuming. This section is used for
# filtering what should and what should not be collected
metrics-filters:
# Rules for actor based metrics filtering.
actors:
# List of selected actors for run-time metrics.
includes: []
# List of excluded actors from run-time metrics.
excludes: []
# Configure using OpenSSL for node-to-node connections.
# NOTE: Use the tenzir.endpoint variable to configure the endpoint.
openssl:
# Path to the PEM-formatted certificate file.
certificate:
# Path to the private key file for this node.
key:
# Passphrase to decrypt the private key.
passphrase:
# Path to an OpenSSL-style directory of trusted certificates.
capath:
# Path to a file of concatenated PEM-formatted certificates.
cafile:
# Colon-separated list of OpenSSL cipher strings to use.
cipher-list: