From d98c705cbf7a4d4ff460141daab9400dadb9847a Mon Sep 17 00:00:00 2001 From: Michael Hausenblas Date: Wed, 10 May 2023 19:24:56 +0100 Subject: [PATCH] inits Deployment contrib (#2498) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Patrice Chalin Co-authored-by: Alex Boten Co-authored-by: Patrice Chalin Co-authored-by: Severin Neumann Co-authored-by: Phillip Carter Co-authored-by: Juraci Paixão Kröhling --- content/en/blog/2023/end-user-q-and-a-02.md | 2 +- content/en/docs/collector/deployment.md | 32 -- .../en/docs/collector/deployment/_index.md | 20 ++ content/en/docs/collector/deployment/agent.md | 134 ++++++++ .../en/docs/collector/deployment/gateway.md | 170 ++++++++++ .../docs/collector/deployment/no-collector.md | 32 ++ content/en/docs/collector/img/agent-sdk.svg | 126 +++++++ .../en/docs/collector/img/gateway-lb-sdk.svg | 313 ++++++++++++++++++ content/en/docs/collector/img/gateway-sdk.svg | 222 +++++++++++++ content/en/docs/collector/img/sdk.svg | 40 +++ static/refcache.json | 8 + 11 files changed, 1066 insertions(+), 33 deletions(-) delete mode 100644 content/en/docs/collector/deployment.md create mode 100644 content/en/docs/collector/deployment/_index.md create mode 100644 content/en/docs/collector/deployment/agent.md create mode 100644 content/en/docs/collector/deployment/gateway.md create mode 100644 content/en/docs/collector/deployment/no-collector.md create mode 100644 content/en/docs/collector/img/agent-sdk.svg create mode 100644 content/en/docs/collector/img/gateway-lb-sdk.svg create mode 100644 content/en/docs/collector/img/gateway-sdk.svg create mode 100644 content/en/docs/collector/img/sdk.svg diff --git a/content/en/blog/2023/end-user-q-and-a-02.md b/content/en/blog/2023/end-user-q-and-a-02.md index 8099d5120..b969ac28e 100644 --- a/content/en/blog/2023/end-user-q-and-a-02.md +++ b/content/en/blog/2023/end-user-q-and-a-02.md @@ -198,7 +198,7 @@ Uplight currently has a few different Collector configurations: Doug’s ultimate goal is for any deployment in any environment to be able to easily send telemetry to an -[OTel Collector gateway](/docs/collector/deployment/#gateway). +[OTel Collector gateway](/docs/collector/deployment/gateway/). Collectors at Uplight are typically run and maintained by the infrastructure team, unless individual teams decide to take ownership of their own Collectors. diff --git a/content/en/docs/collector/deployment.md b/content/en/docs/collector/deployment.md deleted file mode 100644 index bf66cff0b..000000000 --- a/content/en/docs/collector/deployment.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: Deployment -weight: 2 ---- - -The OpenTelemetry Collector consists of a single binary and two primary -deployment methods: - -- **Agent:** A Collector instance running with the application or on the same - host as the application (e.g. binary, sidecar, or daemonset). -- **Gateway:** One or more Collector instances running as a standalone service - (e.g. container or deployment) typically per cluster, data center or region. - -## Agent - -It is recommended to deploy the Agent on every host within an environment. In -doing so, the Agent is capable of receiving telemetry data (push and pull based) -as well as enhancing telemetry data with metadata such as custom tags or -infrastructure information. In addition, the Agent can offload responsibilities -that client instrumentation would otherwise need to handle including batching, -retry, encryption, compression and more. - -## Gateway - -Additionally, a Gateway cluster can be deployed in every cluster, data center, -or region. A Gateway cluster runs as a standalone service and can offer advanced -capabilities over the Agent including tail-based sampling. In addition, a -Gateway cluster can limit the number of egress points required to send data as -well as consolidate API token management. Each Collector instance in a Gateway -cluster operates independently so it is easy to scale the architecture based on -performance needs with a simple load balancer. If a gateway cluster is deployed, -it usually receives data from Agents deployed within an environment. diff --git a/content/en/docs/collector/deployment/_index.md b/content/en/docs/collector/deployment/_index.md new file mode 100644 index 000000000..66896f4b3 --- /dev/null +++ b/content/en/docs/collector/deployment/_index.md @@ -0,0 +1,20 @@ +--- +title: Deployment +description: Patterns you can apply to deploy the OpenTelemetry collector +weight: 2 +--- + +The OpenTelemetry collector consists of a single binary which you can use in +different ways, for different use cases. This section describes deployment +patterns, their use cases along with pros and cons and best practices for +collector configurations for cross-environment and multi-backend deployments. + +## Resources + +- KubeCon NA 2021 Talk on [OpenTelemetry Collector Deployment + Patterns][y-patterns] +- [Deployment Patterns][gh-patterns] accompanying the talk + +[gh-patterns]: + https://github.com/jpkrohling/opentelemetry-collector-deployment-patterns/ +[y-patterns]: https://www.youtube.com/watch?v=WhRrwSHDBFs diff --git a/content/en/docs/collector/deployment/agent.md b/content/en/docs/collector/deployment/agent.md new file mode 100644 index 000000000..b9206bc46 --- /dev/null +++ b/content/en/docs/collector/deployment/agent.md @@ -0,0 +1,134 @@ +--- +title: Agent +description: + Why and how to send signals to collectors and from there to backends +weight: 2 +--- + +The agent collector deployment pattern consists of applications — +[instrumented][instrumentation] with an OpenTelemetry SDK using [OpenTelemetry +protocol (OTLP)][otlp] — or other collectors (using the OTLP exporter) that send +telemetry signals to a [collector][collector] instance running with the +application or on the same host as the application (such as a sidecar or a +daemonset). + +Each client-side SDK or downstream collector is configured with a collector +location: + +![Decentralized collector deployment concept](../../img/agent-sdk.svg) + +1. In the app, the SDK is configured to send OTLP data to a collector. +1. The collector is configured to send telemetry data to one or more backends. + +## Example + +A concrete example of the agent collector deployment pattern could look as +follows: you manually instrument, say, a [Java application to export +metrics][instrument-java-metrics] using the OpenTelemetry Java SDK. In the +context of the app, you would set the `OTEL_METRICS_EXPORTER` to `otlp` (which +is the default value) and configure the [OTLP exporter][otlp-exporter] with the +address of your collector, for example (in Bash or `zsh` shell): + +``` +export OTEL_EXPORTER_OTLP_ENDPOINT=http://collector.example.com:4318 +``` + +The collector serving at `collector.example.com:4318` would then be configured +like so: + + +{{< tabpane lang=yaml persistLang=false >}} +{{< tab Traces >}} +receivers: + otlp: # the OTLP receiver the app is sending traces to + protocols: + grpc: + +processors: + batch: + +exporters: + jaeger: # the Jaeger exporter, to ingest traces to backend + endpoint: "https://jaeger.example.com:14250" + insecure: true + +service: + pipelines: + traces/dev: + receivers: [otlp] + processors: [batch] + exporters: [jaeger] +{{< /tab >}} +{{< tab Metrics >}} +receivers: + otlp: # the OTLP receiver the app is sending metrics to + protocols: + grpc: + +processors: + batch: + +exporters: + prometheusremotewrite: # the PRW exporter, to ingest metrics to backend + endpoint: "https://prw.example.com/v1/api/remote_write" + +service: + pipelines: + metrics/prod: + receivers: [otlp] + processors: [batch] + exporters: [prometheusremotewrite] + +{{< /tab >}} +{{< tab Logs >}} +receivers: + otlp: # the OTLP receiver the app is sending logs to + protocols: + grpc: + +processors: + batch: + +exporters: + file: # the File Exporter, to ingest logs to local file + path: "./app42_example.log" + rotation: + +service: + pipelines: + logs/dev: + receivers: [otlp] + processors: [batch] + exporters: [file] +{{< /tab >}} +{{< /tabpane>}} + + +If you want to try it out for yourself, you can have a look at the end-to-end +[Java][java-otlp-example] or [Python][py-otlp-example] examples. + +## Tradeoffs + +Pros: + +- Simple to get started +- Clear 1:1 mapping between application and collector + +Cons: + +- Scalability (human and load-wise) +- Inflexible + +[instrumentation]: /docs/instrumentation/ +[otlp]: /docs/reference/specification/protocol/ +[collector]: /docs/collector/ +[instrument-java-metrics]: /docs/instrumentation/java/manual/#metrics +[otlp-exporter]: /docs/reference/specification/protocol/exporter/ +[java-otlp-example]: + https://github.com/open-telemetry/opentelemetry-java-docs/tree/main/otlp +[py-otlp-example]: + https://opentelemetry-python.readthedocs.io/en/stable/examples/metrics/instruments/README.html +[lb-exporter]: + https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/loadbalancingexporter +[spanmetrics-processor]: + https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/spanmetricsprocessor diff --git a/content/en/docs/collector/deployment/gateway.md b/content/en/docs/collector/deployment/gateway.md new file mode 100644 index 000000000..81fa16dba --- /dev/null +++ b/content/en/docs/collector/deployment/gateway.md @@ -0,0 +1,170 @@ +--- +title: Gateway +description: + Why and how to send signals to a single OTLP end-point and from there to + backends +weight: 3 +--- + +The gateway collector deployment pattern consists of applications (or other +collectors) sending telemetry signals to a single OTLP endpoint provided by one +or more collector instances running as a standalone service (for example, a +deployment in Kubernetes), typically per cluster, per data center or per region. + +In the general case you can use an out-of-the-box load balancer to distribute +the load amongst the collectors: + +![Gateway deployment concept](../../img/gateway-sdk.svg) + +For use cases where the processing of the telemetry data processing has to +happen in a specific collector, you would use a two-tiered setup with a +collector that has a pipeline configured with the [Trace ID/Service-name aware +load-balancing exporter][lb-exporter] in the first tier and the collectors +handling the scale out in the second tier. For example, you will need to use the +load-balancing exporter when using the [Tail Sampling +processor][tailsample-processor] so that all spans for a given trace reach the +same collector instance where the tail sampling policy is applied. + +Let's have a look at such a case where we are using the load-balancing exporter: + +![Gateway deployment with load-balancing exporter](../../img/gateway-lb-sdk.svg) + +1. In the app, the SDK is configured to send OTLP data to a central location. +1. A collector configured using the load-balancing exporter that distributes + signals to a group of collectors. +1. The collectors are configured to send telemetry data to one or more backends. + +{{% alert title="Note" color="info" %}} Currently, the load-balancing exporter +only supports pipelines of the `traces` type. {{% /alert %}} + +## Example + +For a concrete example of the centralized collector deployment pattern we first +need to have a closer look at the load-balancing exporter. It has two main +configuration fields: + +- The `resolver`, which determines where to find the downstream collectors (or: + backends). If you use the `static` sub-key here, you will have to manually + enumerate the collector URLs. The other supported resolver is the DNS resolver + which will periodically check for updates and resolve IP addresses. For this + resolver type, the `hostname` sub-key specifies the hostname to query in order + to obtain the list of IP addresses. +- With the `routing_key` field you tell the load-balancing exporter to route + spans to specific downstream collectors. If you set this field to `traceID` + (default) then the Load-balancing exporter exports spans based on their + `traceID`. Otherwise, if you use `service` as the value for `routing_key`, it + exports spans based on their service name which is useful when using + connectors like the [Span Metrics connector][spanmetrics-connector], so all + spans of a service will be send to the same downstream collector for metric + collection, guaranteeting accurate aggregations. + +The first-tier collector servicing the OTLP endpoint would be configured as +shown below: + + +{{< tabpane lang=yaml persistLang=false >}} +{{< tab Static >}} +receivers: + otlp: + protocols: + grpc: + +exporters: + loadbalancing: + protocol: + otlp: + insecure: true + resolver: + static: + hostnames: + - collector-1.example.com:4317 + - collector-2.example.com:5317 + - collector-3.example.com + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [loadbalancing] +{{< /tab >}} +{{< tab DNS >}} +receivers: + otlp: + protocols: + grpc: + +exporters: + loadbalancing: + protocol: + otlp: + insecure: true + resolver: + dns: + hostname: collectors.example.com + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [loadbalancing] +{{< /tab >}} +{{< tab "DNS with service" >}} +receivers: + otlp: + protocols: + grpc: + +exporters: + loadbalancing: + routing_key: "service" + protocol: + otlp: + insecure: true + resolver: + dns: + hostname: collectors.example.com + port: 5317 + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [loadbalancing] +{{< /tab >}} +{{< /tabpane>}} + + +The load-balancing exporter emits metrics including +`otelcol_loadbalancer_num_backends` and `otelcol_loadbalancer_backend_latency` +that you can use for health and performance monitoring of the OTLP endpoint +collector. + +## Tradeoffs + +Pros: + +- Separation of concerns such as centrally managed credentials +- Centralized policy management (for example, filtering certain logs or + sampling) + +Cons: + +- It's one more thing to maintain and that can fail (complexity) +- Added latency in case of cascaded collectors +- Higher overall resource usage (costs) + +[instrumentation]: /docs/instrumentation/ +[otlp]: /docs/reference/specification/protocol/ +[collector]: /docs/collector/ +[instrument-java-metrics]: /docs/instrumentation/java/manual/#metrics +[otlp-exporter]: /docs/reference/specification/protocol/exporter/ +[java-otlp-example]: + https://github.com/open-telemetry/opentelemetry-java-docs/tree/main/otlp +[py-otlp-example]: + https://opentelemetry-python.readthedocs.io/en/stable/examples/metrics/instruments/README.html +[lb-exporter]: + https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/loadbalancingexporter +[tailsample-processor]: + https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor +[spanmetrics-connector]: + https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector diff --git a/content/en/docs/collector/deployment/no-collector.md b/content/en/docs/collector/deployment/no-collector.md new file mode 100644 index 000000000..f43d6e3b9 --- /dev/null +++ b/content/en/docs/collector/deployment/no-collector.md @@ -0,0 +1,32 @@ +--- +title: No Collector +description: Why and how to send signals directly from app to backends +weight: 1 +--- + +The simplest pattern is not to use a collector at all. This pattern consists of +applications [instrumented][instrumentation] with an OpenTelemetry SDK that +export telemetry signals (traces, metrics, logs) directly into a backend: + +![No collector deployment concept](../../img/sdk.svg) + +## Example + +See the [code instrumentation for programming languages][instrumentation] for +concrete end-to-end examples for how to export signals from your app directly +into a backend. + +## Tradeoffs + +Pros: + +- Simple to use (especially in a dev/test environment) +- No additional moving parts to operate (in production environments) + +Cons: + +- Requires code changes if collection, processing, or ingestion changes +- Strong coupling between the application code and the backend +- There are limited number of exporters per language implementation + +[instrumentation]: /docs/instrumentation/ diff --git a/content/en/docs/collector/img/agent-sdk.svg b/content/en/docs/collector/img/agent-sdk.svg new file mode 100644 index 000000000..a7436e543 --- /dev/null +++ b/content/en/docs/collector/img/agent-sdk.svg @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/en/docs/collector/img/gateway-lb-sdk.svg b/content/en/docs/collector/img/gateway-lb-sdk.svg new file mode 100644 index 000000000..0a3a419b6 --- /dev/null +++ b/content/en/docs/collector/img/gateway-lb-sdk.svg @@ -0,0 +1,313 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/en/docs/collector/img/gateway-sdk.svg b/content/en/docs/collector/img/gateway-sdk.svg new file mode 100644 index 000000000..2ad4fdb96 --- /dev/null +++ b/content/en/docs/collector/img/gateway-sdk.svg @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/en/docs/collector/img/sdk.svg b/content/en/docs/collector/img/sdk.svg new file mode 100644 index 000000000..94620657c --- /dev/null +++ b/content/en/docs/collector/img/sdk.svg @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/static/refcache.json b/static/refcache.json index 26f46254d..aedd58296 100644 --- a/static/refcache.json +++ b/static/refcache.json @@ -1675,6 +1675,10 @@ "StatusCode": 200, "LastSeen": "2023-02-20T08:10:49.246765-05:00" }, + "https://github.com/jpkrohling/opentelemetry-collector-deployment-patterns/": { + "StatusCode": 200, + "LastSeen": "2023-03-14T06:35:50.116854Z" + }, "https://github.com/jufab/opentelemetry-angular-interceptor": { "StatusCode": 200, "LastSeen": "2023-02-20T07:43:48.729669-05:00" @@ -3347,6 +3351,10 @@ "StatusCode": 200, "LastSeen": "2023-02-16T17:43:51.469854-05:00" }, + "https://opentelemetry-python.readthedocs.io/en/stable/examples/metrics/instruments/README.html": { + "StatusCode": 200, + "LastSeen": "2023-03-14T06:35:50.907539Z" + }, "https://opentelemetry-python.readthedocs.io/en/stable/shim/opentracing_shim/opentracing_shim.html": { "StatusCode": 200, "LastSeen": "2023-02-16T17:45:15.666125-05:00"