From 7db44d743a9f6c577c3f0c0eea23bd23ea87718b Mon Sep 17 00:00:00 2001
From: Sebastian Poxhofer <secustor@users.noreply.github.com>
Date: Mon, 23 Dec 2024 15:46:34 +0100
Subject: [PATCH] docs(examples/opentelemetry): use modern components and
 simplify (#33226)

Co-authored-by: HonkingGoose <34918129+HonkingGoose@users.noreply.github.com>
---
 docs/usage/examples/opentelemetry.md | 195 +++++++++++++++------------
 1 file changed, 111 insertions(+), 84 deletions(-)

diff --git a/docs/usage/examples/opentelemetry.md b/docs/usage/examples/opentelemetry.md
index 2e6181fb36..c007e6bea9 100644
--- a/docs/usage/examples/opentelemetry.md
+++ b/docs/usage/examples/opentelemetry.md
@@ -9,29 +9,42 @@ Requirements:
 Create a `docker-compose.yaml` and `otel-collector-config.yml` file as seen below in a folder.
 
 ```yaml title="docker-compose.yaml"
-version: '3'
+name: renovate-otel-demo
+
 services:
-  # Jaeger
+  # Jaeger for storing traces
   jaeger:
-    image: jaegertracing/all-in-one:1.64.0
+    image: jaegertracing/jaeger:2.1.0
+    ports:
+      - '16686:16686' # Web UI
+      - '4317' # OTLP gRPC
+      - '4318' # OTLP HTTP
+
+  # Prometheus for storing metrics
+  prometheus:
+    image: prom/prometheus:v3.0.1
     ports:
-      - '16686:16686'
-      - '4317'
+      - '9090:9090' # Web UI
+      - '4318' # OTLP HTTP
+    command:
+      - --web.enable-otlp-receiver
+      # Mirror these flags from the Dockerfile, because `command` overwrites the default flags.
+      # https://github.com/prometheus/prometheus/blob/5b5fee08af4c73230b2dae35964816f7b3c29351/Dockerfile#L23-L24
+      - --config.file=/etc/prometheus/prometheus.yml
+      - --storage.tsdb.path=/prometheus
 
   otel-collector:
+    # Using the Contrib version to access the spanmetrics connector.
+    # If you don't need the spanmetrics connector, you can use the standard version
     image: otel/opentelemetry-collector-contrib:0.116.1
-    command: ['--config=/etc/otel-collector-config.yml']
     volumes:
-      - ./otel-collector-config.yml:/etc/otel-collector-config.yml
+      - ./otel-collector-config.yml:/etc/otelcol-contrib/config.yaml
     ports:
-      - '1888:1888' # pprof extension
-      - '13133:13133' # health_check extension
-      - '55679:55679' # zpages extension
-      - '4318:4318' # OTLP HTTP
-      - '4317:4317' # OTLP GRPC
-      - '9123:9123' # Prometheus exporter
+      - '4318:4318' # OTLP HTTP ( exposed to the host )
+      - '4317:4317' # OTLP gRPC ( exposed to the host )
     depends_on:
       - jaeger
+      - prometheus
 ```
 
 ```yaml title="otel-collector-config.yml"
@@ -39,28 +52,36 @@ receivers:
   otlp:
     protocols:
       grpc:
+        endpoint: 0.0.0.0:4317
       http:
+        endpoint: 0.0.0.0:4318
 
 exporters:
   otlp/jaeger:
     endpoint: jaeger:4317
     tls:
       insecure: true
-  logging:
-  prometheus:
-    endpoint: '0.0.0.0:9123'
+  otlphttp/prometheus:
+    endpoint: http://prometheus:9090/api/v1/otlp
+  debug:
+  # verbosity: normal
 
-processors:
-  batch:
+connectors:
   spanmetrics:
-    metrics_exporter: prometheus
-    latency_histogram_buckets: [10ms, 100ms, 250ms, 1s, 30s, 1m, 5m]
+    histogram:
+      exponential:
     dimensions:
       - name: http.method
+        default: GET
       - name: http.status_code
       - name: http.host
     dimensions_cache_size: 1000
     aggregation_temporality: 'AGGREGATION_TEMPORALITY_CUMULATIVE'
+    exemplars:
+      enabled: true
+
+processors:
+  batch:
 
 extensions:
   health_check:
@@ -72,12 +93,23 @@ service:
   pipelines:
     traces:
       receivers: [otlp]
-      exporters: [otlp/jaeger, logging]
-      processors: [spanmetrics, batch]
+      exporters:
+        - otlp/jaeger
+        # Send traces to connector for metrics calculation
+        - spanmetrics
+        # Enable debug exporter to see traces in the logs
+        #- debug
+      processors: [batch]
 
     metrics:
-      receivers: [otlp]
-      exporters: [prometheus]
+      receivers:
+        - otlp # Receive metrics from Renovate.
+        - spanmetrics # Receive metrics calculated by the spanmetrics connector.
+      processors: [batch]
+      exporters:
+        - otlphttp/prometheus
+        # Enable debug exporter to see metrics in the logs
+        # - debug
 ```
 
 Start setup using this command inside the folder containing the files created in the earlier steps:
@@ -86,7 +118,11 @@ Start setup using this command inside the folder containing the files created in
 docker-compose up
 ```
 
-This command will start an [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib) and an instance of [Jaeger](https://www.jaegertracing.io/).
+This command will start:
+
+- an [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib)
+- an instance of [Jaeger for traces](https://www.jaegertracing.io/)
+- and [Prometheus](https://prometheus.io/)
 
 Jaeger will be now reachable under [http://localhost:16686](http://localhost:16686).
 
@@ -97,7 +133,8 @@ To start Renovate with OpenTelemetry enabled run following command, after pointi
 ```
 docker run \
   --rm \
-  -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 \
+  --network renovate-otel-demo_default \
+  -e OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 \
   -v "/path/to/your/config.js:/usr/src/app/config.js" \
   renovate/renovate:latest
 ```
@@ -130,100 +167,90 @@ You should be able to see now the full trace view which shows each HTTP request
 ### Metrics
 
 Additional to the received traces some metrics are calculated.
-This is achieved using the [spanmetricsprocessor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/spanmetricsprocessor).
-The previous implemented setup will produce following metrics, which are exposed under [http://localhost:9123/metrics](http://localhost:9123/metrics):
+This is achieved using the [spanmetrics connector](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector).
+The previously implemented setup will produce following metrics, which pushed to [Prometheus](http://localhost:9090):
 
 ```
-# HELP calls_total
-# TYPE calls_total counter
-
 ### Example of internal spans
-calls_total{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 3
-calls_total{operation="run",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 1
-### Example of http calls from Renovate to external services
-calls_total{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 9
+traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 2
+traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="run", status_code="STATUS_CODE_UNSET"} 2
 
-...
-
-# HELP latency
-# TYPE latency histogram
-### Example of internal spans
-latency_bucket{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET",le="0.1"} 0
-...
-latency_bucket{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET",le="9.223372036854775e+12"} 3
-latency_bucket{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET",le="+Inf"} 3
-latency_sum{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 30947.4689
-latency_count{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 3
+### Example of http calls from Renovate to external services
+traces_span_metrics_calls_total{http_host="api.github.com:443", http_method="POST", http_status_code="200", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_CLIENT", span_name="POST", status_code="STATUS_CODE_UNSET"} 4
 
-...
 
-### Example of http calls from Renovate to external services
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="0.1"} 0
+### Example histogram metrics
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="8", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 0
 ...
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="250"} 3
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="9.223372036854775e+12"} 9
-latency_bucket{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET",le="+Inf"} 9
-latency_sum{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 2306.1385999999998
-latency_count{http_host="api.github.com:443",http_method="POST",http_status_code="200",operation="HTTPS POST",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 9
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="2000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="5000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="15000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="10000", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_method="GET", job="renovatebot.com/renovate", le="+Inf", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
+
+traces_span_metrics_duration_milliseconds_sum{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 4190.694209
+traces_span_metrics_duration_milliseconds_count{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repository", status_code="STATUS_CODE_UNSET"} 1
 ```
 
-The [spanmetricsprocessor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/spanmetricsprocessor) creates two sets of metrics.
+The [spanmetrics connector](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector) creates two sets of metrics.
 
 #### Calls metric
 
-At first there are the `calls_total` metrics which display how often specific trace spans have been observed.
+At first there are the `traces_span_metrics_calls_total` metrics.
+These metrics show how often _specific_ trace spans have been observed.
 
 For example:
-`calls_total{operation="renovate repository",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 3` signals that 3 repositories have been renovated.
-`calls_total{operation="run",service_name="renovate",span_kind="SPAN_KIND_INTERNAL",status_code="STATUS_CODE_UNSET"} 1` represents how often Renovate has been run.
+
+- `traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="repositories", status_code="STATUS_CODE_UNSET"} 2` signals that 2 repositories have been renovated.
+- `traces_span_metrics_calls_total{http_method="GET", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_INTERNAL", span_name="run", status_code="STATUS_CODE_UNSET"} 1` represents how often Renovate has been run.
 
 If we combine this using the PrometheusQueryLanguage ( PromQL ), we can calculate the average count of repositories each Renovate run handles.
 
 ```
-calls_total{operation="renovate repository",service_name="renovate"} / calls_total{operation="run",service_name="renovate"}
+traces_span_metrics_calls_total{span_name="repository",service_name="renovate"} / traces_span_metrics_calls_total{span_name="run",service_name="renovate"}
 ```
 
-This metrics is also for spans generated by http calls:
+These metrics are generated for HTTP call spans too:
 
 ```yaml
-calls_total{http_host="registry.terraform.io:443",http_method="GET",http_status_code="200",operation="HTTPS GET",service_name="renovate",span_kind="SPAN_KIND_CLIENT",status_code="STATUS_CODE_UNSET"} 5
+traces_span_metrics_calls_total{http_host="prometheus-community.github.io:443", http_method="GET", http_status_code="200", job="renovatebot.com/renovate", service_name="renovate", span_kind="SPAN_KIND_CLIENT", span_name="GET", status_code="STATUS_CODE_UNSET"} 5
 ```
 
 #### Latency buckets
 
-The second class of metrics exposed are the latency focused latency buckets which allow to create [heatmaps](https://grafana.com/docs/grafana/latest/basics/intro-histograms/#heatmaps).
+The second class of metrics exposed are the latency-focused buckets, that allow creating [heatmaps](https://grafana.com/docs/grafana/latest/basics/intro-histograms/#heatmaps).
 A request is added to a backed if the latency is bigger than the bucket value (`le`). `request_duration => le`
 
 As an example if we receive a request which need `1.533s` to complete get following metrics:
 
 ```
-latency_bucket{http_host="api.github.com:443",le="0.1"} 0
-latency_bucket{http_host="api.github.com:443",le="1"} 0
-latency_bucket{http_host="api.github.com:443",le="2"} 1
-latency_bucket{http_host="api.github.com:443",le="6"} 1
-latency_bucket{http_host="api.github.com:443",le="10"} 1
-latency_bucket{http_host="api.github.com:443",le="100"} 1
-latency_bucket{http_host="api.github.com:443",le="250"} 1
-latency_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 1
-latency_bucket{http_host="api.github.com:443",le="+Inf"} 1
-latency_sum{http_host="api.github.com:443"} 1.533
-latency_count{http_host="api.github.com:443"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="0.1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="2"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="6"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="10"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="100"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="250"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="+Inf"} 1
+traces_span_metrics_duration_milliseconds_sum{http_host="api.github.com:443"} 1.533
+traces_span_metrics_duration_milliseconds_count{http_host="api.github.com:443"} 1
 ```
 
 Now we have another request which this time takes 10s to complete:
 
 ```
-latency_bucket{http_host="api.github.com:443",le="0.1"} 0
-latency_bucket{http_host="api.github.com:443",le="1"} 0
-latency_bucket{http_host="api.github.com:443",le="2"} 1
-latency_bucket{http_host="api.github.com:443",le="6"} 1
-latency_bucket{http_host="api.github.com:443",le="10"} 2
-latency_bucket{http_host="api.github.com:443",le="100"} 2
-latency_bucket{http_host="api.github.com:443",le="250"} 2
-latency_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 2
-latency_bucket{http_host="api.github.com:443",le="+Inf"} 2
-latency_sum{http_host="api.github.com:443"} 11.533
-latency_count{http_host="api.github.com:443"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="0.1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="1"} 0
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="2"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="6"} 1
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="10"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="100"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="250"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="9.223372036854775e+12"} 2
+traces_span_metrics_duration_milliseconds_bucket{http_host="api.github.com:443",le="+Inf"} 2
+traces_span_metrics_duration_milliseconds_sum{http_host="api.github.com:443"} 11.533
+traces_span_metrics_duration_milliseconds_count{http_host="api.github.com:443"} 2
 ```
 
 More about the functionality can be found on the Prometheus page for [metric types](https://prometheus.io/docs/concepts/metric_types/#histogram).
-- 
GitLab