Files
Mayne0213 7e61af372b PERF(observability): remove CPU limits for stability
- Remove CPU limits from all observability components
- Prevents CPU throttling issues across monitoring stack
2026-01-12 02:10:54 +09:00

2629 lines
90 KiB
JSON

{
"__inputs": [
{
"name": "DS_GRAFANACLOUD-CLECLERC-PROM",
"label": "grafanacloud-cleclerc-prom",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
},
{
"name": "DS_GRAFANACLOUD-CLECLERC-LOGS",
"label": "grafanacloud-cleclerc-logs",
"description": "",
"type": "datasource",
"pluginId": "loki",
"pluginName": "Loki"
},
{
"name": "DS_GRAFANACLOUD-CLECLERC-TRACES",
"label": "grafanacloud-cleclerc-traces",
"description": "",
"type": "datasource",
"pluginId": "tempo",
"pluginName": "Tempo"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "12.1.0-91094"
},
{
"type": "panel",
"id": "logs",
"name": "Logs",
"version": ""
},
{
"type": "datasource",
"id": "loki",
"name": "Loki",
"version": "12.1.0-91094"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
},
{
"type": "datasource",
"id": "tempo",
"name": "Tempo",
"version": "12.1.0-91094"
},
{
"type": "panel",
"id": "text",
"name": "Text",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Lightweight APM dashboard for monitoring OpenTelemetry-based services. \n\nInstrument your applications using OpenTelemetry SDKs and send traces, metrics, and logs to Tempo for traces, a Prometheus-compatible database like Mimir for metrics, and Loki for logs. This dashboard provides a centralized view of your application's health and performance. \n\nFor a fully managed observability stack, consider using Grafana Cloud. \n\nLearn more about this dashboard on https://github.com/cyrille-leclerc/opentelemetry-service-dashboard.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"gridPos": {
"h": 7,
"w": 20,
"x": 0,
"y": 0
},
"id": 42,
"options": {
"code": {
"language": "plaintext",
"showLineNumbers": false,
"showMiniMap": false
},
"content": "<h1 style=\"background-color:yellow ; color: black\">Dashboard setup instructions (delete this panel after setup)</h1>\n\nThis dashboard requires:\n\n## Metrics\n\nSend OpenTelemetry metrics to the OTLP endpoint of a Prometheus database.\n\n### Prometheus\n\nSend OpenTelemetry metrics to the Prometheus OTLP Endpoint and configure the parameters `keep_identifying_resource_attributes` and `promote_resource_attributes` on the OTLP endpoint. \n\nExample Prometheus OTLP Endpoint configuration snippet:\n\n```yml\notlp:\n keep_identifying_resource_attributes: true\n promote_resource_attributes:\n # REQUIRED FOR THIS DASHBOARD\n - service.instance.id\n - service.name\n - service.namespace\n - deployment.environment.name\n # RECOMMENDED FOR OTEL METRICS IN GENERAL\n - service.version\n - cloud.availability_zone\n - cloud.region\n - container.name\n - deployment.environment\n - k8s.cluster.name\n - k8s.container.name\n - k8s.cronjob.name\n - k8s.daemonset.name\n - k8s.deployment.name\n - k8s.job.name\n - k8s.namespace.name\n - k8s.pod.name\n - k8s.replicaset.name\n - k8s.statefulset.name\n```\n\nLearn more in Prometheus [configuration reference](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) and [OpenTelemetry guide](https://prometheus.io/docs/guides/opentelemetry/).\n\n### Mimir OTLP Endpoint configuration\n\nSend OpenTelemetry metrics to the Mimir OTLP Endpoint and configure the parameters `otel_keep_identifying_resource_attributes` and `promote_otel_resource_attributes` on the OTLP endpoint. \n\nExample Mimir OTLP Endpoint configuration snippet:\n\n```yml\n# (experimental) Whether to keep identifying OTel resource attributes in the\n# target_info metric on top of converting to job and instance labels.\n# CLI flag: -distributor.otel-keep-identifying-resource-attributes\notel_keep_identifying_resource_attributes: true\n# (experimental) Optionally specify OTel resource attributes to promote to\n# labels.\n# CLI flag: -distributor.otel-promote-resource-attributes\npromote_otel_resource_attributes: \"service.instance.id, service.name, service.namespace, service.version, cloud.availability_zone, cloud.region, container.name, deployment.environment, deployment.environment.name, k8s.cluster.name, k8s.container.name, k8s.cronjob.name, k8s.daemonset.name, k8s.deployment.name, k8s.job.name, k8s.namespace.name, k8s.pod.name, k8s.replicaset.name, k8s.statefulset.name\"\n```\n\nLearn more in Mimir [configuration parameters](https://github.com/grafana/mimir/blob/main/docs/sources/mimir/configure/configuration-parameters/index.md).\n\n### Grafana Cloud Metrics\n\nSend OpenTelemetry metrics to the Grafana Cloud OTLP Endpoint as documented in [Grafana Cloud / Send OTLP data](https://grafana.com/docs/grafana-cloud/send-data/otlp/send-data-otlp/) and open a support ticket to activate `otel_keep_identifying_resource_attributes`.\n\nNote that the Grafana Cloud OTLP Endpoint is configured by default to promote the following resource attributes, this list can be modified through a support ticket:\n\n```\n# REQUIRED FOR THIS DASHBOARD\n- service.instance.id\n- service.name\n- service.namespace\n- deployment.environment.name\n# RECOMMENDED FOR OTEL METRICS IN GENERAL\n- service.version\n- cloud.availability_zone\n- cloud.region\n- container.name\n- deployment.environment\n- k8s.cluster.name\n- k8s.container.name\n- k8s.cronjob.name\n- k8s.daemonset.name\n- k8s.deployment.name\n- k8s.job.name\n- k8s.namespace.name\n- k8s.pod.name\n- k8s.replicaset.name\n- k8s.statefulset.name\n```\n\n## Logs\n\n### Grafana Cloud Logs\n\nSend OpenTelemetry logs to the Grafana Cloud OTLP Endpoint as documented in [Grafana Cloud / Send OTLP data](https://grafana.com/docs/grafana-cloud/send-data/otlp/send-data-otlp/) and open a support ticket to activate `otel_keep_identifying_resource_attributes`.\n\n### Loki\n\nSend OpenTelemetry logs to the Loki OTLP Endpoint as documented in [Loki / Send data / OpenTelemetry](https://grafana.com/docs/loki/latest/send-data/otel/).\n\n## Traces\n\n### Grafana Cloud Traces\n\nSend OpenTelemetry traces to the Grafana Cloud OTLP Endpoint as documented in [Grafana Cloud / Send OTLP data](https://grafana.com/docs/grafana-cloud/send-data/otlp/send-data-otlp/).\n\n### Tempo\n\nSend OpenTelemetry traces to the Tempo OTLP Endpoint which supports both OTLP protocols: HTTP/Protobuf and gRPC.\n\n## Grafana\n\nTo prevent PromQL `rate` function issues with OpenTelemetry metrics in Grafana, either set your Prometheus Datasource's **\"scrape interval\"** to `60s` or, if that's not possible, configure each affected dashboard panel's **\"Min Step\"** option to `60s`.",
"mode": "markdown"
},
"pluginVersion": "12.1.0-91094",
"type": "text"
},
{
"description": "service.namespace=${service_namespace}, service.name=${service_name}, deployment.environment.name=${deployment_environment_name}",
"gridPos": {
"h": 2,
"w": 10,
"x": 0,
"y": 7
},
"id": 20,
"options": {
"code": {
"language": "plaintext",
"showLineNumbers": false,
"showMiniMap": false
},
"content": "<h1><img src=\"https://opentelemetry.io/img/logos/opentelemetry-logo-nav.png\" alt=\"OpenTelemetry Icon\" width=\"25\" height=\"\"> Service ${service_namespace}/${service_name} (env: ${deployment_environment_name})</h1>\n",
"mode": "html"
},
"pluginVersion": "12.1.0-91094",
"type": "text"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "Shows the timestamp of the latest metrics received in the past 24h.",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#24292e",
"mode": "fixed"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 3,
"x": 10,
"y": 7
},
"hideTimeOverride": true,
"id": 39,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^Time$/",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "timestamp(sum by (deployment_environment_name, service_namespace, service_name) (target_info{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}) or absent{})\n",
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"timeFrom": "now-24h",
"title": "Latest metrics received",
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Shows the timestamp of the latest logs received in the past 24h.",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#24292e",
"mode": "fixed"
},
"mappings": [],
"noValue": "No data",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "dateTimeFromNow"
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 3,
"x": 13,
"y": 7
},
"hideTimeOverride": true,
"id": 40,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^Time$/",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${DS_GRAFANACLOUD-CLECLERC-LOGS}"
},
"direction": "backward",
"editorMode": "code",
"expr": "sum(count_over_time({service_name=\"$service_name\", deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\"} [5m]))",
"queryType": "range",
"refId": "A"
}
],
"timeFrom": "now-24h",
"title": "Latest logs received",
"type": "stat"
},
{
"datasource": {
"type": "tempo",
"uid": "${tempo_datasource}"
},
"description": "Shows the timestamp of the latest span received in the past 24h.",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#24292e",
"mode": "fixed"
},
"mappings": [],
"noValue": "No data",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "dateTimeFromNow"
},
"overrides": []
},
"gridPos": {
"h": 2,
"w": 4,
"x": 16,
"y": 7
},
"hideTimeOverride": true,
"id": 41,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^time$/",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "tempo",
"uid": "${DS_GRAFANACLOUD-CLECLERC-TRACES}"
},
"filters": [
{
"id": "0344fb49",
"operator": "=",
"scope": "resource",
"tag": "service.namespace",
"value": [
"$service_namespace"
],
"valueType": "string"
},
{
"id": "service-name",
"operator": "=",
"scope": "resource",
"tag": "service.name",
"value": [
"$service_name"
],
"valueType": "string"
}
],
"limit": 20,
"metricsQueryType": "range",
"query": "{resource.service.namespace=\"$service_namespace\" && resource.service.name=\"$service_name\"} | count_over_time()",
"queryType": "traceql",
"refId": "A",
"tableType": "traces"
}
],
"timeFrom": "now-24h",
"title": "Latest traces received",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 9
},
"id": 15,
"panels": [],
"title": "Server HTTP RED Metrics",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "HTTP endpoints aggregation on the `http.server.request.duration` metric.\n\nSee https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpserverrequestduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"noValue": "No HTTP Operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 0,
"y": 10
},
"id": 17,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le, deployment_environment_name, service_namespace, service_name) (rate(http_server_request_duration_seconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "P99",
"range": true,
"refId": "P99",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le, deployment_environment_name, service_namespace, service_name) (rate(http_server_request_duration_seconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "P90",
"range": true,
"refId": "P90",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg by(deployment_environment_name, service_namespace, service_name) (rate(http_server_request_duration_seconds_sum{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])) / avg by(deployment_environment_name, service_namespace, service_name) (rate(http_server_request_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "AVG",
"range": true,
"refId": "AVG",
"useBackend": false
}
],
"title": "Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "HTTP endpoints aggregation on the `http.server.request.duration` metric. \n\nErrors are identified by `http.response.status_code=~\"5..\"`.\n\nSee https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpserverrequestduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 100,
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"noValue": "No HTTP Operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 6,
"y": 10
},
"id": 18,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "(\n sum by(deployment_environment_name, service_namespace, service_name) (\n rate(\n http_server_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\",\n http_response_status_code=~\"5..\"\n }[$__rate_interval]\n )\n ) * 100\n)\n/\n(\n sum by(deployment_environment_name, service_namespace, service_name) (\n rate(\n http_server_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n )\n )\n)\nor\n(\n 0\n *\n sum by(deployment_environment_name, service_namespace, service_name) (\n rate(\n http_server_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n )\n )\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"interval": "60s",
"legendFormat": "5xx",
"range": true,
"refId": "5xx",
"useBackend": false
}
],
"title": "Error",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "HTTP endpoints aggregation on the `http.server.request.duration` metric.\n\nSee https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpserverrequestduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"noValue": "No HTTP Operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 5,
"x": 12,
"y": 10
},
"id": 19,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "(sum(rate(http_server_request_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])) by (deployment_environment_name, service_namespace, service_name)) ",
"hide": false,
"interval": "60s",
"legendFormat": "Requests",
"range": true,
"refId": "RequestRate"
}
],
"title": "Request Rate",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 31,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "gRPC Endpoints aggregation, `rpc.server.duration` metric.\n\nSee https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/#metric-rpcserverduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"noValue": "No RPC operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 0,
"y": 17
},
"id": 33,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum by(le, deployment_environment_name, service_namespace, service_name) (rate(rpc_server_duration_milliseconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "P99",
"range": true,
"refId": "P99",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le, deployment_environment_name, service_namespace, service_name) (rate(rpc_server_duration_milliseconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "P90",
"range": true,
"refId": "P90",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg by(deployment_environment_name, service_namespace, service_name) (rate(rpc_server_duration_milliseconds_sum{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])) / avg by(deployment_environment_name, service_namespace, service_name) (rate(rpc_server_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "AVG",
"range": true,
"refId": "AVG",
"useBackend": false
}
],
"title": "Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "RPC endpoints aggregation based on the `rpc.server.duration` metric.\n\nErrors are identified by `rpc.grpc.status_code != 0` which make the panel specific to the gRPC protocol.\n\nSee https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/#metric-rpcserverdurationInbound ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"noValue": "No RPC operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 6,
"x": 6,
"y": 17
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "(\n sum without (rpc_grpc_status_code, instance) (\n rate(\n rpc_server_duration_milliseconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\",\n rpc_grpc_status_code!=\"0\"\n }[$__rate_interval]\n )\n ) * 100\n)\n/\n(\n sum without (rpc_grpc_status_code, instance) (\n rate(\n rpc_server_duration_milliseconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n )\n )\n)\nor \n(\n 0\n *\n sum without (rpc_grpc_status_code, instance) (\n rate(\n rpc_server_duration_milliseconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n )\n )\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"interval": "60s",
"legendFormat": "Error",
"range": true,
"refId": "ERR",
"useBackend": false
}
],
"title": "gRPC Error",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "gRPC endpoints aggregation on the `rpc.server.duration` metric.\n\nSee https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/#metric-rpcserverduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 10,
"type": "log"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"noValue": "No RPC operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 5,
"x": 12,
"y": 17
},
"id": 35,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "(sum(rate(rpc_server_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval])) by (deployment_environment_name, service_namespace, service_name)) * $__interval_ms / 1000",
"hide": false,
"interval": "60s",
"legendFormat": "Requests",
"range": true,
"refId": "RequestRate"
}
],
"title": "Request Rate",
"type": "timeseries"
}
],
"title": "Server RPC RED Metrics",
"type": "row"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 22,
"panels": [],
"title": "Inbound Operations",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "Inbound HTTP operations of the service (aka HTTP endpoints) based on the `http.server.request.duration` metric.\n\nErrors are identified by `http.response.status_code=~\"5..\"`.\n\nSee https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpserverrequestduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No HTTP operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration (p99)"
},
"properties": [
{
"id": "unit",
"value": "s"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Rate"
},
"properties": [
{
"id": "unit",
"value": "reqps"
},
{
"id": "custom.width",
"value": 219
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 18
},
"id": 21,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Operation"
}
]
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "\n sum by (operation) (\n label_join(\n rate(http_server_request_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"operation\",\n \" \",\n \"http_request_method\",\n \"http_route\"\n )\n )\n ",
"fullMetaSearch": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "{{operation}}",
"range": true,
"refId": "RPS",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "(\n sum by (operation) (\n label_join(\n rate(http_server_request_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\", http_response_status_code=~\"5..\"}[$__rate_interval]),\n \"operation\",\n \" \",\n \"http_request_method\",\n \"http_route\"\n )\n )\n / \n sum by (operation) (\n label_join(\n rate(http_server_request_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"operation\",\n \" \",\n \"http_request_method\",\n \"http_route\"\n )\n )\n ) or (0 * \n sum by (operation) (\n label_join(\n rate(http_server_request_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"operation\",\n \" \",\n \"http_request_method\",\n \"http_route\"\n )\n )\n )",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{operation}}",
"range": true,
"refId": "ERR_PCT"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "\n histogram_quantile(\n 0.99,\n sum by (le, operation) (\n label_join(\n rate(http_server_request_duration_seconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[5m]),\n \"operation\",\n \" \",\n \"http_request_method\",\n \"http_route\"\n )\n )\n )\n ",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{operation}}",
"range": true,
"refId": "Duration"
}
],
"title": "HTTP Operations",
"transformations": [
{
"id": "timeSeriesTable",
"options": {
"Duration": {
"timeField": "Time"
},
"ERR_PCT": {
"timeField": "Time"
},
"RPS": {
"timeField": "Time"
}
}
},
{
"id": "joinByField",
"options": {
"byField": "operation",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {
"Trend #Duration": 1,
"Trend #ERR_PCT": 2,
"Trend #RPS": 3,
"operation": 0
},
"renameByName": {
"Trend #Duration": "Duration (p99)",
"Trend #ERR_PCT": "Error",
"Trend #RPS": "Rate",
"operation": "Operation"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "Inbound RPC operations of the service (aka RPC endpoints) based on the `rpc.server.request.duration` metric.\n\nErrors are identified by `rpc.grpc.status_code != 0` which make the panel specific to the gRPC protocol.\n\nhttps://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/#metric-rpcserverduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No RPC operation",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration (p99)"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Rate"
},
"properties": [
{
"id": "unit",
"value": "reqps"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 18
},
"id": 27,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Operation"
}
]
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "\nsum by (operation) (\n label_join(\n rate(rpc_server_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"operation\",\n \"/\",\n \"rpc_service\",\n \"rpc_method\"\n )\n)\n ",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "RPS",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "(\n sum by (operation) (\n label_join(\n rate(rpc_server_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\", rpc_grpc_status_code!=\"0\"}[$__rate_interval]),\n \"operation\",\n \"/\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n / \n sum by (operation) (\n label_join(\n rate(rpc_server_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"operation\",\n \"/\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n ) or (0 * \n sum by (operation) (\n label_join(\n rate(rpc_server_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"operation\",\n \"/\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n )\n ",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "ERR_PCT"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "\n histogram_quantile(\n 0.99,\n sum by (le, operation) (\n label_join(\n rate(rpc_server_duration_milliseconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[5m]),\n \"operation\",\n \"/\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n )\n ",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{operation}}",
"range": true,
"refId": "Duration"
}
],
"title": "RPC Operations",
"transformations": [
{
"id": "timeSeriesTable",
"options": {
"Duration": {
"timeField": "Time"
},
"ERR_PCT": {
"timeField": "Time"
},
"RPS": {
"timeField": "Time"
}
}
},
{
"id": "joinByField",
"options": {
"byField": "operation",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {
"Trend #Duration": 1,
"Trend #ERR_PCT": 2,
"Trend #RPS": 3,
"operation": 0
},
"renameByName": {
"Trend #Duration": "Duration (p99)",
"Trend #ERR_PCT": "Error",
"Trend #RPS": "Rate",
"operation": "Operation"
}
}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 24
},
"id": 28,
"panels": [],
"title": "Outbound Services and Databases",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "HTTP calls made by the service based on the `http.client.request.duration` metric.\n\nCalls broken done by remote `server.address` and by `http.request.method`.\n\nSee https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpclientrequestduration",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No HTTP call",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration (P99)"
},
"properties": [
{
"id": "unit",
"value": "s"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Rate"
},
"properties": [
{
"id": "unit",
"value": "reqps"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 25
},
"id": 23,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum by (outbound_service) (\n label_join(\n rate(\n http_client_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n ),\n \"outbound_service\",\n \" \",\n \"server_address\",\n \"http_request_method\",\n \"url_template\"\n )\n)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "{{server_address}} {{http_request_method}} {{url_template}}",
"range": true,
"refId": "RPS",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "(\n sum by (outbound_service) (\n label_join(\n rate(\n http_client_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\",\n http_response_status_code=~\"5..\"\n }[$__rate_interval]\n ),\n \"outbound_service\",\n \" \",\n \"server_address\",\n \"http_request_method\",\n \"url_template\"\n )\n )\n /\n sum by (outbound_service) (\n label_join(\n rate(\n http_client_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n ),\n \"outbound_service\",\n \" \",\n \"server_address\",\n \"http_request_method\",\n \"url_template\"\n )\n )\n)\nor\n(\n 0\n *\n sum by (outbound_service) (\n label_join(\n rate(\n http_client_request_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n ),\n \"outbound_service\",\n \" \",\n \"server_address\",\n \"http_request_method\",\n \"url_template\"\n )\n )\n)",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{server_address}} {{http_request_method}} {{url_template}}",
"range": true,
"refId": "ERR_PCT"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "histogram_quantile(\n 0.99,\n sum by (le, outbound_service) (\n label_join(\n rate(\n http_client_request_duration_seconds_bucket{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[5m]\n ),\n \"outbound_service\",\n \" \",\n \"server_address\",\n \"http_request_method\",\n \"url_template\"\n )\n )\n)",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{server_address}} {{http_request_method}} {{url_template}}",
"range": true,
"refId": "DURATION"
}
],
"title": "Outbound HTTP Services",
"transformations": [
{
"id": "timeSeriesTable",
"options": {
"Duration": {
"timeField": "Time"
},
"ERR_PCT": {
"timeField": "Time"
},
"RPS": {
"timeField": "Time"
}
}
},
{
"id": "joinByField",
"options": {
"byField": "outbound_service",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {
"Trend #DURATION": 1,
"Trend #ERR_PCT": 2,
"Trend #RPS": 3,
"outbound_service": 0
},
"renameByName": {
"Trend #DURATION": "Duration (P99)",
"Trend #Duration": "Duration (p99)",
"Trend #ERR_PCT": "Error",
"Trend #RPS": "Rate",
"operation": "Operation",
"outbound_service": "Service"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "DB calls made by the service based on the `db.client.operation.duration` metric.\n\nCalls broken down by `server.address` and `db.namespace`.\n\nSee https://opentelemetry.io/docs/specs/semconv/database/database-metrics/#metric-dbclientoperationduration\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No database call",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration (P99)"
},
"properties": [
{
"id": "unit",
"value": "s"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Rate"
},
"properties": [
{
"id": "unit",
"value": "reqps"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 25
},
"id": 24,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true,
"sortBy": []
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum by (database) (\n label_join(\n rate(\n db_client_operation_duration_seconds_count{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[$__rate_interval]\n ),\n \"database\",\n \"/\",\n \"server_address\",\n \"db_namespace\"\n )\n)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "{{server_address}} {{db_namespace}}",
"range": true,
"refId": "RPS",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "(\n sum by (database) (\n label_join(\n rate(db_client_operation_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\", http_response_status_code=~\"5..\"}[$__rate_interval]),\n \"database\",\n \"/\",\n \"server_address\",\n \"db_namespace\"\n )\n )\n / \n sum by (database) (\n label_join(\n rate(db_client_operation_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"database\",\n \"/\",\n \"server_address\",\n \"db_namespace\"\n )\n )\n ) or (0 * \n sum by (database) (\n label_join(\n rate(db_client_operation_duration_seconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"database\",\n \"/\",\n \"server_address\",\n \"db_namespace\"\n )\n )\n )",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{server_address}} {{db_namespace}}",
"range": true,
"refId": "ERR_PCT"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "histogram_quantile(\n 0.99,\n sum by (le, database) (\n label_join(\n rate(\n db_client_operation_duration_seconds_bucket{\n deployment_environment_name=~\"$deployment_environment_name\",\n service_namespace=~\"$service_namespace\",\n service_name=\"$service_name\"\n }[5m]\n ),\n \"database\",\n \"/\",\n \"server_address\",\n \"db_namespace\"\n )\n )\n)",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "{{server_address}} {{db_namespace}}",
"range": true,
"refId": "DURATION"
}
],
"title": "Outbound Databases",
"transformations": [
{
"id": "timeSeriesTable",
"options": {
"DURATION": {
"timeField": "Time"
},
"Duration": {
"timeField": "Time"
},
"ERR_PCT": {
"timeField": "Time"
},
"RPS": {
"timeField": "Time"
}
}
},
{
"id": "joinByField",
"options": {
"byField": "database",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {
"Trend #DURATION": 1,
"Trend #ERR_PCT": 2,
"Trend #RPS": 3,
"database": 0
},
"renameByName": {
"Trend #DURATION": "Duration (P99)",
"Trend #Duration": "Duration (p99)",
"Trend #ERR_PCT": "Error",
"Trend #RPS": "Rate",
"database": "Database",
"database_operation": "Database Operation",
"operation": "Operation",
"outbound_service": "Service"
}
}
}
],
"type": "table"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "RPC calls made by the service based on the `rpc.client.request.duration` metric.\n\nSpecific to gRPC due to the usage of the `grpc.status.code` attribute to identify errors.\n\nCalls broken down by `server.address`, `rpc.service`, and `rpc.method`.\n\nSee https://opentelemetry.io/docs/specs/semconv/rpc/rpc-metrics/#rpc-client\n\n",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No RPC call",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Duration (P99)"
},
"properties": [
{
"id": "unit",
"value": "ms"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Rate"
},
"properties": [
{
"id": "unit",
"value": "reqps"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Error"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 30
},
"id": 32,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "\n sum by (outbound_service) (\n label_join(\n rate(rpc_client_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"outbound_service\",\n \"/\",\n \"server_address\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n ",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "RPS",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "(\n sum by (outbound_service) (\n label_join(\n rate(rpc_client_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\", http_response_status_code=~\"5..\"}[$__rate_interval]),\n \"outbound_service\",\n \"/\",\n \"server_address\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n / \n sum by (outbound_service) (\n label_join(\n rate(rpc_client_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"outbound_service\",\n \"/\",\n \"server_address\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n ) or (0 * \n sum by (outbound_service) (\n label_join(\n rate(rpc_client_duration_milliseconds_count{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]),\n \"outbound_service\",\n \"/\",\n \"server_address\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n )",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "ERR_PCT"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "\nhistogram_quantile(\n 0.99,\n sum by (le, outbound_service) (\n label_join(\n rate(rpc_client_duration_milliseconds_bucket{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[5m]),\n \"outbound_service\",\n \"/\",\n \"server_address\",\n \"rpc_service\",\n \"rpc_method\"\n )\n )\n)",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "DURATION"
}
],
"title": "Outbound RPC Services",
"transformations": [
{
"id": "timeSeriesTable",
"options": {
"Duration": {
"timeField": "Time"
},
"ERR_PCT": {
"timeField": "Time"
},
"RPS": {
"timeField": "Time"
}
}
},
{
"id": "joinByField",
"options": {
"byField": "outbound_service",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {
"Trend #DURATION": 1,
"Trend #ERR_PCT": 2,
"Trend #RPS": 3,
"outbound_service": 0
},
"renameByName": {
"Trend #DURATION": "Duration (P99)",
"Trend #Duration": "Duration (p99)",
"Trend #ERR_PCT": "Error",
"Trend #RPS": "Rate",
"operation": "Operation",
"outbound_service": "Service Method"
}
}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 35
},
"id": 25,
"panels": [],
"title": "Logs",
"type": "row"
},
{
"datasource": {
"type": "loki",
"uid": "${loki_datasource}"
},
"description": "Logs of the service, filtered by `service.name` and `service.namespace`.\n\nTo explore the logs, open the menu clicking on the icon `⋮` of this panel and click on `Explore`.",
"gridPos": {
"h": 15,
"w": 24,
"x": 0,
"y": 36
},
"id": 26,
"options": {
"dedupStrategy": "none",
"enableInfiniteScrolling": true,
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Ascending",
"wrapLogMessage": false
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "${DS_GRAFANACLOUD-CLECLERC-LOGS}"
},
"direction": "backward",
"editorMode": "code",
"expr": "{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"} | line_format `\u001b[1m{{if .level }}{{alignRight 5 .level}}{{else if .severity_text}}{{alignRight 5 .severity_text}}{{end}}\u001b[0m \u001b[90m[{{alignRight 10 .service_instance_id}}{{if .thread_name}}/{{alignRight 20 .thread_name}}{{else if eq \"java\" .telemetry_sdk_language }} {{end}}]\u001b[0m \u001b[36m{{if .scope_name }}{{alignRight 40 .scope_name}}{{end}}{{if .exception_type}} \u001b[1;101m \u001b[0m{{end}} {{if .exception_type}}\u001b[1;91m{{.exception_type}}\u001b[0m{{end}}{{if .exception_message}} \u001b[1;91m{{.exception_message}}\u001b[0m {{end}} \u001b[0m {{__line__}} {{if .trace_id}} \u001b[37m\u001b[3m[trace_id={{.trace_id}}]{{end}}`",
"queryType": "range",
"refId": "A"
}
],
"type": "logs"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 51
},
"id": 29,
"panels": [],
"title": "Traces",
"type": "row"
},
{
"datasource": {
"type": "tempo",
"uid": "${tempo_datasource}"
},
"description": "Traces containing a span emitted by the service",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No traces",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Trace Service"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Span ID"
},
"properties": [
{
"id": "custom.hidden",
"value": false
}
]
},
{
"matcher": {
"id": "byName",
"options": "deployment.environment.name"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "service.name"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "service.namespace"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 15,
"x": 0,
"y": 52
},
"id": 30,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 0,
"showHeader": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "tempo",
"uid": "${DS_GRAFANACLOUD-CLECLERC-TRACES}"
},
"filters": [
{
"id": "service-name",
"operator": "=",
"scope": "resource",
"tag": "service.name",
"value": [
"$service_name"
],
"valueType": "string"
},
{
"id": "6997e808",
"operator": "=",
"scope": "resource",
"tag": "deployment.environment.name",
"value": [
"$deployment_environment_name"
],
"valueType": "string"
},
{
"id": "e9f0e855",
"operator": "=",
"scope": "resource",
"tag": "service.namespace",
"value": [
"$service_namespace"
],
"valueType": "string"
}
],
"limit": 20,
"metricsQueryType": "range",
"query": "{resource.service.name=\"$service_name\" && resource.service.namespace=\"$service_namespace\"} | select(status)",
"queryType": "traceql",
"refId": "A",
"tableType": "spans"
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 60
},
"id": 37,
"panels": [],
"title": "Runtime",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"description": "For JVM based services like Java or Kotlin, the JVM metrics.\n\nSee https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"noValue": "No JVM metrics",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Trend #CPU_PCT"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Trend #GC_PCT"
},
"properties": [
{
"id": "unit",
"value": "percentunit"
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 61
},
"id": 38,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "12.1.0-91094",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"editorMode": "code",
"expr": "sum by(service_instance_id) (jvm_cpu_recent_utilization_ratio{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"})",
"hide": false,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "CPU_PCT"
},
{
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"editorMode": "code",
"expr": "sum by(service_instance_id) (rate(jvm_gc_duration_seconds_sum{deployment_environment_name=~\"$deployment_environment_name\", service_namespace=~\"$service_namespace\", service_name=\"$service_name\"}[$__rate_interval]))",
"hide": false,
"instant": false,
"interval": "60s",
"legendFormat": "__auto",
"range": true,
"refId": "GC_PCT"
}
],
"title": "JVM",
"transformations": [
{
"id": "timeSeriesTable",
"options": {
"A": {
"timeField": "Time"
},
"CPU_PCT": {
"timeField": "Time"
},
"GC_PCT": {
"timeField": "Time"
}
}
},
{
"id": "joinByField",
"options": {
"byField": "service_instance_id",
"mode": "outer"
}
},
{
"id": "organize",
"options": {
"excludeByName": {},
"includeByName": {},
"indexByName": {},
"renameByName": {
"Trend #CPU_PCT": "CPU",
"Trend #GC_PCT": "Garbage Collector Time",
"instance": "Instance",
"service_instance_id": "Instance"
}
}
}
],
"type": "table"
}
],
"refresh": "30s",
"schemaVersion": 41,
"tags": [],
"templating": {
"list": [
{
"allowCustomValue": false,
"current": {},
"description": "OpenTelemetry metrics. \nSend metrics using the Prometheus OTLP endpoint activating `keep_identifying_resource_attributes` and resource attribute promotion (aka `promote_resource_attributes`) including `service.name`, service.namespace`, `service.instance.id`, and `deployment.environment.name`",
"label": "Metrics",
"name": "prometheus_datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "(?!grafanacloud-usage|grafanacloud-ml-metrics).+",
"type": "datasource"
},
{
"allowCustomValue": false,
"current": {},
"description": "OpenTelemetry traces",
"label": "Traces",
"name": "tempo_datasource",
"options": [],
"query": "tempo",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allowCustomValue": false,
"current": {},
"description": "OpenTelemetry logs.\n\nSend logs using the Loki OTLP endpoint activating resource attribute promotion (aka `default_resource_attributes_as_index_labels`) including `service.name`, service.namespace`, and `deployment.environment.name`",
"label": "Logs",
"name": "loki_datasource",
"options": [],
"query": "loki",
"refresh": 1,
"regex": "(?!grafanacloud-cleclerc-alert-state-history|grafanacloud-.*-usage-insights).+",
"type": "datasource"
},
{
"allowCustomValue": false,
"current": {},
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"definition": "label_values(target_info,deployment_environment_name)",
"description": "Deployment environment (e.g. \"production\").\nResource attribute `deployment.environment.name` via `target_info`",
"includeAll": true,
"label": "Environment",
"name": "deployment_environment_name",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(target_info,deployment_environment_name)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"type": "query"
},
{
"allowCustomValue": false,
"current": {},
"datasource": {
"type": "prometheus",
"uid": "${prometheus_datasource}"
},
"definition": "label_values(target_info{deployment_environment_name=~\"$deployment_environment_name\"},service_namespace)",
"description": "Service namespace.\nResource attribute `service.namespace` via `target_info`",
"includeAll": true,
"label": "Namespace",
"name": "service_namespace",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(target_info{deployment_environment_name=~\"$deployment_environment_name\"},service_namespace)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "${DS_GRAFANACLOUD-CLECLERC-PROM}"
},
"definition": "label_values(target_info{service_namespace=~\"$service_namespace\", deployment_environment_name=~\"$deployment_environment_name\"},service_name)",
"description": "Service name.\nResource attribute `service.name` via `target_info`.",
"label": "Name",
"name": "service_name",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(target_info{service_namespace=~\"$service_namespace\", deployment_environment_name=~\"$deployment_environment_name\"},service_name)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 2,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Lightweight APM for OpenTelemetry",
"uid": "febljk0a32qyoa",
"version": 41,
"weekStart": "",
"id": null,
"gnetId": 22784
}