diff --git a/docs/_files/cp-dashboard-p1.png b/docs/_files/cp-dashboard-p1.png new file mode 100644 index 00000000..67dbaacf Binary files /dev/null and b/docs/_files/cp-dashboard-p1.png differ diff --git a/docs/_files/cp-dashboard-p2.png b/docs/_files/cp-dashboard-p2.png new file mode 100644 index 00000000..ddb63b90 Binary files /dev/null and b/docs/_files/cp-dashboard-p2.png differ diff --git a/docs/guides/installation.md b/docs/guides/installation.md index 9a3cc12f..22adf704 100644 --- a/docs/guides/installation.md +++ b/docs/guides/installation.md @@ -328,3 +328,30 @@ tk create helmrelease sealed-secrets \ --chart-name=sealed-secrets \ --chart-version="^1.10.0" ``` + +### Monitoring with Prometheus and Grafana + +The GitOps Toolkit comes with an optional monitoring stack. +You can install the stack in the `gitops-system` namespace with: + +```yaml +kustomize build github.com/fluxcd/toolkit/manifests/monitoring?ref=master | kubectl apply -f- +``` + +The monitoring stack is composed of: + +* Prometheus server - collects metrics from the toolkit controllers and stores them for 2h +* Grafana dashboards - displays the control plane resource usage and reconciliation stats + +![](../_files/cp-dashboard-p1.png) + +![](../_files/cp-dashboard-p2.png) + +If you wish to use your own Prometheus and Grafana instances, then you can import the dashboards from +[GitHub](https://github.com/fluxcd/toolkit/tree/master/manifests/monitoring/grafana/dashboards). + +!!! hint + Note that the toolkit controllers expose the `/metrics` endpoint on port `8080`. + When using Prometheus Operator you should create `PodMonitor` objects to configure scraping. + When Prometheus is running outside of the `gitops-system` namespace, you have to create a network policy + that allows traffic on port `8080` from the namespace where Prometheus is deployed. diff --git a/manifests/monitoring/grafana/dashboards/control-plane.json b/manifests/monitoring/grafana/dashboards/control-plane.json new file mode 100644 index 00000000..2f4a9fff --- /dev/null +++ b/manifests/monitoring/grafana/dashboards/control-plane.json @@ -0,0 +1,1412 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1596541513301, + "links": [], + "panels": [ + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "7.1.1", + "targets": [ + { + "expr": "sum(go_info{kubernetes_namespace=\"$namespace\",kubernetes_pod_name=~\".*-controller-.*\"})", + "interval": "", + "legendFormat": "pods", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Controllers", + "type": "stat" + }, + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.1.1", + "targets": [ + { + "expr": "max(workqueue_longest_running_processor_seconds{kubernetes_namespace=\"$namespace\",kubernetes_pod_name=~\".*-controller-.*\"})", + "hide": false, + "interval": "", + "legendFormat": "seconds", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Max Work Queue", + "type": "stat" + }, + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 500000000 + }, + { + "color": "red", + "value": 900000000 + } + ] + }, + "unit": "decbits" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 25, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "7.1.1", + "targets": [ + { + "expr": "sum(go_memstats_alloc_bytes{kubernetes_namespace=\"$namespace\",kubernetes_pod_name=~\".*-controller-.*\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "type": "gauge" + }, + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 26, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.1.1", + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{kubernetes_namespace=\"$namespace\",kubernetes_pod_name=~\".*-controller-.*\"}[1m]))", + "interval": "", + "legendFormat": "requests", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "API Requests", + "type": "stat" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 15, + "panels": [], + "title": "Resource Usage", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(rest_client_request_latency_seconds_bucket{kubernetes_namespace=\"$namespace\"}[5m])) by (le))", + "interval": "", + "legendFormat": "P50", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(rest_client_request_latency_seconds_bucket{kubernetes_namespace=\"$namespace\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P90", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{kubernetes_namespace=\"$namespace\"}[5m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "P99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kubernetes API Requests Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:912", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:913", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(rest_client_requests_total{kubernetes_namespace=\"$namespace\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "total", + "refId": "A" + }, + { + "expr": "sum(rate(rest_client_requests_total{kubernetes_namespace=\"$namespace\",code!~\"2..\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "errors", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kubernetes API Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:912", + "decimals": null, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:913", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{kubernetes_namespace=\"$namespace\",kubernetes_pod_name=~\".*-controller-.*\"}[1m])", + "interval": "", + "legendFormat": "{{kubernetes_pod_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:93", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:94", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 14 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(go_memstats_alloc_bytes_total{kubernetes_namespace=\"$namespace\",kubernetes_pod_name=~\".*-controller-.*\"}[1m])", + "hide": false, + "interval": "", + "legendFormat": "{{kubernetes_pod_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:93", + "decimals": 0, + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:94", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 17, + "panels": [], + "title": "Reconciliation Stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "workqueue_longest_running_processor_seconds{name=\"kustomization\"}", + "hide": false, + "interval": "", + "legendFormat": "kustomizations", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cluster Reconciliation Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:912", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:913", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"kustomization\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful reconciliations ", + "refId": "A" + }, + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"kustomization\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed reconciliations ", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cluster Reconciliations ops/min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1171", + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1172", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 34 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"gitrepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful git pulls", + "refId": "A" + }, + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"gitrepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed git pulls", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Git Sources ops/min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:285", + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:286", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 19, + "panels": [], + "title": "Helm Stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 44 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.50, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P50", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P90", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "P99", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Helm Release Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1612", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1613", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 52 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrelease\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful reconciliations ", + "refId": "A" + }, + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrelease\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed reconciliations ", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Helm Releases ops/min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1102", + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1103", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 52 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmchart\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful chart pulls", + "refId": "A" + }, + { + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmchart\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed chart pulls", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Helm Charts ops/min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1892", + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1893", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 26, + "style": "light", + "tags": [ + "gitops-toolkit" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "gitops-system", + "value": "gitops-system" + }, + "datasource": "prometheus", + "definition": "workqueue_work_duration_seconds_count", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [], + "query": "workqueue_work_duration_seconds_count", + "refresh": 2, + "regex": "/.*namespace=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "GitOps Toolkit Control Plane", + "uid": "gitops-toolkit-control-plane", + "version": 1 +} diff --git a/manifests/monitoring/grafana/datasources.yaml b/manifests/monitoring/grafana/datasources.yaml new file mode 100644 index 00000000..d50bd1e2 --- /dev/null +++ b/manifests/monitoring/grafana/datasources.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources + namespace: gitops-system +data: + datasources.yaml: |- + apiVersion: 1 + deleteDatasources: + - name: prometheus + datasources: + - name: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true + version: 1 diff --git a/manifests/monitoring/grafana/deployment.yaml b/manifests/monitoring/grafana/deployment.yaml new file mode 100644 index 00000000..19c349a3 --- /dev/null +++ b/manifests/monitoring/grafana/deployment.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + labels: + app: grafana +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + annotations: + prometheus.io/scrape: 'false' + spec: + containers: + - name: grafana + image: "grafana/grafana:7.1.1" + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 3000 + protocol: TCP + env: + - name: GF_PATHS_PROVISIONING + value: /etc/grafana/provisioning/ + - name: GF_AUTH_BASIC_ENABLED + value: "false" + - name: GF_AUTH_ANONYMOUS_ENABLED + value: "true" + - name: GF_AUTH_ANONYMOUS_ORG_ROLE + value: Admin + - name: GF_DEFAULT_THEME + value: "Light" + volumeMounts: + - name: grafana + mountPath: /var/lib/grafana + - name: dashboards + mountPath: /etc/grafana/dashboards + - name: datasources + mountPath: /etc/grafana/provisioning/datasources + - name: providers + mountPath: /etc/grafana/provisioning/dashboards + resources: + {} + volumes: + - name: grafana + emptyDir: {} + - name: dashboards + configMap: + name: grafana-dashboards + - name: providers + configMap: + name: grafana-providers + - name: datasources + configMap: + name: grafana-datasources diff --git a/manifests/monitoring/grafana/kustomization.yaml b/manifests/monitoring/grafana/kustomization.yaml new file mode 100644 index 00000000..0472ee72 --- /dev/null +++ b/manifests/monitoring/grafana/kustomization.yaml @@ -0,0 +1,13 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: gitops-system +resources: + - service.yaml + - deployment.yaml + - providers.yaml + - datasources.yaml +configMapGenerator: + - name: grafana-dashboards + files: + - dashboards/control-plane.json + diff --git a/manifests/monitoring/grafana/providers.yaml b/manifests/monitoring/grafana/providers.yaml new file mode 100644 index 00000000..ccbd4cc0 --- /dev/null +++ b/manifests/monitoring/grafana/providers.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-providers + namespace: gitops-system +data: + providers.yaml: |+ + apiVersion: 1 + providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/dashboards diff --git a/manifests/monitoring/grafana/service.yaml b/manifests/monitoring/grafana/service.yaml new file mode 100644 index 00000000..8c6c2ec5 --- /dev/null +++ b/manifests/monitoring/grafana/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: gitops-system + labels: + app: grafana +spec: + type: ClusterIP + ports: + - port: 3000 + targetPort: http + protocol: TCP + name: http + selector: + app: grafana diff --git a/manifests/monitoring/kustomization.yaml b/manifests/monitoring/kustomization.yaml new file mode 100644 index 00000000..f18a3b2a --- /dev/null +++ b/manifests/monitoring/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: gitops-system +resources: +- prometheus +- grafana diff --git a/manifests/monitoring/prometheus/account.yaml b/manifests/monitoring/prometheus/account.yaml new file mode 100644 index 00000000..81de3172 --- /dev/null +++ b/manifests/monitoring/prometheus/account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: gitops-system diff --git a/manifests/monitoring/prometheus/deployment.yaml b/manifests/monitoring/prometheus/deployment.yaml new file mode 100644 index 00000000..c57c437d --- /dev/null +++ b/manifests/monitoring/prometheus/deployment.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: gitops-system +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + annotations: + appmesh.k8s.aws/sidecarInjectorWebhook: disabled + sidecar.istio.io/inject: "false" + spec: + serviceAccountName: prometheus + containers: + - name: prometheus + image: prom/prometheus:v2.20.0 + imagePullPolicy: IfNotPresent + args: + - '--storage.tsdb.retention=2h' + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - containerPort: 9090 + name: http + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + resources: + requests: + cpu: 10m + memory: 128Mi + volumeMounts: + - name: config-volume + mountPath: /etc/prometheus + - name: data-volume + mountPath: /prometheus/data + volumes: + - name: config-volume + configMap: + name: prometheus + - name: data-volume + emptyDir: {} \ No newline at end of file diff --git a/manifests/monitoring/prometheus/kustomization.yaml b/manifests/monitoring/prometheus/kustomization.yaml new file mode 100644 index 00000000..4d8de750 --- /dev/null +++ b/manifests/monitoring/prometheus/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: gitops-system +resources: + - account.yaml + - rbac.yaml + - service.yaml + - deployment.yaml +configMapGenerator: + - name: prometheus + files: + - prometheus.yml diff --git a/manifests/monitoring/prometheus/prometheus.yml b/manifests/monitoring/prometheus/prometheus.yml new file mode 100644 index 00000000..ca72bf8b --- /dev/null +++ b/manifests/monitoring/prometheus/prometheus.yml @@ -0,0 +1,52 @@ +global: + scrape_interval: 10s +scrape_configs: + +# Kubernetes API +- job_name: kubernetes-apiserver + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - default + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: kubernetes;https + +# Kubernetes pods +- job_name: kubernetes-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: true + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_scrape + - action: replace + regex: (.+) + source_labels: + - __meta_kubernetes_pod_annotation_prometheus_io_path + target_label: __metrics_path__ + - action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + source_labels: + - __address__ + - __meta_kubernetes_pod_annotation_prometheus_io_port + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: kubernetes_namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: kubernetes_pod_name diff --git a/manifests/monitoring/prometheus/rbac.yaml b/manifests/monitoring/prometheus/rbac.yaml new file mode 100644 index 00000000..5f796c19 --- /dev/null +++ b/manifests/monitoring/prometheus/rbac.yaml @@ -0,0 +1,32 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-gitops-system +rules: + - apiGroups: [""] + resources: + - nodes + - services + - endpoints + - pods + - nodes/proxy + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: + - configmaps + verbs: ["get"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-gitops-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-gitops-system +subjects: +- kind: ServiceAccount + name: prometheus + namespace: gitops-system diff --git a/manifests/monitoring/prometheus/service.yaml b/manifests/monitoring/prometheus/service.yaml new file mode 100644 index 00000000..27ada3c0 --- /dev/null +++ b/manifests/monitoring/prometheus/service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: gitops-system +spec: + selector: + app: prometheus + ports: + - name: http + protocol: TCP + port: 9090 \ No newline at end of file