INIT(repo): apps setup with all user-facing servic
This commit is contained in:
20
docusaurus/.gitignore
vendored
Normal file
20
docusaurus/.gitignore
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
package-lock.json
|
||||
|
||||
# Production
|
||||
build/
|
||||
.docusaurus/
|
||||
.cache-loader/
|
||||
|
||||
# Generated files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Misc
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
53
docusaurus/argocd/docusaurus.yaml
Normal file
53
docusaurus/argocd/docusaurus.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: docusaurus
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
path: docusaurus
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: docusaurus
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
|
||||
# Ignore differences in checksum annotations and manual restart annotations
|
||||
ignoreDifferences:
|
||||
- group: apps
|
||||
kind: Deployment
|
||||
jqPathExpressions:
|
||||
- .spec.template.metadata.annotations
|
||||
- .metadata.annotations
|
||||
|
||||
- group: ""
|
||||
kind: Service
|
||||
name: docusaurus
|
||||
namespace: docusaurus
|
||||
jsonPointers:
|
||||
- /spec/clusterIP
|
||||
25
docusaurus/blog/2025-12-17-welcome.md
Normal file
25
docusaurus/blog/2025-12-17-welcome.md
Normal file
@@ -0,0 +1,25 @@
|
||||
---
|
||||
slug: welcome
|
||||
title: Welcome to Infrastructure Docs
|
||||
authors:
|
||||
name: Bluemayne
|
||||
title: Infrastructure Engineer
|
||||
tags: [welcome, infrastructure]
|
||||
---
|
||||
|
||||
# Welcome to Infrastructure Documentation
|
||||
|
||||
Welcome to our infrastructure documentation site! This is where we'll document all our infrastructure setup, guides, and best practices.
|
||||
|
||||
<!--truncate-->
|
||||
|
||||
## What's New
|
||||
|
||||
We've just launched this documentation site using Docusaurus. Here you'll find:
|
||||
|
||||
- Complete infrastructure setup guides
|
||||
- Service documentation
|
||||
- Troubleshooting tips
|
||||
- Best practices
|
||||
|
||||
Stay tuned for more updates!
|
||||
117
docusaurus/deployment.yaml
Normal file
117
docusaurus/deployment.yaml
Normal file
@@ -0,0 +1,117 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: docusaurus
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: docusaurus
|
||||
namespace: docusaurus
|
||||
labels:
|
||||
app: docusaurus
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: docusaurus
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: docusaurus
|
||||
spec:
|
||||
initContainers:
|
||||
- name: build-docusaurus
|
||||
image: node:18-alpine
|
||||
workingDir: /workspace
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
apk add --no-cache git
|
||||
echo "Cloning repository..."
|
||||
git clone https://gitea0213.kro.kr/bluemayne/infrastructure.git /tmp/repo
|
||||
cd /tmp/repo/docusaurus
|
||||
echo "Installing dependencies..."
|
||||
npm install --legacy-peer-deps
|
||||
echo "Building Docusaurus site..."
|
||||
npm run build
|
||||
echo "Copying build output..."
|
||||
cp -r build/* /build/
|
||||
echo "Build complete!"
|
||||
volumeMounts:
|
||||
- name: build-output
|
||||
mountPath: /build
|
||||
containers:
|
||||
- name: nginx
|
||||
image: nginx:alpine
|
||||
ports:
|
||||
- containerPort: 80
|
||||
name: http
|
||||
volumeMounts:
|
||||
- name: build-output
|
||||
mountPath: /usr/share/nginx/html
|
||||
- name: nginx-config
|
||||
mountPath: /etc/nginx/conf.d/default.conf
|
||||
subPath: default.conf
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 128Mi
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 80
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: build-output
|
||||
emptyDir: {}
|
||||
- name: nginx-config
|
||||
configMap:
|
||||
name: nginx-config
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: nginx-config
|
||||
namespace: docusaurus
|
||||
data:
|
||||
default.conf: |
|
||||
server {
|
||||
listen 80;
|
||||
server_name _;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# Enable gzip compression
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_min_length 1024;
|
||||
gzip_types text/plain text/css text/xml text/javascript application/x-javascript application/xml+rss application/javascript application/json;
|
||||
|
||||
# SPA fallback
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Cache static assets
|
||||
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Security headers
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
}
|
||||
103
docusaurus/docs/getting-started/architecture.md
Normal file
103
docusaurus/docs/getting-started/architecture.md
Normal file
@@ -0,0 +1,103 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
# Architecture
|
||||
|
||||
## System Architecture
|
||||
|
||||
Our infrastructure follows modern DevOps practices with GitOps at its core.
|
||||
|
||||
### GitOps Workflow
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
A[Developer] -->|Git Push| B[Gitea]
|
||||
B -->|Webhook| C[ArgoCD]
|
||||
C -->|Deploy| D[Kubernetes]
|
||||
D -->|Metrics| E[Prometheus]
|
||||
E -->|Visualize| F[Grafana]
|
||||
```
|
||||
|
||||
### Network Architecture
|
||||
|
||||
#### Ingress Layer
|
||||
- **nginx-ingress-controller**: Routes external traffic
|
||||
- **cert-manager**: Manages SSL certificates via Let's Encrypt
|
||||
- **DNS**: kro.kr domain with wildcard support
|
||||
|
||||
#### Application Layer
|
||||
- Each service runs in its own namespace
|
||||
- Resource limits and requests defined
|
||||
- Health checks and readiness probes
|
||||
|
||||
#### Data Layer
|
||||
- **PostgreSQL**: Primary database for Gitea, Grafana
|
||||
- **Persistent Volumes**: Using local-path provisioner
|
||||
- **Backup**: Automated backups to S3-compatible storage (MinIO)
|
||||
|
||||
### Security Architecture
|
||||
|
||||
```yaml
|
||||
Security Layers:
|
||||
1. Network Level:
|
||||
- Ingress with TLS termination
|
||||
- Network policies between namespaces
|
||||
|
||||
2. Application Level:
|
||||
- External Secrets for sensitive data
|
||||
- HashiCorp Vault integration
|
||||
- Secret rotation policies
|
||||
|
||||
3. Access Control:
|
||||
- RBAC for Kubernetes
|
||||
- SSO integration (future)
|
||||
- Audit logging
|
||||
```
|
||||
|
||||
## Design Principles
|
||||
|
||||
### 1. Everything as Code
|
||||
- Infrastructure: Kubernetes YAML
|
||||
- Configuration: Kustomize
|
||||
- Secrets: External Secrets Operator
|
||||
- Monitoring: Prometheus rules as code
|
||||
|
||||
### 2. GitOps First
|
||||
- Single source of truth: Git repository
|
||||
- Automatic synchronization via ArgoCD
|
||||
- Rollback capability through Git history
|
||||
|
||||
### 3. Observability
|
||||
- Metrics: Prometheus
|
||||
- Logs: Loki
|
||||
- Traces: (Future: Jaeger/Tempo)
|
||||
- Dashboards: Grafana
|
||||
|
||||
### 4. High Availability
|
||||
- Multi-node Kubernetes cluster
|
||||
- Replicated stateful services
|
||||
- Automated failover
|
||||
|
||||
## Technology Stack
|
||||
|
||||
| Layer | Technology |
|
||||
|-------|-----------|
|
||||
| Container Orchestration | Kubernetes (K3s) |
|
||||
| GitOps | ArgoCD |
|
||||
| Service Mesh | (Future: Istio/Linkerd) |
|
||||
| Ingress | nginx-ingress |
|
||||
| Certificate Management | cert-manager |
|
||||
| Secrets Management | External Secrets + Vault |
|
||||
| Monitoring | Prometheus + Grafana |
|
||||
| Logging | Loki + Promtail |
|
||||
| Storage | local-path, MinIO |
|
||||
| Database | PostgreSQL |
|
||||
| Git | Gitea |
|
||||
| Documentation | Docusaurus |
|
||||
|
||||
## Further Reading
|
||||
|
||||
- [Kubernetes Setup Details](../services/kubernetes)
|
||||
- [ArgoCD Configuration](../services/argocd)
|
||||
- [Monitoring Stack](../services/monitoring)
|
||||
67
docusaurus/docs/getting-started/overview.md
Normal file
67
docusaurus/docs/getting-started/overview.md
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# Overview
|
||||
|
||||
## Infrastructure at a Glance
|
||||
|
||||
Our infrastructure is designed for **high availability**, **automation**, and **easy management**.
|
||||
|
||||
### Key Components
|
||||
|
||||
#### 🎯 Kubernetes (K3s)
|
||||
- Lightweight Kubernetes distribution
|
||||
- Running on Oracle Cloud Infrastructure
|
||||
- Multi-node cluster for redundancy
|
||||
|
||||
#### 🔄 ArgoCD
|
||||
- GitOps-based deployment
|
||||
- Automatic synchronization from Git
|
||||
- Declarative infrastructure management
|
||||
|
||||
#### 🔐 Security
|
||||
- **cert-manager**: Automatic SSL/TLS certificates
|
||||
- **External Secrets**: Vault integration for secrets management
|
||||
- **Network Policies**: Fine-grained network access control
|
||||
|
||||
#### 📊 Monitoring
|
||||
- **Prometheus**: Metrics collection
|
||||
- **Grafana**: Visualization and dashboards
|
||||
- **Loki**: Log aggregation
|
||||
- **Alertmanager**: Alert management
|
||||
|
||||
### Infrastructure Layout
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────┐
|
||||
│ Load Balancer / Ingress │
|
||||
│ (nginx-ingress-controller) │
|
||||
└─────────────────┬───────────────────────┘
|
||||
│
|
||||
┌──────────┴──────────┐
|
||||
│ │
|
||||
┌──────▼──────┐ ┌────────▼────────┐
|
||||
│ Master │ │ Worker Nodes │
|
||||
│ Node │ │ │
|
||||
│ │ │ - Applications │
|
||||
│ - Control │ │ - Services │
|
||||
│ Plane │ │ - Monitoring │
|
||||
│ - ArgoCD │ │ │
|
||||
└─────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
### Services Running
|
||||
|
||||
| Service | Purpose | URL |
|
||||
|---------|---------|-----|
|
||||
| Homer | Dashboard | https://homer0213.kro.kr |
|
||||
| Gitea | Git Service | https://gitea0213.kro.kr |
|
||||
| Grafana | Monitoring | https://grafana0213.kro.kr |
|
||||
| Docusaurus | Documentation | https://docusaurus0213.kro.kr |
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Learn about the architecture](./architecture)
|
||||
- [Explore Kubernetes setup](../services/kubernetes)
|
||||
- [Set up monitoring](../services/monitoring)
|
||||
40
docusaurus/docs/intro.md
Normal file
40
docusaurus/docs/intro.md
Normal file
@@ -0,0 +1,40 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# Introduction
|
||||
|
||||
Welcome to the **Infrastructure Documentation**!
|
||||
|
||||
This documentation site contains comprehensive guides and references for our entire infrastructure setup.
|
||||
|
||||
## What's Inside?
|
||||
|
||||
- **Getting Started**: Learn about our infrastructure architecture and how to get started
|
||||
- **Services**: Detailed documentation for each service we run
|
||||
- **Guides**: Step-by-step tutorials and best practices
|
||||
- **Blog**: Updates, announcements, and technical insights
|
||||
|
||||
## Infrastructure Overview
|
||||
|
||||
Our infrastructure is built on:
|
||||
|
||||
- **Kubernetes (K3s)**: Container orchestration platform
|
||||
- **ArgoCD**: GitOps continuous delivery
|
||||
- **Gitea**: Self-hosted Git service
|
||||
- **Prometheus & Grafana**: Monitoring and observability
|
||||
- **cert-manager**: Automatic SSL certificate management
|
||||
|
||||
## Quick Links
|
||||
|
||||
- [Architecture Overview](./getting-started/architecture)
|
||||
- [Kubernetes Setup](./services/kubernetes)
|
||||
- [Monitoring Stack](./services/monitoring)
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you have questions or need assistance:
|
||||
|
||||
1. Check the documentation sections
|
||||
2. Search for specific topics
|
||||
3. Visit our [Gitea repository](https://gitea0213.kro.kr/bluemayne/infrastructure)
|
||||
169
docusaurus/docs/services/argocd.md
Normal file
169
docusaurus/docs/services/argocd.md
Normal file
@@ -0,0 +1,169 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
# ArgoCD
|
||||
|
||||
## Overview
|
||||
|
||||
**ArgoCD** is our GitOps continuous delivery tool. It automatically synchronizes applications from our Git repository to Kubernetes.
|
||||
|
||||
## How It Works
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
A[Git Repository] -->|Watch| B[ArgoCD]
|
||||
B -->|Compare| C[Desired State]
|
||||
B -->|Compare| D[Actual State in K8s]
|
||||
B -->|Sync| E[Deploy Changes]
|
||||
```
|
||||
|
||||
## Application Structure
|
||||
|
||||
Each service has an ArgoCD Application definition:
|
||||
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: example-service
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
path: example-service
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: example-service
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
### Automated Sync
|
||||
|
||||
- **prune: true**: Removes resources deleted from Git
|
||||
- **selfHeal: true**: Reverts manual changes to match Git
|
||||
- **allowEmpty: false**: Prevents accidental empty deployments
|
||||
|
||||
### Manual Operations
|
||||
|
||||
```bash
|
||||
# List applications
|
||||
sudo kubectl get applications -n argocd
|
||||
|
||||
# View application status
|
||||
sudo kubectl get application -n argocd <app-name> -o yaml
|
||||
|
||||
# Force refresh
|
||||
argocd app get <app-name> --refresh
|
||||
```
|
||||
|
||||
## Deployment Workflow
|
||||
|
||||
### 1. Make Changes
|
||||
|
||||
Edit files in the infrastructure repository:
|
||||
|
||||
```bash
|
||||
cd /path/to/infrastructure
|
||||
vim example-service/deployment.yaml
|
||||
git add .
|
||||
git commit -m "update deployment"
|
||||
git push
|
||||
```
|
||||
|
||||
### 2. ArgoCD Detects Changes
|
||||
|
||||
- Polls Git repository every 3 minutes
|
||||
- Or immediately via webhook (if configured)
|
||||
|
||||
### 3. Automatic Sync
|
||||
|
||||
- Compares desired state (Git) vs actual state (K8s)
|
||||
- Applies changes automatically
|
||||
- Reports status
|
||||
|
||||
### 4. Monitor Deployment
|
||||
|
||||
```bash
|
||||
# Check application sync status
|
||||
sudo kubectl get applications -n argocd
|
||||
|
||||
# Watch pod rollout
|
||||
sudo kubectl rollout status deployment/<name> -n <namespace>
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Adding a New Service
|
||||
|
||||
1. Create service directory: `infrastructure/my-service/`
|
||||
2. Add Kubernetes manifests
|
||||
3. Create kustomization.yaml
|
||||
4. Add ArgoCD Application: `infrastructure/my-service/argocd/my-service.yaml`
|
||||
5. Reference in main kustomization.yaml
|
||||
6. Git push → ArgoCD deploys automatically
|
||||
|
||||
### Updating a Service
|
||||
|
||||
1. Edit deployment.yaml or other files
|
||||
2. Git commit & push
|
||||
3. Wait for ArgoCD sync (or force refresh)
|
||||
4. Verify deployment
|
||||
|
||||
### Rolling Back
|
||||
|
||||
```bash
|
||||
# Method 1: Git revert
|
||||
git revert <commit-hash>
|
||||
git push
|
||||
|
||||
# Method 2: Sync to specific commit
|
||||
argocd app sync <app-name> --revision <commit-hash>
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Sync Stuck
|
||||
|
||||
```bash
|
||||
# Force hard refresh
|
||||
sudo kubectl patch application -n argocd <app-name> \
|
||||
-p '{"metadata": {"annotations": {"argocd.argoproj.io/refresh": "hard"}}}' \
|
||||
--type merge
|
||||
```
|
||||
|
||||
### Out of Sync
|
||||
|
||||
Check what's different:
|
||||
|
||||
```bash
|
||||
argocd app diff <app-name>
|
||||
```
|
||||
|
||||
### Sync Failed
|
||||
|
||||
View detailed error:
|
||||
|
||||
```bash
|
||||
sudo kubectl describe application -n argocd <app-name>
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always use Git**: Don't make manual kubectl changes
|
||||
2. **Small commits**: Easier to review and rollback
|
||||
3. **Test locally**: Use `kubectl apply --dry-run=client`
|
||||
4. **Use pruning**: Keep cluster clean with `prune: true`
|
||||
5. **Enable selfHeal**: Prevent configuration drift
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Monitoring with Prometheus](./monitoring)
|
||||
- [Kubernetes Operations](./kubernetes)
|
||||
174
docusaurus/docs/services/kubernetes.md
Normal file
174
docusaurus/docs/services/kubernetes.md
Normal file
@@ -0,0 +1,174 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# Kubernetes (K3s)
|
||||
|
||||
## Overview
|
||||
|
||||
We use **K3s**, a lightweight Kubernetes distribution, as our container orchestration platform.
|
||||
|
||||
## Cluster Setup
|
||||
|
||||
### Nodes
|
||||
|
||||
- **Master Node**: `oracle-master`
|
||||
- Control plane components
|
||||
- etcd database
|
||||
- ArgoCD installation
|
||||
|
||||
- **Worker Nodes**: `mayne-worker-1`, `mayne-worker-2`, etc.
|
||||
- Application workloads
|
||||
- Monitoring stack
|
||||
- Service deployments
|
||||
|
||||
### Access
|
||||
|
||||
```bash
|
||||
# SSH to master node
|
||||
ssh oracle-master
|
||||
|
||||
# Use kubectl (requires sudo on master)
|
||||
sudo kubectl get nodes
|
||||
sudo kubectl get pods -A
|
||||
```
|
||||
|
||||
## Common Operations
|
||||
|
||||
### View All Resources
|
||||
|
||||
```bash
|
||||
# Get all namespaces
|
||||
sudo kubectl get namespaces
|
||||
|
||||
# Get all pods in all namespaces
|
||||
sudo kubectl get pods -A
|
||||
|
||||
# Get services
|
||||
sudo kubectl get services -A
|
||||
```
|
||||
|
||||
### Check Application Status
|
||||
|
||||
```bash
|
||||
# Check specific namespace
|
||||
sudo kubectl get all -n <namespace>
|
||||
|
||||
# View pod logs
|
||||
sudo kubectl logs -n <namespace> <pod-name>
|
||||
|
||||
# Describe pod for troubleshooting
|
||||
sudo kubectl describe pod -n <namespace> <pod-name>
|
||||
```
|
||||
|
||||
### Managing Applications
|
||||
|
||||
Most applications are managed by ArgoCD, so manual kubectl operations are rarely needed.
|
||||
|
||||
```bash
|
||||
# Check ArgoCD applications
|
||||
sudo kubectl get applications -n argocd
|
||||
|
||||
# Force sync an application (if needed)
|
||||
sudo kubectl patch application -n argocd <app-name> \
|
||||
-p '{"metadata": {"annotations": {"argocd.argoproj.io/refresh": "hard"}}}' \
|
||||
--type merge
|
||||
```
|
||||
|
||||
## Namespaces
|
||||
|
||||
Each service runs in its own namespace for isolation:
|
||||
|
||||
| Namespace | Purpose |
|
||||
|-----------|---------|
|
||||
| `argocd` | ArgoCD deployment |
|
||||
| `cert-manager` | Certificate management |
|
||||
| `ingress-nginx` | Ingress controller |
|
||||
| `monitoring` | Prometheus, Grafana |
|
||||
| `gitea` | Git service |
|
||||
| `vault` | Secrets management |
|
||||
|
||||
## Storage
|
||||
|
||||
### Storage Classes
|
||||
|
||||
- **local-path**: Default storage class
|
||||
- Uses local disk on worker nodes
|
||||
- Good for development and non-critical data
|
||||
|
||||
### Persistent Volumes
|
||||
|
||||
```bash
|
||||
# View PVCs
|
||||
sudo kubectl get pvc -A
|
||||
|
||||
# View PVs
|
||||
sudo kubectl get pv
|
||||
```
|
||||
|
||||
## Networking
|
||||
|
||||
### Ingress
|
||||
|
||||
External traffic flows through nginx-ingress-controller:
|
||||
|
||||
```
|
||||
Internet → nginx-ingress → Service → Pod
|
||||
```
|
||||
|
||||
### Services
|
||||
|
||||
- **ClusterIP**: Internal only (default)
|
||||
- **LoadBalancer**: External access (rarely used)
|
||||
- **Ingress**: HTTPS with custom domains
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Pod Not Starting
|
||||
|
||||
```bash
|
||||
# Check pod events
|
||||
sudo kubectl describe pod -n <namespace> <pod-name>
|
||||
|
||||
# Check logs
|
||||
sudo kubectl logs -n <namespace> <pod-name>
|
||||
|
||||
# Check previous container logs (if crashed)
|
||||
sudo kubectl logs -n <namespace> <pod-name> --previous
|
||||
```
|
||||
|
||||
### Resource Issues
|
||||
|
||||
```bash
|
||||
# Check node resources
|
||||
sudo kubectl top nodes
|
||||
|
||||
# Check pod resources
|
||||
sudo kubectl top pods -A
|
||||
```
|
||||
|
||||
### Network Issues
|
||||
|
||||
```bash
|
||||
# Check services
|
||||
sudo kubectl get svc -A
|
||||
|
||||
# Check ingress
|
||||
sudo kubectl get ingress -A
|
||||
|
||||
# Test connectivity from a pod
|
||||
sudo kubectl run -it --rm debug --image=nicolaka/netshoot --restart=Never -- /bin/bash
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use namespaces** for logical separation
|
||||
2. **Set resource limits** on all containers
|
||||
3. **Use health checks** (readiness and liveness probes)
|
||||
4. **Label everything** for better organization
|
||||
5. **Don't run as root** (use securityContext)
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [ArgoCD Setup](./argocd)
|
||||
- [Monitoring Stack](./monitoring)
|
||||
225
docusaurus/docs/services/monitoring.md
Normal file
225
docusaurus/docs/services/monitoring.md
Normal file
@@ -0,0 +1,225 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
|
||||
# Monitoring Stack
|
||||
|
||||
## Overview
|
||||
|
||||
Our monitoring stack provides complete observability with metrics, logs, and visualization.
|
||||
|
||||
## Components
|
||||
|
||||
### Prometheus
|
||||
|
||||
**Metrics collection and storage**
|
||||
|
||||
- Scrapes metrics from all services
|
||||
- Stores time-series data
|
||||
- Powers alerting rules
|
||||
|
||||
Access: Internal only (no direct UI exposure)
|
||||
|
||||
### Grafana
|
||||
|
||||
**Visualization and dashboards**
|
||||
|
||||
- Beautiful dashboards
|
||||
- Query Prometheus data
|
||||
- Alert management UI
|
||||
|
||||
Access: https://grafana0213.kro.kr
|
||||
|
||||
### Loki
|
||||
|
||||
**Log aggregation**
|
||||
|
||||
- Collects logs from all pods
|
||||
- Indexed for fast searching
|
||||
- Integrated with Grafana
|
||||
|
||||
### Promtail
|
||||
|
||||
**Log shipping agent**
|
||||
|
||||
- Runs on each node
|
||||
- Forwards logs to Loki
|
||||
- Adds metadata labels
|
||||
|
||||
### Alertmanager
|
||||
|
||||
**Alert routing and notification**
|
||||
|
||||
- Receives alerts from Prometheus
|
||||
- Routes to correct channels
|
||||
- Deduplication and grouping
|
||||
|
||||
## Dashboards
|
||||
|
||||
### Pre-built Dashboards
|
||||
|
||||
1. **Cluster Overview**
|
||||
- Node health
|
||||
- Resource usage
|
||||
- Pod status
|
||||
|
||||
2. **Application Metrics**
|
||||
- Request rate
|
||||
- Error rate
|
||||
- Response time
|
||||
|
||||
3. **Infrastructure**
|
||||
- CPU, Memory, Disk
|
||||
- Network traffic
|
||||
- Storage usage
|
||||
|
||||
### Creating Custom Dashboards
|
||||
|
||||
```bash
|
||||
# Export existing dashboard
|
||||
curl -s http://grafana:3000/api/dashboards/uid/<uid> > dashboard.json
|
||||
|
||||
# Import via UI
|
||||
Grafana → Dashboards → Import → Upload JSON
|
||||
```
|
||||
|
||||
## Querying Metrics
|
||||
|
||||
### PromQL Examples
|
||||
|
||||
```promql
|
||||
# CPU usage by pod
|
||||
rate(container_cpu_usage_seconds_total[5m])
|
||||
|
||||
# Memory usage
|
||||
container_memory_working_set_bytes
|
||||
|
||||
# HTTP request rate
|
||||
rate(http_requests_total[5m])
|
||||
|
||||
# Error rate
|
||||
rate(http_requests_total{status=~"5.."}[5m])
|
||||
```
|
||||
|
||||
## Alerts
|
||||
|
||||
### Viewing Alerts
|
||||
|
||||
```bash
|
||||
# List Prometheus rules
|
||||
sudo kubectl get prometheusrules -n monitoring
|
||||
|
||||
# View Alertmanager status
|
||||
sudo kubectl get alertmanagers -n monitoring
|
||||
```
|
||||
|
||||
### Common Alerts
|
||||
|
||||
- **HighCPUUsage**: Pod using >80% CPU
|
||||
- **HighMemoryUsage**: Pod using >80% memory
|
||||
- **PodCrashLooping**: Pod restarting frequently
|
||||
- **DiskSpaceLow**: Node disk >85% full
|
||||
|
||||
## Log Queries
|
||||
|
||||
### LogQL Examples
|
||||
|
||||
```logql
|
||||
# All logs from a namespace
|
||||
{namespace="my-app"}
|
||||
|
||||
# Error logs
|
||||
{namespace="my-app"} |= "error"
|
||||
|
||||
# Parse JSON logs
|
||||
{namespace="my-app"} | json | level="error"
|
||||
|
||||
# Count errors
|
||||
count_over_time({namespace="my-app"} |= "error" [5m])
|
||||
```
|
||||
|
||||
## Accessing Monitoring Data
|
||||
|
||||
### Grafana UI
|
||||
|
||||
1. Navigate to https://grafana0213.kro.kr
|
||||
2. Log in with credentials
|
||||
3. Browse dashboards or create queries
|
||||
|
||||
### Port Forwarding (Development)
|
||||
|
||||
```bash
|
||||
# Prometheus UI
|
||||
sudo kubectl port-forward -n monitoring svc/prometheus-operated 9090:9090
|
||||
|
||||
# Access at http://localhost:9090
|
||||
|
||||
# Alertmanager UI
|
||||
sudo kubectl port-forward -n monitoring svc/alertmanager-operated 9093:9093
|
||||
|
||||
# Access at http://localhost:9093
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No Metrics Showing
|
||||
|
||||
```bash
|
||||
# Check Prometheus targets
|
||||
sudo kubectl exec -n monitoring prometheus-0 -- promtool check config /etc/prometheus/prometheus.yml
|
||||
|
||||
# Verify service monitors
|
||||
sudo kubectl get servicemonitors -A
|
||||
```
|
||||
|
||||
### Grafana Not Loading Data
|
||||
|
||||
```bash
|
||||
# Check Grafana logs
|
||||
sudo kubectl logs -n monitoring deployment/grafana
|
||||
|
||||
# Verify datasource configuration
|
||||
sudo kubectl get secret -n monitoring grafana-datasources -o yaml
|
||||
```
|
||||
|
||||
### High Cardinality Issues
|
||||
|
||||
Too many unique label combinations can cause performance issues:
|
||||
|
||||
```bash
|
||||
# Check series count
|
||||
curl http://prometheus:9090/api/v1/status/tsdb | jq '.data.seriesCountByMetricName'
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Set up alerts proactively**: Don't wait for incidents
|
||||
2. **Use labels wisely**: Avoid high cardinality
|
||||
3. **Create focused dashboards**: One purpose per dashboard
|
||||
4. **Set retention policies**: Balance storage vs history
|
||||
5. **Document custom metrics**: Help future maintainers
|
||||
|
||||
## Metrics to Monitor
|
||||
|
||||
### Application Level
|
||||
- Request rate
|
||||
- Error rate
|
||||
- Response time (latency)
|
||||
- Saturation (queue depth)
|
||||
|
||||
### Infrastructure Level
|
||||
- CPU usage
|
||||
- Memory usage
|
||||
- Disk I/O
|
||||
- Network throughput
|
||||
|
||||
### Business Level (Optional)
|
||||
- User signups
|
||||
- Active sessions
|
||||
- Feature usage
|
||||
- Transaction volume
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Kubernetes Operations](./kubernetes)
|
||||
- [ArgoCD Configuration](./argocd)
|
||||
146
docusaurus/docusaurus.config.js
Normal file
146
docusaurus/docusaurus.config.js
Normal file
@@ -0,0 +1,146 @@
|
||||
// @ts-check
|
||||
// `@type` JSDoc annotations allow editor autocompletion and type checking
|
||||
// (when paired with `@ts-check`).
|
||||
// There are various equivalent ways to declare your Docusaurus config.
|
||||
// See: https://docusaurus.io/docs/api/docusaurus-config
|
||||
|
||||
import {themes as prismThemes} from 'prism-react-renderer';
|
||||
|
||||
/** @type {import('@docusaurus/types').Config} */
|
||||
const config = {
|
||||
title: 'Infrastructure Documentation',
|
||||
tagline: 'Complete guide to our infrastructure and services',
|
||||
favicon: 'img/favicon.ico',
|
||||
|
||||
// Set the production url of your site here
|
||||
url: 'https://docusaurus0213.kro.kr',
|
||||
// Set the /<baseUrl>/ pathname under which your site is served
|
||||
// For GitHub pages deployment, it is often '/<projectName>/'
|
||||
baseUrl: '/',
|
||||
|
||||
// GitHub pages deployment config.
|
||||
// If you aren't using GitHub pages, you don't need these.
|
||||
organizationName: 'bluemayne', // Usually your GitHub org/user name.
|
||||
projectName: 'infrastructure', // Usually your repo name.
|
||||
|
||||
onBrokenLinks: 'warn',
|
||||
onBrokenMarkdownLinks: 'warn',
|
||||
|
||||
// Even if you don't use internationalization, you can use this field to set
|
||||
// useful metadata like html lang. For example, if your site is Chinese, you
|
||||
// may want to replace "en" with "zh-Hans".
|
||||
i18n: {
|
||||
defaultLocale: 'ko',
|
||||
locales: ['ko', 'en'],
|
||||
},
|
||||
|
||||
presets: [
|
||||
[
|
||||
'classic',
|
||||
/** @type {import('@docusaurus/preset-classic').Options} */
|
||||
({
|
||||
docs: {
|
||||
routeBasePath: '/', // Serve docs at the site's root
|
||||
sidebarPath: './sidebars.js',
|
||||
// Please change this to your repo.
|
||||
// Remove this to remove the "edit this page" links.
|
||||
editUrl:
|
||||
'https://gitea0213.kro.kr/bluemayne/infrastructure/src/branch/main/docusaurus/',
|
||||
},
|
||||
blog: {
|
||||
showReadingTime: true,
|
||||
// Please change this to your repo.
|
||||
// Remove this to remove the "edit this page" links.
|
||||
editUrl:
|
||||
'https://gitea0213.kro.kr/bluemayne/infrastructure/src/branch/main/docusaurus/',
|
||||
},
|
||||
theme: {
|
||||
customCss: './src/css/custom.css',
|
||||
},
|
||||
}),
|
||||
],
|
||||
],
|
||||
|
||||
themeConfig:
|
||||
/** @type {import('@docusaurus/preset-classic').ThemeConfig} */
|
||||
({
|
||||
// Replace with your project's social card
|
||||
image: 'img/docusaurus-social-card.jpg',
|
||||
navbar: {
|
||||
title: 'Infrastructure Docs',
|
||||
logo: {
|
||||
alt: 'Infrastructure Logo',
|
||||
src: 'img/logo.svg',
|
||||
},
|
||||
items: [
|
||||
{
|
||||
type: 'docSidebar',
|
||||
sidebarId: 'tutorialSidebar',
|
||||
position: 'left',
|
||||
label: 'Documentation',
|
||||
},
|
||||
{to: '/blog', label: 'Blog', position: 'left'},
|
||||
{
|
||||
type: 'localeDropdown',
|
||||
position: 'right',
|
||||
},
|
||||
{
|
||||
href: 'https://gitea0213.kro.kr/bluemayne/infrastructure',
|
||||
label: 'Gitea',
|
||||
position: 'right',
|
||||
},
|
||||
],
|
||||
},
|
||||
footer: {
|
||||
style: 'dark',
|
||||
links: [
|
||||
{
|
||||
title: 'Docs',
|
||||
items: [
|
||||
{
|
||||
label: 'Documentation',
|
||||
to: '/docs/intro',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
title: 'Services',
|
||||
items: [
|
||||
{
|
||||
label: 'Homer',
|
||||
href: 'https://homer0213.kro.kr',
|
||||
},
|
||||
{
|
||||
label: 'Gitea',
|
||||
href: 'https://gitea0213.kro.kr',
|
||||
},
|
||||
{
|
||||
label: 'Grafana',
|
||||
href: 'https://grafana0213.kro.kr',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
title: 'More',
|
||||
items: [
|
||||
{
|
||||
label: 'Blog',
|
||||
to: '/blog',
|
||||
},
|
||||
{
|
||||
label: 'Repository',
|
||||
href: 'https://gitea0213.kro.kr/bluemayne/infrastructure',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
copyright: `Copyright © ${new Date().getFullYear()} Infrastructure Documentation. Built with Docusaurus.`,
|
||||
},
|
||||
prism: {
|
||||
theme: prismThemes.github,
|
||||
darkTheme: prismThemes.dracula,
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
export default config;
|
||||
27
docusaurus/ingress.yaml
Normal file
27
docusaurus/ingress.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: docusaurus
|
||||
namespace: docusaurus
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "0"
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
tls:
|
||||
- hosts:
|
||||
- docusaurus0213.kro.kr
|
||||
secretName: docusaurus-tls
|
||||
rules:
|
||||
- host: docusaurus0213.kro.kr
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: docusaurus
|
||||
port:
|
||||
number: 80
|
||||
11
docusaurus/kustomization.yaml
Normal file
11
docusaurus/kustomization.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/docusaurus.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
|
||||
namespace: docusaurus
|
||||
44
docusaurus/package.json
Normal file
44
docusaurus/package.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"name": "infrastructure-docs",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"docusaurus": "docusaurus",
|
||||
"start": "docusaurus start",
|
||||
"build": "docusaurus build",
|
||||
"swizzle": "docusaurus swizzle",
|
||||
"deploy": "docusaurus deploy",
|
||||
"clear": "docusaurus clear",
|
||||
"serve": "docusaurus serve",
|
||||
"write-translations": "docusaurus write-translations",
|
||||
"write-heading-ids": "docusaurus write-heading-ids"
|
||||
},
|
||||
"dependencies": {
|
||||
"@docusaurus/core": "^3.1.0",
|
||||
"@docusaurus/preset-classic": "^3.1.0",
|
||||
"@mdx-js/react": "^3.0.0",
|
||||
"clsx": "^2.0.0",
|
||||
"prism-react-renderer": "^2.3.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@docusaurus/module-type-aliases": "^3.1.0",
|
||||
"@docusaurus/types": "^3.1.0"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.5%",
|
||||
"not dead",
|
||||
"not op_mini all"
|
||||
],
|
||||
"development": [
|
||||
"last 1 chrome version",
|
||||
"last 1 firefox version",
|
||||
"last 1 safari version"
|
||||
]
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0"
|
||||
}
|
||||
}
|
||||
16
docusaurus/service.yaml
Normal file
16
docusaurus/service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: docusaurus
|
||||
namespace: docusaurus
|
||||
labels:
|
||||
app: docusaurus
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: docusaurus
|
||||
45
docusaurus/sidebars.js
Normal file
45
docusaurus/sidebars.js
Normal file
@@ -0,0 +1,45 @@
|
||||
/**
|
||||
* Creating a sidebar enables you to:
|
||||
- create an ordered group of docs
|
||||
- render a sidebar for each doc of that group
|
||||
- provide next/previous navigation
|
||||
|
||||
The sidebars can be generated from the filesystem, or explicitly defined here.
|
||||
|
||||
Create as many sidebars as you want.
|
||||
*/
|
||||
|
||||
// @ts-check
|
||||
|
||||
/** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
|
||||
const sidebars = {
|
||||
// By default, Docusaurus generates a sidebar from the docs folder structure
|
||||
tutorialSidebar: [
|
||||
'intro',
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Getting Started',
|
||||
items: ['getting-started/overview', 'getting-started/architecture'],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Services',
|
||||
items: ['services/kubernetes', 'services/argocd', 'services/monitoring'],
|
||||
},
|
||||
],
|
||||
|
||||
// But you can create a sidebar manually
|
||||
/*
|
||||
tutorialSidebar: [
|
||||
'intro',
|
||||
'hello',
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Tutorial',
|
||||
items: ['tutorial-basics/create-a-document'],
|
||||
},
|
||||
],
|
||||
*/
|
||||
};
|
||||
|
||||
export default sidebars;
|
||||
30
docusaurus/src/css/custom.css
Normal file
30
docusaurus/src/css/custom.css
Normal file
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Any CSS included here will be global. The classic template
|
||||
* bundles Infima by default. Infima is a CSS framework designed to
|
||||
* work well for content-centric websites.
|
||||
*/
|
||||
|
||||
/* You can override the default Infima variables here. */
|
||||
:root {
|
||||
--ifm-color-primary: #2e8555;
|
||||
--ifm-color-primary-dark: #29784c;
|
||||
--ifm-color-primary-darker: #277148;
|
||||
--ifm-color-primary-darkest: #205d3b;
|
||||
--ifm-color-primary-light: #33925d;
|
||||
--ifm-color-primary-lighter: #359962;
|
||||
--ifm-color-primary-lightest: #3cad6e;
|
||||
--ifm-code-font-size: 95%;
|
||||
--docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
/* For readability concerns, you should choose a lighter palette in dark mode. */
|
||||
[data-theme='dark'] {
|
||||
--ifm-color-primary: #25c2a0;
|
||||
--ifm-color-primary-dark: #21af90;
|
||||
--ifm-color-primary-darker: #1fa588;
|
||||
--ifm-color-primary-darkest: #1a8870;
|
||||
--ifm-color-primary-light: #29d5b0;
|
||||
--ifm-color-primary-lighter: #32d8b4;
|
||||
--ifm-color-primary-lightest: #4fddbf;
|
||||
--docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
Reference in New Issue
Block a user