REFACTOR(app): use native Python clients

Major changes:
- Kubernetes tools: Replace subprocess kubectl calls with kubernetes-client library
  - Supports in-cluster config for pod execution
  - Fallback to local kubeconfig for development
  - All k8s tools (nodes, pods, deployments, logs, describe) now use Python API

- PostgreSQL tools: Replace kubectl exec psql with direct psycopg2 connection
  - Connect via Kubernetes service DNS
  - Support for environment-based configuration
  - Improved error handling with proper pgcode/pgerror

- Prometheus tools: Replace kubectl exec wget with direct HTTP requests
  - Use requests library to query Prometheus API
  - Connect via Kubernetes service DNS
  - Configurable via PROMETHEUS_URL env var

- Deployment updates: Add explicit PostgreSQL connection env vars
  - POSTGRES_HOST, POSTGRES_PORT, POSTGRES_USER
  - Already had POSTGRES_PASSWORD from secret

Benefits:
- No longer requires kubectl binary in container
- Faster execution (no subprocess overhead)
- Better error handling and type safety
- Works seamlessly in Kubernetes pods with RBAC
This commit is contained in:
2025-12-24 00:47:17 +09:00
parent 7197e94baf
commit 6f984e5b6f
3 changed files with 333 additions and 128 deletions

View File

@@ -42,6 +42,12 @@ spec:
# SQLAlchemy format (if needed) # SQLAlchemy format (if needed)
- name: DATABASE_URL - name: DATABASE_URL
value: "postgresql+asyncpg://bluemayne:$(POSTGRES_PASSWORD)@postgresql-primary.postgresql.svc.cluster.local:5432/mas" value: "postgresql+asyncpg://bluemayne:$(POSTGRES_PASSWORD)@postgresql-primary.postgresql.svc.cluster.local:5432/mas"
- name: POSTGRES_HOST
value: "postgresql-primary.postgresql.svc.cluster.local"
- name: POSTGRES_PORT
value: "5432"
- name: POSTGRES_USER
value: "bluemayne"
- name: POSTGRES_PASSWORD - name: POSTGRES_PASSWORD
valueFrom: valueFrom:
secretKeyRef: secretKeyRef:

View File

@@ -16,5 +16,5 @@ commonLabels:
# 이미지 태그 설정 (ArgoCD Image Updater가 자동으로 업데이트) # 이미지 태그 설정 (ArgoCD Image Updater가 자동으로 업데이트)
images: images:
- name: gitea0213.kro.kr/bluemayne/mas - name: gitea0213.kro.kr/bluemayne/mas
newTag: main-sha-28c6e6f8b827900a25c03f3e63c874ddedecf535 newTag: main-sha-22b0840afde85b864df82a5c9408da8c78e28195

View File

@@ -13,6 +13,10 @@ import subprocess
import json import json
import requests import requests
from datetime import datetime from datetime import datetime
from kubernetes import client, config
from kubernetes.client.rest import ApiException
import psycopg2
from urllib.parse import quote_plus
class AgentState(TypedDict): class AgentState(TypedDict):
@@ -23,6 +27,26 @@ class AgentState(TypedDict):
result: dict result: dict
# ===== Kubernetes Client 초기화 =====
try:
# Try in-cluster config first (Pod 내부에서 실행 시)
config.load_incluster_config()
print("✅ Loaded in-cluster Kubernetes config")
except config.ConfigException:
# Fallback to local kubeconfig (로컬 개발 시)
try:
config.load_kube_config()
print("✅ Loaded local Kubernetes config")
except config.ConfigException:
print("⚠️ No Kubernetes config found - K8s tools will fail")
# Kubernetes API clients
k8s_core_v1 = client.CoreV1Api()
k8s_apps_v1 = client.AppsV1Api()
k8s_batch_v1 = client.BatchV1Api()
k8s_networking_v1 = client.NetworkingV1Api()
# ===== MCP Tools ===== # ===== MCP Tools =====
# === 1. Kubernetes MCP Tools === # === 1. Kubernetes MCP Tools ===
@@ -32,34 +56,35 @@ def k8s_get_nodes() -> str:
Get Kubernetes cluster nodes information including status, roles, CPU and memory. Get Kubernetes cluster nodes information including status, roles, CPU and memory.
""" """
try: try:
result = subprocess.run( nodes = k8s_core_v1.list_node()
["kubectl", "get", "nodes", "-o", "json"], info = []
capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
nodes = json.loads(result.stdout)
info = []
for node in nodes.get("items", []):
name = node["metadata"]["name"]
labels = node["metadata"].get("labels", {})
roles = [k.split("/")[1] for k in labels if "node-role.kubernetes.io" in k]
role_str = ",".join(roles) if roles else "worker"
status = "Unknown" for node in nodes.items:
for cond in node["status"]["conditions"]: name = node.metadata.name
if cond["type"] == "Ready": labels = node.metadata.labels or {}
status = "Ready" if cond["status"] == "True" else "NotReady"
capacity = node["status"]["capacity"] # Extract roles from labels
cpu = capacity.get("cpu", "?") roles = [k.split("/")[1] for k in labels if "node-role.kubernetes.io" in k]
mem = capacity.get("memory", "?") role_str = ",".join(roles) if roles else "worker"
info.append(f"{name} [{role_str}]: {status} | CPU: {cpu}, Memory: {mem}") # Get node status
status = "Unknown"
for cond in node.status.conditions:
if cond.type == "Ready":
status = "Ready" if cond.status == "True" else "NotReady"
return f"📦 Kubernetes Nodes ({len(info)}):\n" + "\n".join(info) # Get capacity
return f"❌ Error: {result.stderr}" capacity = node.status.capacity
cpu = capacity.get("cpu", "?")
mem = capacity.get("memory", "?")
info.append(f"{name} [{role_str}]: {status} | CPU: {cpu}, Memory: {mem}")
return f"📦 Kubernetes Nodes ({len(info)}):\n" + "\n".join(info)
except ApiException as e:
return f"❌ Kubernetes API error: {e.status} {e.reason}"
except Exception as e: except Exception as e:
return f"kubectl error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -71,31 +96,36 @@ def k8s_get_pods(namespace: str = "", label_selector: str = "") -> str:
label_selector: Filter by labels (e.g., "app=myapp") label_selector: Filter by labels (e.g., "app=myapp")
""" """
try: try:
cmd = ["kubectl", "get", "pods", "-o", "json"] # Get pods based on namespace filter
if namespace: if namespace:
cmd.extend(["-n", namespace]) pods = k8s_core_v1.list_namespaced_pod(
namespace=namespace,
label_selector=label_selector if label_selector else None
)
else: else:
cmd.append("--all-namespaces") pods = k8s_core_v1.list_pod_for_all_namespaces(
if label_selector: label_selector=label_selector if label_selector else None
cmd.extend(["-l", label_selector]) )
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) info = []
if result.returncode == 0: for pod in pods.items:
pods = json.loads(result.stdout) name = pod.metadata.name
info = [] ns = pod.metadata.namespace
for pod in pods.get("items", []): phase = pod.status.phase
name = pod["metadata"]["name"]
ns = pod["metadata"]["namespace"]
phase = pod["status"]["phase"]
restarts = sum(c.get("restartCount", 0) for c in pod["status"].get("containerStatuses", []))
emoji = "" if phase == "Running" else "⚠️" if phase == "Pending" else "" # Calculate total restarts
info.append(f"{emoji} {ns}/{name}: {phase} (restarts: {restarts})") restarts = 0
if pod.status.container_statuses:
restarts = sum(c.restart_count for c in pod.status.container_statuses)
return f"🐳 Pods ({len(info)}):\n" + "\n".join(info[:50]) # Limit to 50 emoji = "" if phase == "Running" else "⚠️" if phase == "Pending" else ""
return f"❌ Error: {result.stderr}" info.append(f"{emoji} {ns}/{name}: {phase} (restarts: {restarts})")
return f"🐳 Pods ({len(info)}):\n" + "\n".join(info[:50]) # Limit to 50
except ApiException as e:
return f"❌ Kubernetes API error: {e.status} {e.reason}"
except Exception as e: except Exception as e:
return f"kubectl error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -106,29 +136,27 @@ def k8s_get_deployments(namespace: str = "") -> str:
namespace: Filter by namespace (empty = all namespaces) namespace: Filter by namespace (empty = all namespaces)
""" """
try: try:
cmd = ["kubectl", "get", "deployments", "-o", "json"] # Get deployments based on namespace filter
if namespace: if namespace:
cmd.extend(["-n", namespace]) deployments = k8s_apps_v1.list_namespaced_deployment(namespace=namespace)
else: else:
cmd.append("--all-namespaces") deployments = k8s_apps_v1.list_deployment_for_all_namespaces()
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) info = []
if result.returncode == 0: for deploy in deployments.items:
deployments = json.loads(result.stdout) name = deploy.metadata.name
info = [] ns = deploy.metadata.namespace
for deploy in deployments.get("items", []): desired = deploy.spec.replicas or 0
name = deploy["metadata"]["name"] ready = deploy.status.ready_replicas or 0
ns = deploy["metadata"]["namespace"]
desired = deploy["spec"].get("replicas", 0)
ready = deploy["status"].get("readyReplicas", 0)
emoji = "" if ready == desired else "⚠️" emoji = "" if ready == desired else "⚠️"
info.append(f"{emoji} {ns}/{name}: {ready}/{desired} ready") info.append(f"{emoji} {ns}/{name}: {ready}/{desired} ready")
return f"📦 Deployments ({len(info)}):\n" + "\n".join(info[:30]) return f"📦 Deployments ({len(info)}):\n" + "\n".join(info[:30])
return f"❌ Error: {result.stderr}" except ApiException as e:
return f"❌ Kubernetes API error: {e.status} {e.reason}"
except Exception as e: except Exception as e:
return f"kubectl error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -141,15 +169,16 @@ def k8s_get_pod_logs(namespace: str, pod_name: str, tail: int = 50) -> str:
tail: Number of lines to show (default: 50) tail: Number of lines to show (default: 50)
""" """
try: try:
result = subprocess.run( logs = k8s_core_v1.read_namespaced_pod_log(
["kubectl", "logs", "-n", namespace, pod_name, f"--tail={tail}"], name=pod_name,
capture_output=True, text=True, timeout=10 namespace=namespace,
tail_lines=tail
) )
if result.returncode == 0: return f"📜 Logs for {namespace}/{pod_name}:\n```\n{logs}\n```"
return f"📜 Logs for {namespace}/{pod_name}:\n```\n{result.stdout}\n```" except ApiException as e:
return f"Error: {result.stderr}" return f"Kubernetes API error: {e.status} {e.reason}"
except Exception as e: except Exception as e:
return f"kubectl error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -162,20 +191,65 @@ def k8s_describe_resource(resource_type: str, name: str, namespace: str = "defau
namespace: Namespace (default: default) namespace: Namespace (default: default)
""" """
try: try:
result = subprocess.run( resource_type = resource_type.lower()
["kubectl", "describe", resource_type, name, "-n", namespace],
capture_output=True, text=True, timeout=10 if resource_type in ["pod", "pods"]:
) resource = k8s_core_v1.read_namespaced_pod(name=name, namespace=namespace)
if result.returncode == 0: elif resource_type in ["deployment", "deployments", "deploy"]:
# Return last 50 lines to keep it manageable resource = k8s_apps_v1.read_namespaced_deployment(name=name, namespace=namespace)
lines = result.stdout.split("\n") elif resource_type in ["service", "services", "svc"]:
return f"🔍 Describe {resource_type}/{name} in {namespace}:\n```\n" + "\n".join(lines[-50:]) + "\n```" resource = k8s_core_v1.read_namespaced_service(name=name, namespace=namespace)
return f"❌ Error: {result.stderr}" elif resource_type in ["statefulset", "statefulsets", "sts"]:
resource = k8s_apps_v1.read_namespaced_stateful_set(name=name, namespace=namespace)
elif resource_type in ["daemonset", "daemonsets", "ds"]:
resource = k8s_apps_v1.read_namespaced_daemon_set(name=name, namespace=namespace)
elif resource_type in ["ingress", "ingresses", "ing"]:
resource = k8s_networking_v1.read_namespaced_ingress(name=name, namespace=namespace)
else:
return f"❌ Unsupported resource type: {resource_type}"
# Format resource information
result_lines = [
f"Name: {resource.metadata.name}",
f"Namespace: {resource.metadata.namespace}",
f"Created: {resource.metadata.creation_timestamp}",
f"Labels: {resource.metadata.labels}",
f"Annotations: {resource.metadata.annotations}",
]
# Add status if available
if hasattr(resource, 'status') and resource.status:
result_lines.append(f"Status: {resource.status}")
output = "\n".join(str(line) for line in result_lines)
return f"🔍 Describe {resource_type}/{name} in {namespace}:\n```\n{output}\n```"
except ApiException as e:
return f"❌ Kubernetes API error: {e.status} {e.reason}"
except Exception as e: except Exception as e:
return f"kubectl error: {str(e)}" return f"Error: {str(e)}"
# === 2. PostgreSQL MCP Tools === # === 2. PostgreSQL MCP Tools ===
def get_postgres_connection(database: str = "postgres"):
"""
Create PostgreSQL connection using environment variables or k8s service.
"""
# Try to connect via Kubernetes service (when running in cluster)
pg_host = os.getenv("POSTGRES_HOST", "postgresql-primary.postgresql.svc.cluster.local")
pg_port = os.getenv("POSTGRES_PORT", "5432")
pg_user = os.getenv("POSTGRES_USER", "bluemayne")
pg_password = os.getenv("POSTGRES_PASSWORD", "")
return psycopg2.connect(
host=pg_host,
port=pg_port,
user=pg_user,
password=pg_password,
database=database,
connect_timeout=10
)
@tool @tool
def postgres_query(query: str, database: str = "postgres") -> str: def postgres_query(query: str, database: str = "postgres") -> str:
""" """
@@ -188,20 +262,28 @@ def postgres_query(query: str, database: str = "postgres") -> str:
return "❌ Only SELECT queries are allowed for safety" return "❌ Only SELECT queries are allowed for safety"
try: try:
# Use kubectl exec to run psql in the PostgreSQL pod conn = get_postgres_connection(database)
pg_password = os.getenv("POSTGRES_PASSWORD", "") cursor = conn.cursor()
cmd = [ cursor.execute(query)
"kubectl", "exec", "-n", "postgresql", "postgresql-primary-0", "--",
"env", f"PGPASSWORD={pg_password}",
"psql", "-U", "bluemayne", "-d", database, "-c", query
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=15) # Fetch results
if result.returncode == 0: rows = cursor.fetchall()
return f"📊 Query Result:\n```\n{result.stdout}\n```" colnames = [desc[0] for desc in cursor.description]
return f"❌ Error: {result.stderr}"
# Format output
result_lines = [" | ".join(colnames)]
result_lines.append("-" * len(result_lines[0]))
for row in rows[:100]: # Limit to 100 rows
result_lines.append(" | ".join(str(val) for val in row))
cursor.close()
conn.close()
return f"📊 Query Result ({len(rows)} rows):\n```\n" + "\n".join(result_lines) + "\n```"
except psycopg2.Error as e:
return f"❌ PostgreSQL error: {e.pgcode} - {e.pgerror}"
except Exception as e: except Exception as e:
return f"PostgreSQL error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -210,19 +292,23 @@ def postgres_list_databases() -> str:
List all databases in PostgreSQL cluster. List all databases in PostgreSQL cluster.
""" """
try: try:
pg_password = os.getenv("POSTGRES_PASSWORD", "") conn = get_postgres_connection("postgres")
cmd = [ cursor = conn.cursor()
"kubectl", "exec", "-n", "postgresql", "postgresql-primary-0", "--", cursor.execute("SELECT datname, pg_size_pretty(pg_database_size(datname)) AS size FROM pg_database WHERE datistemplate = false ORDER BY datname;")
"env", f"PGPASSWORD={pg_password}",
"psql", "-U", "bluemayne", "-c", "\\l"
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) rows = cursor.fetchall()
if result.returncode == 0: result_lines = ["Database | Size", "---------|-----"]
return f"🗄️ Databases:\n```\n{result.stdout}\n```" for row in rows:
return f"❌ Error: {result.stderr}" result_lines.append(f"{row[0]} | {row[1]}")
cursor.close()
conn.close()
return f"🗄️ Databases ({len(rows)}):\n```\n" + "\n".join(result_lines) + "\n```"
except psycopg2.Error as e:
return f"❌ PostgreSQL error: {e.pgcode} - {e.pgerror}"
except Exception as e: except Exception as e:
return f"PostgreSQL error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -261,9 +347,9 @@ def git_recent_commits(repo: str, limit: int = 10) -> str:
limit: Number of commits to show (default: 10) limit: Number of commits to show (default: 10)
""" """
try: try:
repo_path = f"/Users/bluemayne/Projects/{repo}" repo_path = f"/app/repos/{repo}" # Container 내부 경로
if not os.path.exists(repo_path): if not os.path.exists(repo_path):
return f"❌ Repository not found: {repo_path}" return f"❌ Repository not found: {repo_path}. Use git_clone_repo first."
result = subprocess.run( result = subprocess.run(
["git", "-C", repo_path, "log", f"-{limit}", "--oneline"], ["git", "-C", repo_path, "log", f"-{limit}", "--oneline"],
@@ -276,6 +362,111 @@ def git_recent_commits(repo: str, limit: int = 10) -> str:
return f"❌ Git error: {str(e)}" return f"❌ Git error: {str(e)}"
@tool
def git_clone_repo(repo_url: str, repo_name: str = "") -> str:
"""
Clone a Git repository locally.
Args:
repo_url: Full repository URL (e.g., https://gitea0213.kro.kr/bluemayne/harbor.git)
repo_name: Optional local directory name (default: extracted from URL)
"""
try:
if not repo_name:
repo_name = repo_url.split("/")[-1].replace(".git", "")
repo_path = f"/app/repos/{repo_name}"
os.makedirs("/app/repos", exist_ok=True)
if os.path.exists(repo_path):
return f"✅ Repository already exists: {repo_path}"
# Use Gitea token if available
token = os.getenv("GITEA_TOKEN", "")
if token:
# Inject token into URL
if "gitea0213.kro.kr" in repo_url:
repo_url = repo_url.replace("https://", f"https://{token}@")
result = subprocess.run(
["git", "clone", repo_url, repo_path],
capture_output=True, text=True, timeout=60
)
if result.returncode == 0:
return f"✅ Cloned repository: {repo_name} to {repo_path}"
return f"❌ Error: {result.stderr}"
except Exception as e:
return f"❌ Git clone error: {str(e)}"
@tool
def git_create_file(repo_name: str, file_path: str, content: str, commit_message: str = "") -> str:
"""
Create or update a file in a Git repository and commit it.
Args:
repo_name: Repository name (must be cloned first)
file_path: File path relative to repo root
content: File content
commit_message: Commit message (default: "Add/Update {file_path}")
"""
try:
repo_path = f"/app/repos/{repo_name}"
if not os.path.exists(repo_path):
return f"❌ Repository not found: {repo_path}. Use git_clone_repo first."
full_path = os.path.join(repo_path, file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, "w") as f:
f.write(content)
# Git add
subprocess.run(["git", "-C", repo_path, "add", file_path], check=True, timeout=10)
# Git commit
if not commit_message:
commit_message = f"Add/Update {file_path}"
result = subprocess.run(
["git", "-C", repo_path, "commit", "-m", commit_message],
capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
return f"✅ Created/Updated {file_path} and committed"
return f"⚠️ File created but commit failed: {result.stderr}"
except Exception as e:
return f"❌ Git file error: {str(e)}"
@tool
def git_push(repo_name: str, branch: str = "main") -> str:
"""
Push commits to remote repository.
Args:
repo_name: Repository name
branch: Branch name (default: main)
"""
try:
repo_path = f"/app/repos/{repo_name}"
if not os.path.exists(repo_path):
return f"❌ Repository not found: {repo_path}"
# Configure git user if needed
subprocess.run(["git", "-C", repo_path, "config", "user.name", "mas-agent"], timeout=5)
subprocess.run(["git", "-C", repo_path, "config", "user.email", "mas-agent@mas.local"], timeout=5)
result = subprocess.run(
["git", "-C", repo_path, "push", "origin", branch],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0:
return f"✅ Pushed to {branch} branch"
return f"❌ Push failed: {result.stderr}"
except Exception as e:
return f"❌ Git push error: {str(e)}"
# === 4. Prometheus MCP Tools === # === 4. Prometheus MCP Tools ===
@tool @tool
def prometheus_query(query: str) -> str: def prometheus_query(query: str) -> str:
@@ -285,15 +476,21 @@ def prometheus_query(query: str) -> str:
query: PromQL query (e.g., "up", "node_cpu_seconds_total") query: PromQL query (e.g., "up", "node_cpu_seconds_total")
""" """
try: try:
# Prometheus is accessible via kubectl port-forward or ingress # Access Prometheus via Kubernetes service
# For now, use kubectl proxy approach prometheus_url = os.getenv(
result = subprocess.run( "PROMETHEUS_URL",
["kubectl", "exec", "-n", "monitoring", "deployment/prometheus-kube-prometheus-operator", "--", "http://prometheus-kube-prometheus-prometheus.monitoring.svc.cluster.local:9090"
"wget", "-qO-", f"http://localhost:9090/api/v1/query?query={query}"],
capture_output=True, text=True, timeout=10
) )
if result.returncode == 0:
data = json.loads(result.stdout) # Make HTTP request
response = requests.get(
f"{prometheus_url}/api/v1/query",
params={"query": query},
timeout=10
)
if response.status_code == 200:
data = response.json()
if data.get("status") == "success": if data.get("status") == "success":
results = data.get("data", {}).get("result", []) results = data.get("data", {}).get("result", [])
output = [] output = []
@@ -301,11 +498,13 @@ def prometheus_query(query: str) -> str:
metric = r.get("metric", {}) metric = r.get("metric", {})
value = r.get("value", [None, "N/A"])[1] value = r.get("value", [None, "N/A"])[1]
output.append(f"{metric}: {value}") output.append(f"{metric}: {value}")
return f"📈 Prometheus Query Result:\n" + "\n".join(output) return f"📈 Prometheus Query Result ({len(results)} metrics):\n" + "\n".join(output)
return f"❌ Query failed: {data}" return f"❌ Query failed: {data}"
return f"❌ Error: {result.stderr}" return f" HTTP Error: {response.status_code} - {response.text}"
except requests.RequestException as e:
return f"❌ Prometheus request error: {str(e)}"
except Exception as e: except Exception as e:
return f"Prometheus error: {str(e)}" return f"Error: {str(e)}"
@tool @tool
@@ -327,8 +526,8 @@ def fs_read_file(file_path: str, max_lines: int = 100) -> str:
max_lines: Maximum lines to read (default: 100) max_lines: Maximum lines to read (default: 100)
""" """
try: try:
# Only allow reading from safe directories # Allow reading from safe directories
safe_dirs = ["/var/log", "/tmp", os.path.expanduser("~/Projects")] safe_dirs = ["/app/repos", "/var/log", "/tmp"]
if not any(file_path.startswith(d) for d in safe_dirs): if not any(file_path.startswith(d) for d in safe_dirs):
return f"❌ Access denied: {file_path} (not in safe directories)" return f"❌ Access denied: {file_path} (not in safe directories)"