diff --git a/.gitea/README.md b/.gitea/README.md new file mode 100644 index 0000000..956c237 --- /dev/null +++ b/.gitea/README.md @@ -0,0 +1,57 @@ +# Gitea Actions 配置说明 + +本目录为本项目的 Gitea CI/CD 工作流。运行环境为 **self-hosted Linux runner**,且 runner 必须能在目标主机上以 root 身份执行 eBPF/bcc。 + +## 工作流 + +- [`workflows/ci.yml`](workflows/ci.yml) — push / PR 到 `main` 触发。语法检查 + 干净 venv 安装依赖。**不**需要 root。 +- [`workflows/cd.yml`](workflows/cd.yml) — 推送 `v*` tag 或手动触发。流程: + 1. `stop.sh` 停掉旧实例(若存在) + 2. `rsync` 当前 checkout 到固定目录 `/opt/syscall_monitor` + 3. `setup.sh` 装 venv 依赖 + 4. `run.sh` 启动 + 5. 校验 `.pid` 进程存活 + 6. 重试 10 次 `curl http://127.0.0.1:5000` 做健康检查 + 7. tail 启动日志 + + **需要 root**。部署目录和健康检查 URL 在 workflow 顶部 `env:` 块里改。 + +## Runner 准备 + +1. 在目标 Linux 主机上注册 act_runner,labels 至少包含 `self-hosted`。 +2. 安装系统依赖: + + ``` + sudo apt install -y python3-venv python3-bpfcc bpfcc-tools rsync curl linux-headers-$(uname -r) + ``` + +3. 创建固定部署目录并把所有权给 root(脚本以 root 运行): + + ``` + sudo mkdir -p /opt/syscall_monitor + ``` + +4. 给 runner 用户配置免密 sudo。CD 用到的 root 命令有 4 个:rsync、setup.sh、run.sh、stop.sh,外加 tail 读 root 拥有的日志。写 `/etc/sudoers.d/syscall-monitor`: + + ``` + ALL=(root) NOPASSWD: /usr/bin/rsync, /opt/syscall_monitor/setup.sh, /opt/syscall_monitor/run.sh, /opt/syscall_monitor/stop.sh, /usr/bin/tail + ``` + + 只授权这些,不要 `ALL` 通配。`rsync` 路径以 `which rsync` 为准。 + +## 触发方式 + +- 日常 push / PR → 自动跑 CI。 +- 发版部署 → 打 tag: + + ``` + git tag v0.1.0 && git push origin v0.1.0 + ``` + + 也可在 Gitea Actions 页面用 `workflow_dispatch` 手动触发。 + +## 注意 + +- 部署目录固定为 `/opt/syscall_monitor`。每次 CD 用 `rsync --delete` 全量同步代码(保留 `.venv` / `logs` / `.pid`),所以 venv 不会每次重建,依赖增量装。 +- 健康检查用 `HEALTH_URL` 默认 `http://127.0.0.1:5000`,重试 10 次每次间隔 2 秒。如果你的 Flask 路由没有根路径,把 `env.HEALTH_URL` 改成 `http://127.0.0.1:5000/具体路径`。 +- `run.sh` 是 `nohup` 后台启动,runner 当前 workspace 跟部署目录是分开的——`.pid` 落在 `/opt/syscall_monitor/.pid`,CD 校验的就是这个。 diff --git a/.gitea/workflows/cd.yml b/.gitea/workflows/cd.yml new file mode 100644 index 0000000..1446f34 --- /dev/null +++ b/.gitea/workflows/cd.yml @@ -0,0 +1,85 @@ +name: CD + +on: + push: + tags: + - "v*" + workflow_dispatch: + +env: + DEPLOY_DIR: /opt/syscall_monitor + HEALTH_URL: http://127.0.0.1:5000 + +jobs: + deploy: + runs-on: self-hosted + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Show target host info + run: | + echo "deploying on: $(hostname)" + echo "user: $(id -un) ($(id -u))" + echo "ref: ${{ gitea.ref }}" + echo "deploy dir: $DEPLOY_DIR" + + - name: Stop running instance (if any) + run: | + if [ -x "$DEPLOY_DIR/stop.sh" ]; then + sudo -n "$DEPLOY_DIR/stop.sh" || true + else + echo "no prior install at $DEPLOY_DIR, skipping stop" + fi + + - name: Sync code to deploy dir + run: | + sudo -n /usr/bin/rsync -a --delete \ + --exclude='.git' \ + --exclude='.venv' \ + --exclude='.venv-ci' \ + --exclude='logs' \ + --exclude='.pid' \ + ./ "$DEPLOY_DIR/" + sudo -n chmod +x "$DEPLOY_DIR/setup.sh" "$DEPLOY_DIR/run.sh" "$DEPLOY_DIR/stop.sh" + + - name: Setup venv and dependencies + run: sudo -n "$DEPLOY_DIR/setup.sh" + + - name: Start service + run: sudo -n "$DEPLOY_DIR/run.sh" + + - name: Verify pid is alive + run: | + sleep 2 + PID_FILE="$DEPLOY_DIR/.pid" + if [ ! -f "$PID_FILE" ]; then + echo "no .pid file at $PID_FILE" >&2 + sudo -n tail -n 50 "$DEPLOY_DIR/logs/app.log" >&2 || true + exit 1 + fi + PID=$(cat "$PID_FILE") + if ! kill -0 "$PID" 2>/dev/null; then + echo "pid $PID not alive" >&2 + sudo -n tail -n 50 "$DEPLOY_DIR/logs/app.log" >&2 || true + exit 1 + fi + echo "service running, pid=$PID" + + - name: HTTP health check + run: | + for i in 1 2 3 4 5 6 7 8 9 10; do + if curl -fsS -o /dev/null "$HEALTH_URL"; then + echo "health check passed on attempt $i" + exit 0 + fi + echo "attempt $i failed, retrying..." + sleep 2 + done + echo "health check failed: $HEALTH_URL unreachable" >&2 + sudo -n tail -n 80 "$DEPLOY_DIR/logs/app.log" >&2 || true + exit 1 + + - name: Tail startup log + if: always() + run: sudo -n tail -n 30 "$DEPLOY_DIR/logs/app.log" || true diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml new file mode 100644 index 0000000..349c65d --- /dev/null +++ b/.gitea/workflows/ci.yml @@ -0,0 +1,37 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + lint-and-build: + runs-on: self-hosted + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Python syntax check + run: | + python3 -m compileall -q collector web main.py + + - name: Optional ruff lint + run: | + if command -v ruff >/dev/null 2>&1; then + ruff check collector web main.py || true + else + echo "ruff not installed, skipping" + fi + + - name: Build venv (dry-run install) + run: | + python3 -m venv --system-site-packages .venv-ci + .venv-ci/bin/pip install --upgrade pip + .venv-ci/bin/pip install -r requirements.txt + .venv-ci/bin/python -c "import flask; print('flask', flask.__version__)" + + - name: Cleanup + if: always() + run: rm -rf .venv-ci diff --git a/collector/__init__.py b/collector/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/collector/syscall_tracer.py b/collector/syscall_tracer.py new file mode 100644 index 0000000..c5534b8 --- /dev/null +++ b/collector/syscall_tracer.py @@ -0,0 +1,107 @@ +"""eBPF syscall counter. + +Attaches a single BPF program to the raw_syscalls:sys_enter tracepoint and +counts every syscall by its numeric id in a BPF_HASH map. The Python side +periodically reads the map and exposes per-syscall counts for the names +listed in config/monitors.json. + +Why count all syscalls, not just configured ones? + - Config changes at runtime; if we filtered inside BPF we'd have to + rebuild and reattach the program. Counting everything in-kernel is + cheap (one hash increment) and lets the web layer pick which names to + surface. +""" + +import json +import os +import threading +import time +from pathlib import Path + +from bcc import BPF +from bcc.syscall import syscall_name + + +BPF_TEXT = r""" +BPF_HASH(counts, u32, u64); + +TRACEPOINT_PROBE(raw_syscalls, sys_enter) { + u32 id = (u32)args->id; + u64 zero = 0, *val; + val = counts.lookup_or_try_init(&id, &zero); + if (val) { + __sync_fetch_and_add(val, 1); + } + return 0; +} +""" + + +class SyscallTracer: + def __init__(self, config_path: Path): + self.config_path = Path(config_path) + self._bpf = None + self._lock = threading.Lock() + self._snapshot: dict[str, int] = {} + self._stop = threading.Event() + self._thread: threading.Thread | None = None + + def load_config(self) -> list[str]: + try: + with self.config_path.open("r", encoding="utf-8") as f: + data = json.load(f) + names = data.get("syscalls", []) + return [str(n).strip() for n in names if str(n).strip()] + except FileNotFoundError: + return [] + + def start(self) -> None: + if self._bpf is not None: + return + self._bpf = BPF(text=BPF_TEXT) + self._thread = threading.Thread(target=self._poll_loop, daemon=True) + self._thread.start() + + def stop(self) -> None: + self._stop.set() + if self._thread: + self._thread.join(timeout=2) + self._bpf = None + + def _poll_loop(self) -> None: + while not self._stop.wait(1.0): + self._refresh_snapshot() + + def _refresh_snapshot(self) -> None: + if self._bpf is None: + return + names = set(self.load_config()) + if not names: + with self._lock: + self._snapshot = {} + return + totals: dict[str, int] = {n: 0 for n in names} + for k, v in self._bpf["counts"].items(): + try: + name = syscall_name(k.value).decode("utf-8", "replace") + except Exception: + continue + if name in totals: + totals[name] += v.value + with self._lock: + self._snapshot = totals + + def get_counts(self) -> dict[str, int]: + with self._lock: + return dict(self._snapshot) + + +_tracer: SyscallTracer | None = None + + +def get_tracer(config_path: str | os.PathLike) -> SyscallTracer: + global _tracer + if _tracer is None: + _tracer = SyscallTracer(Path(config_path)) + _tracer.start() + return _tracer diff --git a/config/monitors.json b/config/monitors.json new file mode 100644 index 0000000..427c23f --- /dev/null +++ b/config/monitors.json @@ -0,0 +1,3 @@ +{ + "syscalls": ["openat", "read", "write", "execve", "close"] +} diff --git a/main.py b/main.py new file mode 100644 index 0000000..e6ac829 --- /dev/null +++ b/main.py @@ -0,0 +1,20 @@ +"""Entry point. Must run as root for eBPF.""" + +import os +import sys + +from web.app import create_app + + +def main(): + if os.geteuid() != 0: + print("error: must run as root (eBPF requires CAP_BPF/root)", file=sys.stderr) + sys.exit(1) + app = create_app() + host = os.environ.get("HOST", "0.0.0.0") + port = int(os.environ.get("PORT", "5000")) + app.run(host=host, port=port, debug=False, use_reloader=False, threaded=True) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7e10602 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +flask diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..911011b --- /dev/null +++ b/run.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -e +cd "$(dirname "$0")" + +if [ "$(id -u)" -ne 0 ]; then + echo "error: must run as root (eBPF requires root)" >&2 + exit 1 +fi + +mkdir -p logs +nohup .venv/bin/python main.py > logs/app.log 2>&1 & +echo $! > .pid +echo "started, pid=$(cat .pid), log: logs/app.log" diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..534dee2 --- /dev/null +++ b/setup.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Create venv with system-site-packages (so bcc from apt is visible) and install Python deps. +set -e +cd "$(dirname "$0")" + +if [ ! -d .venv ]; then + python3 -m venv --system-site-packages .venv +fi + +.venv/bin/pip install --upgrade pip +.venv/bin/pip install -r requirements.txt +echo "setup done." diff --git a/stop.sh b/stop.sh new file mode 100644 index 0000000..7ddc367 --- /dev/null +++ b/stop.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +cd "$(dirname "$0")" +if [ -f .pid ]; then + PID=$(cat .pid) + if kill -0 "$PID" 2>/dev/null; then + kill "$PID" + echo "stopped pid=$PID" + else + echo "pid $PID not running" + fi + rm -f .pid +else + echo "no .pid file" +fi diff --git a/web/__init__.py b/web/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/app.py b/web/app.py new file mode 100644 index 0000000..6539bc7 --- /dev/null +++ b/web/app.py @@ -0,0 +1,64 @@ +"""Flask web layer for the syscall monitor.""" + +import json +import threading +from pathlib import Path + +from flask import Flask, jsonify, redirect, render_template, request, url_for + +from collector.syscall_tracer import get_tracer + + +BASE_DIR = Path(__file__).resolve().parent.parent +CONFIG_PATH = BASE_DIR / "config" / "monitors.json" +_config_lock = threading.Lock() + + +def _read_config() -> dict: + if not CONFIG_PATH.exists(): + return {"syscalls": []} + with CONFIG_PATH.open("r", encoding="utf-8") as f: + return json.load(f) + + +def _write_config(data: dict) -> None: + CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True) + tmp = CONFIG_PATH.with_suffix(".json.tmp") + with tmp.open("w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + tmp.replace(CONFIG_PATH) + + +def create_app() -> Flask: + app = Flask(__name__, template_folder="templates", static_folder="static") + tracer = get_tracer(CONFIG_PATH) + + @app.get("/") + def index(): + cfg = _read_config() + return render_template("index.html", syscalls=cfg.get("syscalls", [])) + + @app.get("/api/counts") + def api_counts(): + return jsonify(tracer.get_counts()) + + @app.route("/config", methods=["GET", "POST"]) + def config_page(): + with _config_lock: + cfg = _read_config() + syscalls = list(cfg.get("syscalls", [])) + + if request.method == "POST": + action = request.form.get("action", "") + name = (request.form.get("name") or "").strip() + if action == "add" and name and name not in syscalls: + syscalls.append(name) + elif action == "remove" and name in syscalls: + syscalls.remove(name) + cfg["syscalls"] = syscalls + _write_config(cfg) + return redirect(url_for("config_page")) + + return render_template("config.html", syscalls=syscalls) + + return app diff --git a/web/static/style.css b/web/static/style.css new file mode 100644 index 0000000..8205f41 --- /dev/null +++ b/web/static/style.css @@ -0,0 +1,19 @@ +body { font-family: -apple-system, "Segoe UI", Helvetica, Arial, sans-serif; margin: 0; background: #f5f6fa; color: #1f2330; } +header { background: #1f2330; color: #fff; padding: 16px 24px; display: flex; align-items: center; justify-content: space-between; } +header h1 { margin: 0; font-size: 18px; font-weight: 600; } +nav a { color: #c9cbd1; margin-left: 16px; text-decoration: none; font-size: 14px; } +nav a:hover { color: #fff; } +main { max-width: 900px; margin: 24px auto; padding: 0 16px; } +.card { background: #fff; border-radius: 8px; padding: 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.08); margin-bottom: 16px; } +table { width: 100%; border-collapse: collapse; } +th, td { padding: 10px 12px; text-align: left; border-bottom: 1px solid #eef0f4; font-size: 14px; } +th { background: #fafbfd; color: #5a6172; font-weight: 600; } +td.count { font-family: "SF Mono", Consolas, monospace; text-align: right; color: #2f6feb; } +form.inline { display: inline; } +input[type=text] { padding: 8px 10px; border: 1px solid #d6d9e0; border-radius: 6px; font-size: 14px; min-width: 220px; } +button { padding: 8px 14px; border: 0; border-radius: 6px; font-size: 14px; cursor: pointer; } +button.primary { background: #2f6feb; color: #fff; } +button.danger { background: #fff; color: #d33; border: 1px solid #f0c2c2; } +button:hover { opacity: 0.9; } +.muted { color: #8a90a0; font-size: 13px; } +.tag { display: inline-block; padding: 2px 8px; border-radius: 12px; background: #eef2ff; color: #2f6feb; font-size: 12px; } diff --git a/web/templates/config.html b/web/templates/config.html new file mode 100644 index 0000000..d936ece --- /dev/null +++ b/web/templates/config.html @@ -0,0 +1,53 @@ + + + + + 监控配置 + + + +
+

Syscall Monitor

+ +
+
+
+

添加监控项

+
+ + + +
+

名称需为内核 syscall 名(可参考 man syscalls)。新增后会立即出现在监控页面。

+
+ +
+

当前监控项 ({{ syscalls|length }})

+ {% if syscalls %} + + + + {% for name in syscalls %} + + + + + {% endfor %} + +
名称操作
{{ name }} +
+ + + +
+
+ {% else %} +

尚无监控项。

+ {% endif %} +
+
+ + diff --git a/web/templates/index.html b/web/templates/index.html new file mode 100644 index 0000000..68da721 --- /dev/null +++ b/web/templates/index.html @@ -0,0 +1,48 @@ + + + + + Syscall Monitor + + + +
+

Syscall Monitor

+ +
+
+
+

当前监控 {{ syscalls|length }} 项,每秒刷新。计数自 Agent 启动起累计。

+ {% if syscalls %} + + + + {% for name in syscalls %} + + {% endfor %} + +
System Call调用次数
{{ name }}
+ {% else %} +

尚未配置监控项,前往 配置页面 添加。

+ {% endif %} +
+
+ + +