From 26a5f99587fbb0147c726f83d4defb0b6c1be3a5 Mon Sep 17 00:00:00 2001
From: MarceloZoeng <2280535520@qq.com>
Date: Sun, 14 Jun 2026 12:08:27 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=AD=E6=96=87+README?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitea/README.md            |  30 ++++---
 .gitea/workflows/cd.yml     |  42 ++++-----
 .gitea/workflows/ci.yml     |  16 +---
 README.md                   | 164 +++++++++++++++++++++++++++++++++++-
 collector/__init__.py       |   3 +
 collector/syscall_tracer.py |  40 ++++++---
 config/monitors.json        |  12 +++
 main.py                     |  15 +++-
 requirements.txt            |  73 +++++++++++++++-
 run.sh                      |   4 +-
 setup.sh                    |   4 +-
 stop.sh                     |   6 +-
 web/__init__.py             |   3 +
 web/app.py                  |  22 ++++-
 web/templates/config.html   |   8 +-
 web/templates/index.html    |  14 +--
 16 files changed, 376 insertions(+), 80 deletions(-)

diff --git a/.gitea/README.md b/.gitea/README.md
index 73f1b80..9a69875 100644
--- a/.gitea/README.md
+++ b/.gitea/README.md
@@ -7,12 +7,12 @@
 - [`workflows/ci.yml`](workflows/ci.yml) — push / PR 到 `main` 触发。语法检查 + 干净 venv 安装依赖。**不**需要 root。
 - [`workflows/cd.yml`](workflows/cd.yml) — 推送 `v*` tag 或手动触发。流程：
   1. `stop.sh` 停掉旧实例（若存在）
-  2. `rsync` 当前 checkout 到固定目录 `/opt/syscall_monitor`
+  2. `rsync --delete` 当前 checkout 到固定目录 `/opt/syscall_monitor`，排除 `.git` / `.venv` / `.venv-ci` / `logs` / `.pid`，并 `chmod +x` 三个部署脚本
   3. `setup.sh` 装 venv 依赖
   4. `run.sh` 启动
-  5. 校验 `.pid` 进程存活
-  6. 重试 10 次 `curl http://127.0.0.1:5000` 做健康检查
-  7. tail 启动日志
+  5. `sleep 5` 后校验 `.pid` 进程存活，最多重试 3 次（每次间隔 2 秒）
+  6. 重试 10 次 `curl http://127.0.0.1:5000` 做健康检查（每次间隔 2 秒）
+  7. `tail` 启动日志（无论成功失败都执行）
 
   **需要 root**。部署目录和健康检查 URL 在 workflow 顶部 `env:` 块里改。
 
@@ -21,37 +21,39 @@
 1. 在目标 Linux 主机上注册 act_runner，labels 至少包含 `self-hosted`。
 2. 安装系统依赖：
 
-   ```
+   ```bash
    sudo apt install -y python3-venv python3-bpfcc bpfcc-tools rsync curl linux-headers-$(uname -r)
    ```
 
-3. 创建固定部署目录并把所有权给 root（脚本以 root 运行）：
+3. 创建固定部署目录（脚本以 root 运行，目录所有权保持 root）：
 
-   ```
+   ```bash
    sudo mkdir -p /opt/syscall_monitor
    ```
 
-4. 给 runner 用户配置免密 sudo。CD 用到的 root 命令有 4 个：rsync、setup.sh、run.sh、stop.sh，外加 tail 读 root 拥有的日志。写 `/etc/sudoers.d/syscall-monitor`：
+4. 给 runner 用户配置免密 sudo。写 `/etc/sudoers.d/syscall-monitor`（用 `sudo visudo -f` 编辑，文件权限 `0440`）：
 
    ```
-   gitea-runner ALL=(root) NOPASSWD: /usr/bin/rsync, /opt/syscall_monitor/setup.sh, /opt/syscall_monitor/run.sh, /opt/syscall_monitor/stop.sh, /usr/bin/tail
+   gitea-runner ALL=(root) NOPASSWD: ALL
    ```
 
-   只授权这些，不要 `ALL` 通配。`rsync` 路径以 `which rsync` 为准。
+   - 用户名以实际 runner 进程身份为准（`id -un` 查看），不一定是 `gitea-runner`。
+   - `rsync` 实际路径以 `which rsync` 为准（一般是 `/usr/bin/rsync`）。
 
 ## 触发方式
 
 - 日常 push / PR → 自动跑 CI。
 - 发版部署 → 打 tag → 自动跑 CD：
 
-  ```
+  ```bash
   git tag v0.1.0 && git push origin v0.1.0
   ```
 
-  也可在 Gitea Actions 页面用 `workflow_dispatch` 手动触发。
+  也可在 Gitea Actions 页面用 `workflow_dispatch` 手动触发（无需打 tag）。
 
 ## 注意
 
-- 部署目录固定为 `/opt/syscall_monitor`。每次 CD 用 `rsync --delete` 全量同步代码（保留 `.venv` / `logs` / `.pid`），所以 venv 不会每次重建，依赖增量装。
-- 健康检查用 `HEALTH_URL` 默认 `http://127.0.0.1:5000`，重试 10 次每次间隔 2 秒。
+- 部署目录固定为 `/opt/syscall_monitor`。每次 CD 用 `rsync --delete` 全量同步代码，但排除 `.venv` / `.venv-ci` / `logs` / `.pid` / `.git`，所以 venv 不会每次重建，依赖增量装。
+- 健康检查 URL 由 `HEALTH_URL` 控制，默认 `http://127.0.0.1:5000`，重试 10 次每次间隔 2 秒。
 - `run.sh` 是 `nohup` 后台启动，runner 当前 workspace 跟部署目录是分开的——`.pid` 落在 `/opt/syscall_monitor/.pid`，CD 校验的就是这个。
+- 失败时 CD 会 `tail` `/opt/syscall_monitor/logs/app.log` 末尾若干行到 stderr，便于在 Actions 页面直接看启动日志。
diff --git a/.gitea/workflows/cd.yml b/.gitea/workflows/cd.yml
index 51cbec0..cecfd1d 100644
--- a/.gitea/workflows/cd.yml
+++ b/.gitea/workflows/cd.yml
@@ -14,25 +14,25 @@ jobs:
   deploy:
     runs-on: self-hosted
     steps:
-      - name: Checkout
+      - name: 检出代码
         uses: https://gitea.com/actions/checkout@v4
 
-      - name: Show target host info
+      - name: 显示目标主机信息
         run: |
-          echo "deploying on: $(hostname)"
-          echo "user: $(id -un) ($(id -u))"
-          echo "ref: ${{ gitea.ref }}"
-          echo "deploy dir: $DEPLOY_DIR"
+          echo "部署主机: $(hostname)"
+          echo "执行用户: $(id -un) ($(id -u))"
+          echo "触发引用: ${{ gitea.ref }}"
+          echo "部署目录: $DEPLOY_DIR"
 
-      - name: Stop running instance (if any)
+      - name: 停止旧实例（若存在）
         run: |
           if [ -x "$DEPLOY_DIR/stop.sh" ]; then
             sudo -n "$DEPLOY_DIR/stop.sh" || true
           else
-            echo "no prior install at $DEPLOY_DIR, skipping stop"
+            echo "$DEPLOY_DIR 下未发现旧实例，跳过停止步骤"
           fi
 
-      - name: Sync code to deploy dir
+      - name: 同步代码到部署目录
         run: |
           sudo -n /usr/bin/rsync -a --delete \
             --exclude='.git' \
@@ -43,18 +43,18 @@ jobs:
             ./ "$DEPLOY_DIR/"
           sudo -n chmod +x "$DEPLOY_DIR/setup.sh" "$DEPLOY_DIR/run.sh" "$DEPLOY_DIR/stop.sh"
 
-      - name: Setup venv and dependencies
+      - name: 安装 venv 与依赖
         run: sudo -n "$DEPLOY_DIR/setup.sh"
 
-      - name: Start service
+      - name: 启动服务
         run: sudo -n "$DEPLOY_DIR/run.sh"
 
-      - name: Verify pid is alive
+      - name: 校验进程存活
         run: |
           sleep 5
           PID_FILE="$DEPLOY_DIR/.pid"
           if [ ! -f "$PID_FILE" ]; then
-            echo "no .pid file at $PID_FILE" >&2
+            echo "未找到 pid 文件: $PID_FILE" >&2
             sudo -n tail -n 50 "$DEPLOY_DIR/logs/app.log" >&2 || true
             exit 1
           fi
@@ -62,30 +62,30 @@ jobs:
           echo "pid=$PID"
           for i in 1 2 3; do
             if [ -d "/proc/$PID" ]; then
-              echo "service running, pid=$PID (attempt $i)"
+              echo "服务运行中，pid=$PID（第 $i 次检查通过）"
               exit 0
             fi
-            echo "attempt $i: pid $PID not alive yet, retrying..."
+            echo "第 $i 次检查：pid $PID 尚未存活，重试中..."
             sleep 2
           done
-          echo "pid $PID not alive after 3 attempts" >&2
+          echo "重试 3 次后 pid $PID 仍未存活" >&2
           sudo -n tail -n 50 "$DEPLOY_DIR/logs/app.log" >&2 || true
           exit 1
 
-      - name: HTTP health check
+      - name: HTTP 健康检查
         run: |
           for i in 1 2 3 4 5 6 7 8 9 10; do
             if curl -fsS -o /dev/null "$HEALTH_URL"; then
-              echo "health check passed on attempt $i"
+              echo "第 $i 次健康检查通过"
               exit 0
             fi
-            echo "attempt $i failed, retrying..."
+            echo "第 $i 次健康检查失败，重试中..."
             sleep 2
           done
-          echo "health check failed: $HEALTH_URL unreachable" >&2
+          echo "健康检查失败：$HEALTH_URL 不可达" >&2
           sudo -n tail -n 80 "$DEPLOY_DIR/logs/app.log" >&2 || true
           exit 1
 
-      - name: Tail startup log
+      - name: 输出启动日志
         if: always()
         run: sudo -n tail -n 30 "$DEPLOY_DIR/logs/app.log" || true
diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml
index d80ac94..606300e 100644
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -10,28 +10,20 @@ jobs:
   lint-and-build:
     runs-on: self-hosted
     steps:
-      - name: Checkout
+      - name: 检出代码
         uses: https://gitea.com/actions/checkout@v4
 
-      - name: Python syntax check
+      - name: Python 语法检查
         run: |
           python3 -m compileall -q collector web main.py
 
-      - name: Optional ruff lint
-        run: |
-          if command -v ruff >/dev/null 2>&1; then
-            ruff check collector web main.py || true
-          else
-            echo "ruff not installed, skipping"
-          fi
-
-      - name: Build venv (dry-run install)
+      - name: 构建 venv
         run: |
           python3 -m venv --system-site-packages .venv-ci
           .venv-ci/bin/pip install --upgrade pip
           .venv-ci/bin/pip install -r requirements.txt
           .venv-ci/bin/python -c "import flask; print('flask', flask.__version__)"
 
-      - name: Cleanup
+      - name: 清理临时 venv
         if: always()
         run: rm -rf .venv-ci
diff --git a/README.md b/README.md
index 8de3925..d78f4c3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,164 @@
-# syscall_monitor
+<h1 align="center">syscall_monitor</h1>
 
+<p align="center">
+  <strong>基于 eBPF 的 Linux 系统调用计数监控器，附带 Flask 可视化界面</strong>
+</p>
 
-1. 获取系统调用列表
+<p align="center">
+  <img src="https://img.shields.io/badge/python-3-blue?style=flat-square&logo=python&logoColor=white" alt="python">
+  <img src="https://img.shields.io/badge/flask-3.1-000000?style=flat-square&logo=flask&logoColor=white" alt="flask">
+  <img src="https://img.shields.io/badge/bcc-0.29-orange?style=flat-square" alt="bcc">
+  <img src="https://img.shields.io/badge/platform-linux-lightgrey?style=flat-square&logo=linux&logoColor=white" alt="platform">
+  <img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" alt="license">
+</p>
+
+## 项目简介
+
+`syscall_monitor` 在 Linux 内核 `raw_syscalls:sys_enter` 跟踪点上挂载一段 eBPF 程序，按系统调用号在内核态做哈希计数；用户态由 Python 每秒读取一次快照，并在 Flask Web 页面上展示配置项中关心的系统调用调用次数。
+
+由于挂载 eBPF 需要内核能力（CAP_BPF / root），整个程序必须以 root 权限运行。
+
+## 主要特性
+
+- 内核态全量计数，用户态按需展示：BPF 程序对所有系统调用号自增计数，配置变化无需重新挂载探针。
+- 可在 Web 页面动态增删需要监控的系统调用名称，无需重启进程。
+- `/api/counts` JSON 接口供前端每秒轮询，页面实时刷新。
+- 后台采集线程 + 配置文件原子写入（`tmp` → `replace`），并发读写不丢更新。
+- 提供 `setup.sh` / `run.sh` / `stop.sh` 三个部署脚本，配套 Gitea Actions CI/CD 工作流。
+
+## 技术栈
+
+| 类别 | 选型 | 来源 |
+|------|------|------|
+| 运行环境 | Linux（需 root，要求内核支持 eBPF/tracepoints） | [main.py](main.py), [collector/syscall_tracer.py](collector/syscall_tracer.py) |
+| 语言 | Python 3 | [requirements.txt](requirements.txt) |
+| Web 框架 | Flask 3.1 + Jinja2 | [requirements.txt](requirements.txt), [web/app.py](web/app.py) |
+| 内核采集 | bcc 0.29（BPF Compiler Collection） | [requirements.txt](requirements.txt), [collector/syscall_tracer.py](collector/syscall_tracer.py) |
+| 前端 | 服务端模板 + 原生 JS `fetch` 轮询 | [web/templates/](web/templates/) |
+| CI/CD | Gitea Actions（self-hosted runner） | [.gitea/workflows/](.gitea/workflows/) |
+
+## 项目结构
+
+```text
+syscall_monitor/
+├── main.py                       # 程序入口，校验 root 后启动 Flask
+├── collector/
+│   ├── __init__.py
+│   └── syscall_tracer.py         # eBPF 程序与采集线程
+├── web/
+│   ├── __init__.py
+│   ├── app.py                    # Flask 应用工厂与路由
+│   ├── static/style.css
+│   └── templates/
+│       ├── index.html            # 实时监控页
+│       └── config.html           # 监控项配置页
+├── config/
+│   └── monitors.json             # 需要展示的 syscall 名称列表
+├── requirements.txt
+├── setup.sh                      # 创建 venv（--system-site-packages）并装依赖
+├── run.sh                        # nohup 后台启动，pid 写入 .pid
+├── stop.sh                       # 根据 .pid 停止进程
+├── .gitea/
+│   ├── README.md                 # CI/CD 详细说明
+│   └── workflows/
+│       ├── ci.yml                # 语法检查 + venv 构建
+│       └── cd.yml                # rsync 部署 + 健康检查
+└── LICENSE
+```
+
+## 模块关系
+
+```mermaid
+flowchart LR
+    Kernel[Linux Kernel<br/>raw_syscalls:sys_enter] -->|tracepoint| BPF[BPF_HASH counts]
+    BPF -->|每秒读取| Tracer[SyscallTracer<br/>后台线程]
+    Tracer -->|快照| Flask[Flask app]
+    Config[(config/monitors.json)] --> Tracer
+    Config --> Flask
+    Flask -->|/| IndexPage[实时监控页]
+    Flask -->|/api/counts| IndexPage
+    Flask -->|/config| ConfigPage[配置页]
+```
+
+## 快速开始
+
+### 环境要求
+
+- Linux 主机，root 权限
+- Python 3 + venv
+- `bcc` / `python3-bpfcc` 等内核工具及对应内核头文件
+
+参考安装命令（来自 [.gitea/README.md](.gitea/README.md)）：
+
+```bash
+sudo apt install -y python3-venv python3-bpfcc bpfcc-tools rsync curl linux-headers-$(uname -r)
+```
+
+### 安装
+
+```bash
+./setup.sh
+```
+
+脚本会创建启用 `--system-site-packages` 的 `.venv`（让 apt 安装的 bcc 在虚拟环境中可见），并按 `requirements.txt` 安装依赖。
+
+### 配置
+
+监控项保存在 [config/monitors.json](config/monitors.json) 的 `syscalls` 字段中，默认包含：`openat`、`read`、`write`、`execve`、`close`。
+
+也可在 Web 配置页通过表单新增或移除监控项，文件会被原子化写入。
+
+### 运行
+
+```bash
+sudo ./run.sh
+```
+
+默认监听 `0.0.0.0:5000`，可通过环境变量覆盖：
+
+| 变量 | 默认值 | 说明 |
+|------|--------|------|
+| `HOST` | `0.0.0.0` | Flask 监听地址 |
+| `PORT` | `5000` | Flask 监听端口 |
+
+启动日志位于 `logs/app.log`，进程号写入 `.pid`。
+
+### 停止
+
+```bash
+sudo ./stop.sh
+```
+
+## 接口
+
+| 方法 | 路径 | 处理函数 | 说明 |
+|------|------|----------|------|
+| GET | `/` | `index` | 实时监控首页，前端每秒轮询 `/api/counts` 刷新 |
+| GET | `/api/counts` | `api_counts` | 返回 `{syscall 名: 累计次数}` 的 JSON |
+| GET | `/config` | `config_page` | 监控项配置页 |
+| POST | `/config` | `config_page` | 表单字段 `action=add/remove`、`name=<syscall>`，PRG 后重定向回配置页 |
+
+来源：[web/app.py](web/app.py)。
+
+## 部署
+
+仓库内附带的 Gitea Actions 工作流面向 **self-hosted Linux runner**：
+
+- [.gitea/workflows/ci.yml](.gitea/workflows/ci.yml)：push / PR 到 `main` 触发，做 Python 语法检查并在临时 venv 中安装依赖。
+- [.gitea/workflows/cd.yml](.gitea/workflows/cd.yml)：推送 `v*` tag 或手动触发 `workflow_dispatch`。流程依次为：停旧实例 → `rsync --delete` 同步代码到固定目录 → `setup.sh` → `run.sh` → 进程存活校验 → HTTP 健康检查 → 输出启动日志。
+
+完整的 runner 准备步骤（系统包安装、部署目录创建、免密 sudo 配置等）见 [.gitea/README.md](.gitea/README.md)。
+
+## 开发说明
+
+- `main.py` 中 `app.run(..., use_reloader=False)` 是关键：开启 Flask 自动重载会让主进程被 fork，导致 eBPF 探针被加载两次而冲突。
+- BPF 程序在内核里对**所有**系统调用号计数，过滤逻辑放在 Python 端 `_refresh_snapshot()` 里，因此修改配置无需重建 BPF 程序。
+- `SyscallTracer` 通过模块级 `_tracer` 单例保证 BPF 程序在进程内只挂载一次。
+
+## 安全与隐私
+
+运行该程序需要 root / CAP_BPF 权限，会读取整机所有进程的系统调用频率信息，请勿在不受信任的环境或未脱敏的多租户主机上长期开启。监听地址默认 `0.0.0.0:5000`，对外暴露前请自行加上反向代理与访问控制。
+
+## 许可证
+
+本项目以 MIT License 发布，详见 [LICENSE](LICENSE)。
diff --git a/collector/__init__.py b/collector/__init__.py
index e69de29..ba3e13c 100644
--- a/collector/__init__.py
+++ b/collector/__init__.py
@@ -0,0 +1,3 @@
+# collector 包：系统调用采集模块
+# 负责通过 eBPF/ftrace 等机制追踪并采集内核系统调用事件
+# 显式声明是常规包，不包含任何代码，仅用于组织模块结构
\ No newline at end of file
diff --git a/collector/syscall_tracer.py b/collector/syscall_tracer.py
index c5534b8..d2ee608 100644
--- a/collector/syscall_tracer.py
+++ b/collector/syscall_tracer.py
@@ -1,15 +1,13 @@
-"""eBPF syscall counter.
+"""基于 eBPF 的系统调用计数器。
 
-Attaches a single BPF program to the raw_syscalls:sys_enter tracepoint and
-counts every syscall by its numeric id in a BPF_HASH map. The Python side
-periodically reads the map and exposes per-syscall counts for the names
-listed in config/monitors.json.
+将一段 BPF 程序挂载到 raw_syscalls:sys_enter 跟踪点，按系统调用号在
+BPF_HASH 映射中累计调用次数。Python 端定时读取该映射，并按
+config/monitors.json 中配置的名称对外暴露各系统调用的计数。
 
-Why count all syscalls, not just configured ones?
-  - Config changes at runtime; if we filtered inside BPF we'd have to
-    rebuild and reattach the program. Counting everything in-kernel is
-    cheap (one hash increment) and lets the web layer pick which names to
-    surface.
+为什么在内核里统计所有系统调用，而不是只统计配置中列出的？
+  - 配置可能在运行期变化；如果在 BPF 中做过滤，每次改配置都要
+    重建并重新挂载程序。直接在内核里全部计数（一次哈希自增）开销极低，
+    Web 层只需挑选要展示的名称即可。
 """
 
 import json
@@ -22,6 +20,7 @@ from bcc import BPF
 from bcc.syscall import syscall_name
 
 
+# 内核态 BPF 程序：在每次系统调用入口处，对该调用号对应的计数器自增 1
 BPF_TEXT = r"""
 BPF_HASH(counts, u32, u64);
 
@@ -30,6 +29,7 @@ TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
     u64 zero = 0, *val;
     val = counts.lookup_or_try_init(&id, &zero);
     if (val) {
+        // 原子自增，保证多 CPU 并发下计数正确
         __sync_fetch_and_add(val, 1);
     }
     return 0;
@@ -38,53 +38,70 @@ TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
 
 
 class SyscallTracer:
+    """系统调用追踪器：加载 BPF 程序并周期性刷新计数快照。"""
+
     def __init__(self, config_path: Path):
         self.config_path = Path(config_path)
         self._bpf = None
+        # 保护 _snapshot 的并发访问（后台轮询线程写入，HTTP 请求线程读取）
         self._lock = threading.Lock()
+        # 最近一次快照：{系统调用名: 累计次数}
         self._snapshot: dict[str, int] = {}
+        # 用于通知后台线程退出的事件
         self._stop = threading.Event()
         self._thread: threading.Thread | None = None
 
     def load_config(self) -> list[str]:
+        """从 monitors.json 读取需要展示的系统调用名称列表。"""
         try:
             with self.config_path.open("r", encoding="utf-8") as f:
                 data = json.load(f)
             names = data.get("syscalls", [])
+            # 过滤空字符串并去除首尾空格
             return [str(n).strip() for n in names if str(n).strip()]
         except FileNotFoundError:
             return []
 
     def start(self) -> None:
+        """加载并挂载 BPF 程序，启动后台轮询线程。"""
+        # 防止重复加载（已启动则直接返回）
         if self._bpf is not None:
             return
         self._bpf = BPF(text=BPF_TEXT)
+        # daemon=True：主进程退出时后台线程自动终止
         self._thread = threading.Thread(target=self._poll_loop, daemon=True)
         self._thread.start()
 
     def stop(self) -> None:
+        """停止追踪：通知后台线程退出并卸载 BPF 程序。"""
         self._stop.set()
         if self._thread:
             self._thread.join(timeout=2)
         self._bpf = None
 
     def _poll_loop(self) -> None:
+        # 每秒刷新一次快照；用 Event.wait 兼顾「定时」与「可中断」
         while not self._stop.wait(1.0):
             self._refresh_snapshot()
 
     def _refresh_snapshot(self) -> None:
+        """从 BPF 映射读取原始计数，按配置过滤出关心的系统调用。"""
         if self._bpf is None:
             return
         names = set(self.load_config())
+        # 配置为空：清空快照，避免展示过期数据
         if not names:
             with self._lock:
                 self._snapshot = {}
             return
+        # 初始化：未触发过的系统调用也展示为 0
         totals: dict[str, int] = {n: 0 for n in names}
         for k, v in self._bpf["counts"].items():
             try:
+                # 把内核里的系统调用号转换成可读名称（如 0 -> "read"）
                 name = syscall_name(k.value).decode("utf-8", "replace")
             except Exception:
+                # 解析失败的条目直接跳过，避免影响其他计数
                 continue
             if name in totals:
                 totals[name] += v.value
@@ -92,14 +109,17 @@ class SyscallTracer:
             self._snapshot = totals
 
     def get_counts(self) -> dict[str, int]:
+        """供 Web 层调用：返回最近一次快照的副本。"""
         with self._lock:
             return dict(self._snapshot)
 
 
+# 进程级单例：整个应用只挂载一次 BPF 程序，避免重复挂载和资源浪费
 _tracer: SyscallTracer | None = None
 
 
 def get_tracer(config_path: str | os.PathLike) -> SyscallTracer:
+    """获取（必要时创建）全局追踪器实例。"""
     global _tracer
     if _tracer is None:
         _tracer = SyscallTracer(Path(config_path))
diff --git a/config/monitors.json b/config/monitors.json
index 427c23f..0140292 100644
--- a/config/monitors.json
+++ b/config/monitors.json
@@ -1,3 +1,15 @@
+
 {
+  "_说明": "系统调用监控配置文件",
+  "_字段说明": {
+    "syscalls": "需要追踪的系统调用名称列表，可在 Web 界面的“配置”页动态增删"
+  },
+  "_常用系统调用参考": {
+    "openat": "打开文件",
+    "read": "读取文件",
+    "write": "写入文件",
+    "execve": "执行程序",
+    "close": "关闭文件描述符"
+  },
   "syscalls": ["openat", "read", "write", "execve", "close"]
 }
diff --git a/main.py b/main.py
index e6ac829..24b3aba 100644
--- a/main.py
+++ b/main.py
@@ -1,18 +1,29 @@
-"""Entry point. Must run as root for eBPF."""
+"""程序入口：系统调用监控器主启动脚本。
+
+由于 eBPF 需要内核能力（CAP_BPF/root），本脚本必须以 root 权限运行。
+"""
 
 import os
 import sys
 
+# 引入 Flask 应用，用于创建 Web 服务实例
 from web.app import create_app
 
 
 def main():
+    # 校验运行权限：eBPF 需要 root 或 CAP_BPF 能力，否则无法挂载内核探针
     if os.geteuid() != 0:
-        print("error: must run as root (eBPF requires CAP_BPF/root)", file=sys.stderr)
+        print("错误：必须以 root 权限运行（eBPF 需要 CAP_BPF/root 权限）", file=sys.stderr)
         sys.exit(1)
+
+    # 创建 Flask 应用并从环境变量读取监听地址与端口（默认 0.0.0.0:5000）
     app = create_app()
     host = os.environ.get("HOST", "0.0.0.0")
     port = int(os.environ.get("PORT", "5000"))
+
+    # 启动 Web 服务：关闭 debug 与 reloader，避免 eBPF 探针被重复加载；开启多线程以并发处理请求
+    # use_reloader=False 是关键，Flask 的自动重载机制会导致主进程被 fork 两次，从而导致 eBPF 探针被加载两次，产生冲突和错误
+    # thearded=True 是可选的，默认情况下，Flask 会在多线程中运行，但 eBPF 探针不能在多线程中运行
     app.run(host=host, port=port, debug=False, use_reloader=False, threaded=True)
 
 
diff --git a/requirements.txt b/requirements.txt
index 7e10602..fd5231e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,72 @@
-flask
+attrs==23.2.0
+Babel==2.10.3
+bcc==0.29.1
+blinker==1.9.0
+Brlapi==0.8.5
+certifi==2023.11.17
+chardet==5.2.0
+click==8.1.6
+cloud-init==25.2
+colorama==0.4.6
+command-not-found==0.3
+configobj==5.0.8
+cryptography==41.0.7
+cupshelpers==1.0
+dbus-python==1.3.2
+defer==1.0.6
+distro==1.9.0
+distro-info==1.7+build1
+Flask==3.1.3
+httplib2==0.20.4
+idna==3.6
+itsdangerous==2.2.0
+Jinja2==3.1.2
+jsonpatch==1.32
+jsonpointer==2.0
+jsonschema==4.10.3
+language-selector==0.1
+launchpadlib==1.11.0
+lazr.restfulclient==0.14.6
+lazr.uri==1.0.6
+louis==3.29.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+netaddr==0.8.0
+netifaces==0.11.0
+oauthlib==3.2.2
+olefile==0.46
+pexpect==4.9.0
+pillow==10.2.0
+ptyprocess==0.7.0
+pycairo==1.25.1
+pycups==2.0.1
+Pygments==2.17.2
+PyGObject==3.48.2
+PyJWT==2.7.0
+pyparsing==3.1.1
+pyrsistent==0.20.0
+pyserial==3.5
+python-apt==2.7.7+ubuntu5.2
+python-dateutil==2.8.2
+python-debian==0.1.49+ubuntu2
+pytz==2024.1
+pyxdg==0.28
+PyYAML==6.0.1
+requests==2.31.0
+rich==13.7.1
+setuptools==68.1.2
+six==1.16.0
+ssh-import-id==5.11
+systemd-python==235
+typing_extensions==4.10.0
+ubuntu-drivers-common==0.0.0
+ubuntu-pro-client==8001
+ufw==0.36.2
+unattended-upgrades==0.1
+urllib3==2.0.7
+wadllib==1.3.6
+Werkzeug==3.1.8
+wheel==0.42.0
+xdg==5
+xkit==0.0.0
\ No newline at end of file
diff --git a/run.sh b/run.sh
index 911011b..895d1ec 100644
--- a/run.sh
+++ b/run.sh
@@ -3,11 +3,11 @@ set -e
 cd "$(dirname "$0")"
 
 if [ "$(id -u)" -ne 0 ]; then
-  echo "error: must run as root (eBPF requires root)" >&2
+  echo "错误：必须以 root 身份运行（eBPF 需要 root 权限）" >&2
   exit 1
 fi
 
 mkdir -p logs
 nohup .venv/bin/python main.py > logs/app.log 2>&1 &
 echo $! > .pid
-echo "started, pid=$(cat .pid), log: logs/app.log"
+echo "已启动，pid=$(cat .pid)，日志：logs/app.log"
diff --git a/setup.sh b/setup.sh
index 534dee2..0674ebf 100644
--- a/setup.sh
+++ b/setup.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Create venv with system-site-packages (so bcc from apt is visible) and install Python deps.
+# 创建启用 system-site-packages 的 venv（让 apt 装的 bcc 可见），并安装 Python 依赖。
 set -e
 cd "$(dirname "$0")"
 
@@ -9,4 +9,4 @@ fi
 
 .venv/bin/pip install --upgrade pip
 .venv/bin/pip install -r requirements.txt
-echo "setup done."
+echo "依赖安装完成。"
diff --git a/stop.sh b/stop.sh
index 7ddc367..0d2c2d2 100644
--- a/stop.sh
+++ b/stop.sh
@@ -4,11 +4,11 @@ if [ -f .pid ]; then
   PID=$(cat .pid)
   if kill -0 "$PID" 2>/dev/null; then
     kill "$PID"
-    echo "stopped pid=$PID"
+    echo "已停止，pid=$PID"
   else
-    echo "pid $PID not running"
+    echo "pid $PID 进程未在运行"
   fi
   rm -f .pid
 else
-  echo "no .pid file"
+  echo "未找到 .pid 文件"
 fi
diff --git a/web/__init__.py b/web/__init__.py
index e69de29..71733b1 100644
--- a/web/__init__.py
+++ b/web/__init__.py
@@ -0,0 +1,3 @@
+# web 包：Web 服务模块
+# 基于 Flask 提供可视化界面与接口，用于展示和管理系统调用监控数据
+# 显式声明是常规包，不包含任何代码，仅用于组织模块结构
\ No newline at end of file
diff --git a/web/app.py b/web/app.py
index 6539bc7..a99d968 100644
--- a/web/app.py
+++ b/web/app.py
@@ -1,4 +1,10 @@
-"""Flask web layer for the syscall monitor."""
+"""系统调用监控器的 Flask Web 服务层。
+
+提供两个页面与一个 JSON 接口：
+- 实时监控页：展示各系统调用的累计调用次数
+- 配置页：增删需要追踪的系统调用名称
+- /api/counts：返回当前计数，供前端轮询
+"""
 
 import json
 import threading
@@ -9,12 +15,16 @@ from flask import Flask, jsonify, redirect, render_template, request, url_for
 from collector.syscall_tracer import get_tracer
 
 
+# 项目根目录与配置文件路径
 BASE_DIR = Path(__file__).resolve().parent.parent
 CONFIG_PATH = BASE_DIR / "config" / "monitors.json"
+
+# 配置读写锁：防止多个请求并发修改 monitors.json 时互相覆盖
 _config_lock = threading.Lock()
 
 
 def _read_config() -> dict:
+    """读取监控配置；文件不存在时返回空配置。"""
     if not CONFIG_PATH.exists():
         return {"syscalls": []}
     with CONFIG_PATH.open("r", encoding="utf-8") as f:
@@ -22,33 +32,42 @@ def _read_config() -> dict:
 
 
 def _write_config(data: dict) -> None:
+    """原子化写入配置：先写临时文件再替换，避免写入过程中被读到半个文件。"""
     CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
     tmp = CONFIG_PATH.with_suffix(".json.tmp")
     with tmp.open("w", encoding="utf-8") as f:
+        # ensure_ascii=False 保留中文注释字段的原始字符
         json.dump(data, f, indent=2, ensure_ascii=False)
     tmp.replace(CONFIG_PATH)
 
 
 def create_app() -> Flask:
+    """Flask 应用工厂：创建并返回配置好的 Flask 实例。"""
     app = Flask(__name__, template_folder="templates", static_folder="static")
+
+    # 初始化系统调用追踪器，由其负责挂载 eBPF 探针并维护计数
     tracer = get_tracer(CONFIG_PATH)
 
     @app.get("/")
     def index():
+        # 实时监控首页：渲染当前监控项列表，前端会定时轮询 /api/counts 刷新数据
         cfg = _read_config()
         return render_template("index.html", syscalls=cfg.get("syscalls", []))
 
     @app.get("/api/counts")
     def api_counts():
+        # 返回各系统调用的累计调用次数（dict：syscall 名 -> 次数）
         return jsonify(tracer.get_counts())
 
     @app.route("/config", methods=["GET", "POST"])
     def config_page():
+        # 加锁保护配置文件的读-改-写流程，避免并发请求导致更新丢失
         with _config_lock:
             cfg = _read_config()
             syscalls = list(cfg.get("syscalls", []))
 
             if request.method == "POST":
+                # 处理「添加」/「移除」表单：根据 action 字段决定操作
                 action = request.form.get("action", "")
                 name = (request.form.get("name") or "").strip()
                 if action == "add" and name and name not in syscalls:
@@ -57,6 +76,7 @@ def create_app() -> Flask:
                     syscalls.remove(name)
                 cfg["syscalls"] = syscalls
                 _write_config(cfg)
+                # PRG 模式：提交后重定向到配置页，避免刷新重复提交
                 return redirect(url_for("config_page"))
 
         return render_template("config.html", syscalls=syscalls)
diff --git a/web/templates/config.html b/web/templates/config.html
index d936ece..790c3a2 100644
--- a/web/templates/config.html
+++ b/web/templates/config.html
@@ -7,7 +7,7 @@
 </head>
 <body>
   <header>
-    <h1>Syscall Monitor</h1>
+    <h1>系统调用监控器</h1>
     <nav>
       <a href="{{ url_for('index') }}">实时监控</a>
       <a href="{{ url_for('config_page') }}">配置</a>
@@ -18,14 +18,14 @@
       <h3 style="margin-top:0">添加监控项</h3>
       <form method="post" action="{{ url_for('config_page') }}">
         <input type="hidden" name="action" value="add">
-        <input type="text" name="name" placeholder="syscall 名称,例如 openat" required>
+        <input type="text" name="name" placeholder="系统调用名称，例如 openat" required>
         <button type="submit" class="primary">添加</button>
       </form>
-      <p class="muted">名称需为内核 syscall 名(可参考 <code>man syscalls</code>)。新增后会立即出现在监控页面。</p>
+      <p class="muted">名称需为内核系统调用名（可参考 <code>man syscalls</code>）。新增后会立即出现在监控页面。</p>
     </div>
 
     <div class="card">
-      <h3 style="margin-top:0">当前监控项 ({{ syscalls|length }})</h3>
+      <h3 style="margin-top:0">当前监控项（共 {{ syscalls|length }} 项）</h3>
       {% if syscalls %}
       <table>
         <thead><tr><th>名称</th><th style="text-align:right">操作</th></tr></thead>
diff --git a/web/templates/index.html b/web/templates/index.html
index 68da721..74d3ace 100644
--- a/web/templates/index.html
+++ b/web/templates/index.html
@@ -2,12 +2,12 @@
 <html lang="zh-CN">
 <head>
   <meta charset="utf-8">
-  <title>Syscall Monitor</title>
+  <title>系统调用监控器</title>
   <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
 </head>
 <body>
   <header>
-    <h1>Syscall Monitor</h1>
+    <h1>系统调用监控器</h1>
     <nav>
       <a href="{{ url_for('index') }}">实时监控</a>
       <a href="{{ url_for('config_page') }}">配置</a>
@@ -15,10 +15,10 @@
   </header>
   <main>
     <div class="card">
-      <p class="muted">当前监控 {{ syscalls|length }} 项,每秒刷新。计数自 Agent 启动起累计。</p>
+      <p class="muted">当前监控 {{ syscalls|length }} 项，每秒刷新一次。计数自采集器启动起累计。</p>
       {% if syscalls %}
       <table>
-        <thead><tr><th>System Call</th><th style="text-align:right">调用次数</th></tr></thead>
+        <thead><tr><th>系统调用</th><th style="text-align:right">调用次数</th></tr></thead>
         <tbody id="rows">
           {% for name in syscalls %}
           <tr><td><span class="tag">{{ name }}</span></td><td class="count" data-name="{{ name }}">—</td></tr>
@@ -26,7 +26,7 @@
         </tbody>
       </table>
       {% else %}
-      <p>尚未配置监控项,前往 <a href="{{ url_for('config_page') }}">配置页面</a> 添加。</p>
+      <p>尚未配置监控项，前往 <a href="{{ url_for('config_page') }}">配置页面</a> 添加。</p>
       {% endif %}
     </div>
   </main>
@@ -39,7 +39,9 @@
           const n = td.dataset.name;
           td.textContent = (data[n] ?? 0).toLocaleString();
         });
-      } catch (e) { /* keep last value on error */ }
+      } catch (e) {
+        console.error('刷新失败', e);
+      }
     }
     refresh();
     setInterval(refresh, 1000);