2026-06-14 12:08:27 +08:00
|
|
|
|
"""基于 eBPF 的系统调用计数器。
|
|
|
|
|
|
|
|
|
|
|
|
将一段 BPF 程序挂载到 raw_syscalls:sys_enter 跟踪点,按系统调用号在
|
|
|
|
|
|
BPF_HASH 映射中累计调用次数。Python 端定时读取该映射,并按
|
|
|
|
|
|
config/monitors.json 中配置的名称对外暴露各系统调用的计数。
|
|
|
|
|
|
|
|
|
|
|
|
为什么在内核里统计所有系统调用,而不是只统计配置中列出的?
|
|
|
|
|
|
- 配置可能在运行期变化;如果在 BPF 中做过滤,每次改配置都要
|
|
|
|
|
|
重建并重新挂载程序。直接在内核里全部计数(一次哈希自增)开销极低,
|
|
|
|
|
|
Web 层只需挑选要展示的名称即可。
|
2026-06-09 13:27:51 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
import os
|
|
|
|
|
|
import threading
|
|
|
|
|
|
import time
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
from bcc import BPF
|
|
|
|
|
|
from bcc.syscall import syscall_name
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 内核态 BPF 程序:在每次系统调用入口处,对该调用号对应的计数器自增 1
|
2026-06-09 13:27:51 +08:00
|
|
|
|
BPF_TEXT = r"""
|
|
|
|
|
|
BPF_HASH(counts, u32, u64);
|
|
|
|
|
|
|
|
|
|
|
|
TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
|
|
|
|
|
|
u32 id = (u32)args->id;
|
|
|
|
|
|
u64 zero = 0, *val;
|
|
|
|
|
|
val = counts.lookup_or_try_init(&id, &zero);
|
|
|
|
|
|
if (val) {
|
2026-06-14 12:08:27 +08:00
|
|
|
|
// 原子自增,保证多 CPU 并发下计数正确
|
2026-06-09 13:27:51 +08:00
|
|
|
|
__sync_fetch_and_add(val, 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SyscallTracer:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""系统调用追踪器:加载 BPF 程序并周期性刷新计数快照。"""
|
|
|
|
|
|
|
2026-06-09 13:27:51 +08:00
|
|
|
|
def __init__(self, config_path: Path):
|
|
|
|
|
|
self.config_path = Path(config_path)
|
|
|
|
|
|
self._bpf = None
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 保护 _snapshot 的并发访问(后台轮询线程写入,HTTP 请求线程读取)
|
2026-06-09 13:27:51 +08:00
|
|
|
|
self._lock = threading.Lock()
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 最近一次快照:{系统调用名: 累计次数}
|
2026-06-09 13:27:51 +08:00
|
|
|
|
self._snapshot: dict[str, int] = {}
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 用于通知后台线程退出的事件
|
2026-06-09 13:27:51 +08:00
|
|
|
|
self._stop = threading.Event()
|
|
|
|
|
|
self._thread: threading.Thread | None = None
|
|
|
|
|
|
|
|
|
|
|
|
def load_config(self) -> list[str]:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""从 monitors.json 读取需要展示的系统调用名称列表。"""
|
2026-06-09 13:27:51 +08:00
|
|
|
|
try:
|
|
|
|
|
|
with self.config_path.open("r", encoding="utf-8") as f:
|
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
|
names = data.get("syscalls", [])
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 过滤空字符串并去除首尾空格
|
2026-06-09 13:27:51 +08:00
|
|
|
|
return [str(n).strip() for n in names if str(n).strip()]
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
def start(self) -> None:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""加载并挂载 BPF 程序,启动后台轮询线程。"""
|
|
|
|
|
|
# 防止重复加载(已启动则直接返回)
|
2026-06-09 13:27:51 +08:00
|
|
|
|
if self._bpf is not None:
|
|
|
|
|
|
return
|
|
|
|
|
|
self._bpf = BPF(text=BPF_TEXT)
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# daemon=True:主进程退出时后台线程自动终止
|
2026-06-09 13:27:51 +08:00
|
|
|
|
self._thread = threading.Thread(target=self._poll_loop, daemon=True)
|
|
|
|
|
|
self._thread.start()
|
|
|
|
|
|
|
|
|
|
|
|
def stop(self) -> None:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""停止追踪:通知后台线程退出并卸载 BPF 程序。"""
|
2026-06-09 13:27:51 +08:00
|
|
|
|
self._stop.set()
|
|
|
|
|
|
if self._thread:
|
|
|
|
|
|
self._thread.join(timeout=2)
|
|
|
|
|
|
self._bpf = None
|
|
|
|
|
|
|
|
|
|
|
|
def _poll_loop(self) -> None:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 每秒刷新一次快照;用 Event.wait 兼顾「定时」与「可中断」
|
2026-06-09 13:27:51 +08:00
|
|
|
|
while not self._stop.wait(1.0):
|
|
|
|
|
|
self._refresh_snapshot()
|
|
|
|
|
|
|
|
|
|
|
|
def _refresh_snapshot(self) -> None:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""从 BPF 映射读取原始计数,按配置过滤出关心的系统调用。"""
|
2026-06-09 13:27:51 +08:00
|
|
|
|
if self._bpf is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
names = set(self.load_config())
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 配置为空:清空快照,避免展示过期数据
|
2026-06-09 13:27:51 +08:00
|
|
|
|
if not names:
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
self._snapshot = {}
|
|
|
|
|
|
return
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 初始化:未触发过的系统调用也展示为 0
|
2026-06-09 13:27:51 +08:00
|
|
|
|
totals: dict[str, int] = {n: 0 for n in names}
|
|
|
|
|
|
for k, v in self._bpf["counts"].items():
|
|
|
|
|
|
try:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 把内核里的系统调用号转换成可读名称(如 0 -> "read")
|
2026-06-09 13:27:51 +08:00
|
|
|
|
name = syscall_name(k.value).decode("utf-8", "replace")
|
|
|
|
|
|
except Exception:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 解析失败的条目直接跳过,避免影响其他计数
|
2026-06-09 13:27:51 +08:00
|
|
|
|
continue
|
|
|
|
|
|
if name in totals:
|
|
|
|
|
|
totals[name] += v.value
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
self._snapshot = totals
|
|
|
|
|
|
|
|
|
|
|
|
def get_counts(self) -> dict[str, int]:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""供 Web 层调用:返回最近一次快照的副本。"""
|
2026-06-09 13:27:51 +08:00
|
|
|
|
with self._lock:
|
|
|
|
|
|
return dict(self._snapshot)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-06-14 12:08:27 +08:00
|
|
|
|
# 进程级单例:整个应用只挂载一次 BPF 程序,避免重复挂载和资源浪费
|
2026-06-09 13:27:51 +08:00
|
|
|
|
_tracer: SyscallTracer | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_tracer(config_path: str | os.PathLike) -> SyscallTracer:
|
2026-06-14 12:08:27 +08:00
|
|
|
|
"""获取(必要时创建)全局追踪器实例。"""
|
2026-06-09 13:27:51 +08:00
|
|
|
|
global _tracer
|
|
|
|
|
|
if _tracer is None:
|
|
|
|
|
|
_tracer = SyscallTracer(Path(config_path))
|
|
|
|
|
|
_tracer.start()
|
|
|
|
|
|
return _tracer
|