wall_docker/app/core/yolo/detect.py

import os

import cv2
import numpy as np
import onnxruntime as ort

CLASS_NAMES = {
    0: "Hollowing",
    1: "Water seepage",
    2: "Cracking",
}

CLASS_COLORS = {
    0: (0, 0, 255),  # Hollowing -> 红色
    1: (0, 255, 0),  # Water seepage -> 绿色
    2: (255, 0, 0),  # Cracking -> 蓝色
}

IMG_SIZE = 640


class YOLOSeg:
    def __init__(self, onnx_path: str = "model.onnx", imgsz: int = IMG_SIZE):
        real_path = os.path.join(os.path.dirname(__file__), onnx_path)
        self.session = ort.InferenceSession(
            real_path,
            providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
            if ort.get_device() == "GPU"
            else ["CPUExecutionProvider"],
        )
        self.ndtype = (
            np.half
            if self.session.get_inputs()[0].type == "tensor(float16)"
            else np.single
        )
        self.imgsz = imgsz
        self.classes = CLASS_NAMES

    # ---------- 预处理：letterbox ----------
    def _preprocess(self, img_bgr):
        h0, w0 = img_bgr.shape[:2]
        new_shape = (self.imgsz, self.imgsz)

        r = min(new_shape[0] / h0, new_shape[1] / w0)
        ratio = (r, r)
        new_unpad = (int(round(w0 * r)), int(round(h0 * r)))
        pad_w = (new_shape[1] - new_unpad[0]) / 2
        pad_h = (new_shape[0] - new_unpad[1]) / 2

        if (w0, h0) != new_unpad:
            img = cv2.resize(img_bgr, new_unpad, interpolation=cv2.INTER_LINEAR)
        else:
            img = img_bgr.copy()

        top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
        left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
        img = cv2.copyMakeBorder(
            img, top, bottom, left, right,
            borderType=cv2.BORDER_CONSTANT, value=(114, 114, 114)
        )

        # HWC -> CHW, BGR->RGB, /255
        img = np.ascontiguousarray(np.einsum("HWC->CHW", img)[::-1], dtype=self.ndtype)
        img = img / 255.0
        if img.ndim == 3:
            img = img[None]  # (1,3,H,W)
        return img, ratio, (pad_w, pad_h)

    # ---------- mask -> 多边形 ----------
    @staticmethod
    def _masks2segments(masks):
        """masks: (N,H,W) -> 每个实例的多边形坐标"""
        segments = []
        for x in masks.astype("uint8"):
            cs = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0]
            if cs:
                # 取点数最多的一条轮廓
                c = np.array(cs[np.argmax([len(i) for i in cs])]).reshape(-1, 2)
            else:
                c = np.zeros((0, 2))
            segments.append(c.astype("float32"))
        return segments

    @staticmethod
    def _crop_mask(masks, boxes):
        """masks: (N,H,W), boxes: (N,4) xyxy"""
        n, h, w = masks.shape
        x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
        r = np.arange(w, dtype=x1.dtype)[None, None, :]
        c = np.arange(h, dtype=x1.dtype)[None, :, None]
        return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))

    @staticmethod
    def _scale_mask(masks, im0_shape, ratio_pad=None):
        """把特征图上的 mask 缩放到原图大小"""
        im1_shape = masks.shape[:2]
        if ratio_pad is None:
            gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])
            pad = (
                (im1_shape[1] - im0_shape[1] * gain) / 2,
                (im1_shape[0] - im0_shape[0] * gain) / 2,
            )
        else:
            pad = ratio_pad[1]

        top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))
        bottom = int(round(im1_shape[0] - pad[1] + 0.1))
        right = int(round(im1_shape[1] - pad[0] + 0.1))

        if masks.ndim < 2:
            raise ValueError("masks ndim 应该是 2 或 3")

        masks = masks[top:bottom, left:right]
        masks = cv2.resize(
            masks, (im0_shape[1], im0_shape[0]), interpolation=cv2.INTER_LINEAR
        )
        if masks.ndim == 2:
            masks = masks[:, :, None]
        return masks

    def _process_mask(self, protos, masks_in, bboxes, im0_shape):
        """
        protos: (C,Hm,Wm)
        masks_in: (N,C)
        bboxes: (N,4) xyxy
        返回: (N,H,W) bool
        """
        c, mh, mw = protos.shape
        masks = (
            np.matmul(masks_in, protos.reshape(c, -1))
            .reshape(-1, mh, mw)
            .transpose(1, 2, 0)
        )  # HWN
        masks = np.ascontiguousarray(masks)
        masks = self._scale_mask(masks, im0_shape)  # HWC
        masks = np.einsum("HWN->NHW", masks)  # NHW
        masks = self._crop_mask(masks, bboxes)
        return masks > 0.5

    @staticmethod
    def _get_cid(name):
        for k, v in CLASS_NAMES.items():
            if v == name:
                return k

    @staticmethod
    def _make_color_mask(img_bgr, masks, coords):
        """
        生成一张“带颜色的掩码图”
        - 背景为黑色
        - 每个实例区域按类别上色（不叠加到原图）
        返回：color_mask (H,W,3) BGR uint8
        """
        h, w = img_bgr.shape[:2]
        color_mask = np.zeros((h, w, 3), dtype=np.uint8)

        N = masks.shape[0]
        for i in range(N):
            m = masks[i]  # (H,W) bool
            inst = coords[i]
            cid = YOLOSeg._get_cid(inst[0])
            # print(f"name: {inst[0]}, cid: {cid}")
            color = CLASS_COLORS.get(cid, (0, 255, 255))  # 没配置的类用黄青色

            # 只在掩码区域上色
            color_mask[m] = color

        return color_mask

    # ---------- 推理主入口 ----------
    def detect(self, img_input):
        conf_thres = 0.1
        iou_thres = 0.1
        """
        输入: 原始 BGR 图像
        输出:
          masks: (N,H,W) bool 掩码
          coords: List[dict] 每个实例包含 class_name, confidence, points(多边形)
          color_mask: 带有颜色的掩码图（黑背景，上面是彩色的缺陷区域）
        """
        if isinstance(img_input, str):
            img_bgr = cv2.imdecode(np.fromfile(img_input, dtype=np.uint8), cv2.IMREAD_COLOR)
        else:
            img_bgr = img_input

        if img_bgr is None:
            raise ValueError("img_bgr is None, 请检查图片读取是否成功")

        im0 = img_bgr.copy()
        im, ratio, (pad_w, pad_h) = self._preprocess(im0)

        # ONNX 推理
        input_name = self.session.get_inputs()[0].name
        preds = self.session.run(None, {input_name: im})
        x, protos = preds[0], preds[1]  # x:(1,C,N), protos:(1,32,Hm,Wm)

        # (1,C,N) -> (N,C)
        x = np.einsum("bcn->bnc", x)[0]  # (N, C)

        # 从 protos 动态推断 mask 通道数
        nm = int(protos.shape[1])  # 一般是 32
        C = x.shape[1]
        nc = C - 4 - nm  # 类别数

        # 类别分数区间 [4:4+nc]
        cls_scores = x[:, 4:4 + nc]
        cls_max = np.max(cls_scores, axis=-1)
        keep = cls_max > conf_thres
        x = x[keep]
        cls_scores = cls_scores[keep]

        h0, w0 = im0.shape[:2]

        if x.size == 0:
            # 没有检测到任何目标：返回空 mask、空坐标、空彩色掩码
            empty_masks = np.zeros((0, h0, w0), dtype=bool)
            empty_color_mask = np.zeros((h0, w0, 3), dtype=np.uint8)
            return empty_masks, [], empty_color_mask

        conf = cls_max[keep]
        cls_id = np.argmax(cls_scores, axis=-1)
        # 拼成 [cx,cy,w,h, conf, cls_id, mask_coeffs...]
        x = np.c_[x[:, :4], conf, cls_id, x[:, -nm:]]

        # ===== NMS：OpenCV NMSBoxes 需要 [x, y, w, h] 左上角坐标 =====
        # 当前 x[:, :4] 是 [cx, cy, w, h]，先转换成 [x, y, w, h]
        bboxes_xywh = x[:, :4].copy()
        bboxes_xywh[:, 0] = bboxes_xywh[:, 0] - bboxes_xywh[:, 2] / 2  # x = cx - w/2
        bboxes_xywh[:, 1] = bboxes_xywh[:, 1] - bboxes_xywh[:, 3] / 2  # y = cy - h/2

        indices = cv2.dnn.NMSBoxes(
            bboxes_xywh.tolist(), x[:, 4].tolist(), conf_thres, iou_thres
        )

        # 不同 OpenCV 版本，indices 可能是 []、[0,1]、[[0],[1]]、np.array([...])
        if indices is None or len(indices) == 0:
            empty_masks = np.zeros((0, h0, w0), dtype=bool)
            empty_color_mask = np.zeros((h0, w0, 3), dtype=np.uint8)
            return empty_masks, [], empty_color_mask

        # 统一成一维整型索引数组
        indices = np.array(indices).reshape(-1)
        x = x[indices]

        # cxcywh -> xyxy（这里用处理后的 x[:, :4]）
        x[:, 0:2] -= x[:, 2:4] / 2
        x[:, 2:4] += x[:, 0:2]

        # 去掉 pad，缩放回原图
        x[:, [0, 2]] -= pad_w
        x[:, [1, 3]] -= pad_h
        x[:, :4] /= min(ratio)

        # 限制在图像范围内
        x[:, [0, 2]] = x[:, [0, 2]].clip(0, w0)
        x[:, [1, 3]] = x[:, [1, 3]].clip(0, h0)

        # 解码 mask
        protos = protos[0]  # (32,Hm,Wm)
        bboxes_xyxy = x[:, :4]
        mask_coeffs = x[:, 6:]
        masks = self._process_mask(protos, mask_coeffs, bboxes_xyxy, im0.shape)

        # 掩码 -> 多边形
        segments = self._masks2segments(masks)

        # 打包坐标结果
        coords = []
        for (x1, y1, x2, y2, conf_i, cls_i), seg in zip(x[:, :6], segments):
            cid = int(cls_i)
            coords.append(
                (
                    self.classes.get(cid, str(cid)),
                    seg.tolist()
                )
            )

        color_mask = self._make_color_mask(im0, masks, coords)

        return color_mask, coords


if __name__ == "__main__":
    img_path = r"D:\Projects\Python\wall\app\core\yolo\test.jpg"
    IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}
    # ====== 加载模型 ======
    model = YOLOSeg()
    img = cv2.imdecode(np.fromfile(str(img_path), dtype=np.uint8), cv2.IMREAD_COLOR)
    color_mask, coords = model.detect(img)
    print(color_mask.shape)
    print(coords)