wall_docker/app/core/yolo_detect/detect.py

import io
import os
import cv2
import torch
import numpy as np

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from PIL import Image, ImageDraw, ImageFont, ImageSequence
from app.core.yolo_detect.yolo import YOLO  # 假设你有 YOLO 模型类
from app.core.yolo_detect.utils.utils import (cvtColor, get_classes, preprocess_input,
                         resize_image, show_config)

CLASS_NAMES = {
    "wall_konggu": "Hollowing",
    "wall_shenshui": "Water seepage",
    "wall_kailie": "Cracking",
    "wall_konggu_gap": "Gap in hollowing",
    "wall": "Wall",
}


class YOLODetect(YOLO):
    def __init__(self):
        super().__init__()
        self.classes = CLASS_NAMES

    def detect(self, img_input, crop=False, count=False):
        try:
            image = Image.open(img_input)
            if image.format == "MPO":
                image = next(ImageSequence.Iterator(image))
                jpeg_image_in_memory = io.BytesIO()
                image.save(jpeg_image_in_memory, format="JPEG")
                jpeg_image_in_memory.seek(0)
                image = Image.open(jpeg_image_in_memory)
            # if isinstance(img_input, str):
            #     image = cv2.imdecode(np.fromfile(img_input, dtype=np.uint8), cv2.IMREAD_COLOR)
            #     # image = Image.open(img_input)
            # else:
            #     image = img_input
            # ---------------------------------------------------#
            #   计算输入图片的高和宽
            # ---------------------------------------------------#
            image_shape = np.array(np.shape(image)[0:2])
            # ---------------------------------------------------------#
            #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
            #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
            # ---------------------------------------------------------#
            image = cvtColor(image)
            # ---------------------------------------------------------#
            #   给图像增加灰条，实现不失真的resize
            #   也可以直接resize进行识别
            # ---------------------------------------------------------#
            image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
            # ---------------------------------------------------------#
            #   添加上batch_size维度
            #   h, w, 3 => 3, h, w => 1, 3, h, w
            # ---------------------------------------------------------#
            image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
            with torch.no_grad():
                images = torch.from_numpy(image_data)
                if self.cuda:
                    images = images.cuda()
                # ---------------------------------------------------------#
                #   将图像输入网络当中进行预测！
                # ---------------------------------------------------------#
                outputs = self.net(images)
                outputs = self.bbox_util.decode_box(outputs)
                # ---------------------------------------------------------#
                #   将预测框进行堆叠，然后进行非极大抑制
                # ---------------------------------------------------------#
                results = self.bbox_util.non_max_suppression(outputs, self.num_classes, self.input_shape,
                                                             image_shape, self.letterbox_image, conf_thres=self.confidence,
                                                             nms_thres=self.nms_iou)

                if results[0] is None:
                    return image, []

                top_label = np.array(results[0][:, 5], dtype='int32')
                top_conf = results[0][:, 4]
                top_boxes = results[0][:, :4]
            mask = np.zeros((image.size[1], image.size[0], 3), dtype=np.uint8)
            coords = []
            # 先把 wall 区域存起来
            wall_boxes = []
            for i, c in enumerate(top_label):
                predicted_class = self.class_names[int(c)]
                if predicted_class == "wall":
                    box = top_boxes[i]
                    top, left, bottom, right = box
                    top = max(0, np.floor(top).astype('int32'))
                    left = max(0, np.floor(left).astype('int32'))
                    bottom = min(image.size[1], np.floor(bottom).astype('int32'))
                    right = min(image.size[0], np.floor(right).astype('int32'))
                    wall_boxes.append((left, top, right, bottom))
            # 再处理特殊类别
            for i, c in enumerate(top_label):
                predicted_class = self.class_names[int(c)]
                if predicted_class != "wall":
                    box = top_boxes[i]
                    score = top_conf[i]
                    top, left, bottom, right = box
                    top = max(0, np.floor(top).astype('int32'))
                    left = max(0, np.floor(left).astype('int32'))
                    bottom = min(image.size[1], np.floor(bottom).astype('int32'))
                    right = min(image.size[0], np.floor(right).astype('int32'))

                    # 计算与每个 wall 的重叠面积
                    special_area = (right - left) * (bottom - top)
                    keep = False
                    for w_left, w_top, w_right, w_bottom in wall_boxes:
                        inter_left = max(left, w_left)
                        inter_top = max(top, w_top)
                        inter_right = min(right, w_right)
                        inter_bottom = min(bottom, w_bottom)
                        if inter_right > inter_left and inter_bottom > inter_top:
                            inter_area = (inter_right - inter_left) * (inter_bottom - inter_top)
                            if inter_area / special_area >= 0.6:  # 重叠比例 ≥ 60%
                                keep = True
                                break

                    if predicted_class == "wall_konggu":
                        # 面积不能超过整个图像面积的50%
                        if special_area / (image.size[0] * image.size[1]) > 0.5:
                            keep = False

                    if keep:
                        color = self.colors[int(c)]
                        mask[top:bottom, left:right] = color
                        coords.append(
                            (
                                self.classes.get(predicted_class),
                                float(score),
                                [(int(left), int(top)), (int(right), int(top)), (int(right), int(bottom)), (int(left), int(bottom))]
                            )
                        )
            mask = cv2.cvtColor(mask, cv2.COLOR_RGB2BGR)
            # print("coords:", coords)

            return mask, coords

        except Exception as e:
            print(e)

if __name__ == "__main__":
    model = YOLODetect()
    image = "test.jpg"
    mask, coords = model.detect(Image.open(image))
    mask.save("mask.jpg", quality=95, subsampling=0)
    print(coords)