import io import os import cv2 import torch import numpy as np os.environ["CUDA_VISIBLE_DEVICES"] = "0" from PIL import Image, ImageDraw, ImageFont, ImageSequence from app.core.yolo_detect.yolo import YOLO # 假设你有 YOLO 模型类 from app.core.yolo_detect.utils.utils import (cvtColor, get_classes, preprocess_input, resize_image, show_config) CLASS_NAMES = { "wall_konggu": "Hollowing", "wall_shenshui": "Water seepage", "wall_kailie": "Cracking", "wall_konggu_gap": "Gap in hollowing", "wall": "Wall", } class YOLODetect(YOLO): def __init__(self): super().__init__() self.classes = CLASS_NAMES def detect(self, img_input, crop=False, count=False): try: image = Image.open(img_input) if image.format == "MPO": image = next(ImageSequence.Iterator(image)) jpeg_image_in_memory = io.BytesIO() image.save(jpeg_image_in_memory, format="JPEG") jpeg_image_in_memory.seek(0) image = Image.open(jpeg_image_in_memory) # if isinstance(img_input, str): # image = cv2.imdecode(np.fromfile(img_input, dtype=np.uint8), cv2.IMREAD_COLOR) # # image = Image.open(img_input) # else: # image = img_input # ---------------------------------------------------# # 计算输入图片的高和宽 # ---------------------------------------------------# image_shape = np.array(np.shape(image)[0:2]) # ---------------------------------------------------------# # 在这里将图像转换成RGB图像,防止灰度图在预测时报错。 # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB # ---------------------------------------------------------# image = cvtColor(image) # ---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize # 也可以直接resize进行识别 # ---------------------------------------------------------# image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) # ---------------------------------------------------------# # 添加上batch_size维度 # h, w, 3 => 3, h, w => 1, 3, h, w # ---------------------------------------------------------# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() # ---------------------------------------------------------# # 将图像输入网络当中进行预测! # ---------------------------------------------------------# outputs = self.net(images) outputs = self.bbox_util.decode_box(outputs) # ---------------------------------------------------------# # 将预测框进行堆叠,然后进行非极大抑制 # ---------------------------------------------------------# results = self.bbox_util.non_max_suppression(outputs, self.num_classes, self.input_shape, image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou) if results[0] is None: return image, [] top_label = np.array(results[0][:, 5], dtype='int32') top_conf = results[0][:, 4] top_boxes = results[0][:, :4] mask = np.zeros((image.size[1], image.size[0], 3), dtype=np.uint8) coords = [] # 先把 wall 区域存起来 wall_boxes = [] for i, c in enumerate(top_label): predicted_class = self.class_names[int(c)] if predicted_class == "wall": box = top_boxes[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) wall_boxes.append((left, top, right, bottom)) # 再处理特殊类别 for i, c in enumerate(top_label): predicted_class = self.class_names[int(c)] if predicted_class != "wall": box = top_boxes[i] score = top_conf[i] top, left, bottom, right = box top = max(0, np.floor(top).astype('int32')) left = max(0, np.floor(left).astype('int32')) bottom = min(image.size[1], np.floor(bottom).astype('int32')) right = min(image.size[0], np.floor(right).astype('int32')) # 计算与每个 wall 的重叠面积 special_area = (right - left) * (bottom - top) keep = False for w_left, w_top, w_right, w_bottom in wall_boxes: inter_left = max(left, w_left) inter_top = max(top, w_top) inter_right = min(right, w_right) inter_bottom = min(bottom, w_bottom) if inter_right > inter_left and inter_bottom > inter_top: inter_area = (inter_right - inter_left) * (inter_bottom - inter_top) if inter_area / special_area >= 0.6: # 重叠比例 ≥ 60% keep = True break if predicted_class == "wall_konggu": # 面积不能超过整个图像面积的50% if special_area / (image.size[0] * image.size[1]) > 0.5: keep = False if keep: color = self.colors[int(c)] mask[top:bottom, left:right] = color coords.append( ( self.classes.get(predicted_class), float(score), [(int(left), int(top)), (int(right), int(top)), (int(right), int(bottom)), (int(left), int(bottom))] ) ) mask = cv2.cvtColor(mask, cv2.COLOR_RGB2BGR) # print("coords:", coords) return mask, coords except Exception as e: print(e) if __name__ == "__main__": model = YOLODetect() image = "test.jpg" mask, coords = model.detect(Image.open(image)) mask.save("mask.jpg", quality=95, subsampling=0) print(coords)