Boen_Shi b054769b40 feat(model): 添加YOLO Detect模型支持并移除预处理功能
- 新增yolo_detect模块,包含backbone、nets、utils等组件
- 在模型配置中添加yolo_detect选项,支持新的检测模型
- 移除SAM3预处理相关代码和配置项
- 更新Dockerfile删除core目录下所有文件以减少镜像体积
- 修改worker服务移除图像标签预处理逻辑,直接进行模型检测
2026-01-27 15:00:27 +08:00

145 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import io
import os
import cv2
import torch
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from PIL import Image, ImageDraw, ImageFont, ImageSequence
from app.core.yolo_detect.yolo import YOLO # 假设你有 YOLO 模型类
from app.core.yolo_detect.utils.utils import (cvtColor, get_classes, preprocess_input,
resize_image, show_config)
CLASS_NAMES = {
"wall_konggu": "Hollowing",
"wall_shenshui": "Water seepage",
"wall_kailie": "Cracking",
"wall_konggu_gap": "Gap in hollowing",
"wall": "Wall",
}
class YOLODetect(YOLO):
def __init__(self):
super().__init__()
self.classes = CLASS_NAMES
def detect(self, img_input, crop=False, count=False):
try:
image = Image.open(img_input)
if image.format == "MPO":
image = next(ImageSequence.Iterator(image))
jpeg_image_in_memory = io.BytesIO()
image.save(jpeg_image_in_memory, format="JPEG")
jpeg_image_in_memory.seek(0)
image = Image.open(jpeg_image_in_memory)
# if isinstance(img_input, str):
# image = cv2.imdecode(np.fromfile(img_input, dtype=np.uint8), cv2.IMREAD_COLOR)
# # image = Image.open(img_input)
# else:
# image = img_input
# ---------------------------------------------------#
# 计算输入图片的高和宽
# ---------------------------------------------------#
image_shape = np.array(np.shape(image)[0:2])
# ---------------------------------------------------------#
# 在这里将图像转换成RGB图像防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测所有其它类型的图像都会转化成RGB
# ---------------------------------------------------------#
image = cvtColor(image)
# ---------------------------------------------------------#
# 给图像增加灰条实现不失真的resize
# 也可以直接resize进行识别
# ---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
# ---------------------------------------------------------#
# 添加上batch_size维度
# h, w, 3 => 3, h, w => 1, 3, h, w
# ---------------------------------------------------------#
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
# ---------------------------------------------------------#
# 将图像输入网络当中进行预测!
# ---------------------------------------------------------#
outputs = self.net(images)
outputs = self.bbox_util.decode_box(outputs)
# ---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
# ---------------------------------------------------------#
results = self.bbox_util.non_max_suppression(outputs, self.num_classes, self.input_shape,
image_shape, self.letterbox_image, conf_thres=self.confidence,
nms_thres=self.nms_iou)
if results[0] is None:
return image, []
top_label = np.array(results[0][:, 5], dtype='int32')
top_conf = results[0][:, 4]
top_boxes = results[0][:, :4]
mask = np.zeros((image.size[1], image.size[0], 3), dtype=np.uint8)
coords = []
# 先把 wall 区域存起来
wall_boxes = []
for i, c in enumerate(top_label):
predicted_class = self.class_names[int(c)]
if predicted_class == "wall":
box = top_boxes[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
wall_boxes.append((left, top, right, bottom))
# 再处理特殊类别
for i, c in enumerate(top_label):
predicted_class = self.class_names[int(c)]
if predicted_class != "wall":
box = top_boxes[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
# 计算与每个 wall 的重叠面积
special_area = (right - left) * (bottom - top)
keep = False
for w_left, w_top, w_right, w_bottom in wall_boxes:
inter_left = max(left, w_left)
inter_top = max(top, w_top)
inter_right = min(right, w_right)
inter_bottom = min(bottom, w_bottom)
if inter_right > inter_left and inter_bottom > inter_top:
inter_area = (inter_right - inter_left) * (inter_bottom - inter_top)
if inter_area / special_area >= 0.6: # 重叠比例 ≥ 60%
keep = True
break
if predicted_class == "wall_konggu":
# 面积不能超过整个图像面积的50%
if special_area / (image.size[0] * image.size[1]) > 0.5:
keep = False
if keep:
color = self.colors[int(c)]
mask[top:bottom, left:right] = color
coords.append((self.classes.get(predicted_class),
[(int(left), int(top)), (int(right), int(top)), (int(right), int(bottom)), (int(left), int(bottom))]))
mask = cv2.cvtColor(mask, cv2.COLOR_RGB2BGR)
# print("coords:", coords)
return mask, coords
except Exception as e:
print(e)
if __name__ == "__main__":
model = YOLODetect()
image = "test.jpg"
mask, coords = model.detect(Image.open(image))
mask.save("mask.jpg", quality=95, subsampling=0)
print(coords)