Boen_Shi fd590d1294 feat(model): 添加检测结果置信度分数支持、修改Dockerfile以支持代码持久化部署
- 在MaskInfo模型中添加score字段用于存储检测置信度
- 修改YOLO检测逻辑以提取和传递预测分数
- 更新坐标数据结构以包含置信度信息
- 调整数据处理流程以正确传输分数数据
- 修改Dockerfile以支持代码持久化部署
- 更新README文档说明代码持久化配置方式
2026-01-29 15:50:14 +08:00

151 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import io
import os
import cv2
import torch
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from PIL import Image, ImageDraw, ImageFont, ImageSequence
from app.core.yolo_detect.yolo import YOLO # 假设你有 YOLO 模型类
from app.core.yolo_detect.utils.utils import (cvtColor, get_classes, preprocess_input,
resize_image, show_config)
CLASS_NAMES = {
"wall_konggu": "Hollowing",
"wall_shenshui": "Water seepage",
"wall_kailie": "Cracking",
"wall_konggu_gap": "Gap in hollowing",
"wall": "Wall",
}
class YOLODetect(YOLO):
def __init__(self):
super().__init__()
self.classes = CLASS_NAMES
def detect(self, img_input, crop=False, count=False):
try:
image = Image.open(img_input)
if image.format == "MPO":
image = next(ImageSequence.Iterator(image))
jpeg_image_in_memory = io.BytesIO()
image.save(jpeg_image_in_memory, format="JPEG")
jpeg_image_in_memory.seek(0)
image = Image.open(jpeg_image_in_memory)
# if isinstance(img_input, str):
# image = cv2.imdecode(np.fromfile(img_input, dtype=np.uint8), cv2.IMREAD_COLOR)
# # image = Image.open(img_input)
# else:
# image = img_input
# ---------------------------------------------------#
# 计算输入图片的高和宽
# ---------------------------------------------------#
image_shape = np.array(np.shape(image)[0:2])
# ---------------------------------------------------------#
# 在这里将图像转换成RGB图像防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测所有其它类型的图像都会转化成RGB
# ---------------------------------------------------------#
image = cvtColor(image)
# ---------------------------------------------------------#
# 给图像增加灰条实现不失真的resize
# 也可以直接resize进行识别
# ---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
# ---------------------------------------------------------#
# 添加上batch_size维度
# h, w, 3 => 3, h, w => 1, 3, h, w
# ---------------------------------------------------------#
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
# ---------------------------------------------------------#
# 将图像输入网络当中进行预测!
# ---------------------------------------------------------#
outputs = self.net(images)
outputs = self.bbox_util.decode_box(outputs)
# ---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
# ---------------------------------------------------------#
results = self.bbox_util.non_max_suppression(outputs, self.num_classes, self.input_shape,
image_shape, self.letterbox_image, conf_thres=self.confidence,
nms_thres=self.nms_iou)
if results[0] is None:
return image, []
top_label = np.array(results[0][:, 5], dtype='int32')
top_conf = results[0][:, 4]
top_boxes = results[0][:, :4]
mask = np.zeros((image.size[1], image.size[0], 3), dtype=np.uint8)
coords = []
# 先把 wall 区域存起来
wall_boxes = []
for i, c in enumerate(top_label):
predicted_class = self.class_names[int(c)]
if predicted_class == "wall":
box = top_boxes[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
wall_boxes.append((left, top, right, bottom))
# 再处理特殊类别
for i, c in enumerate(top_label):
predicted_class = self.class_names[int(c)]
if predicted_class != "wall":
box = top_boxes[i]
score = top_conf[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
# 计算与每个 wall 的重叠面积
special_area = (right - left) * (bottom - top)
keep = False
for w_left, w_top, w_right, w_bottom in wall_boxes:
inter_left = max(left, w_left)
inter_top = max(top, w_top)
inter_right = min(right, w_right)
inter_bottom = min(bottom, w_bottom)
if inter_right > inter_left and inter_bottom > inter_top:
inter_area = (inter_right - inter_left) * (inter_bottom - inter_top)
if inter_area / special_area >= 0.6: # 重叠比例 ≥ 60%
keep = True
break
if predicted_class == "wall_konggu":
# 面积不能超过整个图像面积的50%
if special_area / (image.size[0] * image.size[1]) > 0.5:
keep = False
if keep:
color = self.colors[int(c)]
mask[top:bottom, left:right] = color
coords.append(
(
self.classes.get(predicted_class),
float(score),
[(int(left), int(top)), (int(right), int(top)), (int(right), int(bottom)), (int(left), int(bottom))]
)
)
mask = cv2.cvtColor(mask, cv2.COLOR_RGB2BGR)
# print("coords:", coords)
return mask, coords
except Exception as e:
print(e)
if __name__ == "__main__":
model = YOLODetect()
image = "test.jpg"
mask, coords = model.detect(Image.open(image))
mask.save("mask.jpg", quality=95, subsampling=0)
print(coords)