能提供可视化的推理代码吗？ #10

LiuChuanWei · 2025-01-16T07:18:26Z

能提供像论文中Figure 6. Visualization Results on M-OWODB. Compared to the open-vocabulary model using prompts with all 80 classes, our approach that extends to open-world only employs 40 class embeddings with an additional “unknown” wildcard.
这样的可视化推理代码吗？

wl654655902 · 2025-01-17T02:42:05Z

我参考yoloworld里的推理写的，用yolouniow的配置和模型可以可视化

import numpy as np
import torch
from mmengine.config import Config
from mmengine.dataset import Compose
from mmengine.runner import Runner
from mmengine.runner.amp import autocast
from mmyolo.registry import RUNNERS
from torchvision.ops import nms

import PIL.Image
import supervision as sv

import os
import cv2

import sys
sys.path.append('/home/wl/code/opensource/open_detect/YOLO-World/yolo_world')

bounding_box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)


def run_image(
        runner,
        input_image,
        max_num_boxes=100,
        score_thr=0.05,  ##src=0.05
        nms_thr=0.5,
        output_image="output.png",
):
    texts = [[t.strip()] for t in class_names.split(",")] + [[" "]]
    data_info = runner.pipeline(dict(img_id=0, img_path=input_image,
                                     texts=texts))

    data_batch = dict(
        inputs=data_info["inputs"].unsqueeze(0),
        data_samples=[data_info["data_samples"]],
    )

    with autocast(enabled=False), torch.no_grad():
        output = runner.model.test_step(data_batch)[0]
        runner.model.class_names = texts
        pred_instances = output.pred_instances

    keep_idxs = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=nms_thr)
    pred_instances = pred_instances[keep_idxs]
    pred_instances = pred_instances[pred_instances.scores.float() > score_thr]

    if len(pred_instances.scores) > max_num_boxes:
        indices = pred_instances.scores.float().topk(max_num_boxes)[1]
        pred_instances = pred_instances[indices]
    output.pred_instances = pred_instances

    pred_instances = pred_instances.cpu().numpy()
    infos = []
    for cls, box , conf in zip(pred_instances['labels'],pred_instances['bboxes'],pred_instances['scores']):
        info = '%d %.2f %.2f %.2f %.2f %.4f'%(cls, box[0],box[1],box[2],box[3],conf)
        infos.append(info)

    detections = sv.Detections(
        xyxy=pred_instances['bboxes'],
        class_id=pred_instances['labels'],
        confidence=pred_instances['scores']
    )
    label_names = class_names.split(",")
    # print(label_names)

    labels = [
        f"{class_id}{label_names[class_id]}{confidence:0.2f}"
        for class_id, confidence
        in zip(detections.class_id, detections.confidence)
    ]

    image = PIL.Image.open(input_image)
    svimage = np.array(image) 
    svimage = bounding_box_annotator.annotate(svimage, detections)
    svimage = label_annotator.annotate(svimage, detections, labels)
    return svimage[:, :, ::-1],infos


if __name__ == "__main__":
   
    cfg = Config.fromfile(
        "/configs/pretrain/yolo_uniow_s_lora_bn_5e-4_100e_8gpus_obj365v1_goldg_train_lvis_minival.py"
    )
    cfg.load_from = "yolo_uniow_s_lora_bn_5e-4_100e_8gpus_obj365v1_goldg_train_lvis_minival.pth"

    class_names = ("person, bicycle")

    img_dir = './images'
    result_dir = './uniow_s_ft_result'
  

    cfg.work_dir = "."
    runner = Runner.from_cfg(cfg)
    runner.call_hook("before_run")
    runner.load_or_resume()
    pipeline = cfg.test_dataloader.dataset.pipeline
    runner.pipeline = Compose(pipeline)
    runner.model.eval()


    for name in os.listdir(img_dir):
        img_path = os.path.join(img_dir,name)
        resutl_path = os.path.join(result_dir,name)
    
        img, info = run_image(runner,img_path)
        
        cv2.imwrite(resutl_path,img)

LiuChuanWei · 2025-01-17T03:18:31Z

@wl654655902 非常感谢您的回复。但是这个推理代码是开放词汇的推理代码，我的疑问是不知道如何进行开放世界的推理，即如何将通配符“object/unknown”和已知类别的提示联合进行推理，得到既有已知类别，也有unknown的目标结果。看论文的描述，我对已知类别，通配符“object”以及通配符“unknown”之间的关系还是理解不明白。如果您对此有经验，非常感谢您的分享。

wl654655902 · 2025-01-17T06:20:34Z

开放世界推理也用这个，改cfg和class_names就可以了

    cfg = Config.fromfile(
        "/configs/owod_ft/yolo_uniow_s_lora_bn_1e-3_20e_8gpus_koala.py"
    )
    cfg.load_from = ".work_dirs/yolo_uniow_s_lora_bn_1e-3_20e_8gpus_koala_koala_train_task1/best_owod_Both_epoch_40.pth"

    class_names = ("person,car, ....,unkown")  ##填自己训练的类别加一个unknow

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

能提供可视化的推理代码吗？ #10

能提供可视化的推理代码吗？ #10

LiuChuanWei commented Jan 16, 2025

wl654655902 commented Jan 17, 2025 •

edited

Loading

LiuChuanWei commented Jan 17, 2025

wl654655902 commented Jan 17, 2025 •

edited

Loading

能提供可视化的推理代码吗？ #10

能提供可视化的推理代码吗？ #10

Comments

LiuChuanWei commented Jan 16, 2025

wl654655902 commented Jan 17, 2025 • edited Loading

LiuChuanWei commented Jan 17, 2025

wl654655902 commented Jan 17, 2025 • edited Loading

wl654655902 commented Jan 17, 2025 •

edited

Loading

wl654655902 commented Jan 17, 2025 •

edited

Loading