0


手把手YOLOv5输出热力图

环境要求

我的版本是YOLOV5 7.0

先看结果:

在这里插入图片描述

在这里插入图片描述
结果仅供参考

具体步骤一:

首先配置好YOLO V5环境
这个采用pip install requirements即可
具体配置环境可以看我其他的博客有详细介绍
GPU环境自己配置

步骤二:

运行YOLO 没问题,输出结果:
在这里插入图片描述

步骤三

在项目文件夹下添加main_gradcam.py文件
在这里插入图片描述
main_gradcam.py

  1. import os
  2. import random
  3. import time
  4. import argparse
  5. import numpy as np
  6. from models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPP
  7. from models.yolov5_object_detector import YOLOV5TorchObjectDetector
  8. import cv2
  9. # 数据集类别名
  10. names =['person','bicycle','car','motorcycle','airplane','bus','train','truck','boat','traffic light','fire hydrant','stop sign','parking meter','bench','bird','cat','dog','horse','sheep','cow','elephant','bear','zebra','giraffe','backpack','umbrella','handbag','tie','suitcase','frisbee','skis','snowboard','sports ball','kite','baseball bat','baseball glove','skateboard','surfboard','tennis racket','bottle','wine glass','cup','fork','knife','spoon','bowl','banana','apple','sandwich','orange','broccoli','carrot','hot dog','pizza','donut','cake','chair','couch','potted plant','bed','dining table','toilet','tv','laptop','mouse','remote','keyboard','cell phone','microwave','oven','toaster','sink','refrigerator','book','clock','vase','scissors','teddy bear','hair drier','toothbrush']# class names# yolov5s网络中的三个detect层
  11. target_layers =['model_17_cv3_act','model_20_cv3_act','model_23_cv3_act']# Arguments
  12. parser = argparse.ArgumentParser()
  13. parser.add_argument('--model-path',type=str, default="yolov5s.pt",help='Path to the model')
  14. parser.add_argument('--img-path',type=str, default='data/images/bus.jpg',help='input image path')
  15. parser.add_argument('--output-dir',type=str, default='runs/result17',help='output dir')
  16. parser.add_argument('--img-size',type=int, default=640,help="input image size")
  17. parser.add_argument('--target-layer',type=str, default='model_17_cv3_act',help='The layer hierarchical address to which gradcam will applied,'' the names should be separated by underline')
  18. parser.add_argument('--method',type=str, default='gradcam',help='gradcam method')
  19. parser.add_argument('--device',type=str, default='cuda',help='cuda or cpu')
  20. parser.add_argument('--no_text_box', action='store_true',help='do not show label and box on the heatmap')
  21. args = parser.parse_args()defget_res_img(bbox, mask, res_img):
  22. mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0,255).permute(1,2,0).detach().cpu().numpy().astype(
  23. np.uint8)
  24. heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET)# n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32)
  25. n_heatmat =(heatmap /255).astype(np.float32)
  26. res_img = res_img /255
  27. res_img = cv2.add(res_img, n_heatmat)
  28. res_img =(res_img / res_img.max())return res_img, n_heatmat
  29. defplot_one_box(x, img, color=None, label=None, line_thickness=3):# this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again!
  30. cv2.imwrite('temp.jpg',(img *255).astype(np.uint8))
  31. img = cv2.imread('temp.jpg')# Plots one bounding box on image img
  32. tl = line_thickness orround(0.002*(img.shape[0]+ img.shape[1])/2)+1# line/font thickness
  33. color = color or[random.randint(0,255)for _ inrange(3)]
  34. c1, c2 =(int(x[0]),int(x[1])),(int(x[2]),int(x[3]))
  35. cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)if label:
  36. tf =max(tl -1,1)# font thickness
  37. t_size = cv2.getTextSize(label,0, fontScale=tl /3, thickness=tf)[0]
  38. outside = c1[1]- t_size[1]-3>=0# label fits outside box up
  39. c2 = c1[0]+ t_size[0], c1[1]- t_size[1]-3if outside else c1[1]+ t_size[1]+3
  40. outsize_right = c2[0]- img.shape[:2][1]>0# label fits outside box right
  41. c1 = c1[0]-(c2[0]- img.shape[:2][1])if outsize_right else c1[0], c1[1]
  42. c2 = c2[0]-(c2[0]- img.shape[:2][1])if outsize_right else c2[0], c2[1]
  43. cv2.rectangle(img, c1, c2, color,-1, cv2.LINE_AA)# filled
  44. cv2.putText(img, label,(c1[0], c1[1]-2if outside else c2[1]-2),0, tl /3,[225,255,255], thickness=tf,
  45. lineType=cv2.LINE_AA)return img
  46. # 检测单个图片defmain(img_path):
  47. colors =[[random.randint(0,255)for _ inrange(3)]for _ in names]
  48. device = args.device
  49. input_size =(args.img_size, args.img_size)# 读入图片
  50. img = cv2.imread(img_path)# 读取图像格式:BGRprint('[INFO] Loading the model')# 实例化YOLOv5模型,得到检测结果
  51. model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names)# img[..., ::-1]: BGR --> RGB# (480, 640, 3) --> (1, 3, 480, 640)
  52. torch_img = model.preprocessing(img[...,::-1])
  53. tic = time.time()# 遍历三层检测层for target_layer in target_layers:# 获取grad-cam方法if args.method =='gradcam':
  54. saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size)elif args.method =='gradcampp':
  55. saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size)
  56. masks, logits,[boxes, _, class_names, conf]= saliency_method(torch_img)# 得到预测结果
  57. result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0,255).permute(1,2,0).detach().cpu().numpy()
  58. result = result[...,::-1]# convert to bgr# 保存设置
  59. imgae_name = os.path.basename(img_path)# 获取图片名
  60. save_path =f'{args.output_dir}{imgae_name[:-4]}/{args.method}'ifnot os.path.exists(save_path):
  61. os.makedirs(save_path)print(f'[INFO] Saving the final image at {save_path}')# 遍历每张图片中的每个目标for i, mask inenumerate(masks):# 遍历图片中的每个目标
  62. res_img = result.copy()# 获取目标的位置和类别信息
  63. bbox, cls_name = boxes[0][i], class_names[0][i]
  64. label =f'{cls_name}{conf[0][i]}'# 类别+置信分数# 获取目标的热力图
  65. res_img, heat_map = get_res_img(bbox, mask, res_img)
  66. res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))],
  67. line_thickness=3)# 缩放到原图片大小
  68. res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1]))
  69. output_path =f'{save_path}/{target_layer[6:8]}_{i}.jpg'
  70. cv2.imwrite(output_path, res_img)print(f'{target_layer[6:8]}_{i}.jpg done!!')print(f'Total time : {round(time.time()- tic,4)} s')if __name__ =='__main__':# 图片路径为文件夹if os.path.isdir(args.img_path):
  71. img_list = os.listdir(args.img_path)print(img_list)for item in img_list:# 依次获取文件夹中的图片名,组合成图片的路径
  72. main(os.path.join(args.img_path, item))# 单个图片else:
  73. main(args.img_path)

步骤四

在model文件夹下添加如下两个py文件,分别是gradcam.py和yolov5_object_detector.py
![在这里插入图片描述](https://img-blog.csdnimg.cn/83c33e99f69d43a0a3c7016aec5f81e8.png
gradcam.py代码如下:

  1. import time
  2. import torch
  3. import torch.nn.functional as F
  4. deffind_yolo_layer(model, layer_name):"""Find yolov5 layer to calculate GradCAM and GradCAM++
  5. Args:
  6. model: yolov5 model.
  7. layer_name (str): the name of layer with its hierarchical information.
  8. Return:
  9. target_layer: found layer
  10. """
  11. hierarchy = layer_name.split('_')
  12. target_layer = model.model._modules[hierarchy[0]]for h in hierarchy[1:]:
  13. target_layer = target_layer._modules[h]return target_layer
  14. classYOLOV5GradCAM:# 初始化,得到target_layerdef__init__(self, model, layer_name, img_size=(640,640)):
  15. self.model = model
  16. self.gradients =dict()
  17. self.activations =dict()defbackward_hook(module, grad_input, grad_output):
  18. self.gradients['value']= grad_output[0]returnNonedefforward_hook(module,input, output):
  19. self.activations['value']= output
  20. returnNone
  21. target_layer = find_yolo_layer(self.model, layer_name)# 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录
  22. target_layer.register_forward_hook(forward_hook)
  23. target_layer.register_full_backward_hook(backward_hook)
  24. device ='cuda'ifnext(self.model.model.parameters()).is_cuda else'cpu'
  25. self.model(torch.zeros(1,3,*img_size, device=device))defforward(self, input_img, class_idx=True):"""
  26. Args:
  27. input_img: input image with shape of (1, 3, H, W)
  28. Return:
  29. mask: saliency map of the same spatial dimension with input
  30. logit: model output
  31. preds: The object predictions
  32. """
  33. saliency_maps =[]
  34. b, c, h, w = input_img.size()
  35. preds, logits = self.model(input_img)for logit, cls, cls_name inzip(logits[0], preds[1][0], preds[2][0]):if class_idx:
  36. score = logit[cls]else:
  37. score = logit.max()
  38. self.model.zero_grad()
  39. tic = time.time()# 获取梯度
  40. score.backward(retain_graph=True)print(f"[INFO] {cls_name}, model-backward took: ",round(time.time()- tic,4),'seconds')
  41. gradients = self.gradients['value']
  42. activations = self.activations['value']
  43. b, k, u, v = gradients.size()
  44. alpha = gradients.view(b, k,-1).mean(2)
  45. weights = alpha.view(b, k,1,1)
  46. saliency_map =(weights * activations).sum(1, keepdim=True)
  47. saliency_map = F.relu(saliency_map)
  48. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  49. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  50. saliency_map =(saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  51. saliency_maps.append(saliency_map)return saliency_maps, logits, preds
  52. def__call__(self, input_img):return self.forward(input_img)classYOLOV5GradCAMPP(YOLOV5GradCAM):def__init__(self, model, layer_name, img_size=(640,640)):super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size)defforward(self, input_img, class_idx=True):
  53. saliency_maps =[]
  54. b, c, h, w = input_img.size()
  55. tic = time.time()
  56. preds, logits = self.model(input_img)print("[INFO] model-forward took: ",round(time.time()- tic,4),'seconds')for logit, cls, cls_name inzip(logits[0], preds[1][0], preds[2][0]):if class_idx:
  57. score = logit[cls]else:
  58. score = logit.max()
  59. self.model.zero_grad()
  60. tic = time.time()# 获取梯度
  61. score.backward(retain_graph=True)print(f"[INFO] {cls_name}, model-backward took: ",round(time.time()- tic,4),'seconds')
  62. gradients = self.gradients['value']# dS/dA
  63. activations = self.activations['value']# A
  64. b, k, u, v = gradients.size()
  65. alpha_num = gradients.pow(2)
  66. alpha_denom = gradients.pow(2).mul(2)+ \
  67. activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k,1,1)# torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y
  68. alpha_denom = torch.where(alpha_denom !=0.0, alpha_denom, torch.ones_like(alpha_denom))
  69. alpha = alpha_num.div(alpha_denom +1e-7)
  70. positive_gradients = F.relu(score.exp()* gradients)# ReLU(dY/dA) == ReLU(exp(S)*dS/dA))
  71. weights =(alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k,1,1)
  72. saliency_map =(weights * activations).sum(1, keepdim=True)
  73. saliency_map = F.relu(saliency_map)
  74. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  75. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  76. saliency_map =(saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  77. saliency_maps.append(saliency_map)return saliency_maps, logits, preds

yolov5_object_detector.py的代码如下:

  1. import numpy as np
  2. import torch
  3. from models.experimental import attempt_load
  4. from utils.general import xywh2xyxy
  5. from utils.dataloaders import letterbox
  6. import cv2
  7. import time
  8. import torchvision
  9. import torch.nn as nn
  10. from utils.metrics import box_iou
  11. classYOLOV5TorchObjectDetector(nn.Module):def__init__(self,
  12. model_weight,
  13. device,
  14. img_size,
  15. names=None,
  16. mode='eval',
  17. confidence=0.45,
  18. iou_thresh=0.45,
  19. agnostic_nms=False):super(YOLOV5TorchObjectDetector, self).__init__()
  20. self.device = device
  21. self.model =None
  22. self.img_size = img_size
  23. self.mode = mode
  24. self.confidence = confidence
  25. self.iou_thresh = iou_thresh
  26. self.agnostic = agnostic_nms
  27. self.model = attempt_load(model_weight, inplace=False, fuse=False)
  28. self.model.requires_grad_(True)
  29. self.model.to(device)if self.mode =='train':
  30. self.model.train()else:
  31. self.model.eval()# fetch the namesif names isNone:
  32. self.names =['your dataset classname']else:
  33. self.names = names
  34. # preventing cold start
  35. img = torch.zeros((1,3,*self.img_size), device=device)
  36. self.model(img)@staticmethoddefnon_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False,
  37. multi_label=False, labels=(), max_det=300):"""Runs Non-Maximum Suppression (NMS) on inference and logits results
  38. Returns:
  39. list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes)
  40. """
  41. nc = prediction.shape[2]-5# number of classes
  42. xc = prediction[...,4]> conf_thres # candidates# Checksassert0<= conf_thres <=1,f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'assert0<= iou_thres <=1,f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'# Settings
  43. min_wh, max_wh =2,4096# (pixels) minimum and maximum box width and height
  44. max_nms =30000# maximum number of boxes into torchvision.ops.nms()
  45. time_limit =10.0# seconds to quit after
  46. redundant =True# require redundant detections
  47. multi_label &= nc >1# multiple labels per box (adds 0.5ms/img)
  48. merge =False# use merge-NMS
  49. t = time.time()
  50. output =[torch.zeros((0,6), device=prediction.device)]* prediction.shape[0]
  51. logits_output =[torch.zeros((0, nc), device=logits.device)]* logits.shape[0]# logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0]for xi,(x, log_)inenumerate(zip(prediction, logits)):# image index, image inference# Apply constraints# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
  52. x = x[xc[xi]]# confidence
  53. log_ = log_[xc[xi]]# Cat apriori labels if autolabellingif labels andlen(labels[xi]):
  54. l = labels[xi]
  55. v = torch.zeros((len(l), nc +5), device=x.device)
  56. v[:,:4]= l[:,1:5]# box
  57. v[:,4]=1.0# conf
  58. v[range(len(l)), l[:,0].long()+5]=1.0# cls
  59. x = torch.cat((x, v),0)# If none remain process next imageifnot x.shape[0]:continue# Compute conf
  60. x[:,5:]*= x[:,4:5]# conf = obj_conf * cls_conf# Box (center x, center y, width, height) to (x1, y1, x2, y2)
  61. box = xywh2xyxy(x[:,:4])# Detections matrix nx6 (xyxy, conf, cls)if multi_label:
  62. i, j =(x[:,5:]> conf_thres).nonzero(as_tuple=False).T
  63. x = torch.cat((box[i], x[i, j +5,None], j[:,None].float()),1)else:# best class only
  64. conf, j = x[:,5:].max(1, keepdim=True)
  65. x = torch.cat((box, conf, j.float()),1)[conf.view(-1)> conf_thres]
  66. log_ = log_[conf.view(-1)> conf_thres]# Filter by classif classes isnotNone:
  67. x = x[(x[:,5:6]== torch.tensor(classes, device=x.device)).any(1)]# Check shape
  68. n = x.shape[0]# number of boxesifnot n:# no boxescontinueelif n > max_nms:# excess boxes
  69. x = x[x[:,4].argsort(descending=True)[:max_nms]]# sort by confidence# Batched NMS
  70. c = x[:,5:6]*(0if agnostic else max_wh)# classes
  71. boxes, scores = x[:,:4]+ c, x[:,4]# boxes (offset by class), scores
  72. i = torchvision.ops.nms(boxes, scores, iou_thres)# NMSif i.shape[0]> max_det:# limit detections
  73. i = i[:max_det]if merge and(1< n <3E3):# Merge NMS (boxes merged using weighted mean)# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
  74. iou = box_iou(boxes[i], boxes)> iou_thres # iou matrix
  75. weights = iou * scores[None]# box weights
  76. x[i,:4]= torch.mm(weights, x[:,:4]).float()/ weights.sum(1, keepdim=True)# merged boxesif redundant:
  77. i = i[iou.sum(1)>1]# require redundancy
  78. output[xi]= x[i]
  79. logits_output[xi]= log_[i]assert log_[i].shape[0]== x[i].shape[0]if(time.time()- t)> time_limit:print(f'WARNING: NMS time limit {time_limit}s exceeded')break# time limit exceededreturn output, logits_output
  80. @staticmethoddefyolo_resize(img, new_shape=(640,640), color=(114,114,114), auto=True, scaleFill=False, scaleup=True):return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup)defforward(self, img):
  81. prediction, logits, _ = self.model(img, augment=False)
  82. prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh,
  83. classes=None,
  84. agnostic=self.agnostic)
  85. self.boxes, self.class_names, self.classes, self.confidences =[[[]for _ inrange(img.shape[0])]for _ inrange(4)]for i, det inenumerate(prediction):# detections per imageiflen(det):for*xyxy, conf, cls in det:# 返回整数
  86. bbox =[int(b)for b in xyxy]
  87. self.boxes[i].append(bbox)
  88. self.confidences[i].append(round(conf.item(),2))
  89. cls =int(cls.item())
  90. self.classes[i].append(cls)if self.names isnotNone:
  91. self.class_names[i].append(self.names[cls])else:
  92. self.class_names[i].append(cls)return[self.boxes, self.classes, self.class_names, self.confidences], logits
  93. defpreprocessing(self, img):iflen(img.shape)!=4:
  94. img = np.expand_dims(img, axis=0)
  95. im0 = img.astype(np.uint8)
  96. img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0]for im in im0])
  97. img = img.transpose((0,3,1,2))
  98. img = np.ascontiguousarray(img)
  99. img = torch.from_numpy(img).to(self.device)
  100. img = img /255.0return img

步骤五

更改model/yolo.py
在这里插入图片描述

具体而言
Detect类中的forward函数

  1. defforward(self, x):
  2. z =[]# inference output
  3. logits_ =[]# 修改---1for i inrange(self.nl):
  4. x[i]= self.m[i](x[i])# conv
  5. bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
  6. x[i]= x[i].view(bs, self.na, self.no, ny, nx).permute(0,1,3,4,2).contiguous()ifnot self.training:# inferenceif self.dynamic or self.grid[i].shape[2:4]!= x[i].shape[2:4]:
  7. self.grid[i], self.anchor_grid[i]= self._make_grid(nx, ny, i)
  8. logits = x[i][...,5:]# 修改---2ifisinstance(self, Segment):# (boxes + masks)
  9. xy, wh, conf, mask = x[i].split((2,2, self.nc +1, self.no - self.nc -5),4)
  10. xy =(xy.sigmoid()*2+ self.grid[i])* self.stride[i]# xy
  11. wh =(wh.sigmoid()*2)**2* self.anchor_grid[i]# wh
  12. y = torch.cat((xy, wh, conf.sigmoid(), mask),4)else:# Detect (boxes only)
  13. xy, wh, conf = x[i].sigmoid().split((2,2, self.nc +1),4)
  14. xy =(xy *2+ self.grid[i])* self.stride[i]# xy
  15. wh =(wh *2)**2* self.anchor_grid[i]# wh
  16. y = torch.cat((xy, wh, conf),4)
  17. z.append(y.view(bs, self.na * nx * ny, self.no))
  18. logits_.append(logits.view(bs,-1, self.no -5))# 修改---3# return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)return x if self.training else(torch.cat(z,1), torch.cat(logits_,1), x)# 修改---4

为了防止大家不知道怎么修改yolo.py文件,我将修改后的yolo.py文件放在下方
yolo.py

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""
  2. YOLO-specific modules
  3. Usage:
  4. $ python models/yolo.py --cfg yolov5s.yaml
  5. """import argparse
  6. import contextlib
  7. import os
  8. import platform
  9. import sys
  10. from copy import deepcopy
  11. from pathlib import Path
  12. FILE = Path(__file__).resolve()
  13. ROOT = FILE.parents[1]# YOLOv5 root directoryifstr(ROOT)notin sys.path:
  14. sys.path.append(str(ROOT))# add ROOT to PATHif platform.system()!='Windows':
  15. ROOT = Path(os.path.relpath(ROOT, Path.cwd()))# relativefrom models.common import*from models.experimental import*from utils.autoanchor import check_anchor_order
  16. from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
  17. from utils.plots import feature_visualization
  18. from utils.torch_utils import(fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
  19. time_sync)try:import thop # for FLOPs computationexcept ImportError:
  20. thop =NoneclassDetect(nn.Module):# YOLOv5 Detect head for detection models
  21. stride =None# strides computed during build
  22. dynamic =False# force grid reconstruction
  23. export =False# export modedef__init__(self, nc=80, anchors=(), ch=(), inplace=True):# detection layersuper().__init__()
  24. self.nc = nc # number of classes
  25. self.no = nc +5# number of outputs per anchor
  26. self.nl =len(anchors)# number of detection layers
  27. self.na =len(anchors[0])//2# number of anchors
  28. self.grid =[torch.empty(0)for _ inrange(self.nl)]# init grid
  29. self.anchor_grid =[torch.empty(0)for _ inrange(self.nl)]# init anchor grid
  30. self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl,-1,2))# shape(nl,na,2)
  31. self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na,1)for x in ch)# output conv
  32. self.inplace = inplace # use inplace ops (e.g. slice assignment)defforward(self, x):
  33. z =[]# inference output
  34. logits_ =[]# 修改---1for i inrange(self.nl):
  35. x[i]= self.m[i](x[i])# conv
  36. bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
  37. x[i]= x[i].view(bs, self.na, self.no, ny, nx).permute(0,1,3,4,2).contiguous()ifnot self.training:# inferenceif self.dynamic or self.grid[i].shape[2:4]!= x[i].shape[2:4]:
  38. self.grid[i], self.anchor_grid[i]= self._make_grid(nx, ny, i)
  39. logits = x[i][...,5:]# 修改---2ifisinstance(self, Segment):# (boxes + masks)
  40. xy, wh, conf, mask = x[i].split((2,2, self.nc +1, self.no - self.nc -5),4)
  41. xy =(xy.sigmoid()*2+ self.grid[i])* self.stride[i]# xy
  42. wh =(wh.sigmoid()*2)**2* self.anchor_grid[i]# wh
  43. y = torch.cat((xy, wh, conf.sigmoid(), mask),4)else:# Detect (boxes only)
  44. xy, wh, conf = x[i].sigmoid().split((2,2, self.nc +1),4)
  45. xy =(xy *2+ self.grid[i])* self.stride[i]# xy
  46. wh =(wh *2)**2* self.anchor_grid[i]# wh
  47. y = torch.cat((xy, wh, conf),4)
  48. z.append(y.view(bs, self.na * nx * ny, self.no))
  49. logits_.append(logits.view(bs,-1, self.no -5))# 修改---3# return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)return x if self.training else(torch.cat(z,1), torch.cat(logits_,1), x)# 修改---4def_make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__,'1.10.0')):
  50. d = self.anchors[i].device
  51. t = self.anchors[i].dtype
  52. shape =1, self.na, ny, nx,2# grid shape
  53. y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
  54. yv, xv = torch.meshgrid(y, x, indexing='ij')if torch_1_10 else torch.meshgrid(y, x)# torch>=0.7 compatibility
  55. grid = torch.stack((xv, yv),2).expand(shape)-0.5# add grid offset, i.e. y = 2.0 * x - 0.5
  56. anchor_grid =(self.anchors[i]* self.stride[i]).view((1, self.na,1,1,2)).expand(shape)return grid, anchor_grid
  57. classSegment(Detect):# YOLOv5 Segment head for segmentation modelsdef__init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):super().__init__(nc, anchors, ch, inplace)
  58. self.nm = nm # number of masks
  59. self.npr = npr # number of protos
  60. self.no =5+ nc + self.nm # number of outputs per anchor
  61. self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na,1)for x in ch)# output conv
  62. self.proto = Proto(ch[0], self.npr, self.nm)# protos
  63. self.detect = Detect.forward
  64. defforward(self, x):
  65. p = self.proto(x[0])
  66. x = self.detect(self, x)return(x, p)if self.training else(x[0], p)if self.export else(x[0], p, x[1])classBaseModel(nn.Module):# YOLOv5 base modeldefforward(self, x, profile=False, visualize=False):return self._forward_once(x, profile, visualize)# single-scale inference, traindef_forward_once(self, x, profile=False, visualize=False):
  67. y, dt =[],[]# outputsfor m in self.model:if m.f !=-1:# if not from previous layer
  68. x = y[m.f]ifisinstance(m.f,int)else[x if j ==-1else y[j]for j in m.f]# from earlier layersif profile:
  69. self._profile_one_layer(m, x, dt)
  70. x = m(x)# run
  71. y.append(x if m.i in self.save elseNone)# save outputif visualize:
  72. feature_visualization(x, m.type, m.i, save_dir=visualize)return x
  73. def_profile_one_layer(self, m, x, dt):
  74. c = m == self.model[-1]# is final layer, copy input as inplace fix
  75. o = thop.profile(m, inputs=(x.copy()if c else x,), verbose=False)[0]/1E9*2if thop else0# FLOPs
  76. t = time_sync()for _ inrange(10):
  77. m(x.copy()if c else x)
  78. dt.append((time_sync()- t)*100)if m == self.model[0]:
  79. LOGGER.info(f"{'time (ms)':>10s}{'GFLOPs':>10s}{'params':>10s} module")
  80. LOGGER.info(f'{dt[-1]:10.2f}{o:10.2f}{m.np:10.0f}{m.type}')if c:
  81. LOGGER.info(f"{sum(dt):10.2f}{'-':>10s}{'-':>10s} Total")deffuse(self):# fuse model Conv2d() + BatchNorm2d() layers
  82. LOGGER.info('Fusing layers... ')for m in self.model.modules():ifisinstance(m,(Conv, DWConv))andhasattr(m,'bn'):
  83. m.conv = fuse_conv_and_bn(m.conv, m.bn)# update convdelattr(m,'bn')# remove batchnorm
  84. m.forward = m.forward_fuse # update forward
  85. self.info()return self
  86. definfo(self, verbose=False, img_size=640):# print model information
  87. model_info(self, verbose, img_size)def_apply(self, fn):# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
  88. self =super()._apply(fn)
  89. m = self.model[-1]# Detect()ifisinstance(m,(Detect, Segment)):
  90. m.stride = fn(m.stride)
  91. m.grid =list(map(fn, m.grid))ifisinstance(m.anchor_grid,list):
  92. m.anchor_grid =list(map(fn, m.anchor_grid))return self
  93. classDetectionModel(BaseModel):# YOLOv5 detection modeldef__init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):# model, input channels, number of classessuper().__init__()ifisinstance(cfg,dict):
  94. self.yaml = cfg # model dictelse:# is *.yamlimport yaml # for torch hub
  95. self.yaml_file = Path(cfg).name
  96. withopen(cfg, encoding='ascii', errors='ignore')as f:
  97. self.yaml = yaml.safe_load(f)# model dict# Define model
  98. ch = self.yaml['ch']= self.yaml.get('ch', ch)# input channelsif nc and nc != self.yaml['nc']:
  99. LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
  100. self.yaml['nc']= nc # override yaml valueif anchors:
  101. LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
  102. self.yaml['anchors']=round(anchors)# override yaml value
  103. self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])# model, savelist
  104. self.names =[str(i)for i inrange(self.yaml['nc'])]# default names
  105. self.inplace = self.yaml.get('inplace',True)# Build strides, anchors
  106. m = self.model[-1]# Detect()ifisinstance(m,(Detect, Segment)):
  107. s =256# 2x min stride
  108. m.inplace = self.inplace
  109. forward =lambda x: self.forward(x)[0]ifisinstance(m, Segment)else self.forward(x)
  110. m.stride = torch.tensor([s / x.shape[-2]for x in forward(torch.zeros(1, ch, s, s))])# forward
  111. check_anchor_order(m)
  112. m.anchors /= m.stride.view(-1,1,1)
  113. self.stride = m.stride
  114. self._initialize_biases()# only run once# Init weights, biases
  115. initialize_weights(self)
  116. self.info()
  117. LOGGER.info('')defforward(self, x, augment=False, profile=False, visualize=False):if augment:return self._forward_augment(x)# augmented inference, Nonereturn self._forward_once(x, profile, visualize)# single-scale inference, traindef_forward_augment(self, x):
  118. img_size = x.shape[-2:]# height, width
  119. s =[1,0.83,0.67]# scales
  120. f =[None,3,None]# flips (2-ud, 3-lr)
  121. y =[]# outputsfor si, fi inzip(s, f):
  122. xi = scale_img(x.flip(fi)if fi else x, si, gs=int(self.stride.max()))
  123. yi = self._forward_once(xi)[0]# forward# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
  124. yi = self._descale_pred(yi, fi, si, img_size)
  125. y.append(yi)
  126. y = self._clip_augmented(y)# clip augmented tailsreturn torch.cat(y,1),None# augmented inference, traindef_descale_pred(self, p, flips, scale, img_size):# de-scale predictions following augmented inference (inverse operation)if self.inplace:
  127. p[...,:4]/= scale # de-scaleif flips ==2:
  128. p[...,1]= img_size[0]- p[...,1]# de-flip udelif flips ==3:
  129. p[...,0]= img_size[1]- p[...,0]# de-flip lrelse:
  130. x, y, wh = p[...,0:1]/ scale, p[...,1:2]/ scale, p[...,2:4]/ scale # de-scaleif flips ==2:
  131. y = img_size[0]- y # de-flip udelif flips ==3:
  132. x = img_size[1]- x # de-flip lr
  133. p = torch.cat((x, y, wh, p[...,4:]),-1)return p
  134. def_clip_augmented(self, y):# Clip YOLOv5 augmented inference tails
  135. nl = self.model[-1].nl # number of detection layers (P3-P5)
  136. g =sum(4** x for x inrange(nl))# grid points
  137. e =1# exclude layer count
  138. i =(y[0].shape[1]// g)*sum(4** x for x inrange(e))# indices
  139. y[0]= y[0][:,:-i]# large
  140. i =(y[-1].shape[1]// g)*sum(4**(nl -1- x)for x inrange(e))# indices
  141. y[-1]= y[-1][:, i:]# smallreturn y
  142. def_initialize_biases(self, cf=None):# initialize biases into Detect(), cf is class frequency# https://arxiv.org/abs/1708.02002 section 3.3# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
  143. m = self.model[-1]# Detect() modulefor mi, s inzip(m.m, m.stride):# from
  144. b = mi.bias.view(m.na,-1)# conv.bias(255) to (3,85)
  145. b.data[:,4]+= math.log(8/(640/ s)**2)# obj (8 objects per 640 image)
  146. b.data[:,5:5+ m.nc]+= math.log(0.6/(m.nc -0.99999))if cf isNoneelse torch.log(cf / cf.sum())# cls
  147. mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
  148. Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibilityclassSegmentationModel(DetectionModel):# YOLOv5 segmentation modeldef__init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):super().__init__(cfg, ch, nc, anchors)classClassificationModel(BaseModel):# YOLOv5 classification modeldef__init__(self, cfg=None, model=None, nc=1000, cutoff=10):# yaml, model, number of classes, cutoff indexsuper().__init__()
  149. self._from_detection_model(model, nc, cutoff)if model isnotNoneelse self._from_yaml(cfg)def_from_detection_model(self, model, nc=1000, cutoff=10):# Create a YOLOv5 classification model from a YOLOv5 detection modelifisinstance(model, DetectMultiBackend):
  150. model = model.model # unwrap DetectMultiBackend
  151. model.model = model.model[:cutoff]# backbone
  152. m = model.model[-1]# last layer
  153. ch = m.conv.in_channels ifhasattr(m,'conv')else m.cv1.conv.in_channels # ch into module
  154. c = Classify(ch, nc)# Classify()
  155. c.i, c.f, c.type= m.i, m.f,'models.common.Classify'# index, from, type
  156. model.model[-1]= c # replace
  157. self.model = model.model
  158. self.stride = model.stride
  159. self.save =[]
  160. self.nc = nc
  161. def_from_yaml(self, cfg):# Create a YOLOv5 classification model from a *.yaml file
  162. self.model =Nonedefparse_model(d, ch):# model_dict, input_channels(3)# Parse a YOLOv5 model.yaml dictionary
  163. LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}{'module':<40}{'arguments':<30}")
  164. anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')if act:
  165. Conv.default_act =eval(act)# redefine default activation, i.e. Conv.default_act = nn.SiLU()
  166. LOGGER.info(f"{colorstr('activation:')}{act}")# print
  167. na =(len(anchors[0])//2)ifisinstance(anchors,list)else anchors # number of anchors
  168. no = na *(nc +5)# number of outputs = anchors * (classes + 5)
  169. layers, save, c2 =[],[], ch[-1]# layers, savelist, ch outfor i,(f, n, m, args)inenumerate(d['backbone']+ d['head']):# from, number, module, args
  170. m =eval(m)ifisinstance(m,str)else m # eval stringsfor j, a inenumerate(args):with contextlib.suppress(NameError):
  171. args[j]=eval(a)ifisinstance(a,str)else a # eval strings
  172. n = n_ =max(round(n * gd),1)if n >1else n # depth gainif m in{
  173. Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
  174. BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
  175. c1, c2 = ch[f], args[0]if c2 != no:# if not output
  176. c2 = make_divisible(c2 * gw,8)
  177. args =[c1, c2,*args[1:]]if m in{BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
  178. args.insert(2, n)# number of repeats
  179. n =1elif m is nn.BatchNorm2d:
  180. args =[ch[f]]elif m is Concat:
  181. c2 =sum(ch[x]for x in f)# TODO: channel, gw, gdelif m in{Detect, Segment}:
  182. args.append([ch[x]for x in f])ifisinstance(args[1],int):# number of anchors
  183. args[1]=[list(range(args[1]*2))]*len(f)if m is Segment:
  184. args[3]= make_divisible(args[3]* gw,8)elif m is Contract:
  185. c2 = ch[f]* args[0]**2elif m is Expand:
  186. c2 = ch[f]// args[0]**2else:
  187. c2 = ch[f]
  188. m_ = nn.Sequential(*(m(*args)for _ inrange(n)))if n >1else m(*args)# module
  189. t =str(m)[8:-2].replace('__main__.','')# module type
  190. np =sum(x.numel()for x in m_.parameters())# number params
  191. m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
  192. LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}{t:<40}{str(args):<30}')# print
  193. save.extend(x % i for x in([f]ifisinstance(f,int)else f)if x !=-1)# append to savelist
  194. layers.append(m_)if i ==0:
  195. ch =[]
  196. ch.append(c2)return nn.Sequential(*layers),sorted(save)if __name__ =='__main__':
  197. parser = argparse.ArgumentParser()
  198. parser.add_argument('--cfg',type=str, default='yolov5s.yaml',help='model.yaml')
  199. parser.add_argument('--batch-size',type=int, default=1,help='total batch size for all GPUs')
  200. parser.add_argument('--device', default='',help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
  201. parser.add_argument('--profile', action='store_true',help='profile model speed')
  202. parser.add_argument('--line-profile', action='store_true',help='profile model speed layer by layer')
  203. parser.add_argument('--test', action='store_true',help='test all yolo*.yaml')
  204. opt = parser.parse_args()
  205. opt.cfg = check_yaml(opt.cfg)# check YAML
  206. print_args(vars(opt))
  207. device = select_device(opt.device)# Create model
  208. im = torch.rand(opt.batch_size,3,640,640).to(device)
  209. model = Model(opt.cfg).to(device)# Optionsif opt.line_profile:# profile layer by layer
  210. model(im, profile=True)elif opt.profile:# profile forward-backward
  211. results = profile(input=im, ops=[model], n=3)elif opt.test:# test all modelsfor cfg in Path(ROOT /'models').rglob('yolo*.yaml'):try:
  212. _ = Model(cfg)except Exception as e:print(f'Error in {cfg}: {e}')else:# report fused model summary
  213. model.fuse()

步骤六:

运行main_gradcam.py
参数列表可以自己进行修改。

  1. # Arguments
  2. parser = argparse.ArgumentParser()
  3. parser.add_argument('--model-path',type=str, default="yolov5s.pt",help='Path to the model')
  4. parser.add_argument('--img-path',type=str, default='data/images/bus.jpg',help='input image path')
  5. parser.add_argument('--output-dir',type=str, default='runs/result17',help='output dir')
  6. parser.add_argument('--img-size',type=int, default=640,help="input image size")
  7. parser.add_argument('--target-layer',type=str, default='model_17_cv3_act',help='The layer hierarchical address to which gradcam will applied,'' the names should be separated by underline')
  8. parser.add_argument('--method',type=str, default='gradcam',help='gradcam method')
  9. parser.add_argument('--device',type=str, default='cuda',help='cuda or cpu')
  10. parser.add_argument('--no_text_box', action='store_true',help='do not show label and box on the heatmap')
  11. args = parser.parse_args()

完成

在这里插入图片描述
在这里插入图片描述


本文转载自: https://blog.csdn.net/weixin_49321128/article/details/128555192
版权归原作者 小小帅呀 所有, 如有侵权,请联系我们删除。

“手把手YOLOv5输出热力图”的评论:

还没有评论