0 前言
🚩 **基于深度学习疫情社交安全距离检测算法 **
- 难度系数:3分
- 工作量:3分
- 创新点:5分
1 课题背景
2 实现效果
import argparse
from utils.datasets import*from utils.utils import*defdetect(save_img=False):
out, source, weights, view_img, save_txt, imgsz = \
opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
webcam = source =='0'or source.startswith('rtsp')or source.startswith('http')or source.endswith('.txt')# Initialize
device = torch_utils.select_device(opt.device)if os.path.exists(out):
shutil.rmtree(out)# delete output folder
os.makedirs(out)# make new output folder
half = device.type!='cpu'# half precision only supported on CUDA# Load model
model = torch.load(weights, map_location=device)['model'].float()# load to FP32# torch.save(torch.load(weights, map_location=device), weights) # update model if SourceChangeWarning# model.fuse()
model.to(device).eval()if half:
model.half()# to FP16# Second-stage classifier
classify =Falseif classify:
modelc = torch_utils.load_classifier(name='resnet101', n=2)# initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])# load weights
modelc.to(device).eval()# Set Dataloader
vid_path, vid_writer =None,Noneif webcam:
view_img =True
torch.backends.cudnn.benchmark =True# set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)else:
save_img =True
dataset = LoadImages(source, img_size=imgsz)# Get names and colors
names = model.names ifhasattr(model,'names')else model.modules.names
colors =[[random.randint(0,255)for _ inrange(3)]for _ inrange(len(names))]# Run inference
t0 = time.time()
img = torch.zeros((1,3, imgsz, imgsz), device=device)# init img
_ = model(img.half()if half else img)if device.type!='cpu'elseNone# run oncefor path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half()if half else img.float()# uint8 to fp16/32
img /=255.0# 0 - 255 to 0.0 - 1.0if img.ndimension()==3:
img = img.unsqueeze(0)# Inference
t1 = torch_utils.time_synchronized()
pred = model(img, augment=opt.augment)[0]# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres,
fast=True, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = torch_utils.time_synchronized()# Apply Classifierif classify:
pred = apply_classifier(pred, modelc, img, im0s)# List to store bounding coordinates of people
people_coords =[]# Process detectionsfor i, det inenumerate(pred):# detections per imageif webcam:# batch_size >= 1
p, s, im0 = path[i],'%g: '% i, im0s[i].copy()else:
p, s, im0 = path,'', im0s
save_path =str(Path(out)/ Path(p).name)
s +='%gx%g '% img.shape[2:]# print string
gn = torch.tensor(im0.shape)[[1,0,1,0]]# normalization gain whwhif det isnotNoneandlen(det):# Rescale boxes from img_size to im0 size
det[:,:4]= scale_coords(img.shape[2:], det[:,:4], im0.shape).round()# Print resultsfor c in det[:,-1].unique():
n =(det[:,-1]== c).sum()# detections per class
s +='%g %ss, '%(n, names[int(c)])# add to string# Write resultsfor*xyxy, conf, cls in det:if save_txt:# Write to file
xywh =(xyxy2xywh(torch.tensor(xyxy).view(1,4))/ gn).view(-1).tolist()# normalized xywhwithopen(save_path[:save_path.rfind('.')]+'.txt','a')asfile:file.write(('%g '*5+'\n')%(cls,*xywh))# label formatif save_img or view_img:# Add bbox to image
label ='%s %.2f'%(names[int(cls)], conf)if label isnotNone:if(label.split())[0]=='person':
people_coords.append(xyxy)# plot_one_box(xyxy, im0, line_thickness=3)
plot_dots_on_people(xyxy, im0)# Plot lines connecting people
distancing(people_coords, im0, dist_thres_lim=(200,250))# Print time (inference + NMS)print('%sDone. (%.3fs)'%(s, t2 - t1))# Stream resultsif view_img:
cv2.imshow(p, im0)if cv2.waitKey(1)==ord('q'):# q to quitraise StopIteration
# Save results (image with detections)if save_img:if dataset.mode =='images':
cv2.imwrite(save_path, im0)else:if vid_path != save_path:# new video
vid_path = save_path
ifisinstance(vid_writer, cv2.VideoWriter):
vid_writer.release()# release previous video writer
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w =int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h =int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps,(w, h))
vid_writer.write(im0)if save_txt or save_img:print('Results saved to %s'% os.getcwd()+ os.sep + out)if platform =='darwin':# MacOS
os.system('open '+ save_path)print('Done. (%.3fs)'%(time.time()- t0))
3 相关技术
3.1 YOLOV4
YOLOv4使用卷积网络 CSPDarknet-53 特征提取,网络结构模型如图 2 所示。在每个 Darknet-53的残块行加上 CSP(Cross Stage Partial)结构13,将基础层划分为两部分,再通过跨层次结构的特征融合进行合并。并采用 FPN( feature pyramid networks)结构加强特征金字塔,最后用不同层的特征的高分辨率来提取不同尺度特征图进行对象检测。最终网络输出 3 个不同尺度的特征图,在三个不同尺度特征图上分别使用 3 个不同的先验框(anchors)进行预测识别,使得远近大小目标均能得到较好的检测。
YOLOv4 的先验框尺寸是经PASCALL_VOC,COCO 数据集包含的种类复杂而生成的,并不一定完全适合行人。本研究旨在研究行人之间的社交距离,针对行人目标检测,利用聚类算法对 YOLOv4 的先验框微调,首先将行人数据集F依据相似性分为i个对象,即,其中每个对象都具有 m 个维度的属性。聚类算法的目的是 i 个对象依据相似性聚集到指定的 j 个类簇,每个对象属于且仅属于一个其到类簇中心距离最小的类簇中心。初始化 j 个 聚 类 中 心
得到 个类簇
defcheck_anchors(dataset, model, thr=4.0, imgsz=640):# Check anchor fit to data, recompute if necessaryprint('\nAnalyzing anchors... ', end='')
m = model.module.model[-1]ifhasattr(model,'module')else model.model[-1]# Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh = torch.tensor(np.concatenate([l[:,3:5]* s for s, l inzip(shapes, dataset.labels)])).float()# whdefmetric(k):# compute metric
r = wh[:,None]/ k[None]
x = torch.min(r,1./ r).min(2)[0]# ratio metric
best = x.max(1)[0]# best_xreturn(best >1./ thr).float().mean()# best possible recall
bpr = metric(m.anchor_grid.clone().cpu().view(-1,2))print('Best Possible Recall (BPR) = %.4f'% bpr, end='')if bpr <0.99:# threshold to recomputeprint('. Attempting to generate improved anchors, please wait...'% bpr)
na = m.anchor_grid.numel()//2# number of anchors
new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
new_bpr = metric(new_anchors.reshape(-1,2))if new_bpr > bpr:# replace anchors
new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
m.anchor_grid[:]= new_anchors.clone().view_as(m.anchor_grid)# for inference
m.anchors[:]= new_anchors.clone().view_as(m.anchors)/ m.stride.to(m.anchors.device).view(-1,1,1)# lossprint('New anchors saved to model. Update model *.yaml to use these anchors in the future.')else:print('Original anchors better than new anchors. Proceeding with original anchors.')print('')# newline
3.2 基于 DeepSort 算法的行人跟踪
YOLOv4中完成行人目标检测后生成边界框(Bounding box,Bbox),Bbox 含有包含最小化行人边框矩形的坐标信息,本研究引入 DeepSort 算法[18]完成对行人的质点进行跟踪,目的是为了在运动矢量分析时算行人安全社交距离中。首先,对行人进行质点化计算。其质点计算公式如
确定行人质点后,利用 DeepSort 算法实现对多个目标的精确定位与跟踪,其核心算法流程如图所示:
Tentative =1#不确定态
Confirmed =2#确定态
Deleted =3#删除态classTrack:def__init__(self, mean, covariance, track_id, class_id, conf, n_init, max_age,
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.class_id =int(class_id)
self.hits =1
self.age =1
self.time_since_update =0
self.state = TrackState.Tentative
self.features =[]if feature isnotNone:
self.conf = conf
self._n_init = n_init
self._max_age = max_age
self.age +=1#轨迹连续存在帧数+1
self.time_since_update +=1#轨迹连续匹配失败次数+1defpredict(self, kf):'''
self.mean, self.covariance = kf.predict(self.mean, self.covariance)#卡尔曼滤波预测下一帧轨迹的状态均值和方差
self.increment_age()#调用函数,age+1,time_since_update+1defupdate(self, kf, detection, class_id, conf):'''
self.conf = conf #更新置信度得分
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detection.to_xyah())#卡尔曼滤波更新轨迹的状态均值和方差
self.class_id = class_id.int()#更新轨迹所属类别
self.hits +=1#轨迹匹配成功次数+1
self.time_since_update =0#匹配成功时,轨迹连续匹配失败次数归0if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed #当连续匹配成功次数达标时轨迹由不确定态转为确定态defmark_missed(self):'''
'''if self.state == TrackState.Tentative:
self.state = TrackState.Deleted #当级联匹配和IOU匹配后仍为不确定态elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted #当连续匹配失败次数超标'''
4 最后
