文章目录
1.1 深度估计应用场景之一(特斯拉撞前预警)
特斯拉自动驾驶
1.2 深度估计概念
深度估计,就是获取图像中场景里的每个点到相机的距离信息,这种距离信息组成的图我们称之为深度图,英文叫Depth map
1.3 深度估计整体架构
1.4 深度估计架构流程论文解读
一、backbone提取
代码实现:
classdeepFeatureExtractor_ResNext101(nn.Module):def__init__(self,args, lv6 =False):super(deepFeatureExtractor_ResNext101, self).__init__()
self.args = args
# after passing ReLU : H/2 x W/2# after passing Layer1 : H/4 x W/4# after passing Layer2 : H/8 x W/8# after passing Layer3 : H/16 x W/16
self.encoder = models.resnext101_32x8d(pretrained=True)
self.fixList =['layer1.0','layer1.1','.bn']
self.lv6 = lv6
if lv6 isTrue:
self.layerList =['relu','layer1','layer2','layer3','layer4']
self.dimList =[64,256,512,1024,2048]else:del self.encoder.layer4
del self.encoder.fc
self.layerList =['relu','layer1','layer2','layer3']
self.dimList =[64,256,512,1024]for name, parameters in self.encoder.named_parameters():if name =='conv1.weight':
parameters.requires_grad =Falseifany(x in name for x in self.fixList):
parameters.requires_grad =Falsedefforward(self, x):
out_featList =[]
feature = x
for k, v in self.encoder._modules.items():if k =='avgpool':break
feature = v(feature)#feature = v(features[-1])#features.append(feature)ifany(x in k for x in self.layerList):
out_featList.append(feature)return out_featList
二、差异特征提取
代码实现:
defforward(self, x):print(x.shape)
out_featList = self.encoder(x)
rgb_down2 = F.interpolate(x, scale_factor =0.5, mode='bilinear')print(rgb_down2.shape)
rgb_down4 = F.interpolate(rgb_down2, scale_factor =0.5, mode='bilinear')
rgb_down8 = F.interpolate(rgb_down4, scale_factor =0.5, mode='bilinear')
rgb_down16 = F.interpolate(rgb_down8, scale_factor =0.5, mode='bilinear')
rgb_down32 = F.interpolate(rgb_down16, scale_factor =0.5, mode='bilinear')print(rgb_down32.shape)
rgb_up16 = F.interpolate(rgb_down32, rgb_down16.shape[2:], mode='bilinear')print(rgb_up16.shape)
rgb_up8 = F.interpolate(rgb_down16, rgb_down8.shape[2:], mode='bilinear')
rgb_up4 = F.interpolate(rgb_down8, rgb_down4.shape[2:], mode='bilinear')
rgb_up2 = F.interpolate(rgb_down4, rgb_down2.shape[2:], mode='bilinear')
rgb_up = F.interpolate(rgb_down2, x.shape[2:], mode='bilinear')print(rgb_up.shape)
lap1 = x - rgb_up
lap2 = rgb_down2 - rgb_up2
lap3 = rgb_down4 - rgb_up4
lap4 = rgb_down8 - rgb_up8
lap5 = rgb_down16 - rgb_up16
rgb_list =[rgb_down32, lap5, lap4, lap3, lap2, lap1]
d_res_list, depth = self.decoder(out_featList, rgb_list)return d_res_list, depth
三、权重操作标准化
代码实现:
defforward(self, x):
weight = self.weight
weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)
weight = weight - weight_mean
std = weight.view(weight.size(0),-1).std(dim=1).view(-1,1,1,1)+1e-5#std = torch.sqrt(torch.var(weight.view(weight.size(0),-1),dim=1)+1e-12).view(-1,1,1,1)+1e-5
weight = weight / std.expand_as(weight)return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
四、网络结构ASPP(空洞卷积)
代码实现
classDilated_bottleNeck(nn.Module):def__init__(self, norm, act, in_feat):super(Dilated_bottleNeck, self).__init__()
conv = conv_ws
# in feat = 1024 in ResNext101 and ResNet101
self.reduction1 = conv(in_feat, in_feat//2, kernel_size=1, stride =1, bias=False, padding=0)
self.aspp_d3 = nn.Sequential(myConv(in_feat//2, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat//2)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=3, dilation=3,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.aspp_d6 = nn.Sequential(myConv(in_feat//2+ in_feat//4, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat//2+ in_feat//4)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=6, dilation=6,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.aspp_d12 = nn.Sequential(myConv(in_feat, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=12, dilation=12,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.aspp_d18 = nn.Sequential(myConv(in_feat + in_feat//4, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat + in_feat//4)//16),
myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=18, dilation=18,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
self.reduction2 = myConv(((in_feat//4)*4)+(in_feat//2), in_feat//2, kSize=3, stride=1, padding=1,bias=False, norm=norm, act=act, num_groups =((in_feat//4)*4+(in_feat//2))//16)defforward(self, x):print(x.shape)
x = self.reduction1(x)print(x.shape)
d3 = self.aspp_d3(x)print(d3.shape)
cat1 = torch.cat([x, d3],dim=1)print(cat1.shape)
d6 = self.aspp_d6(cat1)print(d6.shape)
cat2 = torch.cat([cat1, d6],dim=1)print(cat2.shape)
d12 = self.aspp_d12(cat2)print(d12.shape)
cat3 = torch.cat([cat2, d12],dim=1)print(cat3.shape)
d18 = self.aspp_d18(cat3)print(d18.shape)
out = self.reduction2(torch.cat([x,d3,d6,d12,d18], dim=1))print(out.shape)return out # 512 x H/16 x W/16
五、coarst-to-fine特征拼接
实现代码:
# decoder 1 - Pyramid level 5
lap_lv5 = torch.sigmoid(self.decoder1(dense_feat))#R5print(lap_lv5.shape)
lap_lv5_up = self.upscale(lap_lv5, scale_factor =2, mode='bilinear')print(lap_lv5_up.shape)# decoder 2 - Pyramid level 4
dec2 = self.decoder2_up1(dense_feat)print(dec2.shape)
dec2 = self.decoder2_reduc1(torch.cat([dec2,cat3],dim=1))#252print(dec2.shape)
dec2_up = self.decoder2_1(torch.cat([dec2,lap_lv5_up,rgb_lv4],dim=1))print(dec2_up.shape)
dec2 = self.decoder2_2(dec2_up)print(dec2.shape)
dec2 = self.decoder2_3(dec2)print(dec2.shape)
lap_lv4 = torch.tanh(self.decoder2_4(dec2)+(0.1*rgb_lv4.mean(dim=1,keepdim=True)))print(lap_lv4.shape)# if depth range is (0,1), laplacian of image range is (-1,1)
lap_lv4_up = self.upscale(lap_lv4, scale_factor =2, mode='bilinear')print(lap_lv4_up.shape)# decoder 2 - Pyramid level 3
dec3 = self.decoder2_1_up2(dec2_up)
dec3 = self.decoder2_1_reduc2(torch.cat([dec3,cat2],dim=1))
dec3_up = self.decoder2_1_1(torch.cat([dec3,lap_lv4_up,rgb_lv3],dim=1))
dec3 = self.decoder2_1_2(dec3_up)
lap_lv3 = torch.tanh(self.decoder2_1_3(dec3)+(0.1*rgb_lv3.mean(dim=1,keepdim=True)))# if depth range is (0,1), laplacian of image range is (-1,1)
lap_lv3_up = self.upscale(lap_lv3, scale_factor =2, mode='bilinear')# decoder 2 - Pyramid level 2
dec4 = self.decoder2_1_1_up3(dec3_up)
dec4 = self.decoder2_1_1_reduc3(torch.cat([dec4,cat1],dim=1))
dec4_up = self.decoder2_1_1_1(torch.cat([dec4,lap_lv3_up,rgb_lv2],dim=1))
lap_lv2 = torch.tanh(self.decoder2_1_1_2(dec4_up)+(0.1*rgb_lv2.mean(dim=1,keepdim=True)))# if depth range is (0,1), laplacian of image range is (-1,1)
lap_lv2_up = self.upscale(lap_lv2, scale_factor =2, mode='bilinear')# decoder 2 - Pyramid level 1
dec5 = self.decoder2_1_1_1_up4(dec4_up)
dec5 = self.decoder2_1_1_1_1(torch.cat([dec5,lap_lv2_up,rgb_lv1],dim=1))
dec5 = self.decoder2_1_1_1_2(dec5)
lap_lv1 = torch.tanh(self.decoder2_1_1_1_3(dec5)+(0.1*rgb_lv1.mean(dim=1,keepdim=True)))# if depth range is (0,1), laplacian of image range is (-1,1)# Laplacian restoration
lap_lv4_img = lap_lv4 + lap_lv5_up
lap_lv3_img = lap_lv3 + self.upscale(lap_lv4_img, scale_factor =2, mode ='bilinear')
lap_lv2_img = lap_lv2 + self.upscale(lap_lv3_img, scale_factor =2, mode ='bilinear')
final_depth = lap_lv1 + self.upscale(lap_lv2_img, scale_factor =2, mode ='bilinear')
final_depth = torch.sigmoid(final_depth)print(final_depth.shape)return[(lap_lv5)*self.max_depth,(lap_lv4)*self.max_depth,(lap_lv3)*self.max_depth,(lap_lv2)*self.max_depth,(lap_lv1)*self.max_depth], final_depth*self.max_depth
# fit laplacian image range (-80,80), depth image range(0,80)
六、损失计算代码实现(正则化):
代码实现
defscale_invariant_loss(valid_out, valid_gt):
logdiff = torch.log(valid_out)- torch.log(valid_gt)
scale_inv_loss = torch.sqrt((logdiff **2).mean()-0.85*(logdiff.mean()**2))*10.0return scale_inv_loss
1.5 深度估计项目应用
如果需要本文完整项目代码,以上算法论文或者点数据集资源的小伙伴可以私信我哦!
版权归原作者 丹尼君 所有, 如有侵权,请联系我们删除。