# Improvement of YOLOv5 - loss function for target detection

flyfish

The improved source code is fully compatible with the original YOLOv5:v5 version. At the same time, the backbone supports mobilenetv3 and shufflenetv2, and the original backbone supports all of them

Categories include relationships. For example, a target can be a person, a man, or a category with mutually exclusive relationships, such as a person, a cat, and a dog. Try to improve the loss function when the category of data set is mutually exclusive

## A category is one that contains relationships

BCEWithLogitsLoss can be used for multi label classification. A target can belong to one or more categories. For example, a target can be people, men and children. There is an inclusive relationship in the category.
Because BCEWithLogitsLoss = Sigmoid + BCELoss, BCEWithLogitsLoss adds Sigmoid to the loss function. The sum of Sigmoid probabilities does not need to be 1.
For example, the calculation result of sigmoid takes out a line and looks at the output [0.5100, 0.6713, 0.5025] in the example code. The cumulative number is not 1. If the defined threshold is greater than or equal to 0.50. Then the target belongs to three classes at the same time. As a result, if it is required to belong to only one class, the largest one can be taken.

## Categories are mutually exclusive

If the detected category is a mutually exclusive relationship, such as human, cat and dog, how to transform it?
CrossEntropyLoss = LogSoftmax + NLLLoss
The sum of softmax probabilities is 1 or close to 1. Softmax has a greater probability than other values. If the Sigmoid value is large, the probability is large, but the probability will not be greater than that of another value.
Look at the output [0.2543, 0.4990, 0.2467] in the sample code. The sum of these three numbers is 1.

## Sigmoid and Softmax sample code

```import torch
import torch.nn as nn

input = torch.Tensor([[0.0402, 0.7142,0.01],
[0.2214, 0.4781,0.01]])

net1 = nn.Sigmoid()
output1 = net1(input)
print(output1)
# tensor([[0.5100, 0.6713, 0.5025],
#         [0.5551, 0.6173, 0.5025]])
net2 = nn.Softmax(dim=-1)
output2 = net2(input)
print(output2)
# tensor([[0.2543, 0.4990, 0.2467],
#         [0.3224, 0.4167, 0.2609]])
```

Softmax is mutually exclusive, so try to use the cross entropy loss transformation.

Change the code as follows or go directly here YOLOv5-ShuffleNetV2-CrossEntropyLoss Download all codes

## Training phase

utils/loss.py

```class ComputeLoss:
# Compute losses
def __init__(self, model, autobalance=False):
super(ComputeLoss, self).__init__()
device = next(model.parameters()).device  # get model device
h = model.hyp  # hyperparameters

# Define criteria

#changed by Sisyphus

BCEcls = nn.CrossEntropyLoss()
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))

# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
print("self.cp, self.cn: ",self.cp,":", self.cn)

# Focal loss
g = h['fl_gamma']  # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)

det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
for k in 'na', 'nc', 'nl', 'anchors':
setattr(self, k, getattr(det, k))

def __call__(self, p, targets):  # predictions, targets, model
device = targets.device
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets

# Losses
for i, pi in enumerate(p):  # layer index, layer predictions
b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
print("indices[i] :",indices[i].shape )
tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj

n = b.shape  # number of targets
if n:
ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets

# Regression
pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1)  # predicted box
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
lbox += (1.0 - iou).mean()  # iou loss

# Objectness
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio

# Classification
if self.nc > 1:  # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
t[range(n), tcls[i]] = self.cp
#lcls += self.BCEcls(ps[:, 5:], t)  # BCE
#changed by Sisyphus 20210914
lcls += self.BCEcls(ps[:, 5:], tcls[i].clone().detach())

# Append targets to text file
# with open('targets.txt', 'a') as file:
#     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i]  # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()

if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp['box']
lobj *= self.hyp['obj']
lcls *= self.hyp['cls']
bs = tobj.shape  # batch size

loss = lbox + lobj + lcls
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
```

## Reasoning stage

models/yolo.py

```class Detect(nn.Module):
stride = None  # strides computed during build
export = False  # onnx export

def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
super(Detect, self).__init__()
self.nc = nc  # number of classes
self.no = nc + 5  # number of outputs per anchor
self.nl = len(anchors)  # number of detection layers
self.na = len(anchors) // 2  # number of anchors
self.grid = [torch.zeros(1)] * self.nl  # init grid
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
self.register_buffer('anchors', a)  # shape(nl,na,2)
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv

def forward(self, x):
# x = x.copy()  # for profiling
z = []  # inference output
self.training |= self.export
for i in range(self.nl):
x[i] = self.m[i](x[i])  # conv
bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

if not self.training:  # inference
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

y = x[i].sigmoid()
tmp = x[i][...,5:]# add by Sisyphus
tmp = tmp.softmax(dim=-1)
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
y[...,5:] = tmp
z.append(y.view(bs, -1, self.no))

return x if self.training else (torch.cat(z, 1), x)

@staticmethod
def _make_grid(nx=20, ny=20):
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])