|
| 1 | +import numpy as np |
| 2 | +from itertools import product as product |
| 3 | +import torch |
| 4 | +from torch.autograd import Function |
| 5 | + |
| 6 | + |
| 7 | +def nms_(dets, thresh): |
| 8 | + """ |
| 9 | + Courtesy of Ross Girshick |
| 10 | + [https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/nms/py_cpu_nms.py] |
| 11 | + """ |
| 12 | + x1 = dets[:, 0] |
| 13 | + y1 = dets[:, 1] |
| 14 | + x2 = dets[:, 2] |
| 15 | + y2 = dets[:, 3] |
| 16 | + scores = dets[:, 4] |
| 17 | + |
| 18 | + areas = (x2 - x1) * (y2 - y1) |
| 19 | + order = scores.argsort()[::-1] |
| 20 | + |
| 21 | + keep = [] |
| 22 | + while order.size > 0: |
| 23 | + i = order[0] |
| 24 | + keep.append(int(i)) |
| 25 | + xx1 = np.maximum(x1[i], x1[order[1:]]) |
| 26 | + yy1 = np.maximum(y1[i], y1[order[1:]]) |
| 27 | + xx2 = np.minimum(x2[i], x2[order[1:]]) |
| 28 | + yy2 = np.minimum(y2[i], y2[order[1:]]) |
| 29 | + |
| 30 | + w = np.maximum(0.0, xx2 - xx1) |
| 31 | + h = np.maximum(0.0, yy2 - yy1) |
| 32 | + inter = w * h |
| 33 | + ovr = inter / (areas[i] + areas[order[1:]] - inter) |
| 34 | + |
| 35 | + inds = np.where(ovr <= thresh)[0] |
| 36 | + order = order[inds + 1] |
| 37 | + |
| 38 | + return np.array(keep).astype(np.int) |
| 39 | + |
| 40 | + |
| 41 | +def decode(loc, priors, variances): |
| 42 | + """Decode locations from predictions using priors to undo |
| 43 | + the encoding we did for offset regression at train time. |
| 44 | + Args: |
| 45 | + loc (tensor): location predictions for loc layers, |
| 46 | + Shape: [num_priors,4] |
| 47 | + priors (tensor): Prior boxes in center-offset form. |
| 48 | + Shape: [num_priors,4]. |
| 49 | + variances: (list[float]) Variances of priorboxes |
| 50 | + Return: |
| 51 | + decoded bounding box predictions |
| 52 | + """ |
| 53 | + |
| 54 | + boxes = torch.cat(( |
| 55 | + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], |
| 56 | + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) |
| 57 | + boxes[:, :2] -= boxes[:, 2:] / 2 |
| 58 | + boxes[:, 2:] += boxes[:, :2] |
| 59 | + return boxes |
| 60 | + |
| 61 | + |
| 62 | +def nms(boxes, scores, overlap=0.5, top_k=200): |
| 63 | + """Apply non-maximum suppression at test time to avoid detecting too many |
| 64 | + overlapping bounding boxes for a given object. |
| 65 | + Args: |
| 66 | + boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. |
| 67 | + scores: (tensor) The class predscores for the img, Shape:[num_priors]. |
| 68 | + overlap: (float) The overlap thresh for suppressing unnecessary boxes. |
| 69 | + top_k: (int) The Maximum number of box preds to consider. |
| 70 | + Return: |
| 71 | + The indices of the kept boxes with respect to num_priors. |
| 72 | + """ |
| 73 | + |
| 74 | + keep = scores.new(scores.size(0)).zero_().long() |
| 75 | + if boxes.numel() == 0: |
| 76 | + return keep, 0 |
| 77 | + x1 = boxes[:, 0] |
| 78 | + y1 = boxes[:, 1] |
| 79 | + x2 = boxes[:, 2] |
| 80 | + y2 = boxes[:, 3] |
| 81 | + area = torch.mul(x2 - x1, y2 - y1) |
| 82 | + v, idx = scores.sort(0) # sort in ascending order |
| 83 | + # I = I[v >= 0.01] |
| 84 | + idx = idx[-top_k:] # indices of the top-k largest vals |
| 85 | + xx1 = boxes.new() |
| 86 | + yy1 = boxes.new() |
| 87 | + xx2 = boxes.new() |
| 88 | + yy2 = boxes.new() |
| 89 | + w = boxes.new() |
| 90 | + h = boxes.new() |
| 91 | + |
| 92 | + # keep = torch.Tensor() |
| 93 | + count = 0 |
| 94 | + while idx.numel() > 0: |
| 95 | + i = idx[-1] # index of current largest val |
| 96 | + # keep.append(i) |
| 97 | + keep[count] = i |
| 98 | + count += 1 |
| 99 | + if idx.size(0) == 1: |
| 100 | + break |
| 101 | + idx = idx[:-1] # remove kept element from view |
| 102 | + # load bboxes of next highest vals |
| 103 | + torch.index_select(x1, 0, idx, out=xx1) |
| 104 | + torch.index_select(y1, 0, idx, out=yy1) |
| 105 | + torch.index_select(x2, 0, idx, out=xx2) |
| 106 | + torch.index_select(y2, 0, idx, out=yy2) |
| 107 | + # store element-wise max with next highest score |
| 108 | + xx1 = torch.clamp(xx1, min=x1[i]) |
| 109 | + yy1 = torch.clamp(yy1, min=y1[i]) |
| 110 | + xx2 = torch.clamp(xx2, max=x2[i]) |
| 111 | + yy2 = torch.clamp(yy2, max=y2[i]) |
| 112 | + w.resize_as_(xx2) |
| 113 | + h.resize_as_(yy2) |
| 114 | + w = xx2 - xx1 |
| 115 | + h = yy2 - yy1 |
| 116 | + # check sizes of xx1 and xx2.. after each iteration |
| 117 | + w = torch.clamp(w, min=0.0) |
| 118 | + h = torch.clamp(h, min=0.0) |
| 119 | + inter = w * h |
| 120 | + # IoU = i / (area(a) + area(b) - i) |
| 121 | + rem_areas = torch.index_select(area, 0, idx) # load remaining areas) |
| 122 | + union = (rem_areas - inter) + area[i] |
| 123 | + IoU = inter / union # store result in iou |
| 124 | + # keep only elements with an IoU <= overlap |
| 125 | + idx = idx[IoU.le(overlap)] |
| 126 | + return keep, count |
| 127 | + |
| 128 | + |
| 129 | +class Detect(object): |
| 130 | + |
| 131 | + def __init__(self, num_classes=2, |
| 132 | + top_k=750, nms_thresh=0.3, conf_thresh=0.05, |
| 133 | + variance=[0.1, 0.2], nms_top_k=5000): |
| 134 | + |
| 135 | + self.num_classes = num_classes |
| 136 | + self.top_k = top_k |
| 137 | + self.nms_thresh = nms_thresh |
| 138 | + self.conf_thresh = conf_thresh |
| 139 | + self.variance = variance |
| 140 | + self.nms_top_k = nms_top_k |
| 141 | + |
| 142 | + def forward(self, loc_data, conf_data, prior_data): |
| 143 | + |
| 144 | + num = loc_data.size(0) |
| 145 | + num_priors = prior_data.size(0) |
| 146 | + |
| 147 | + conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) |
| 148 | + batch_priors = prior_data.view(-1, num_priors, 4).expand(num, num_priors, 4) |
| 149 | + batch_priors = batch_priors.contiguous().view(-1, 4) |
| 150 | + |
| 151 | + decoded_boxes = decode(loc_data.view(-1, 4), batch_priors, self.variance) |
| 152 | + decoded_boxes = decoded_boxes.view(num, num_priors, 4) |
| 153 | + |
| 154 | + output = torch.zeros(num, self.num_classes, self.top_k, 5) |
| 155 | + |
| 156 | + for i in range(num): |
| 157 | + boxes = decoded_boxes[i].clone() |
| 158 | + conf_scores = conf_preds[i].clone() |
| 159 | + |
| 160 | + for cl in range(1, self.num_classes): |
| 161 | + c_mask = conf_scores[cl].gt(self.conf_thresh) |
| 162 | + scores = conf_scores[cl][c_mask] |
| 163 | + |
| 164 | + if scores.dim() == 0: |
| 165 | + continue |
| 166 | + l_mask = c_mask.unsqueeze(1).expand_as(boxes) |
| 167 | + boxes_ = boxes[l_mask].view(-1, 4) |
| 168 | + ids, count = nms(boxes_, scores, self.nms_thresh, self.nms_top_k) |
| 169 | + count = count if count < self.top_k else self.top_k |
| 170 | + |
| 171 | + output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1), boxes_[ids[:count]]), 1) |
| 172 | + |
| 173 | + return output |
| 174 | + |
| 175 | + |
| 176 | +class PriorBox(object): |
| 177 | + |
| 178 | + def __init__(self, input_size, feature_maps, |
| 179 | + variance=[0.1, 0.2], |
| 180 | + min_sizes=[16, 32, 64, 128, 256, 512], |
| 181 | + steps=[4, 8, 16, 32, 64, 128], |
| 182 | + clip=False): |
| 183 | + |
| 184 | + super(PriorBox, self).__init__() |
| 185 | + |
| 186 | + self.imh = input_size[0] |
| 187 | + self.imw = input_size[1] |
| 188 | + self.feature_maps = feature_maps |
| 189 | + |
| 190 | + self.variance = variance |
| 191 | + self.min_sizes = min_sizes |
| 192 | + self.steps = steps |
| 193 | + self.clip = clip |
| 194 | + |
| 195 | + def forward(self): |
| 196 | + mean = [] |
| 197 | + for k, fmap in enumerate(self.feature_maps): |
| 198 | + feath = fmap[0] |
| 199 | + featw = fmap[1] |
| 200 | + for i, j in product(range(feath), range(featw)): |
| 201 | + f_kw = self.imw / self.steps[k] |
| 202 | + f_kh = self.imh / self.steps[k] |
| 203 | + |
| 204 | + cx = (j + 0.5) / f_kw |
| 205 | + cy = (i + 0.5) / f_kh |
| 206 | + |
| 207 | + s_kw = self.min_sizes[k] / self.imw |
| 208 | + s_kh = self.min_sizes[k] / self.imh |
| 209 | + |
| 210 | + mean += [cx, cy, s_kw, s_kh] |
| 211 | + |
| 212 | + output = torch.FloatTensor(mean).view(-1, 4) |
| 213 | + |
| 214 | + if self.clip: |
| 215 | + output.clamp_(max=1, min=0) |
| 216 | + |
| 217 | + return output |
0 commit comments