Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

operators.py 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. #
  2. # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import logging
  17. import sys
  18. import six
  19. import cv2
  20. import numpy as np
  21. import math
  22. from PIL import Image
  23. class DecodeImage(object):
  24. """ decode image """
  25. def __init__(self,
  26. img_mode='RGB',
  27. channel_first=False,
  28. ignore_orientation=False,
  29. **kwargs):
  30. self.img_mode = img_mode
  31. self.channel_first = channel_first
  32. self.ignore_orientation = ignore_orientation
  33. def __call__(self, data):
  34. img = data['image']
  35. if six.PY2:
  36. assert isinstance(img, str) and len(
  37. img) > 0, "invalid input 'img' in DecodeImage"
  38. else:
  39. assert isinstance(img, bytes) and len(
  40. img) > 0, "invalid input 'img' in DecodeImage"
  41. img = np.frombuffer(img, dtype='uint8')
  42. if self.ignore_orientation:
  43. img = cv2.imdecode(img, cv2.IMREAD_IGNORE_ORIENTATION |
  44. cv2.IMREAD_COLOR)
  45. else:
  46. img = cv2.imdecode(img, 1)
  47. if img is None:
  48. return None
  49. if self.img_mode == 'GRAY':
  50. img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  51. elif self.img_mode == 'RGB':
  52. assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
  53. img.shape)
  54. img = img[:, :, ::-1]
  55. if self.channel_first:
  56. img = img.transpose((2, 0, 1))
  57. data['image'] = img
  58. return data
  59. class StandardizeImage(object):
  60. """normalize image
  61. Args:
  62. mean (list): im - mean
  63. std (list): im / std
  64. is_scale (bool): whether need im / 255
  65. norm_type (str): type in ['mean_std', 'none']
  66. """
  67. def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
  68. self.mean = mean
  69. self.std = std
  70. self.is_scale = is_scale
  71. self.norm_type = norm_type
  72. def __call__(self, im, im_info):
  73. """
  74. Args:
  75. im (np.ndarray): image (np.ndarray)
  76. im_info (dict): info of image
  77. Returns:
  78. im (np.ndarray): processed image (np.ndarray)
  79. im_info (dict): info of processed image
  80. """
  81. im = im.astype(np.float32, copy=False)
  82. if self.is_scale:
  83. scale = 1.0 / 255.0
  84. im *= scale
  85. if self.norm_type == 'mean_std':
  86. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  87. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  88. im -= mean
  89. im /= std
  90. return im, im_info
  91. class NormalizeImage(object):
  92. """ normalize image such as subtract mean, divide std
  93. """
  94. def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
  95. if isinstance(scale, str):
  96. scale = eval(scale)
  97. self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
  98. mean = mean if mean is not None else [0.485, 0.456, 0.406]
  99. std = std if std is not None else [0.229, 0.224, 0.225]
  100. shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
  101. self.mean = np.array(mean).reshape(shape).astype('float32')
  102. self.std = np.array(std).reshape(shape).astype('float32')
  103. def __call__(self, data):
  104. img = data['image']
  105. from PIL import Image
  106. if isinstance(img, Image.Image):
  107. img = np.array(img)
  108. assert isinstance(img,
  109. np.ndarray), "invalid input 'img' in NormalizeImage"
  110. data['image'] = (
  111. img.astype('float32') * self.scale - self.mean) / self.std
  112. return data
  113. class ToCHWImage(object):
  114. """ convert hwc image to chw image
  115. """
  116. def __init__(self, **kwargs):
  117. pass
  118. def __call__(self, data):
  119. img = data['image']
  120. from PIL import Image
  121. if isinstance(img, Image.Image):
  122. img = np.array(img)
  123. data['image'] = img.transpose((2, 0, 1))
  124. return data
  125. class KeepKeys(object):
  126. def __init__(self, keep_keys, **kwargs):
  127. self.keep_keys = keep_keys
  128. def __call__(self, data):
  129. data_list = []
  130. for key in self.keep_keys:
  131. data_list.append(data[key])
  132. return data_list
  133. class Pad(object):
  134. def __init__(self, size=None, size_div=32, **kwargs):
  135. if size is not None and not isinstance(size, (int, list, tuple)):
  136. raise TypeError("Type of target_size is invalid. Now is {}".format(
  137. type(size)))
  138. if isinstance(size, int):
  139. size = [size, size]
  140. self.size = size
  141. self.size_div = size_div
  142. def __call__(self, data):
  143. img = data['image']
  144. img_h, img_w = img.shape[0], img.shape[1]
  145. if self.size:
  146. resize_h2, resize_w2 = self.size
  147. assert (
  148. img_h < resize_h2 and img_w < resize_w2
  149. ), '(h, w) of target size should be greater than (img_h, img_w)'
  150. else:
  151. resize_h2 = max(
  152. int(math.ceil(img.shape[0] / self.size_div) * self.size_div),
  153. self.size_div)
  154. resize_w2 = max(
  155. int(math.ceil(img.shape[1] / self.size_div) * self.size_div),
  156. self.size_div)
  157. img = cv2.copyMakeBorder(
  158. img,
  159. 0,
  160. resize_h2 - img_h,
  161. 0,
  162. resize_w2 - img_w,
  163. cv2.BORDER_CONSTANT,
  164. value=0)
  165. data['image'] = img
  166. return data
  167. class LinearResize(object):
  168. """resize image by target_size and max_size
  169. Args:
  170. target_size (int): the target size of image
  171. keep_ratio (bool): whether keep_ratio or not, default true
  172. interp (int): method of resize
  173. """
  174. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  175. if isinstance(target_size, int):
  176. target_size = [target_size, target_size]
  177. self.target_size = target_size
  178. self.keep_ratio = keep_ratio
  179. self.interp = interp
  180. def __call__(self, im, im_info):
  181. """
  182. Args:
  183. im (np.ndarray): image (np.ndarray)
  184. im_info (dict): info of image
  185. Returns:
  186. im (np.ndarray): processed image (np.ndarray)
  187. im_info (dict): info of processed image
  188. """
  189. assert len(self.target_size) == 2
  190. assert self.target_size[0] > 0 and self.target_size[1] > 0
  191. _im_channel = im.shape[2]
  192. im_scale_y, im_scale_x = self.generate_scale(im)
  193. im = cv2.resize(
  194. im,
  195. None,
  196. None,
  197. fx=im_scale_x,
  198. fy=im_scale_y,
  199. interpolation=self.interp)
  200. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  201. im_info['scale_factor'] = np.array(
  202. [im_scale_y, im_scale_x]).astype('float32')
  203. return im, im_info
  204. def generate_scale(self, im):
  205. """
  206. Args:
  207. im (np.ndarray): image (np.ndarray)
  208. Returns:
  209. im_scale_x: the resize ratio of X
  210. im_scale_y: the resize ratio of Y
  211. """
  212. origin_shape = im.shape[:2]
  213. _im_c = im.shape[2]
  214. if self.keep_ratio:
  215. im_size_min = np.min(origin_shape)
  216. im_size_max = np.max(origin_shape)
  217. target_size_min = np.min(self.target_size)
  218. target_size_max = np.max(self.target_size)
  219. im_scale = float(target_size_min) / float(im_size_min)
  220. if np.round(im_scale * im_size_max) > target_size_max:
  221. im_scale = float(target_size_max) / float(im_size_max)
  222. im_scale_x = im_scale
  223. im_scale_y = im_scale
  224. else:
  225. resize_h, resize_w = self.target_size
  226. im_scale_y = resize_h / float(origin_shape[0])
  227. im_scale_x = resize_w / float(origin_shape[1])
  228. return im_scale_y, im_scale_x
  229. class Resize(object):
  230. def __init__(self, size=(640, 640), **kwargs):
  231. self.size = size
  232. def resize_image(self, img):
  233. resize_h, resize_w = self.size
  234. ori_h, ori_w = img.shape[:2] # (h, w, c)
  235. ratio_h = float(resize_h) / ori_h
  236. ratio_w = float(resize_w) / ori_w
  237. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  238. return img, [ratio_h, ratio_w]
  239. def __call__(self, data):
  240. img = data['image']
  241. if 'polys' in data:
  242. text_polys = data['polys']
  243. img_resize, [ratio_h, ratio_w] = self.resize_image(img)
  244. if 'polys' in data:
  245. new_boxes = []
  246. for box in text_polys:
  247. new_box = []
  248. for cord in box:
  249. new_box.append([cord[0] * ratio_w, cord[1] * ratio_h])
  250. new_boxes.append(new_box)
  251. data['polys'] = np.array(new_boxes, dtype=np.float32)
  252. data['image'] = img_resize
  253. return data
  254. class DetResizeForTest(object):
  255. def __init__(self, **kwargs):
  256. super(DetResizeForTest, self).__init__()
  257. self.resize_type = 0
  258. self.keep_ratio = False
  259. if 'image_shape' in kwargs:
  260. self.image_shape = kwargs['image_shape']
  261. self.resize_type = 1
  262. if 'keep_ratio' in kwargs:
  263. self.keep_ratio = kwargs['keep_ratio']
  264. elif 'limit_side_len' in kwargs:
  265. self.limit_side_len = kwargs['limit_side_len']
  266. self.limit_type = kwargs.get('limit_type', 'min')
  267. elif 'resize_long' in kwargs:
  268. self.resize_type = 2
  269. self.resize_long = kwargs.get('resize_long', 960)
  270. else:
  271. self.limit_side_len = 736
  272. self.limit_type = 'min'
  273. def __call__(self, data):
  274. img = data['image']
  275. src_h, src_w, _ = img.shape
  276. if sum([src_h, src_w]) < 64:
  277. img = self.image_padding(img)
  278. if self.resize_type == 0:
  279. # img, shape = self.resize_image_type0(img)
  280. img, [ratio_h, ratio_w] = self.resize_image_type0(img)
  281. elif self.resize_type == 2:
  282. img, [ratio_h, ratio_w] = self.resize_image_type2(img)
  283. else:
  284. # img, shape = self.resize_image_type1(img)
  285. img, [ratio_h, ratio_w] = self.resize_image_type1(img)
  286. data['image'] = img
  287. data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
  288. return data
  289. def image_padding(self, im, value=0):
  290. h, w, c = im.shape
  291. im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
  292. im_pad[:h, :w, :] = im
  293. return im_pad
  294. def resize_image_type1(self, img):
  295. resize_h, resize_w = self.image_shape
  296. ori_h, ori_w = img.shape[:2] # (h, w, c)
  297. if self.keep_ratio is True:
  298. resize_w = ori_w * resize_h / ori_h
  299. N = math.ceil(resize_w / 32)
  300. resize_w = N * 32
  301. ratio_h = float(resize_h) / ori_h
  302. ratio_w = float(resize_w) / ori_w
  303. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  304. # return img, np.array([ori_h, ori_w])
  305. return img, [ratio_h, ratio_w]
  306. def resize_image_type0(self, img):
  307. """
  308. resize image to a size multiple of 32 which is required by the network
  309. args:
  310. img(array): array with shape [h, w, c]
  311. return(tuple):
  312. img, (ratio_h, ratio_w)
  313. """
  314. limit_side_len = self.limit_side_len
  315. h, w, c = img.shape
  316. # limit the max side
  317. if self.limit_type == 'max':
  318. if max(h, w) > limit_side_len:
  319. if h > w:
  320. ratio = float(limit_side_len) / h
  321. else:
  322. ratio = float(limit_side_len) / w
  323. else:
  324. ratio = 1.
  325. elif self.limit_type == 'min':
  326. if min(h, w) < limit_side_len:
  327. if h < w:
  328. ratio = float(limit_side_len) / h
  329. else:
  330. ratio = float(limit_side_len) / w
  331. else:
  332. ratio = 1.
  333. elif self.limit_type == 'resize_long':
  334. ratio = float(limit_side_len) / max(h, w)
  335. else:
  336. raise Exception('not support limit type, image ')
  337. resize_h = int(h * ratio)
  338. resize_w = int(w * ratio)
  339. resize_h = max(int(round(resize_h / 32) * 32), 32)
  340. resize_w = max(int(round(resize_w / 32) * 32), 32)
  341. try:
  342. if int(resize_w) <= 0 or int(resize_h) <= 0:
  343. return None, (None, None)
  344. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  345. except BaseException:
  346. logging.exception("{} {} {}".format(img.shape, resize_w, resize_h))
  347. sys.exit(0)
  348. ratio_h = resize_h / float(h)
  349. ratio_w = resize_w / float(w)
  350. return img, [ratio_h, ratio_w]
  351. def resize_image_type2(self, img):
  352. h, w, _ = img.shape
  353. resize_w = w
  354. resize_h = h
  355. if resize_h > resize_w:
  356. ratio = float(self.resize_long) / resize_h
  357. else:
  358. ratio = float(self.resize_long) / resize_w
  359. resize_h = int(resize_h * ratio)
  360. resize_w = int(resize_w * ratio)
  361. max_stride = 128
  362. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  363. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  364. img = cv2.resize(img, (int(resize_w), int(resize_h)))
  365. ratio_h = resize_h / float(h)
  366. ratio_w = resize_w / float(w)
  367. return img, [ratio_h, ratio_w]
  368. class E2EResizeForTest(object):
  369. def __init__(self, **kwargs):
  370. super(E2EResizeForTest, self).__init__()
  371. self.max_side_len = kwargs['max_side_len']
  372. self.valid_set = kwargs['valid_set']
  373. def __call__(self, data):
  374. img = data['image']
  375. src_h, src_w, _ = img.shape
  376. if self.valid_set == 'totaltext':
  377. im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext(
  378. img, max_side_len=self.max_side_len)
  379. else:
  380. im_resized, (ratio_h, ratio_w) = self.resize_image(
  381. img, max_side_len=self.max_side_len)
  382. data['image'] = im_resized
  383. data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
  384. return data
  385. def resize_image_for_totaltext(self, im, max_side_len=512):
  386. h, w, _ = im.shape
  387. resize_w = w
  388. resize_h = h
  389. ratio = 1.25
  390. if h * ratio > max_side_len:
  391. ratio = float(max_side_len) / resize_h
  392. resize_h = int(resize_h * ratio)
  393. resize_w = int(resize_w * ratio)
  394. max_stride = 128
  395. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  396. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  397. im = cv2.resize(im, (int(resize_w), int(resize_h)))
  398. ratio_h = resize_h / float(h)
  399. ratio_w = resize_w / float(w)
  400. return im, (ratio_h, ratio_w)
  401. def resize_image(self, im, max_side_len=512):
  402. """
  403. resize image to a size multiple of max_stride which is required by the network
  404. :param im: the resized image
  405. :param max_side_len: limit of max image size to avoid out of memory in gpu
  406. :return: the resized image and the resize ratio
  407. """
  408. h, w, _ = im.shape
  409. resize_w = w
  410. resize_h = h
  411. # Fix the longer side
  412. if resize_h > resize_w:
  413. ratio = float(max_side_len) / resize_h
  414. else:
  415. ratio = float(max_side_len) / resize_w
  416. resize_h = int(resize_h * ratio)
  417. resize_w = int(resize_w * ratio)
  418. max_stride = 128
  419. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  420. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  421. im = cv2.resize(im, (int(resize_w), int(resize_h)))
  422. ratio_h = resize_h / float(h)
  423. ratio_w = resize_w / float(w)
  424. return im, (ratio_h, ratio_w)
  425. class KieResize(object):
  426. def __init__(self, **kwargs):
  427. super(KieResize, self).__init__()
  428. self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[
  429. 'img_scale'][1]
  430. def __call__(self, data):
  431. img = data['image']
  432. points = data['points']
  433. src_h, src_w, _ = img.shape
  434. im_resized, scale_factor, [ratio_h, ratio_w
  435. ], [new_h, new_w] = self.resize_image(img)
  436. resize_points = self.resize_boxes(img, points, scale_factor)
  437. data['ori_image'] = img
  438. data['ori_boxes'] = points
  439. data['points'] = resize_points
  440. data['image'] = im_resized
  441. data['shape'] = np.array([new_h, new_w])
  442. return data
  443. def resize_image(self, img):
  444. norm_img = np.zeros([1024, 1024, 3], dtype='float32')
  445. scale = [512, 1024]
  446. h, w = img.shape[:2]
  447. max_long_edge = max(scale)
  448. max_short_edge = min(scale)
  449. scale_factor = min(max_long_edge / max(h, w),
  450. max_short_edge / min(h, w))
  451. resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float(
  452. scale_factor) + 0.5)
  453. max_stride = 32
  454. resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
  455. resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
  456. im = cv2.resize(img, (resize_w, resize_h))
  457. new_h, new_w = im.shape[:2]
  458. w_scale = new_w / w
  459. h_scale = new_h / h
  460. scale_factor = np.array(
  461. [w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
  462. norm_img[:new_h, :new_w, :] = im
  463. return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w]
  464. def resize_boxes(self, im, points, scale_factor):
  465. points = points * scale_factor
  466. img_shape = im.shape[:2]
  467. points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1])
  468. points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0])
  469. return points
  470. class SRResize(object):
  471. def __init__(self,
  472. imgH=32,
  473. imgW=128,
  474. down_sample_scale=4,
  475. keep_ratio=False,
  476. min_ratio=1,
  477. mask=False,
  478. infer_mode=False,
  479. **kwargs):
  480. self.imgH = imgH
  481. self.imgW = imgW
  482. self.keep_ratio = keep_ratio
  483. self.min_ratio = min_ratio
  484. self.down_sample_scale = down_sample_scale
  485. self.mask = mask
  486. self.infer_mode = infer_mode
  487. def __call__(self, data):
  488. imgH = self.imgH
  489. imgW = self.imgW
  490. images_lr = data["image_lr"]
  491. transform2 = ResizeNormalize(
  492. (imgW // self.down_sample_scale, imgH // self.down_sample_scale))
  493. images_lr = transform2(images_lr)
  494. data["img_lr"] = images_lr
  495. if self.infer_mode:
  496. return data
  497. images_HR = data["image_hr"]
  498. _label_strs = data["label"]
  499. transform = ResizeNormalize((imgW, imgH))
  500. images_HR = transform(images_HR)
  501. data["img_hr"] = images_HR
  502. return data
  503. class ResizeNormalize(object):
  504. def __init__(self, size, interpolation=Image.BICUBIC):
  505. self.size = size
  506. self.interpolation = interpolation
  507. def __call__(self, img):
  508. img = img.resize(self.size, self.interpolation)
  509. img_numpy = np.array(img).astype("float32")
  510. img_numpy = img_numpy.transpose((2, 0, 1)) / 255
  511. return img_numpy
  512. class GrayImageChannelFormat(object):
  513. """
  514. format gray scale image's channel: (3,h,w) -> (1,h,w)
  515. Args:
  516. inverse: inverse gray image
  517. """
  518. def __init__(self, inverse=False, **kwargs):
  519. self.inverse = inverse
  520. def __call__(self, data):
  521. img = data['image']
  522. img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  523. img_expanded = np.expand_dims(img_single_channel, 0)
  524. if self.inverse:
  525. data['image'] = np.abs(img_expanded - 1)
  526. else:
  527. data['image'] = img_expanded
  528. data['src_image'] = img
  529. return data
  530. class Permute(object):
  531. """permute image
  532. Args:
  533. to_bgr (bool): whether convert RGB to BGR
  534. channel_first (bool): whether convert HWC to CHW
  535. """
  536. def __init__(self, ):
  537. super(Permute, self).__init__()
  538. def __call__(self, im, im_info):
  539. """
  540. Args:
  541. im (np.ndarray): image (np.ndarray)
  542. im_info (dict): info of image
  543. Returns:
  544. im (np.ndarray): processed image (np.ndarray)
  545. im_info (dict): info of processed image
  546. """
  547. im = im.transpose((2, 0, 1)).copy()
  548. return im, im_info
  549. class PadStride(object):
  550. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  551. Args:
  552. stride (bool): model with FPN need image shape % stride == 0
  553. """
  554. def __init__(self, stride=0):
  555. self.coarsest_stride = stride
  556. def __call__(self, im, im_info):
  557. """
  558. Args:
  559. im (np.ndarray): image (np.ndarray)
  560. im_info (dict): info of image
  561. Returns:
  562. im (np.ndarray): processed image (np.ndarray)
  563. im_info (dict): info of processed image
  564. """
  565. coarsest_stride = self.coarsest_stride
  566. if coarsest_stride <= 0:
  567. return im, im_info
  568. im_c, im_h, im_w = im.shape
  569. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  570. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  571. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  572. padding_im[:, :im_h, :im_w] = im
  573. return padding_im, im_info
  574. def decode_image(im_file, im_info):
  575. """read rgb image
  576. Args:
  577. im_file (str|np.ndarray): input can be image path or np.ndarray
  578. im_info (dict): info of image
  579. Returns:
  580. im (np.ndarray): processed image (np.ndarray)
  581. im_info (dict): info of processed image
  582. """
  583. if isinstance(im_file, str):
  584. with open(im_file, 'rb') as f:
  585. im_read = f.read()
  586. data = np.frombuffer(im_read, dtype='uint8')
  587. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  588. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  589. else:
  590. im = im_file
  591. im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  592. im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  593. return im, im_info
  594. def preprocess(im, preprocess_ops):
  595. # process image by preprocess_ops
  596. im_info = {
  597. 'scale_factor': np.array(
  598. [1., 1.], dtype=np.float32),
  599. 'im_shape': None,
  600. }
  601. im, im_info = decode_image(im, im_info)
  602. for operator in preprocess_ops:
  603. im, im_info = operator(im, im_info)
  604. return im, im_info
  605. def nms(bboxes, scores, iou_thresh):
  606. import numpy as np
  607. x1 = bboxes[:, 0]
  608. y1 = bboxes[:, 1]
  609. x2 = bboxes[:, 2]
  610. y2 = bboxes[:, 3]
  611. areas = (y2 - y1) * (x2 - x1)
  612. indices = []
  613. index = scores.argsort()[::-1]
  614. while index.size > 0:
  615. i = index[0]
  616. indices.append(i)
  617. x11 = np.maximum(x1[i], x1[index[1:]])
  618. y11 = np.maximum(y1[i], y1[index[1:]])
  619. x22 = np.minimum(x2[i], x2[index[1:]])
  620. y22 = np.minimum(y2[i], y2[index[1:]])
  621. w = np.maximum(0, x22 - x11 + 1)
  622. h = np.maximum(0, y22 - y11 + 1)
  623. overlaps = w * h
  624. ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)
  625. idx = np.where(ious <= iou_thresh)[0]
  626. index = index[idx + 1]
  627. return indices