Source code for mmcv.image.geometric

# Copyright (c) Open-MMLab. All rights reserved.
import numbers

import cv2
import numpy as np


def _scale_size(size, scale):
    """Rescale a size by a ratio.

    Args:
        size (tuple[int]): (w, h).
        scale (float): Scaling factor.

    Returns:
        tuple[int]: scaled size.
    """
    w, h = size
    return int(w * float(scale) + 0.5), int(h * float(scale) + 0.5)


interp_codes = {
    'nearest': cv2.INTER_NEAREST,
    'bilinear': cv2.INTER_LINEAR,
    'bicubic': cv2.INTER_CUBIC,
    'area': cv2.INTER_AREA,
    'lanczos': cv2.INTER_LANCZOS4
}


[docs]def imresize(img, size, return_scale=False, interpolation='bilinear', out=None): """Resize image to a given size. Args: img (ndarray): The input image. size (tuple[int]): Target size (w, h). return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos". out (ndarray): The output destination. Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = img.shape[:2] resized_img = cv2.resize( img, size, dst=out, interpolation=interp_codes[interpolation]) if not return_scale: return resized_img else: w_scale = size[0] / w h_scale = size[1] / h return resized_img, w_scale, h_scale
[docs]def imresize_like(img, dst_img, return_scale=False, interpolation='bilinear'): """Resize image to the same size of a given image. Args: img (ndarray): The input image. dst_img (ndarray): The target image. return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Same as :func:`resize`. Returns: tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = dst_img.shape[:2] return imresize(img, (w, h), return_scale, interpolation)
[docs]def rescale_size(old_size, scale, return_scale=False): """Calculate the new size to be rescaled to. Args: old_size (tuple[int]): The old size (w, h) of image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image size. Returns: tuple[int]: The new rescaled image size. """ w, h = old_size if isinstance(scale, (float, int)): if scale <= 0: raise ValueError(f'Invalid scale {scale}, must be positive.') scale_factor = scale elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) else: raise TypeError( f'Scale must be a number or tuple of int, but got {type(scale)}') new_size = _scale_size((w, h), scale_factor) if return_scale: return new_size, scale_factor else: return new_size
[docs]def imrescale(img, scale, return_scale=False, interpolation='bilinear'): """Resize image while keeping the aspect ratio. Args: img (ndarray): The input image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image. interpolation (str): Same as :func:`resize`. Returns: ndarray: The rescaled image. """ h, w = img.shape[:2] new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) rescaled_img = imresize(img, new_size, interpolation=interpolation) if return_scale: return rescaled_img, scale_factor else: return rescaled_img
[docs]def imflip(img, direction='horizontal'): """Flip an image horizontally or vertically. Args: img (ndarray): Image to be flipped. direction (str): The flip direction, either "horizontal" or "vertical". Returns: ndarray: The flipped image. """ assert direction in ['horizontal', 'vertical'] if direction == 'horizontal': return np.flip(img, axis=1) else: return np.flip(img, axis=0)
[docs]def imflip_(img, direction='horizontal'): """Inplace flip an image horizontally or vertically. Args: img (ndarray): Image to be flipped. direction (str): The flip direction, either "horizontal" or "vertical". Returns: ndarray: The flipped image (inplace). """ assert direction in ['horizontal', 'vertical'] if direction == 'horizontal': return cv2.flip(img, 1, img) else: return cv2.flip(img, 0, img)
[docs]def imrotate(img, angle, center=None, scale=1.0, border_value=0, auto_bound=False): """Rotate an image. Args: img (ndarray): Image to be rotated. angle (float): Rotation angle in degrees, positive values mean clockwise rotation. center (tuple[float], optional): Center point (w, h) of the rotation in the source image. If not specified, the center of the image will be used. scale (float): Isotropic scale factor. border_value (int): Border value. auto_bound (bool): Whether to adjust the image size to cover the whole rotated image. Returns: ndarray: The rotated image. """ if center is not None and auto_bound: raise ValueError('`auto_bound` conflicts with `center`') h, w = img.shape[:2] if center is None: center = ((w - 1) * 0.5, (h - 1) * 0.5) assert isinstance(center, tuple) matrix = cv2.getRotationMatrix2D(center, -angle, scale) if auto_bound: cos = np.abs(matrix[0, 0]) sin = np.abs(matrix[0, 1]) new_w = h * sin + w * cos new_h = h * cos + w * sin matrix[0, 2] += (new_w - w) * 0.5 matrix[1, 2] += (new_h - h) * 0.5 w = int(np.round(new_w)) h = int(np.round(new_h)) rotated = cv2.warpAffine(img, matrix, (w, h), borderValue=border_value) return rotated
def bbox_clip(bboxes, img_shape): """Clip bboxes to fit the image shape. Args: bboxes (ndarray): Shape (..., 4*k) img_shape (tuple[int]): (height, width) of the image. Returns: ndarray: Clipped bboxes. """ assert bboxes.shape[-1] % 4 == 0 cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) cmin[0::2] = img_shape[1] - 1 cmin[1::2] = img_shape[0] - 1 clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) return clipped_bboxes def bbox_scaling(bboxes, scale, clip_shape=None): """Scaling bboxes w.r.t the box center. Args: bboxes (ndarray): Shape(..., 4). scale (float): Scaling factor. clip_shape (tuple[int], optional): If specified, bboxes that exceed the boundary will be clipped according to the given shape (h, w). Returns: ndarray: Scaled bboxes. """ if float(scale) == 1.0: scaled_bboxes = bboxes.copy() else: w = bboxes[..., 2] - bboxes[..., 0] + 1 h = bboxes[..., 3] - bboxes[..., 1] + 1 dw = (w * (scale - 1)) * 0.5 dh = (h * (scale - 1)) * 0.5 scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) if clip_shape is not None: return bbox_clip(scaled_bboxes, clip_shape) else: return scaled_bboxes
[docs]def imcrop(img, bboxes, scale=1.0, pad_fill=None): """Crop image patches. 3 steps: scale the bboxes -> clip bboxes -> crop and pad. Args: img (ndarray): Image to be cropped. bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. scale (float, optional): Scale ratio of bboxes, the default value 1.0 means no padding. pad_fill (Number | list[Number]): Value to be filled for padding. Default: None, which means no padding. Returns: list[ndarray] | ndarray: The cropped image patches. """ chn = 1 if img.ndim == 2 else img.shape[2] if pad_fill is not None: if isinstance(pad_fill, (int, float)): pad_fill = [pad_fill for _ in range(chn)] assert len(pad_fill) == chn _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) clipped_bbox = bbox_clip(scaled_bboxes, img.shape) patches = [] for i in range(clipped_bbox.shape[0]): x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) if pad_fill is None: patch = img[y1:y2 + 1, x1:x2 + 1, ...] else: _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) if chn == 1: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) else: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) patch = np.array( pad_fill, dtype=img.dtype) * np.ones( patch_shape, dtype=img.dtype) x_start = 0 if _x1 >= 0 else -_x1 y_start = 0 if _y1 >= 0 else -_y1 w = x2 - x1 + 1 h = y2 - y1 + 1 patch[y_start:y_start + h, x_start:x_start + w, ...] = img[y1:y1 + h, x1:x1 + w, ...] patches.append(patch) if bboxes.ndim == 1: return patches[0] else: return patches
[docs]def impad(img, *, shape=None, padding=None, pad_val=0, padding_mode='constant'): """Pad the given image to a certain shape or pad on all sides with specified padding mode and padding value. Args: img (ndarray): Image to be padded. shape (tuple[int]): Expected padding shape (h, w). Default: None. padding (int or tuple[int]): Padding on each border. If a single int is provided this is used to pad all borders. If tuple of length 2 is provided this is the padding on left/right and top/bottom respectively. If a tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. Default: None. Note that `shape` and `padding` can not be both set. pad_val (Number | Sequence[Number]): Values to be filled in padding areas when padding_mode is 'constant'. Default: 0. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default: constant. - constant: pads with a constant value, this value is specified with pad_val. - edge: pads with the last value at the edge of the image. - reflect: pads with reflection of image without repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode will result in [3, 2, 1, 2, 3, 4, 3, 2]. - symmetric: pads with reflection of image repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] Returns: ndarray: The padded image. """ assert (shape is not None) ^ (padding is not None) if shape is not None: padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) # check pad_val if isinstance(pad_val, tuple): assert len(pad_val) == img.shape[-1] elif not isinstance(pad_val, numbers.Number): raise TypeError('pad_val must be a int or a tuple. ' f'But received {type(pad_val)}') # check padding if isinstance(padding, tuple) and len(padding) in [2, 4]: if len(padding) == 2: padding = (padding[0], padding[1], padding[0], padding[1]) elif isinstance(padding, numbers.Number): padding = (padding, padding, padding, padding) else: raise ValueError('Padding must be a int or a 2, or 4 element tuple.' f'But received {padding}') # check padding mode assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] border_type = { 'constant': cv2.BORDER_CONSTANT, 'edge': cv2.BORDER_REPLICATE, 'reflect': cv2.BORDER_REFLECT_101, 'symmetric': cv2.BORDER_REFLECT } img = cv2.copyMakeBorder( img, padding[1], padding[3], padding[0], padding[2], border_type[padding_mode], value=pad_val) return img
[docs]def impad_to_multiple(img, divisor, pad_val=0): """Pad an image to ensure each edge to be multiple to some number. Args: img (ndarray): Image to be padded. divisor (int): Padded image edges will be multiple to divisor. pad_val (Number | Sequence[Number]): Same as :func:`impad`. Returns: ndarray: The padded image. """ pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor return impad(img, shape=(pad_h, pad_w))