#154 update vision

Merged
hanjr merged 4 commits from erpim into master 1 year ago
  1. +5
    -2
      ms_adapter/torchvision/transforms/autoaugment.py
  2. +12
    -4
      ms_adapter/torchvision/transforms/functional.py
  3. +30
    -11
      ms_adapter/torchvision/transforms/functional_tensor.py

+ 5
- 2
ms_adapter/torchvision/transforms/autoaugment.py View File

@@ -6,6 +6,7 @@ from typing import List, Tuple, Optional, Dict

import ms_adapter.pytorch as torch
from ms_adapter.pytorch import Tensor
from mindspore.ops import constexpr

from . import functional as F, InterpolationMode

@@ -532,11 +533,13 @@ class AugMix(torch.nn.Module):
)
return s

@torch.jit.unused
# @torch.jit.unused
@constexpr
def _pil_to_tensor(self, img) -> Tensor:
return F.pil_to_tensor(img)

@torch.jit.unused
# @torch.jit.unused
@constexpr
def _tensor_to_pil(self, img: Tensor):
return F.to_pil_image(img)



+ 12
- 4
ms_adapter/torchvision/transforms/functional.py View File

@@ -143,14 +143,18 @@ def to_tensor(pic) -> Tensor:
if _is_numpy(pic) and not _is_numpy_image(pic):
raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")

default_float_dtype = torch.get_default_dtype()
# default_float_dtype = torch.get_default_dtype()
# TODO:
default_float_dtype = torch.float32

if isinstance(pic, np.ndarray):
# handle numpy array
if pic.ndim == 2:
pic = pic[:, :, None]

img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
# img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
# TODO:
img = torch.Tensor(pic.transpose((2, 0, 1))).contiguous()
# backward compatibility
if isinstance(img, torch.ByteTensor):
return img.to(dtype=default_float_dtype).div(255)
@@ -160,11 +164,15 @@ def to_tensor(pic) -> Tensor:
if accimage is not None and isinstance(pic, accimage.Image):
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
pic.copyto(nppic)
return torch.from_numpy(nppic).to(dtype=default_float_dtype)
# return torch.from_numpy(nppic).to(dtype=default_float_dtype)
# TODO:
return torch.Tensor(nppic).to(dtype=default_float_dtype)

# handle PIL Image
mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32}
img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
# img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
# TODO:
img = torch.Tensor(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))

if pic.mode == "1":
img = 255 * img


+ 30
- 11
ms_adapter/torchvision/transforms/functional_tensor.py View File

@@ -7,6 +7,7 @@ from typing import Optional, Tuple, List
# from torch import Tensor
# from torch.nn.functional import grid_sample, conv2d, interpolate, pad as torch_pad

import mindspore as ms
import ms_adapter.pytorch as torch
from ms_adapter.pytorch import Tensor
from ms_adapter.pytorch.nn.functional import grid_sample, conv2d, interpolate, pad as torch_pad
@@ -621,15 +622,22 @@ def _gen_affine_grid(
# 1) we normalize grid values after applying theta
# 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate

# d = 0.5
# base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
# x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
# base_grid[..., 0].copy_(x_grid)
# y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
# base_grid[..., 1].copy_(y_grid)
# base_grid[..., 2].fill_(1)

d = 0.5
base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
base_grid[..., 0].copy_(x_grid)
# y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
x_grid = ms.ops.broadcast_to(x_grid, (1, oh, ow))
y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device)
y_grid = y_grid.unsqueeze(-1)
base_grid[..., 1].copy_(y_grid)
base_grid[..., 2].fill_(1)
y_grid = ms.ops.broadcast_to(y_grid, (1, oh, ow))
z_grid = ms.ops.ones((1, oh, ow), ms.float32)
base_grid = ms.ops.stack((x_grid, y_grid, z_grid), 3)

rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
@@ -713,13 +721,22 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype,
)
theta2 = torch.tensor([[[coeffs[6], coeffs[7], 1.0], [coeffs[6], coeffs[7], 1.0]]], dtype=dtype, device=device)

# d = 0.5
# base_grid = torch.empty(1, oh, ow, 3, dtype=dtype, device=device)
# x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device)
# base_grid[..., 0].copy_(x_grid)
# y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1)
# base_grid[..., 1].copy_(y_grid)
# base_grid[..., 2].fill_(1)

d = 0.5
base_grid = torch.empty(1, oh, ow, 3, dtype=dtype, device=device)
x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device)
base_grid[..., 0].copy_(x_grid)
y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1)
base_grid[..., 1].copy_(y_grid)
base_grid[..., 2].fill_(1)
x_grid = ms.ops.broadcast_to(x_grid, (1, oh, ow))
y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device)
y_grid = y_grid.unsqueeze(-1)
y_grid = ms.ops.broadcast_to(y_grid, (1, oh, ow))
z_grid = ms.ops.ones((1, oh, ow), ms.float32)
base_grid = ms.ops.stack((x_grid, y_grid, z_grid), 3)

rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device)
output_grid1 = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta1)
@@ -963,7 +980,9 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
mean = mean.view(-1, 1, 1)
if std.ndim == 1:
std = std.view(-1, 1, 1)
tensor.sub_(mean).div_(std)
# tensor.sub_(mean).div_(std)
tensor = tensor.sub(mean)
tensor = tensor.div(std)
return tensor




Loading…
Cancel
Save