--- a/ms_adapter/torchvision/transforms/autoaugment.py
+++ b/ms_adapter/torchvision/transforms/autoaugment.py
@@ -6,6 +6,7 @@ from typing import List, Tuple, Optional, Dict

 import ms_adapter.pytorch as torch
 from ms_adapter.pytorch import Tensor
 from mindspore.ops import constexpr

 from . import functional as F, InterpolationMode

@@ -532,11 +533,13 @@ class AugMix(torch.nn.Module):
            )
        return s

    @torch.jit.unused
    # @torch.jit.unused
    @constexpr
    def _pil_to_tensor(self, img) -> Tensor:
        return F.pil_to_tensor(img)

    @torch.jit.unused
    # @torch.jit.unused
    @constexpr
    def _tensor_to_pil(self, img: Tensor):
        return F.to_pil_image(img)

--- a/ms_adapter/torchvision/transforms/functional.py
+++ b/ms_adapter/torchvision/transforms/functional.py
@@ -143,14 +143,18 @@ def to_tensor(pic) -> Tensor:
    if _is_numpy(pic) and not _is_numpy_image(pic):
        raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")

    default_float_dtype = torch.get_default_dtype()
    # default_float_dtype = torch.get_default_dtype()
    # TODO:
    default_float_dtype = torch.float32

    if isinstance(pic, np.ndarray):
        # handle numpy array
        if pic.ndim == 2:
            pic = pic[:, :, None]

        img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
        # img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
        # TODO:
        img = torch.Tensor(pic.transpose((2, 0, 1))).contiguous()
        # backward compatibility
        if isinstance(img, torch.ByteTensor):
            return img.to(dtype=default_float_dtype).div(255)
@@ -160,11 +164,15 @@ def to_tensor(pic) -> Tensor:
    if accimage is not None and isinstance(pic, accimage.Image):
        nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
        pic.copyto(nppic)
        return torch.from_numpy(nppic).to(dtype=default_float_dtype)
        # return torch.from_numpy(nppic).to(dtype=default_float_dtype)
        # TODO:
        return torch.Tensor(nppic).to(dtype=default_float_dtype)

    # handle PIL Image
    mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32}
    img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
    # img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
    # TODO:
    img = torch.Tensor(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))

    if pic.mode == "1":
        img = 255 * img
--- a/ms_adapter/torchvision/transforms/functional_tensor.py
+++ b/ms_adapter/torchvision/transforms/functional_tensor.py
@@ -7,6 +7,7 @@ from typing import Optional, Tuple, List
 # from torch import Tensor
 # from torch.nn.functional import grid_sample, conv2d, interpolate, pad as torch_pad

 import mindspore as ms
 import ms_adapter.pytorch as torch
 from ms_adapter.pytorch import Tensor
 from ms_adapter.pytorch.nn.functional import grid_sample, conv2d, interpolate, pad as torch_pad
@@ -621,15 +622,22 @@ def _gen_affine_grid(
    # 1) we normalize grid values after applying theta
    # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate

    # d = 0.5
    # base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
    # x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
    # base_grid[..., 0].copy_(x_grid)
    # y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
    # base_grid[..., 1].copy_(y_grid)
    # base_grid[..., 2].fill_(1)

    d = 0.5
    base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
    x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
    base_grid[..., 0].copy_(x_grid)
    # y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
    x_grid = ms.ops.broadcast_to(x_grid, (1, oh, ow))
    y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device)
    y_grid = y_grid.unsqueeze(-1)
    base_grid[..., 1].copy_(y_grid)
    base_grid[..., 2].fill_(1)
    y_grid = ms.ops.broadcast_to(y_grid, (1, oh, ow))
    z_grid = ms.ops.ones((1, oh, ow), ms.float32)
    base_grid = ms.ops.stack((x_grid, y_grid, z_grid), 3)

    rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
    output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
@@ -713,13 +721,22 @@ def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype,
    )
    theta2 = torch.tensor([[[coeffs[6], coeffs[7], 1.0], [coeffs[6], coeffs[7], 1.0]]], dtype=dtype, device=device)

    # d = 0.5
    # base_grid = torch.empty(1, oh, ow, 3, dtype=dtype, device=device)
    # x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device)
    # base_grid[..., 0].copy_(x_grid)
    # y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1)
    # base_grid[..., 1].copy_(y_grid)
    # base_grid[..., 2].fill_(1)

    d = 0.5
    base_grid = torch.empty(1, oh, ow, 3, dtype=dtype, device=device)
    x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device)
    base_grid[..., 0].copy_(x_grid)
    y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1)
    base_grid[..., 1].copy_(y_grid)
    base_grid[..., 2].fill_(1)
    x_grid = ms.ops.broadcast_to(x_grid, (1, oh, ow))
    y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device)
    y_grid = y_grid.unsqueeze(-1)
    y_grid = ms.ops.broadcast_to(y_grid, (1, oh, ow))
    z_grid = ms.ops.ones((1, oh, ow), ms.float32)
    base_grid = ms.ops.stack((x_grid, y_grid, z_grid), 3)

    rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device)
    output_grid1 = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta1)
@@ -963,7 +980,9 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
        mean = mean.view(-1, 1, 1)
    if std.ndim == 1:
        std = std.view(-1, 1, 1)
    tensor.sub_(mean).div_(std)
    # tensor.sub_(mean).div_(std)
    tensor = tensor.sub(mean)
    tensor = tensor.div(std)
    return tensor