jtc
/
new1
forked from lsyzz/new1

 
			
							import numpy as np
import math

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import __main__
import sys
import os
import time
import glob


# ====================================================
# 第一部分：BiSeNetV2 网络区域
# ====================================================

class ConvBNReLU(nn.Module):

    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1,
                 dilation=1, groups=1, bias=False):
        super().__init__()
        self.conv = nn.Conv2d(
            in_chan, out_chan, kernel_size=ks, stride=stride,
            padding=padding, dilation=dilation,
            groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_chan)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        feat = self.conv(x)
        feat = self.bn(feat)
        feat = self.relu(feat)
        return feat


class UpSample(nn.Module):

    def __init__(self, n_chan, factor=2):
        super().__init__()
        out_chan = n_chan * factor * factor
        self.proj = nn.Conv2d(n_chan, out_chan, 1, 1, 0)
        self.up = nn.PixelShuffle(factor)
        self.init_weight()

    def forward(self, x):
        feat = self.proj(x)
        feat = self.up(feat)
        return feat

    def init_weight(self):
        nn.init.xavier_normal_(self.proj.weight, gain=1.)


class DetailBranch(nn.Module):

    def __init__(self):
        super().__init__()
        self.S1 = nn.Sequential(
            ConvBNReLU(3, 64, 3, stride=2),
            ConvBNReLU(64, 64, 3, stride=1),
        )
        self.S2 = nn.Sequential(
            ConvBNReLU(64, 64, 3, stride=2),
            ConvBNReLU(64, 64, 3, stride=1),
            ConvBNReLU(64, 64, 3, stride=1),
        )
        self.S3 = nn.Sequential(
            ConvBNReLU(64, 128, 3, stride=2),
            ConvBNReLU(128, 128, 3, stride=1),
            ConvBNReLU(128, 128, 3, stride=1),
        )

    def forward(self, x):
        feat = self.S1(x)
        feat = self.S2(feat)
        feat = self.S3(feat)
        return feat


class StemBlock(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv = ConvBNReLU(3, 16, 3, stride=2)
        self.left = nn.Sequential(
            ConvBNReLU(16, 8, 1, stride=1, padding=0),
            ConvBNReLU(8, 16, 3, stride=2),
        )
        self.right = nn.MaxPool2d(
            kernel_size=3, stride=2, padding=1, ceil_mode=False)
        self.fuse = ConvBNReLU(32, 16, 3, stride=1)

    def forward(self, x):
        feat = self.conv(x)
        feat_left = self.left(feat)
        feat_right = self.right(feat)
        feat = torch.cat([feat_left, feat_right], dim=1)
        feat = self.fuse(feat)
        return feat


class CEBlock(nn.Module):

    def __init__(self):
        super().__init__()
        self.bn = nn.BatchNorm2d(128)
        self.conv_gap = ConvBNReLU(128, 128, 1, stride=1, padding=0)
        # TODO: in paper here is naive conv2d, no bn-relu
        self.conv_last = ConvBNReLU(128, 128, 3, stride=1)

    def forward(self, x):
        feat = torch.mean(x, dim=(2, 3), keepdim=True)
        feat = self.bn(feat)
        feat = self.conv_gap(feat)
        feat = feat + x
        feat = self.conv_last(feat)
        return feat


class GELayerS1(nn.Module):

    def __init__(self, in_chan, out_chan, exp_ratio=6):
        super().__init__()
        mid_chan = in_chan * exp_ratio
        self.conv1 = ConvBNReLU(in_chan, in_chan, 3, stride=1)
        self.dwconv = nn.Sequential(
            nn.Conv2d(
                in_chan, mid_chan, kernel_size=3, stride=1,
                padding=1, groups=in_chan, bias=False),
            nn.BatchNorm2d(mid_chan),
            nn.ReLU(inplace=True),  # not shown in paper
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                mid_chan, out_chan, kernel_size=1, stride=1,
                padding=0, bias=False),
            nn.BatchNorm2d(out_chan),
        )
        self.conv2[1].last_bn = True
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        feat = self.conv1(x)
        feat = self.dwconv(feat)
        feat = self.conv2(feat)
        feat = feat + x
        feat = self.relu(feat)
        return feat


class GELayerS2(nn.Module):

    def __init__(self, in_chan, out_chan, exp_ratio=6):
        super().__init__()
        mid_chan = in_chan * exp_ratio
        self.conv1 = ConvBNReLU(in_chan, in_chan, 3, stride=1)
        self.dwconv1 = nn.Sequential(
            nn.Conv2d(
                in_chan, mid_chan, kernel_size=3, stride=2,
                padding=1, groups=in_chan, bias=False),
            nn.BatchNorm2d(mid_chan),
        )
        self.dwconv2 = nn.Sequential(
            nn.Conv2d(
                mid_chan, mid_chan, kernel_size=3, stride=1,
                padding=1, groups=mid_chan, bias=False),
            nn.BatchNorm2d(mid_chan),
            nn.ReLU(inplace=True),  # not shown in paper
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                mid_chan, out_chan, kernel_size=1, stride=1,
                padding=0, bias=False),
            nn.BatchNorm2d(out_chan),
        )
        self.conv2[1].last_bn = True
        self.shortcut = nn.Sequential(
            nn.Conv2d(
                in_chan, in_chan, kernel_size=3, stride=2,
                padding=1, groups=in_chan, bias=False),
            nn.BatchNorm2d(in_chan),
            nn.Conv2d(
                in_chan, out_chan, kernel_size=1, stride=1,
                padding=0, bias=False),
            nn.BatchNorm2d(out_chan),
        )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        feat = self.conv1(x)
        feat = self.dwconv1(feat)
        feat = self.dwconv2(feat)
        feat = self.conv2(feat)
        shortcut = self.shortcut(x)
        feat = feat + shortcut
        feat = self.relu(feat)
        return feat


class SegmentBranch(nn.Module):

    def __init__(self):
        super().__init__()
        self.S1S2 = StemBlock()
        self.S3 = nn.Sequential(
            GELayerS2(16, 32),
            GELayerS1(32, 32),
        )
        self.S4 = nn.Sequential(
            GELayerS2(32, 64),
            GELayerS1(64, 64),
        )
        self.S5_4 = nn.Sequential(
            GELayerS2(64, 128),
            GELayerS1(128, 128),
            GELayerS1(128, 128),
            GELayerS1(128, 128),
        )
        self.S5_5 = CEBlock()

    def forward(self, x):
        feat2 = self.S1S2(x)
        feat3 = self.S3(feat2)
        feat4 = self.S4(feat3)
        feat5_4 = self.S5_4(feat4)
        feat5_5 = self.S5_5(feat5_4)
        return feat2, feat3, feat4, feat5_4, feat5_5


class BGALayer(nn.Module):

    def __init__(self):
        super().__init__()
        self.left1 = nn.Sequential(
            nn.Conv2d(
                128, 128, kernel_size=3, stride=1,
                padding=1, groups=128, bias=False),
            nn.BatchNorm2d(128),
            nn.Conv2d(
                128, 128, kernel_size=1, stride=1,
                padding=0, bias=False),
        )
        self.left2 = nn.Sequential(
            nn.Conv2d(
                128, 128, kernel_size=3, stride=2,
                padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
        )
        self.right1 = nn.Sequential(
            nn.Conv2d(
                128, 128, kernel_size=3, stride=1,
                padding=1, bias=False),
            nn.BatchNorm2d(128),
        )
        self.right2 = nn.Sequential(
            nn.Conv2d(
                128, 128, kernel_size=3, stride=1,
                padding=1, groups=128, bias=False),
            nn.BatchNorm2d(128),
            nn.Conv2d(
                128, 128, kernel_size=1, stride=1,
                padding=0, bias=False),
        )
        self.up1 = nn.Upsample(scale_factor=4)
        self.up2 = nn.Upsample(scale_factor=4)
        ##TODO: does this really has no relu?
        self.conv = nn.Sequential(
            nn.Conv2d(
                128, 128, kernel_size=3, stride=1,
                padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),  # not shown in paper
        )

    def forward(self, x_d, x_s):
        dsize = x_d.size()[2:]
        left1 = self.left1(x_d)
        left2 = self.left2(x_d)
        right1 = self.right1(x_s)
        right2 = self.right2(x_s)
        right1 = self.up1(right1)
        left = left1 * torch.sigmoid(right1)
        right = left2 * torch.sigmoid(right2)
        right = self.up2(right)
        out = self.conv(left + right)
        return out


class SegmentHead(nn.Module):

    def __init__(self, in_chan, mid_chan, n_classes, up_factor=8, aux=True):
        super().__init__()
        self.conv = ConvBNReLU(in_chan, mid_chan, 3, stride=1)
        self.drop = nn.Dropout(0.1)
        self.up_factor = up_factor

        out_chan = n_classes
        mid_chan2 = up_factor * up_factor if aux else mid_chan
        up_factor = up_factor // 2 if aux else up_factor
        self.conv_out = nn.Sequential(
            nn.Sequential(
                nn.Upsample(scale_factor=2),
                ConvBNReLU(mid_chan, mid_chan2, 3, stride=1)
            ) if aux else nn.Identity(),
            nn.Conv2d(mid_chan2, out_chan, 1, 1, 0, bias=True),
            nn.Upsample(scale_factor=up_factor, mode='bilinear', align_corners=False)
        )

    def forward(self, x):
        feat = self.conv(x)
        feat = self.drop(feat)
        feat = self.conv_out(feat)
        return feat


class BiSeNetV2(nn.Module):

    def __init__(self, n_classes, aux_mode='train'):
        super().__init__()
        self.aux_mode = aux_mode
        self.detail = DetailBranch()
        self.segment = SegmentBranch()
        self.bga = BGALayer()

        ## TODO: what is the number of mid chan ?
        self.head = SegmentHead(128, 1024, n_classes, up_factor=8, aux=False)

        self.aux2 = SegmentHead(16, 128, n_classes, up_factor=4)
        self.aux3 = SegmentHead(32, 128, n_classes, up_factor=8)
        self.aux4 = SegmentHead(64, 128, n_classes, up_factor=16)
        self.aux5_4 = SegmentHead(128, 128, n_classes, up_factor=32)

        self.init_weights()

    def forward(self, x):
        size = x.size()[2:]
        feat_d = self.detail(x)
        feat2, feat3, feat4, feat5_4, feat_s = self.segment(x)
        feat_head = self.bga(feat_d, feat_s)

        logits = self.head(feat_head)
        if self.aux_mode == 'train':
            logits_aux2 = self.aux2(feat2)
            logits_aux3 = self.aux3(feat3)
            logits_aux4 = self.aux4(feat4)
            logits_aux5_4 = self.aux5_4(feat5_4)
            return logits, logits_aux2, logits_aux3, logits_aux4, logits_aux5_4
        elif self.aux_mode == 'eval':
            return logits
        elif self.aux_mode == 'pred':
            pred = logits.argmax(dim=1)
            return pred
        else:
            raise NotImplementedError

    def init_weights(self):
        for name, module in self.named_modules():
            if isinstance(module, (nn.Conv2d, nn.Linear)):
                nn.init.kaiming_normal_(module.weight, mode='fan_out')
                if not module.bias is None: nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.modules.batchnorm._BatchNorm):
                if hasattr(module, 'last_bn') and module.last_bn:
                    nn.init.zeros_(module.weight)
                else:
                    nn.init.ones_(module.weight)
                nn.init.zeros_(module.bias)
        self.load_pretrain()

    def load_pretrain(self):
        # 230423：推理时，不必在这里加载预训练模型
        pass

    def get_params(self):
        def add_param_to_list(mod, wd_params, nowd_params):
            for param in mod.parameters():
                if param.dim() == 1:
                    nowd_params.append(param)
                elif param.dim() == 4:
                    wd_params.append(param)
                else:
                    print(name)

        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
        for name, child in self.named_children():
            if 'head' in name or 'aux' in name:
                add_param_to_list(child, lr_mul_wd_params, lr_mul_nowd_params)
            else:
                add_param_to_list(child, wd_params, nowd_params)
        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params


# ====================================================
# 第二部分：推理区域
# ====================================================


# python脚本中获取 Cpp 传过来的变量


def segOperate(model, imgList, cudaIndex):
    torch.cuda.nvtx.range_push("preprocess")
    # os.environ["CUDA_VISIBLE_DEVICES"] = str(cudaIndex)
    torch.cuda.set_device(cudaIndex)

    batch_size = len(imgList)

    # 定义训练的设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 230517：初始化里面有，这里再做一遍没有必要
    # define model
    # model = model.to(device)

    transformsNormalize = transforms.Compose([
        # 前面均值，后面方差
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])

    imgRGB = imgList[0]

    # 20、计算要变化的宽高
    width, height = imgRGB.shape[1], imgRGB.shape[0]
    # print(width,height) # 329 387
    originSize = [height, width]
    # print("originSize: {}".format(originSize)) # [387, 329]
    # 就是向下取整，保证是32的倍数
    newSize = [math.floor(el / 32) * 32 for el in originSize]
    # print("newSize: {}".format(newSize)) # [384, 320]

    # 22、计算新旧图片宽高差距
    HeightDis = originSize[0] - newSize[0]
    WidthDis = originSize[1] - newSize[1]

    # print("HeightDis: {}".format(HeightDis)) # 3
    # print("WidthDis: {}".format(WidthDis)) # 9

    # 24、计算裁剪位置

    cropSizePIL = {
        "left": WidthDis // 2,
        "top": HeightDis // 2
    }
    cropSizePIL["right"] = cropSizePIL["left"] + newSize[1]
    cropSizePIL["bottom"] = cropSizePIL["top"] + newSize[0]
    # print("cropSizePIL: {}".format(cropSizePIL)) #  {'left': 4, 'top': 1, 'right': 324, 'bottom': 385}
    torch.cuda.nvtx.range_pop()

    # 25、将图片转为tensor，并且放到GPU上
    torch.cuda.nvtx.range_push("to_tensor")
    # imgListTensor = torch.tensor(imgList, dtype=torch.float32).to(device)
    imgListTensor = torch.tensor(imgList, dtype=torch.float32, device=device)
    torch.cuda.nvtx.range_pop()
    # print("imgTensor.shape: {}".format(imgTensor.shape)) # torch.Size([2, 1152, 928, 3])

    # 26、裁剪图片
    torch.cuda.nvtx.range_push("crop_img")
    imgListCrop = imgListTensor[:, cropSizePIL["top"]:cropSizePIL["bottom"], cropSizePIL["left"]:cropSizePIL["right"]]
    torch.cuda.nvtx.range_pop()
    # print("imgListCrop.shape: {}".format(imgListCrop.shape)) # (2, 1152, 928, 3)

    # 30、将图片转为tensor和归一化
    torch.cuda.nvtx.range_push("divide255")
    # 230517：乘一个数比除一个数快的多
    # imgListNorm255 = imgListCrop / 255
    imgListNorm255 = imgListCrop * (1 / 255)
    torch.cuda.nvtx.range_pop()

    # 通道变换
    torch.cuda.nvtx.range_push("permute")
    imgTensorPermute = imgListNorm255.permute([0, 3, 1, 2])
    torch.cuda.nvtx.range_pop()

    # print("imgTensorPermute.shape: {}".format(imgTensorPermute.shape)) # torch.Size([2, 3, 1152, 928])
    # 减均值，除方差
    torch.cuda.nvtx.range_push("norm")
    imgListNorm = transformsNormalize(imgTensorPermute)
    torch.cuda.nvtx.range_pop()
    # print("imgNorm.shape: {}".format(imgListNorm.shape)) # torch.Size([2, 3, 1152, 928])

    # 60、模型推理
    torch.cuda.nvtx.range_push("infer")
    logits = model(imgListNorm)
    torch.cuda.nvtx.range_pop()
    # print("logits.shape: {}".format(logits.shape)) # torch.Size([2, 11, 1152, 928])

    # 80、结果图片索引最大，这一部就是对每个像素来找到对应的分类，找里面概率最大的值
    torch.cuda.nvtx.range_push("argmax")
    ansImg_argMax = logits.argmax(dim=1)
    torch.cuda.nvtx.range_pop()
    # print("ansImg_argMax.shape: {}".format(ansImg_argMax.shape)) # torch.Size([1, 384, 320])

    torch.cuda.nvtx.range_push("cropToOrigin")
    # ansImg_OriginSize = torch.zeros((batch_size,height, width), dtype=torch.float32).to(device)
    ansImg_OriginSize = torch.zeros((batch_size, height, width), dtype=torch.float32, device=device)
    ansImg_OriginSize[:, cropSizePIL["top"]:cropSizePIL["bottom"],
    cropSizePIL["left"]:cropSizePIL["right"]] = ansImg_argMax
    torch.cuda.nvtx.range_pop()
    # print("ansImg_OriginSize.shape: {}".format(ansImg_OriginSize.shape))  # (2, 1152, 928)

    # ansImg_OriginSize*=20

    # 82、 tensor转numpy
    torch.cuda.nvtx.range_push("tensor2numpy")
    ansImg_needSave = ansImg_OriginSize.squeeze().detach().cpu().numpy()
    torch.cuda.nvtx.range_pop()
    # print("ansImg_needSave.shape: {}".format(ansImg_needSave.shape)) # torch.Size([2, 1152, 928])
    # print("ansImg_needSave.ndim: {}".format(ansImg_needSave.ndim))

    ansList = []

    # 85、结果图片恢复到原始尺寸
    # 当只有一张图片的时候，ndim为2
    # if ansImg_needSave.ndim==2:
    # 当有多张图片的时候，ndim为3

    torch.cuda.nvtx.range_push("append")
    if ansImg_needSave.ndim == 3:
        for i in range(len(imgList)):
            ansList.append(ansImg_needSave[i])
    if ansImg_needSave.ndim == 2:
        ansList.append(ansImg_needSave)
    torch.cuda.nvtx.range_pop()

    return ansList


def initModel(cudaIndex=0,loadModelPath = r"Seg_BiSeNetV2\pth_save\model_final.pth"):
    # os.environ["CUDA_VISIBLE_DEVICES"] = str(cudaIndex)
    torch.cuda.set_device(cudaIndex)

    # 定义训练的设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    executablePath = os.path.dirname(sys.executable)
    loadModelPathFinal = os.path.join(executablePath, loadModelPath)
    # 230524：为了在非AOI的环境使用加的
    loadModelPathFinal = loadModelPath
    # define model
    n_classes = 11
    model = BiSeNetV2(n_classes, aux_mode='eval')
    if ".pth" in loadModelPathFinal:
        print("加载预训练参数")
        model.load_state_dict(torch.load(loadModelPathFinal))
    model.eval()

    model = model.to(device)
    return model


def segMain(model, imgsTuple, batchSize, cudaIndex=0):
    print("开始推理........................")
    print("len(imgsTuple): {}".format(len(imgsTuple)))
    print("batchSize: {}".format(batchSize))

    totalNum = len(imgsTuple)
    iterNum = math.ceil(totalNum / batchSize)

    ansImgListTotal = []

    print("iterNum: {}".format(iterNum))

    print("torch.cuda.profiler.profile...........")

    with torch.autograd.profiler.profile(use_cuda=True) as prof:

        for i in range(iterNum):

            torch.cuda.nvtx.range_push("inferFunc_preProcess")
            print("===========循环推理===========")

            beginPos = i * batchSize
            endPos = beginPos + batchSize
            if endPos > totalNum:
                endPos = totalNum
            print("beginPos, endPos:   {} - {}".format(beginPos, endPos))
            imgListNP = np.array(imgsTuple[beginPos:endPos])
            print("imgListNP.shape: {}".format(imgListNP.shape))
            torch.cuda.nvtx.range_pop()

            # torch.cuda.nvtx.range("inferFunc")
            torch.cuda.nvtx.range_push("inferFunc")
            with torch.no_grad():
                torch.cuda.nvtx.range_push("segOperate")
                ansImgList = segOperate(model, imgListNP, cudaIndex)
                torch.cuda.nvtx.range_pop()

            torch.cuda.nvtx.range_pop()

            ansImgListTotal += ansImgList

        torch.cuda.nvtx.range_push("empty_cache")
        torch.cuda.empty_cache()
        torch.cuda.nvtx.range_pop()

    return ansImgListTotal


# ====================================================
# 第三部分：推理测试
# ====================================================
def segInferEnter():
    # 1、获取模型
    loadModelPath = "../_230303_pthSave/_230526_pth/lossMin_epoch_425_iter_156400_loss_min_2.9576.pth"
    model = initModel(cudaIndex=0, loadModelPath=loadModelPath)

    # 2、获取图片
    imgBasePath = r"G:\_230428_ZCDataSet\图片_测试模型效果\0515-21-17_crop\孔错位\球形\Image_20230515160058009"
    bmpPath = os.path.join(imgBasePath, r'*.bmp')
    bmpList = glob.glob(bmpPath)
    imgList =[]
    # 图片名前缀：例如33.bmp中的33
    imgNameList = []
    for index in range(len(bmpList)):
        # ========== 1、获取图片名 ==========
        # nowBmpPath = "G:\\_230428_ZCDataSet\\_700_img2label2json\\0-0\\33.bmp"
        nowBmpPath = bmpList[index]
        # 获取文件名
        nowBmpPathSplit = nowBmpPath.split('\\')
        # 获取后缀
        # 例如"33.bmp"
        bmpPostfixName = nowBmpPathSplit[-1]
        # bmpPostfixName:   33.bmp
        # print("bmpPostfixName:    {}".format(bmpPostfixName))

        # 获取文件名
        nowImgName = bmpPostfixName.split('.')[0]
        # nowImgName:   33
        # print("nowImgName:    {}".format(nowImgName))


        # 删除后面的
        del nowBmpPathSplit[-1]
        bmpPrefixName = "\\".join(nowBmpPathSplit)
        # bmpPrefixName:    G:\_230428_ZCDataSet\需要标注_测试模型效果\500_520_0515_图形不好1_sphere
        # print("bmpPrefixName:    {}".format(bmpPrefixName))

        imgNameList.append(bmpPrefixName + "\\"+nowImgName)

        # ========== 2、读取图片 ==========
        testImgPath = nowBmpPath
        # 读取图片
        imgBGR = cv2.imread(testImgPath)
        imgRGB = imgBGR[:, :, ::-1]
        imgList.append(imgRGB)

    # 3、分割
    batchSize = 5
    ansImgList = segMain(model, imgList, batchSize, cudaIndex=0)


    # 4、保存分割结果
    for index in range(len(bmpList)):
        imgName = imgNameList[index]
        perImg = ansImgList[index]
        perImg*=20
        saveImgPath = imgName + ".jpg"

        print("saveImgPath:    {}".format(saveImgPath))
        cv2.imwrite(saveImgPath, perImg)
        pass


segInferEnter()