ytk_sky
/
3D_temp_sality_Fourcastnet

 
			
							from functools import partial
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
from tqdm import tqdm
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
import torch.fft
from params import get_args
from torch.utils.checkpoint import checkpoint_sequential
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error


class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        # self.fc2 = nn.Linear(hidden_features, out_features)
        self.fc2 = nn.AdaptiveAvgPool1d(out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


class AdaptiveFourierNeuralOperator(nn.Module):
    def __init__(self, dim, h=14, w=8):
        super().__init__()
        args = get_args()
        self.hidden_size = dim
        self.h = h
        self.w = w

        self.num_blocks = args.fno_blocks
        self.block_size = self.hidden_size // self.num_blocks
        assert self.hidden_size % self.num_blocks == 0

        self.scale = 0.02
        self.w1 = torch.nn.Parameter(self.scale * torch.randn(2, self.num_blocks, self.block_size, self.block_size))
        self.b1 = torch.nn.Parameter(self.scale * torch.randn(2, self.num_blocks, self.block_size))
        self.w2 = torch.nn.Parameter(self.scale * torch.randn(2, self.num_blocks, self.block_size, self.block_size))
        self.b2 = torch.nn.Parameter(self.scale * torch.randn(2, self.num_blocks, self.block_size))
        self.relu = nn.ReLU()

        if args.fno_bias:
            self.bias = nn.Conv1d(self.hidden_size, self.hidden_size, 1)
        else:
            self.bias = None

        self.softshrink = args.fno_softshrink

    def multiply(self, input, weights):
        return torch.einsum('...bd,bdk->...bk', input, weights)

    def forward(self, x):
        B, N, C = x.shape

        if self.bias:
            bias = self.bias(x.permute(0, 2, 1)).permute(0, 2, 1)
        else:
            bias = torch.zeros(x.shape, device=x.device)

        x = x.reshape(B, self.h, self.w, C)
        x = torch.fft.rfft2(x, dim=(1, 2), norm='ortho')
        x = x.reshape(B, x.shape[1], x.shape[2], self.num_blocks, self.block_size)

        x_real = F.relu(self.multiply(x.real, self.w1[0]) - self.multiply(x.imag, self.w1[1]) + self.b1[0],
                        inplace=True)
        x_imag = F.relu(self.multiply(x.real, self.w1[1]) + self.multiply(x.imag, self.w1[0]) + self.b1[1],
                        inplace=True)
        x_real = self.multiply(x_real, self.w2[0]) - self.multiply(x_imag, self.w2[1]) + self.b2[0]
        x_imag = self.multiply(x_real, self.w2[1]) + self.multiply(x_imag, self.w2[0]) + self.b2[1]

        x = torch.stack([x_real, x_imag], dim=-1)
        x = F.softshrink(x, lambd=self.softshrink) if self.softshrink else x

        x = torch.view_as_complex(x)
        x = x.reshape(B, x.shape[1], x.shape[2], self.hidden_size)
        x = torch.fft.irfft2(x, s=(self.h, self.w), dim=(1, 2), norm='ortho')
        x = x.reshape(B, N, C)

        return x + bias


class Block(nn.Module):
    def __init__(self, dim, mlp_ratio=4., drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, h=14, w=8):
        super().__init__()
        args = get_args()
        self.norm1 = norm_layer(dim)
        self.filter = AdaptiveFourierNeuralOperator(dim, h=h, w=w)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

        self.double_skip = args.double_skip

    def forward(self, x):
        residual = x
        x = self.norm1(x)
        x = self.filter(x)

        if self.double_skip:
            x += residual
            residual = x

        x = self.norm2(x)
        x = self.mlp(x)
        x = self.drop_path(x)
        x += residual
        return x


class PatchEmbed(nn.Module):
    def __init__(self, img_size=None, patch_size=8, in_chans=13, embed_dim=768):
        super().__init__()

        if img_size is None:
            raise KeyError('img is None')

        patch_size = to_2tuple(patch_size)

        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
        self.img_size = img_size
        self.patch_size = patch_size
        self.num_patches = num_patches

        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        # print('x.shape:{}'.format(x.shape)) # x.shape:torch.Size([10, 20, 40, 208, 7])
        B, C, H, W = x.shape
        # FIXME look at relaxing size constraints
        assert H == self.img_size[0] and W == self.img_size[
            1], f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
        x = self.proj(x).flatten(2).transpose(1, 2)
        return x


class AFNONet(nn.Module):
    def __init__(self, img_size=None, patch_size=8, in_chans=60, out_chans=1, embed_dim=100, depth=4, mlp_ratio=4.,
                 uniform_drop=False, drop_rate=0., drop_path_rate=0., norm_layer=None, dropcls=0):
        super().__init__()

        if img_size is None:
            img_size = [40, 200]

        self.embed_dim = embed_dim
        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)

        self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))  # 可学习的参数 “pos_embed# ”
        self.pos_drop = nn.Dropout(p=drop_rate)

        self.h = img_size[0] // patch_size
        self.w = img_size[1] // patch_size

        if uniform_drop:
            dpr = [drop_path_rate for _ in range(depth)]  # stochastic depth decay rule
        else:
            dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule

        self.blocks = nn.ModuleList([Block(dim=embed_dim, mlp_ratio=mlp_ratio, drop=drop_rate, drop_path=dpr[i],
                                           norm_layer=norm_layer, h=self.h, w=self.w) for i in range(depth)])
        self.norm = norm_layer(embed_dim)

        # Representation layer
        # self.num_features = out_chans * img_size[0] * img_size[1]
        # self.representation_size = self.num_features * 8
        # self.pre_logits = nn.Sequential(OrderedDict([
        #     ('fc', nn.Linear(embed_dim, self.representation_size)),
        #     ('act', nn.Tanh())
        # ]))
        self.pre_logits = nn.Sequential(OrderedDict([
            ('conv1', nn.ConvTranspose2d(embed_dim, out_chans * 16, kernel_size=(2, 2), stride=(2, 2))),
            # --》使得输入特征图的长和宽变为原来的两倍 （10，768，5，26）---》（10，320，10，52）
            ('act1', nn.Tanh()),  # 形状不变 （10，320，10，52）---》 （10，320，10，52）
            ('conv2', nn.ConvTranspose2d(out_chans * 16, out_chans * 4, kernel_size=(2, 2), stride=(2, 2))),
            # 形状继续变为原来的两倍 （10，320，10，52）---》（10，80，20，104）
            ('act2', nn.Tanh())  # 形状不变 （10，80，20，104） ---》 （10，80，20，104）
        ]))

        # Generator head
        # self.head = nn.Linear(self.representation_size, self.num_features)
        self.head = nn.ConvTranspose2d(out_chans * 4, out_chans, kernel_size=(2, 2), stride=(2, 2))

        if dropcls > 0:
            print('dropout %.2f before classifier' % dropcls)
            self.final_dropout = nn.Dropout(p=dropcls)
        else:
            self.final_dropout = nn.Identity()

        trunc_normal_(self.pos_embed, std=.02)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'pos_embed', 'cls_token'}

    def forward_features(self, x):
        # print('x.shape:{}'.format(x.shape)) # x.shape:torch.Size([10, 20, 41, 210])  输入数据
        B = x.shape[0]
        x = self.patch_embed(x)
        # print('x.shape:{}'.format(x.shape))  # ([10, 20, 41, 210]) ---> ([10, 130, 768])  # 输入为[B, C, H, W] 输出为 [b,num_patches, embed_dim]
        # num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
        # patch_size=8 [41,210]--->{210//8} * (41 //8) = 26 * 5 = 130
        x += self.pos_embed  # x.shape:torch.Size([10, 130, 768])   可学习的位置编码
        x = self.pos_drop(x)  # x.shape:torch.Size([10, 130, 768])  随即正则化失活  dropout

        if not get_args().checkpoint_activations:
            for blk in self.blocks:
                x = blk(x)
        else:
            x = checkpoint_sequential(self.blocks, 4, x)

        # print('x.shape:{}'.format(x.shape))   x.shape:torch.Size([10, 130, 768])
        x = self.norm(x).transpose(1, 2)  # x.shape:torch.Size([10, 768, 130])

        x = torch.reshape(x, [-1, self.embed_dim, self.h,
                              self.w])  # ([10, 768, 130]) ---》 ([10, 768, 5, 26])   self.h = img_size[0] // patch_size    self.w = img_size[1] // patch_size
        return x  # img_size = [41,210]   h = 41//8 = 5     w = 210//8 = 26

    def forward(self, x):
        # print('x.shape:{}'.format(x.shape))   # ([10, 7, 20, 40, 208])
        # B,T,C,H,W = x.shape
        # x = x.permute(0,2,3,4,1) # ([10, 7, 20, 40, 208]) ---> ([10, 20, 40, 208, 7])

        x = self.forward_features(x)  # ([10, 20, 40, 208, 7]) ---》 ([10, 768, 5, 26])
        x = self.final_dropout(x)  # ([10, 768, 5, 26]) ---》 ([10, 768, 5, 26])
        x = self.pre_logits(x)  # ([10, 768, 5, 26]) ---》 ([10, 80, 20, 104])

        x = self.head(x)  # ([10, 80, 20, 104]) ---》 ([10, 20, 40, 208]) 都变为原来的2倍

        return x


if __name__ == '__main__':
    a = torch.randn(10, 60, 40, 200)
    net = AFNONet()
    b = net(a)
    print(b.shape)  # torch.Size([10, 1, 40, 200])
print(1)
print(12)


for seed in range(2023, 2024):
    for date_append in range(0, 1):
        def setup_seed(seed):
            torch.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            np.random.seed(seed)
            random.seed(seed)
            torch.backends.cudnn.deterministic = True


        # 设置随机数种子
        setup_seed(seed)
        for i in range(1):
            # 需要  mld u v sss temp 降水 蒸发 混合层下的盐度

            data = np.load(r'/dataset/10day_for_14day_all_variables_surface_pacific_10_19_SSS.npz')
            print(data.files)
            
            evaporation = data['evaporation'][:]
            total_precipitation = data['total_precipitation'][:]
            mld = data['mld'][:]
            # sst_surface = data['sst_surface'][:]
            sss_surface = data['sss_surface'][:]
            u_surface = data['u_surface'][:]
            v_surface = data['v_surface'][:]
            
            sss_surface_label = data['sss_surface_label'][:]

            print(sss_surface.shape) #(3642, 10, 40, 200)

            print(sss_surface_label.shape) #(3638, 40, 200)
     
            
            train_size = 2208
            valid_size = 2912  # 前20%   作为验证    剩下的20%的作为测试


            evaporation = evaporation.reshape(-1, 1, 10, 40, 200)
            evaporation = torch.Tensor(evaporation)
            evaporation_train = evaporation[0:train_size, :, :, :, :]
            evaporation_valid = evaporation[train_size:valid_size, :, :, :, :]

            total_precipitation = total_precipitation.reshape(-1, 1, 10, 40, 200)
            total_precipitation = torch.Tensor(total_precipitation)
            total_precipitation_train = total_precipitation[0:train_size, :, :, :, :]
            total_precipitation_valid = total_precipitation[train_size:valid_size, :, :, :, :]

            mld = mld.reshape(-1, 1, 10, 40, 200)
            mld = torch.Tensor(mld)
            mld_train = mld[0:train_size, :, :, :, :]
            mld_valid = mld[train_size:valid_size, :, :, :, :]

            sss_surface = sss_surface.reshape(-1, 1, 10, 40, 200)
            sss_surface = torch.Tensor(sss_surface)
            sss_surface_train = sss_surface[0:train_size, :, :, :, :]
            sss_surface_valid = sss_surface[train_size:valid_size, :, :, :, :]

            u_surface = u_surface.reshape(-1, 1, 10, 40, 200)
            u_surface = torch.Tensor(u_surface)
            u_surface_train = u_surface[0:train_size, :, :, :, :]
            u_surface_valid = u_surface[train_size:valid_size, :, :, :, :]

            v_surface = v_surface.reshape(-1, 1, 10, 40, 200)
            v_surface = torch.Tensor(v_surface)
            v_surface_train = v_surface[0:train_size, :, :, :, :]
            v_surface_valid = v_surface[train_size:valid_size, :, :, :, :]
            
            train_data = torch.cat((evaporation_train, total_precipitation_train, sss_surface_train, mld_train, u_surface_train, v_surface_train
                                            ), dim=1)  # train_data.shape:torch.Size([5920, 10, 16, 40, 200])

            valid_data = torch.cat((evaporation_valid, total_precipitation_valid, sss_surface_valid, mld_valid, u_surface_valid, v_surface_valid
                                            ), dim=1)


            print(train_data.shape)
            print(valid_data.shape)

            sss_train_label = sss_surface_label[10 :train_size + 10 ,date_append, :, :]
            sss_valid_label = sss_surface_label[train_size + 10 : valid_size + 10, date_append, :, :]


            sss_train_label = sss_train_label.reshape(-1, 1, 40, 200)
            sss_valid_label = sss_valid_label.reshape(-1, 1, 40, 200)

            train_label = sss_train_label
            valid_label = sss_valid_label
            print(train_label.shape)


            #构建数据管道
            class MyDataset(Dataset):
                def __init__(self, data, label):
                    self.data = torch.Tensor(data)
                    self.label = torch.Tensor(label)

                def __len__(self):
                    return len(self.label)

                def __getitem__(self, idx):
                    return self.data[idx], self.label[idx]


            batch_size1 = 32
            batch_size2 = 32
            batch_size3 = 3000


            trainset = MyDataset(train_data, train_label)
            trainloader = DataLoader(trainset, batch_size=batch_size1, shuffle=True, drop_last=False,pin_memory=True, num_workers=4)

            validset = MyDataset(valid_data, valid_label)
            validloader = DataLoader(validset, batch_size=batch_size2, shuffle=True, drop_last=False,pin_memory=True, num_workers=4)


            model_weights1 = '/model/epo200_lay3_lr0.001_e{}_forecastnet_{}day_model_weights.pth'.format(seed, date_append)
            torch.backends.cudnn.enabled = False

            model = AFNONet().cuda()

            criterion = nn.MSELoss()
            # 定义优化器
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

            epochs = 200
            train_losses, valid_losses = [], []
            # best_loss = 2
            best_score = float('inf')
            best_score1 = float('inf')

            pred_val= np.zeros((704,1,40,200))

            sores = []
            def rmse(y_true, y_preds):
                return np.sqrt(mean_squared_error(y_pred = y_preds, y_true = y_true))


            for epoch in range(epochs):
                print('Epoch: {}/{}'.format(epoch + 1, epochs))
                # print(var_y)
                # 模型训练
                model.train()
                losses = 0
                loss1 = 0
                for data, label in tqdm(trainloader):
                    # data, label = data
                    data = data.cuda()
                    # print('data.shape:{}'.format(data.shape)) # data.shape:torch.Size([32, 6, 10, 40, 200])
                    label = label.cuda()
                    optimizer.zero_grad()

                    B,T,C,H,W = data.size()
                    # print('data.shape:{}'.format(data.shape)) # data.shape:torch.Size([32, 6, 10, 40, 200])
                    data1 = data.reshape(B,T*C,H,W)
                    out = model(data1)
                    # print('out.shape:{}'.format(out.shape))
                    
                    out = out.reshape(-1,1,40,200)
                    loss = criterion(out, label)


                    losses += loss

                    loss.backward()
                    optimizer.step()
                train_loss = losses / len(trainloader)
                train_losses.append(train_loss)

                print('Training Loss: {:.10f}'.format((train_loss)))

                # model.eval()
                losses = 0
                with torch.no_grad():
                    for i, data in tqdm(enumerate(validloader)):
                        data, label = data
                        data = data.cuda()
                        label = label.cuda()
                        optimizer.zero_grad()


                        B,T,C,H,W = data.size()
                        # print('data.shape:{}'.format(data.shape)) #data.shape:torch.Size([32, 6, 40, 200, 10])
                        data1 = data.reshape(B,T*C,H,W)
                        out = model(data1)
                        # print('out.shape:{}'.format(out.shape))
                        
                        out = out.reshape(-1,1,40,200)
                        loss = criterion(out, label)
                        
                        losses += float(loss)

                        out1 = out.detach().cpu().numpy()
                        pred_val[i * batch_size2:(i + 1) * batch_size2] = np.array(out1)

                valid_loss = losses / len(validloader)
                valid_losses.append(valid_loss)

                valid_label1 = valid_label.reshape(-1,1)
                preds1 = pred_val.reshape(-1,1)

                s = rmse(valid_label1,preds1)
                sores.append(s)
                print('Score: {:.3f}'.format(s))

                if valid_loss < best_score1:  # 求s的最小值  ---》最大值反过来  inf符号也要反过来
                    best_score1 = valid_loss
                    checkpoint = {'best_score': valid_loss,
                                'state_dict': model.state_dict()}
                    torch.save(checkpoint, model_weights1)  # if valid_loss < best_loss:
                    best_loss = valid_loss
                    torch.save(model.state_dict(),
                            '/model/fourcastnet_lr0.001_model_300_layer3_{}day_e{}.pt'.format(date_append, seed))

            print(sores)
            print(best_score)
            print(s)