|
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import torch.optim as optim
- from numpy import random
- import numpy as np
-
- class Qnet(nn.Module):
- def __init__(self):
- super(Qnet, self).__init__()
- self.net_num=4
- self.floor1=128
- self.floor2=128
- # for i in range(self.net_num):
- # a=nn.Linear(2, 160)
- # b=nn.Linear(160, 160)
- # c=nn.Linear(160, 5)
- # self.fc1.append(a)
- # self.fc2.append(b)
- # self.fc3.append(c)
- self.fc1_1=nn.Linear(2, self.floor1)
- self.fc2_1 = nn.Linear(self.floor1, self.floor2)
- self.fc3_1 = nn.Linear(self.floor2, 5)
- self.fc1_2=nn.Linear(2, self.floor1)
- self.fc2_2 = nn.Linear(self.floor1, self.floor2)
- self.fc3_2 = nn.Linear(self.floor2, 5)
- self.fc1_3=nn.Linear(2, self.floor1)
- self.fc2_3 = nn.Linear(self.floor1, self.floor2)
- self.fc3_3 = nn.Linear(self.floor2, 5)
- self.fc1_4=nn.Linear(2, self.floor1)
- self.fc2_4 = nn.Linear(self.floor1, self.floor2)
- self.fc3_4 = nn.Linear(self.floor2, 5)
- self.fc1=[self.fc1_1,self.fc1_2,self.fc1_3,self.fc1_4]
- self.fc2=[self.fc2_1,self.fc2_2,self.fc2_3,self.fc2_4]
- self.fc3=[self.fc3_1,self.fc3_2,self.fc3_3,self.fc3_4]
-
- def forward(self, x):
- ##x:tensor.size([net_num,batch,2])
- ##return:tensor.size([net_num,batch,5])
- out=[]
- for i in range(self.net_num):
- temp = F.relu(self.fc1[i](x[i,:,:]))
- temp = F.relu(self.fc2[i](temp))
- temp = self.fc3[i](temp)
- out.append(temp)
- return torch.stack(tuple(out))
-
- def sample_action(self, s, epsilon):
- ##s:tensor.size([net_num,1,2])
- ##return:[net_num,1] np.ndarray
- out = self.forward(s)
- coin = random.random([self.net_num,1])
- x1,y1=np.where(coin<epsilon)
- x2, y2 = np.where(coin >= epsilon)
- coin[x1,y1]=random.randint(0, 5)
- for i in x2:
- coin[i,0]= out[i].argmax().item()
- return coin
|