|
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import torch.optim as optim
- from numpy import random
- import numpy as np
-
- class Qnet(nn.Module):
- def __init__(self):
- super(Qnet, self).__init__()
- self.net_num=4
- self.s_num=2
- self.floor1=128#160
- self.floor2=32#160
- self.fc1=nn.Linear(self.s_num*self.net_num,self.floor1)
- self.fc2=[]
- for i in range(self.net_num):
- exec("self.fc2_{} = nn.Linear(self.floor1, self.floor2)".format(i))
- exec("self.fc2.append(self.fc2_{})".format(i))
- self.fc3=[]
- for i in range(self.net_num):
- exec("self.fc3_{} = nn.Linear(self.floor2, 5)".format(i))
- exec("self.fc3.append(self.fc3_{})".format(i))
- def forward(self, x):
- ##x:tensor.size([net_num,batch,2])
- ##return:tensor.size([net_num,batch,5])
- x=torch.hstack((x[0],x[1],x[2],x[3]))
- out=[]
- fc1_out = F.relu(self.fc1(x))
- for i in range(self.net_num):
- temp=F.relu(self.fc2[i](fc1_out))
- temp = self.fc3[i](temp)
- out.append(temp)
- return torch.stack(tuple(out))
-
- def sample_action(self, s, epsilon):
- ##s:tensor.size([net_num,1,2])
- ##return:[net_num,1] np.ndarray
- out = self.forward(s)
- coin = random.random([self.net_num,1])
- x1,y1=np.where(coin<epsilon)
- x2, y2 = np.where(coin >= epsilon)
- coin[x1,y1]=random.randint(0, 5)
- for i in x2:
- coin[i,0]= out[i].argmax().item()
- return coin
|