|
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
- import paddle.nn as nn
- import parl
-
-
- class AtariModel(parl.Model):
- """ Neural Network to solve Atari problem.
-
- Args:
- act_dim (int): Dimension of action space.
- dueling (bool): True if use dueling architecture else False
- """
-
- def __init__(self, act_dim, dueling=False):
- super().__init__()
- self.conv1 = nn.Conv2D(
- in_channels=4,
- out_channels=32,
- kernel_size=5,
- stride=1,
- padding=2,
- weight_attr=nn.initializer.KaimingNormal())
- self.conv2 = nn.Conv2D(
- in_channels=32,
- out_channels=32,
- kernel_size=5,
- stride=1,
- padding=2,
- weight_attr=nn.initializer.KaimingNormal())
- self.conv3 = nn.Conv2D(
- in_channels=32,
- out_channels=64,
- kernel_size=4,
- stride=1,
- padding=1,
- weight_attr=nn.initializer.KaimingNormal())
- self.conv4 = nn.Conv2D(
- in_channels=64,
- out_channels=64,
- kernel_size=3,
- stride=1,
- padding=1,
- weight_attr=nn.initializer.KaimingNormal())
- self.relu = nn.ReLU()
- self.max_pool = nn.MaxPool2D(kernel_size=2, stride=2)
- self.flatten = nn.Flatten()
-
- self.dueling = dueling
-
- if dueling:
- self.linear_1_adv = nn.Linear(
- in_features=6400,
- out_features=512,
- weight_attr=nn.initializer.KaimingNormal())
- self.linear_2_adv = nn.Linear(
- in_features=512, out_features=act_dim)
- self.linear_1_val = nn.Linear(
- in_features=6400,
- out_features=512,
- weight_attr=nn.initializer.KaimingNormal())
- self.linear_2_val = nn.Linear(in_features=512, out_features=1)
-
- else:
- self.linear_1 = nn.Linear(in_features=6400, out_features=act_dim)
-
- def forward(self, obs):
- """ Perform forward pass
-
- Args:
- obs (paddle.Tensor): shape of (batch_size, 3, 84, 84), mini batch of observations
- """
- obs = obs / 255.0
- out = self.max_pool(self.relu(self.conv1(obs)))
- out = self.max_pool(self.relu(self.conv2(out)))
- out = self.max_pool(self.relu(self.conv3(out)))
- out = self.relu(self.conv4(out))
- out = self.flatten(out)
-
- if self.dueling:
- As = self.relu(self.linear_1_adv(out))
- As = self.linear_2_adv(As)
- V = self.relu(self.linear_1_val(out))
- V = self.linear_2_val(V)
- Q = As + (V - As.mean(axis=1, keepdim=True))
-
- else:
- Q = self.linear_1(out)
-
- return Q
|