OpenI
/
ARES

 
			
							import tensorflow as tf

from ares.loss.base import Loss


class CrossEntropyLoss(Loss):
    ''' Cross entropy loss. '''

    def __init__(self, model):
        ''' Initialize CrossEntropyLoss.

        :param model: An instance of ``ClassifierWithLogits``.
        '''
        self.model = model

    def __call__(self, xs, ys):
        logits = self.model.logits(xs)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys, logits=logits)
        return loss


class EnsembleCrossEntropyLoss(Loss):
    ''' Ensemble multiple models' cross entropy loss. '''

    def __init__(self, models, weights):
        ''' Initialize EnsembleCrossEntropyLoss.

        :param models: A list of ``ClassifierWithLogits``.
        :param weights: Weights for ensemble these models.
        '''
        self.models, self.weights = models, weights

    def __call__(self, xs, ys):
        losses = []
        for model, weight in zip(self.models, self.weights):
            logits = model.logits(xs)
            losses.append(weight * tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys, logits=logits))
        return tf.reduce_sum(losses, axis=0)


class EnsembleRandomnessCrossEntropyLoss(Loss):
    ''' Ensemble a random model's cross entropy loss. '''

    def __init__(self, model, n, session):
        ''' Initialize EnsembleRandomnessCrossEntropyLoss.

        :param model: An instance of ``ClassifierWithLogits``.
        :param n: Number of samples to ensemble.
        :param session: ``tf.Session``.
        '''
        assert(n > 1)

        self.model, self.n = model, n
        self._session = session

    def __call__(self, xs, ys):
        d_output_ph = tf.placeholder(dtype=xs.dtype)

        xs_ph = tf.placeholder(dtype=xs.dtype, shape=xs.shape)
        ys_ph = tf.placeholder(dtype=ys.dtype, shape=ys.shape)

        logits = self.model.logits(xs_ph)

        one_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys_ph, logits=logits)
        one_loss_grads = tf.gradients(one_loss, xs_ph, grad_ys=[d_output_ph])[0]

        @tf.custom_gradient
        def fn_loss(xs_tf, ys_tf):
            xs_np = xs_tf.numpy()
            ys_np = ys_tf.numpy()

            loss_np = self._session.run(one_loss, feed_dict={xs_ph: xs_np, ys_ph: ys_np})
            for _ in range(self.n - 1):
                loss_np += self._session.run(one_loss, feed_dict={xs_ph: xs_np, ys_ph: ys_np})
            loss_np /= self.n

            def fn_loss_grads(d_output_tf):
                d_output_np = d_output_tf.numpy()

                loss_grads_np = self._session.run(
                    one_loss_grads, feed_dict={xs_ph: xs_np, ys_ph: ys_np, d_output_ph: d_output_np})
                for _ in range(self.n - 1):
                    loss_grads_np += self._session.run(
                        one_loss_grads, feed_dict={xs_ph: xs_np, ys_ph: ys_np, d_output_ph: d_output_np})
                loss_grads_np /= float(self.n)

                # Here the '1' should be 'None', since there is actually no gradient for the second parameter ys, but
                # tensorflow converts 'None' to '0.0', and then tries converting '0.0' to ys' data type ('tf.int32'),
                # which gives an error. So we put a valid integer here to workaround this strange behavior.
                return tf.convert_to_tensor(loss_grads_np), 1

            return tf.convert_to_tensor(loss_np), fn_loss_grads

        loss = tf.py_function(func=fn_loss, inp=[xs, ys], Tout=one_loss.dtype)
        loss.set_shape(one_loss.shape)

        return loss