|
- #!/usr/bin/env python36
- # -*- coding: utf-8 -*-
- """
- Created on July, 2018
-
- @author: Tangrizzly
- """
-
- import datetime
- import math
- import numpy as np
- import torch
- from torch import nn
- from torch.nn import Module, Parameter
- from torch.autograd import Variable
- import torch.nn.functional as F
- from utils import *
- from news_encoder import BiLSTM
- from metric import *
-
-
- class proposed_model(nn.Module):
- def __init__(self, opt):
- super(proposed_model, self).__init__()
- self.opt = opt
- self.MAX_CONTENT = opt.MAX_CONTENT
- self.news_encoder = BiLSTM()
- self.text_hidden_size = opt.textHiddenSize
- self.batch_size = opt.batchSize
- self.loss_function = nn.CrossEntropyLoss()
- self.q1 = nn.Linear(2 * self.text_hidden_size, 1, bias=True)
- self.optimizer = torch.optim.Adam(self.parameters(), lr=opt.lr, weight_decay=opt.l2)
- self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc)
- self.reset_parameters()
-
- def forward(self, content_hidden, mask):
- mask = mask.float().unsqueeze(-1) # bs x N x 1
-
- Q1 = self.q1(content_hidden) # bs x N x 1
- alpha = torch.softmax(Q1, dim=1) # bs x N x 1
- sess_rep_click = torch.sum(alpha * content_hidden * mask, dim=1) # bs x 2d
-
- return sess_rep_click
-
- def reset_parameters(self):
- stdv = 1.0 / math.sqrt(self.text_hidden_size)
- for weight in self.parameters():
- weight.data.uniform_(-stdv, stdv)
-
- def compute_scores(self, sess_rep_content, candidates_content_hidden):
- sess_rep_content = torch.unsqueeze(sess_rep_content, dim=1) # bs * 1 * 2d
- scores_content = torch.bmm(sess_rep_content, candidates_content_hidden.transpose(1, 2)) # bs * 1 * (K+1)
- scores_content = torch.squeeze(scores_content, dim=1) # bs * (K+1)
- return scores_content
-
-
- def trans_to_cuda(variable):
- if torch.cuda.is_available():
- return variable.cuda()
- else:
- return variable
-
-
- def trans_to_cpu(variable):
- if torch.cuda.is_available():
- return variable.cpu()
- else:
- return variable
-
-
- def forward(model, i, data, content_cut, word_vectors_300d):
- # inputs, mask: bs * N, candidates: bs * (K+1), targets: bs * 1
- inputs, mask, candidates, targets = data.get_slice(i)
- targets = trans_to_cuda(torch.Tensor(targets).long())
-
- inputs_content, inputs_length_content = get_text(inputs, word_vectors_300d, content_cut, model.MAX_CONTENT)
- inputs_content = Variable(torch.from_numpy(inputs_content).float()).cuda() # bs * N * M * d
- inputs_length_content = torch.tensor(inputs_length_content) # bs * N
-
- candidates_content, candidates_length_content = get_text(candidates, word_vectors_300d, content_cut, model.MAX_CONTENT)
- candidates_content = Variable(torch.from_numpy(candidates_content).float()).cuda() # bs * (K+1) * M * d
- candidates_length_content = torch.tensor(candidates_length_content) # bs * (K+1)
-
- inputs_content_hidden = model.news_encoder(inputs_content, inputs_length_content) # bs * N * 2d
- candidates_content_hidden = model.news_encoder(candidates_content, candidates_length_content) # bs * (K+1) * 2d
-
- mask = trans_to_cuda(torch.Tensor(mask).long())
- sess_rep_content = model(inputs_content_hidden, mask) # bs * 2d
-
- return model.compute_scores(sess_rep_content, candidates_content_hidden), targets
-
-
- def train_test(model, train_data, test_data, content_cut, word_vectors_300d):
- model.scheduler.step()
- print('start training: ', datetime.datetime.now())
- model.train()
- total_loss = 0.0
- slices = train_data.generate_batch(model.batch_size)
-
- for i, j in zip(slices, np.arange(len(slices))):
- model.optimizer.zero_grad()
- scores, targets = forward(model, i, train_data, content_cut, word_vectors_300d)
- loss = model.loss_function(scores, targets)
- loss.backward()
- model.optimizer.step()
- total_loss += loss
- if j % int(len(slices) / 5 + 1) == 0:
- print('[%d/%d] Loss: %.4f' % (j, len(slices), loss.item()))
- print('\tLoss:\t%.3f' % total_loss)
-
- print('start predicting: ', datetime.datetime.now())
- model.eval()
- slices = test_data.generate_batch(model.batch_size)
- with torch.no_grad():
- tes_scores = []
- tes_labels = []
- for i in slices:
- scores, targets = forward(model, i, test_data, content_cut, word_vectors_300d)
-
- tes_scores = tes_scores + scores.cpu().numpy().tolist()
- tmp_labels = [0] * scores.size(1)
- for i in range(scores.size(0)):
- tmp_labels[targets[i]] = 1
- tes_labels.append(tmp_labels)
- tmp_labels = [0] * scores.size(1)
-
- predicts = []
- truths = []
- for score, label in zip(tes_scores, tes_labels):
- sl_zip = sorted(zip(score, label), key=lambda x: x[0], reverse=True)
- sort_s, sort_l = zip(*sl_zip)
- predicts.append(list(range(1, len(sort_s) + 1, 1)))
- truths.append(sort_l)
-
- auc_tes, mrr_tes, ndcg5_tes, ndcg10_tes = evaluate(predicts, truths)
- print('AUC: %0.4f\tMRR: %0.4f\tNDCG5: %0.4f\tNDCG10: %0.4f' %
- (auc_tes*100, mrr_tes*100, ndcg5_tes*100, ndcg10_tes*100))
-
- return auc_tes*100, mrr_tes*100, ndcg5_tes*100, ndcg10_tes*100
|