janney
/
LSTM1

 
			
			   
				 
					
						
						
							
							import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import xlrd
import xlwt
import os
import time
from sklearn.preprocessing import minmax_scale

def two_up(time,market):
####设置常量####
        input_size = 19
        time_steps = 2
        hiddens = 64
        layers = 1
        output_size = 3
        batch_size = 16
        learning_rate = 0.001
        #keep_prob = 0.5
        decay_rate = 0.8
        decay_step = 50
        alpha = 1.0
        scale = 1e-4
        train = False
        count = 0
        test_num = 20
        fileName = "E:/Python/tushare/data/everyday/"
        MODEL_NAME = "dichan_LSTM"
        if market == "zhong":
           MODEL_SAVE_PATH = 'E:/Python/tushare/model/中小板/2up'
        if market == "chuang":
           MODEL_SAVE_PATH = 'E:/Python/tushare/model/创业板/2up'
        if market == "zhu":
           MODEL_SAVE_PATH = 'E:/Python/tushare/model/主板/2up'

####从Excel中读出数据####
        readbook = xlrd.open_workbook(fileName + time + '/' + market + '/' + '2up_data.xlsx')
        table = readbook.sheet_by_name('Sheet1')
        writebook = xlwt.Workbook(encoding = 'ascii')
        wtable = writebook.add_sheet('Sheet1')
        row = table.nrows
        col = table.ncols

        stock_inf = []
        for i in range(row-1):
                stock_inf.append([])
                for j in range(2):
                        stock_inf[i].append('s')
        for i in range(row-1):
                for j in range(2):
                        stock_inf[i][j] =table.cell(i + 1,j + 1).value

        data_x = np.zeros((row-1,22))
        data_y = np.zeros((row-1,output_size))
        for i in range(row-1):
                for j in range(23):
                        if j != 22:
                                temp = table.cell(i + 1,j + 3).value
                                if type(temp) == str:
                                        data_x[i][j] = float(temp)
                                if type(temp) != str:
                                        data_x[i][j] = temp
                        else:
                                a = table.cell(i + 1,j + 3).value
                                if a >= 5:
                                        data_y[i] = [1,0,0]
                                elif a > 0 and a < 5:
                                        data_y[i] = [0,1,0]
                                elif a <= 0 and a > -5:
                                        data_y[i] = [0,0,1]
                                else:
                                        data_y[i] = [0,0,1]

####当数据不足被batch_size整除时，末尾添加####

####数据处理，删除data_x中不要列####
        data_x = np.delete(data_x,[17,18,19],1)
        print(data_x.shape)
        print(data_y.shape)

####对输入数据的每一列标注化处理####
        for i in range(input_size):
                data_x[:,i] = minmax_scale(data_x[:,i],copy = True,feature_range = (-1,1))

####把数据变成三维数据####
        data_x = data_x.reshape((-1,time_steps,input_size))
        data_y = data_y.reshape((-1,time_steps,output_size))
        data_x1 = data_x
        data_y1 = data_y
        
####把数据添加为整数个batch_size,用于预测数据####
        while data_x.shape[0] % batch_size != 0:
                data_x = np.row_stack((data_x,data_x[-1].reshape((-1,time_steps,input_size))))
                data_y = np.row_stack((data_y,data_y[-1].reshape((-1,time_steps,output_size))))

##随机打乱训练数据顺序##
        perm = np.random.permutation(data_x1.shape[0])
        data_x1 = data_x1[perm,:,:]
        data_y1 = data_y1[perm,:,:]
        ##train_x = train_x.reshape((-1,input_size))
        ##train_y = train_y.reshape((-1,output_size))

        test_x = data_x1[0:batch_size*test_num,:,:]
        test_y = data_y1[0:batch_size*test_num,:,:]
        train_x = data_x1[batch_size*test_num:data_x.shape[0],:,:]
        train_y = data_y1[batch_size*test_num:data_x.shape[0],:,:]

        print(train_x.shape)
        print(train_y.shape)

        print(test_x.shape)
        print(test_y.shape)

####把数据分成batch_size大小####
        def Next_batch_x(data,batch_size,ii):
                num_b = ii % (data.shape[0] // batch_size)
                return data[num_b * batch_size:(num_b + 1) * batch_size]
        def Next_batch_y(data,batch_size,ii):
                num_b = ii % (data.shape[0] // batch_size)
                return data[num_b * batch_size:(num_b + 1) * batch_size,time_steps - 1,:]

####初始化权重值####
        weights = tf.Variable(tf.truncated_normal([hiddens,output_size],stddev = 0.1,name = 'w',dtype = tf.float32))
        biases = tf.Variable(tf.truncated_normal([output_size],stddev = 0.001,name = 'b',dtype = tf.float32))

        def Weights(n_in,n_out,name = None):
            return tf.Variable(tf.truncated_normal([n_in,n_out],stddev = 0.00001,name = name,dtype = tf.float32))

        def Biases(n_out,name = None):
            return tf.Variable(tf.truncated_normal([n_out],stddev = 0.00001,name = name,dtype = tf.float32))

        def Cell(x,w,b):
            return tf.matmul(x,w) + b

####定义网络结构####
        x = tf.placeholder(tf.float32,[None,time_steps,input_size],name = 'input')
        y = tf.placeholder(tf.float32,[None,output_size],name = 'output')
        keep_prob = tf.placeholder(tf.float32)

        def LSTM1(x):
            def cell():
                cell = rnn.LSTMCell(hiddens,state_is_tuple=True,reuse=tf.get_variable_scope().reuse)
                return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
            cells = []
            for i in range(layers):
                cells.append(cell())
            lstm_cell = rnn.MultiRNNCell(cells)
            outputs, states = tf.nn.dynamic_rnn(lstm_cell,x,dtype=tf.float32,time_major=False)
            return outputs[:,-1,:]
    #return tf.nn.relu(tf.matmul(outputs[:,-1,:], weights) + biases)
    #return tf.nn.softmax(tf.matmul(tf.tanh(outputs[:,-1,:]), weights) + biases)
        def LSTM2(x):
            rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_units=hiddens)
            outputs,final_state = tf.nn.dynamic_rnn(
                cell=rnn_cell,              # 选择传入的cell
                inputs=x,               # 传入的数据
                initial_state=None,         # 初始状态
                dtype=tf.float32,           # 数据类型
                time_major=False,           # False: (batch, time step, input); True: (time step, batch, input)，这里根据image结构选择False
            )
            return outputs[:, -1, :]

        ##pred = tf.nn.softsign(y4)

        L=LSTM1(x)
        pred = tf.layers.dense(inputs=L, units=output_size)

        ####选择损失函数和优化器####
        #pred = LSTM(x,weights,biases)
        global_step = tf.Variable(0,trainable = False)
        reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(scale),tf.trainable_variables())
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred,labels = y))
        ##cost = tf.reduce_mean(tf.square(y - pred)) + reg
        ##learning_rate = tf.train.exponential_decay(learning_rate,global_step,decay_step,decay_rate,staircase = True)
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost,global_step = global_step)
        ##train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step = global_step)
        ##train_op = tf.train.MomentumOptimizer(learning_rate,momentum = 0.9).minimize(cost,global_step = global_step)
        ##train_op = tf.train.AdadeltaOptimizer(learning_rate).minimize(cost,global_step = global_step)
        ##train_op = tf.train.RMSPropOptimizer(learning_rate).minimize(cost,global_step = global_step)
        ##accurary = tf.metrics.accuracy(labels = tf.argmax(y,axis = 1),predictions = tf.argmax(pred,axis = 1))[1]
        prediction = tf.equal(tf.argmax(y,1),tf.argmax(pred,1))
        accurary = tf.reduce_mean(tf.cast(prediction,tf.float32))
        init = tf.initialize_all_variables()
        saver = tf.train.Saver()

        with tf.Session() as sess:
                sess.run(init)
                ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                        saver.restore(sess,ckpt.model_checkpoint_path)
                if train == True:
                        for i in range(200000):
                                count = i
                                ii = np.random.randint(0,80000)
                                batch_x = Next_batch_x(train_x,batch_size,ii)
                                batch_y = Next_batch_y(train_y,batch_size,ii)
                                sess.run(train_op,feed_dict={x:batch_x,y:batch_y,keep_prob:0.5})
                                if i % 100 == 0:
                                        print(i,"%.2f"%sess.run(accurary,feed_dict={x:batch_x,y:batch_y,keep_prob:1.0}),"%.4f"%sess.run(cost,feed_dict={x:batch_x,y:batch_y,keep_prob:1}))
                                        saver.save(sess,os.path.join(MODEL_SAVE_PATH,MODEL_NAME),global_step = global_step)
                                        pred_ = sess.run(pred,feed_dict={x:train_x[0:batch_size,:,:],keep_prob:1.0})
        ##                                print("outputs:",pred_)
                                if i % 500 == 0:
                                        count = 0
                                        for i in range(test_num):
                                                accurary_, pred_ = sess.run([accurary,pred],feed_dict={x:test_x[i*batch_size:(i+1)*batch_size,:,:],y:test_y[i*batch_size:(i+1)*batch_size,time_steps - 1,:],keep_prob:1.0})
        ##                                        print("outputs: %.2f"%accurary_)
                                                count += accurary_
                                        print("arg: %.2f" %(count/test_num))
                else:
                        temp = []
                        for i in range(data_x.shape[0] // batch_size):
                                accurary_, pred_ = sess.run([accurary,pred],feed_dict={x:data_x[i*batch_size:(i+1)*batch_size,:,:],y:data_y[i*batch_size:(i+1)*batch_size,time_steps - 1,:],keep_prob:1.0})
        ##                        print("accurary: %.2f"% accurary_)
                                temp.extend(sess.run(tf.argmax(pred_,1)))
                        print(temp)
                        for i in range(len(temp)):
                                wtable.write(i,1,int(temp[i]))
                        writebook.save(fileName + time + '/' + market + '/' + '2up_data_label_pred.xlsx')
                        print('Successy!')

                        stock_chose = []
                        for i in range(len(temp)):
                                if (2*i+1)<= len(stock_inf) and (temp[i] == 0 or temp[i] == 1) and stock_inf[2*i+1][1] == time:
                                        stock_chose.append(stock_inf[2*i+1][0])
                        print("****"+market+"***2up****")
                        print(stock_chose)
        tf.reset_default_graph()

if __name__ == "__main__":
        two_up("20190125","zhong")