在gru、lstm网络中加入Attention机制,具体看看网络实现:
utils方法:
#!/usr/bin/python# -*- coding: utf-8 -*-from collections import Counterimport tensorflow.contrib.keras as krimport numpy as npimport osimport codecsimport tensorflow as tfdef _read_file(filename):"""读取文件数据"""counters=[]labels=[]#改用codecs模块,2.x open函数不支持utf-8编码,增加代码健壮性with codecs.open(filename,'r',encoding='utf-8') as f:for line in f.readlines():try:label,contet=line.strip().split('\t')counters.append(contet.strip().split(" "))labels.append(label)except Exception as e:passreturn counters,labelsdef _read_vocab(filename):"""读取词汇列别"""words=list(map(lambda line:line.strip(),codecs.open(filename,'r',encoding='utf-8').readlines()))word_to_id=dict(zip(words,range(len(words))))return words,word_to_iddef read_vocab_predict(filename):"""读取词汇"""words = list(map(lambda line: line.strip(), codecs.open(filename, 'r', encoding='utf-8').readlines()))word_to_id = dict(zip(words, range(len(words))))return word_to_iddef _read_category():"""返回一个分类目标分类的结果"""categories=["0","1"]cat_to_id=dict(zip(categories,range(len(categories))))return categories,cat_to_iddef to_words(content,words):"""降id表示的内容转换成文字"""return ''.join(words[x] for x in content)def _file_to_ids(filename,word_to_id,max_len=300):"""将文件转换为id表示"""_,cat_to_id=_read_category()contents,labels=_read_file(filename)data_id=[]label_id=[]for i in range(len(contents)):data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])label_id.append(cat_to_id[labels[i]])# 使用keras提供的pad_sequences来将文本pad为固定长度x_pad=kr.preprocessing.sequence.pad_sequences(data_id,max_len)y_pad=kr.utils.to_categorical(label_id)return x_pad, y_paddef preocess_file(data_path,vocapath,seq_length=300):"""一次性返回所有的数据"""words,word_to_id=_read_vocab(vocapath)x_train, y_train = _file_to_ids(data_path, word_to_id, seq_length)# x_test,y_test=_file_to_ids(os.path.join(data_path,# 'cnews.test.txt'), word_to_id, seq_length)# x_val, y_val = _file_to_ids(os.path.join(data_path,# 'cnews.val.txt'), word_to_id, seq_length)return x_train, y_train,words# def preocess_file_test(data_path="/Users/shuubiasahi/Desktop/rnn.txt",seq_length=300):# """一次性返回所有的数据"""# words,word_to_id=_read_vocab("vocab_cnews.txt")# print("words length is:", len(words))# print("word_to_id length is:",len(word_to_id))# x_train,y_train=_file_to_ids_test(data_path,word_to_id,seq_length)# # x_test,y_test=_file_to_ids(os.path.join(data_path,# # 'cnews.test.txt'), word_to_id, seq_length)# # x_val, y_val = _file_to_ids(os.path.join(data_path,# # 'cnews.val.txt'), word_to_id, seq_length)# return x_train, y_train,words# def _file_to_ids_test(filename,word_to_id,max_len=300):# """将文件转换为id表示"""# _,cat_to_id=_read_category()# contents,labels=_read_file(filename)## data_id=[]# label_id=[]# for i in range(len(contents)):# data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])# label_id.append(cat_to_id[labels[i]])# # 使用keras提供的pad_sequences来将文本pad为固定长度## print("contens is:", contents)# print("data id is:",data_id)# print("labels is:", labels)# print("label id is:",label_id)# x_pad=kr.preprocessing.sequence.pad_sequences(data_id,max_len)# y_pad=kr.utils.to_categorical(label_id)# print("xpad is:",x_pad)# print("ypad is:",y_pad)# return x_pad, y_paddef file_to_ids_single(content,word_to_id,maxlen=300):contents=[]contents.append(list(content.lower()))data_id = []for i in range(len(contents)):data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])#print("data_id is :",data_id)x_pad = kr.preprocessing.sequence.pad_sequences(data_id, maxlen)return x_paddef batch_iter(data,batch_size=64,num_epochs=5):"""生成批次数据"""data=np.array(data)data_size=len(data)num_batchs_per_epchs=int((data_size-1)/batch_size)+1for epoch in range(num_epochs):indices=np.random.permutation(np.arange(data_size))shufflfed_data=data[indices]for batch_num in range(num_batchs_per_epchs):start_index=batch_num*batch_sizeend_index=min((batch_num + 1) * batch_size, data_size)yield shufflfed_data[start_index:end_index]if __name__=='__main__':"""data_id is : [[266, 1548, 255]]"""words, word_to_id = _read_vocab("vocab_cnews.txt")print("len word_to_id:",len(word_to_id))result=file_to_ids_single("日你个香蕉芭乐",word_to_id=word_to_id)print(result[0][299])print(result)#build_vocab(Path.baseabusepath)# x_train, y_train, words = preocess_file()# print(x_train.shape, y_train.shape)
显示config:
class AttentionConfig(object):embedding_dim = 64 # 词向量维度seq_len = 300 # 序列长度num_classes = 2 # 类别个数vocab_size = 9000 # 词汇表的大小num_rnn_layers = 2 # 隐含层的层数rnn_size = 128 # 隐藏层神经元rnn = 'gru' # lstm 或 grukeep_prob = 0.6 # dropout保留比例learning_rate = 1e-3 # 学习率batch_size = 128 # 每批训练大小num_epochs = 10 # 总迭代轮次print_per_batch = 100 # 每多少轮输出一次结果l2_reg_lambda = 0.006attention_dim=100max_grad_norm=5isgru = False
model:
import tensorflow as tfclass RnnAttention:def __init__(self, config):# define input variableself.config=configself.input_x = tf.placeholder(tf.int32, [None, self.config.seq_len],name="input_x")self.input_y = tf.placeholder(tf.float32,[None,self.config.num_classes],name="input_y")self.keep_prob= tf.placeholder(tf.float32, name='keep_prob')self.birnn()#self.mubirnn()def input_embedding(self):"""词嵌套这里先把指定gpu的程序去掉,线上用cpu部署,指定gpu模型会报错"""# with tf.device('/gpu:0'):embeddings = tf.get_variable("embedding", [self.config.vocab_size, self.config.embedding_dim])inputs = tf.nn.embedding_lookup(embeddings, self.input_x)return inputsdef birnn(self):inputs=self.input_embedding()with tf.name_scope("rnn"):def gru():rnn_cell_fw= tf.contrib.rnn.GRUCell(num_units=self.config.rnn_size)rnn_cell_cw = tf.contrib.rnn.GRUCell(num_units=self.config.rnn_size)return rnn_cell_cw,rnn_cell_fwdef lstm():rnn_cell_fw = tf.contrib.rnn.LSTMCell(num_units=self.config.rnn_size)rnn_cell_cw = tf.contrib.rnn.LSTMCell(num_units=self.config.rnn_size)return rnn_cell_cw, rnn_cell_fwif self.config.isgru:rnn_cell_cw, rnn_cell_fw=gru()else:rnn_cell_cw, rnn_cell_fw=lstm()rnn_outputs,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=rnn_cell_fw,cell_bw=rnn_cell_cw,inputs=inputs,dtype=tf.float32)rnn_outputs=tf.concat(rnn_outputs,2)# An attention modelwith tf.name_scope("attention"):# Attention mechanismsequence_length = rnn_outputs.shape[1].value # the length of sequences processed in the antecedent RNN layerhidden_size = rnn_outputs.shape[2].value # hidden size of the RNN layeW = tf.Variable(tf.truncated_normal([hidden_size, self.config.attention_dim],stddev=0.1), name="W")b = tf.Variable(tf.random_normal([self.config.attention_dim], stddev=0.1),name="b")u = tf.Variable(tf.random_normal([self.config.attention_dim], stddev=0.1),name="u")v = tf.tanh(tf.matmul(tf.reshape(rnn_outputs, [-1, hidden_size]), W) + tf.reshape(b, [1, -1]))vu = tf.matmul(v, tf.reshape(u, [-1, 1]))exps = tf.reshape(tf.exp(vu), [-1, sequence_length])alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1])# Output of Bi-gru is reduced with attention vectoroutput = tf.reduce_sum(rnn_outputs * tf.reshape(alphas, [-1, sequence_length, 1]), 1)#增加weight的损失函数tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(W),tf.GraphKeys.REGULARIZATION_LOSSES)tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(b),tf.GraphKeys.REGULARIZATION_LOSSES)tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(u), tf.GraphKeys.REGULARIZATION_LOSSES)dropout_outputs = tf.nn.dropout(output, self.keep_prob,name="dropout")with tf.name_scope("score"):W = tf.Variable(tf.truncated_normal([dropout_outputs.shape[1].value, self.config.num_classes], stddev=0.1),name="W")b = tf.Variable(tf.constant(0.1, shape=[self.config.num_classes]), name="b")tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(W), tf.GraphKeys.REGULARIZATION_LOSSES)tf.losses.add_loss(self.config.l2_reg_lambda * tf.nn.l2_loss(b), tf.GraphKeys.REGULARIZATION_LOSSES)self.scores = tf.nn.xw_plus_b(dropout_outputs, W, b, name="scores")self.pred_y = tf.nn.softmax(self.scores, name="pred_y")tf.add_to_collection('pred_network', self.pred_y)self.predictions = tf.argmax(self.scores, 1, name="predictions")#计算损失函数with tf.name_scope("loss"):tf.losses.softmax_cross_entropy(logits=self.scores,onehot_labels =self.input_y)self.cost =tf.losses.get_total_loss()# 优化器with tf.name_scope("optimize"):optimizer = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate)self.train_op = optimizer.minimize(self.cost)# Accuracywith tf.name_scope("accuracy"):correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))self.acc = tf.reduce_mean(tf.cast(correct_predictions, "float"),name="accuracy")
run:
from attentionmodelrnn import RnnAttentionfrom configuration import AttentionConfigfrom data_utils import preocess_file,batch_iterimport timeimport tensorflow as tfimport osfrom datetime import timedelta#basepath="/Users/shuubiasahi/Documents/python"#noexperience#business#togetherbasepath="/home/zhoumeixu"data_path=basepath+"/credit-tftextclassify/tensorflow/noexperience/cnn.txt"vocapath=basepath+"/credit-tftextclassify/tensorflow/noexperience/vocab.txt"modelpath=basepath+"/credit-tftextclassify/tensorflow/noexperience/"print(modelpath,"attenion相关模型开始训练")def run_epoch(rnn=False):# 载入数据print('Loading data...')start_time = time.time()x_train, y_train, words = preocess_file(data_path,vocapath)config = AttentionConfig()if config.isgru:print('Using attention gru model...')else:print('Using attention lstm model...')config.vocab_size = len(words)print("vocab_size is:", config.vocab_size)model = RnnAttention(config)tensorboard_dir = basepath+'/boardlog'end_time = time.time()time_dif = end_time - start_timetime_dif = timedelta(seconds=int(round(time_dif)))print('Time usage:', time_dif)print('Constructing TensorFlow Graph...')session = tf.Session()session.run(tf.global_variables_initializer())saver = tf.train.Saver()# 配置 tensorboardtf.summary.scalar("loss", model.cost)tf.summary.scalar("accuracy", model.acc)if not os.path.exists(tensorboard_dir):os.makedirs(tensorboard_dir)merged_summary = tf.summary.merge_all()writer = tf.summary.FileWriter(tensorboard_dir)writer.add_graph(session.graph)# 生成批次数据print('Generating batch...')batch_train = batch_iter(list(zip(x_train, y_train)),config.batch_size, config.num_epochs)def feed_data(batch):"""准备需要喂入模型的数据"""x_batch, y_batch = zip(*batch)feed_dict = {model.input_x: x_batch,model.input_y: y_batch}return feed_dict, len(x_batch)def evaluate(x_, y_):"""模型评估一次运行所有的数据会OOM,所以需要分批和汇总"""batch_eval = batch_iter(list(zip(x_, y_)), 128, 1)total_loss = 0.0total_acc = 0.0cnt = 0for batch in batch_eval:feed_dict, cur_batch_len = feed_data(batch)feed_dict[model.keep_prob] = 1.0loss, acc = session.run([model.loss, model.acc],feed_dict=feed_dict)total_loss += loss * cur_batch_lentotal_acc += acc * cur_batch_lencnt += cur_batch_lenreturn total_loss / cnt, total_acc / cnt# 训练与验证print('Training and evaluating...')start_time = time.time()print_per_batch = config.print_per_batchfor i, batch in enumerate(batch_train):feed_dict, _ = feed_data(batch)feed_dict[model.keep_prob] = config.keep_probif i % 5 == 0: # 每5次将训练结果写入tensorboard scalars = session.run(merged_summary, feed_dict=feed_dict)writer.add_summary(s, i)if i % print_per_batch == print_per_batch - 1: # 每200次输出在训练集和验证集上的性能loss_train, acc_train = session.run([model.cost, model.acc],feed_dict=feed_dict)#loss, acc = evaluate(x_val, y_val) 验证机暂时不需要# 时间end_time = time.time()time_dif = end_time - start_timetime_dif = timedelta(seconds=int(round(time_dif)))msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},'\+ ' Time: {3}'print(msg.format(i + 1, loss_train, acc_train, time_dif))if i%500==0 and i>0:graph = tf.graph_util.convert_variables_to_constants(session, session.graph_def,["keep_prob", "input_x", "score/pred_y"])tf.train.write_graph(graph, ".", modelpath+"graphattention.model",as_text=False)print("attention模型在第{0}步已经保存".format(i))session.run(model.train_op, feed_dict=feed_dict) # 运行优化session.close()if __name__ == '__main__':#run_epoch(rnn=True)run_epoch(rnn=False)
结果:
2017-11-25 13:48:37.183112: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 3920 get requests, put_count=6089 evicted_count=2000 eviction_rate=0.328461 and unsatisfied allocation rate=02017-11-25 13:48:37.995920: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 5649 get requests, put_count=8853 evicted_count=3000 eviction_rate=0.338868 and unsatisfied allocation rate=02017-11-25 13:48:39.200365: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 2297 get requests, put_count=3569 evicted_count=1000 eviction_rate=0.280191 and unsatisfied allocation rate=02017-11-25 13:48:40.431985: I tensorflow/core/common_runtime/gpu/pool_allocator.cc:247] PoolAllocator: After 2448 get requests, put_count=3810 evicted_count=1000 eviction_rate=0.262467 and unsatisfied allocation rate=0Iter: 100, Train Loss: 0.24, Train Acc: 95.31%, Time: 0:00:42Iter: 200, Train Loss: 0.14, Train Acc: 96.09%, Time: 0:01:23Iter: 300, Train Loss: 0.14, Train Acc: 95.31%, Time: 0:02:03Iter: 400, Train Loss: 0.1, Train Acc: 97.66%, Time: 0:02:44Iter: 500, Train Loss: 0.27, Train Acc: 89.84%, Time: 0:03:25Converted 10 variables to const ops.attention模型在第500步已经保存Iter: 600, Train Loss: 0.16, Train Acc: 93.75%, Time: 0:04:06Iter: 700, Train Loss: 0.15, Train Acc: 96.09%, Time: 0:04:46Iter: 800, Train Loss: 0.14, Train Acc: 94.53%, Time: 0:05:26Iter: 900, Train Loss: 0.1, Train Acc: 95.31%, Time: 0:06:06Iter: 1000, Train Loss: 0.11, Train Acc: 93.75%, Time: 0:06:47Converted 10 variables to const ops.attention模型在第1000步已经保存Iter: 1100, Train Loss: 0.044, Train Acc: 99.22%, Time: 0:07:28Iter: 1200, Train Loss: 0.23, Train Acc: 90.62%, Time: 0:08:09Iter: 1300, Train Loss: 0.11, Train Acc: 96.88%, Time: 0:08:51Iter: 1400, Train Loss: 0.077, Train Acc: 96.88%, Time: 0:09:31Iter: 1500, Train Loss: 0.087, Train Acc: 96.09%, Time: 0:10:11Converted 10 variables to const ops.attention模型在第1500步已经保存Iter: 1600, Train Loss: 0.11, Train Acc: 96.88%, Time: 0:10:52Iter: 1700, Train Loss: 0.099, Train Acc: 95.31%, Time: 0:11:32Iter: 1800, Train Loss: 0.08, Train Acc: 96.09%, Time: 0:12:13Iter: 1900, Train Loss: 0.1, Train Acc: 96.88%, Time: 0:12:53Iter: 2000, Train Loss: 0.13, Train Acc: 94.53%, Time: 0:13:34Converted 10 variables to const ops.
版权声明:本文为luoyexuge原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。