深度学习十分类loss一直保持在2.3是什么问题,求解答

深度学习问题求大神解答


最近在做基于alexnet网络训练cifar10的分类问题,
现在遇到问题是训练中loss一直保持在2.3左右不变,准确率也不上升
求各位大佬帮我看下代码,指出问题,谢各位大佬
代码如下:

-- coding=UTF-8 --

import tensorflow as tf
import os
import numpy as np

####### 定义网络超参数
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(0.001, global_step, 10000, 0.9, staircase=True) # 学习率
momentum = 0.9 # 动量
training_iters = 70000 # 训练次数
batch_size = 64 # 批次量
####### 定义网络参数
n_classes = 10 # 标签的维度
dropout = 0.5 # Dropout 的概率
####### 占位符输入
x = tf.placeholder(“float”, [None, 32, 32, 3])
y = tf.placeholder(“float”, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

####### 得到训练集图像地址集合和其标签集合
def get_files(filename):
class_train = []
label_train = []
for train_class in os.listdir(filename):
for pic in os.listdir(filename+train_class):
class_train.append(filename+train_class+’/’+pic)
label_train.append(train_class)
temp = np.array([class_train, label_train])
temp = temp.transpose()
# shuffle the samples
np.random.shuffle(temp)
# after transpose, images is in dimension 0 and label in dimension 1
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(i) for i in label_list]
return image_list, label_list

####### 得到一批训练集图像数据和其标签数据
def get_batches(image, label, resize_w, resize_h, batch_size, capacity):
# convert the list of images and labels to tensor
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int64)
queue = tf.train.slice_input_producer([image, label])
label = queue[1]
image_c = tf.read_file(queue[0])
image = tf.image.decode_jpeg(image_c, channels=3)
# resize
image = tf.image.resize_image_with_crop_or_pad(image, resize_w, resize_h)
# (x - mean) / adjusted_stddev 图像标准化
image = tf.image.per_image_standardization(image)

images_batch, labels_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=1, capacity=capacity)
images_batch = tf.cast(images_batch, tf.float32)
labels_batch = tf.reshape(labels_batch, [batch_size])
# 把标签变成one hot型
labels_batch = tf.one_hot(labels_batch, depth=10, axis=1)
return images_batch, labels_batch

####### 初始化w
def init_w(shape, stddev):
initializer = tf.truncated_normal_initializer(dtype=tf.float32, stddev=stddev)
w = tf.get_variable(“w”, shape=shape, initializer=initializer)
return w

####### 初始化b
def init_b(shape):
initializer = tf.constant_initializer(0.0)
b = tf.get_variable(“b”, shape=shape, initializer=initializer)
return b

####### 卷积操作
def conv2d(l_input, w, b):
return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input, w, strides=[1, 1, 1, 1], padding=‘SAME’), b))

####### 最大下采样操作
def max_pool(l_input, k):
return tf.nn.max_pool(l_input, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding=‘SAME’)

####### 定义整个网络
def alex_net():
with tf.variable_scope(“conv1”):
wc1 = init_w([3, 3, 3, 24], 0.01)
bc1 = init_b([24])
conv1 = conv2d(x, wc1, bc1)
pool1 = max_pool(conv1, k=2)
with tf.variable_scope(“conv2”):
wc2 = init_w([3, 3, 24, 96], 0.01)
bc2 = init_b([96])
conv2 = conv2d(pool1, wc2, bc2)
pool2 = max_pool(conv2, k=2)
with tf.variable_scope(“conv3”):
wc3 = init_w([3, 3, 96, 192], 0.01)
bc3 = init_b([192])
conv3 = conv2d(pool2, wc3, bc3)
with tf.variable_scope(“conv4”):
wc4 = init_w([3, 3, 192, 192], 0.01)
bc4 = init_b([192])
conv4 = conv2d(conv3, wc4, bc4)
with tf.variable_scope(“conv5”):
wc5 = init_w([3, 3, 192, 96], 0.01)
bc5 = init_b([96])
conv5 = conv2d(conv4, wc5, bc5)
with tf.variable_scope(“fc1”):
# 全连接层,先把特征图转为向量
wfc1 = init_w([8 * 8 * 96, 1024], 1e-2)
bfc1 = init_b([1024])
shape = conv5.get_shape()
conv5_reshape = tf.reshape(conv5, [-1, shape[1].value * shape[2].value * shape[3].value])
fc1 = tf.nn.relu(tf.matmul(conv5_reshape, wfc1) + bfc1)
fc1 = tf.nn.dropout(fc1, keep_prob)
with tf.variable_scope(“fc2”):
# 全连接层
wfc2 = init_w([1024, 1024], 1e-2)
bfc2 = init_b([1024])
vector = tf.nn.relu(tf.matmul(fc1, wfc2) + bfc2)
fc2 = tf.nn.dropout(vector, keep_prob)
w_out = init_w([1024, 10], 1e-2)
b_out = init_b([10])
out = tf.matmul(fc2, w_out) + b_out
return vector, out

Flag_Is_Train = True

if name == ‘main’:
image_w, image_h = 32, 32
# 获取训练数据
data_dir = ‘E:/dataSet/cifar10_data_train/’
image_list, label_list = get_files(data_dir)
images_data, true_labels = get_batches(image_list, label_list, image_w, image_h, batch_size, 64)

# 获取测试数据
data_dir_test = 'E:/dataSet/cifar10_data_test/'
image_list_test, label_list_test = get_files(data_dir_test)
images_data_test, true_labels_test = get_batches(image_list_test, label_list_test, image_w, image_h, batch_size, 64)


# 构建模型
vector, pred = alex_net()
# 定义损失函数和学习步骤
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred))
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(loss, global_step=global_step)
# 测试网络
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# 定义一个saver
saver = tf.train.Saver()
# 初始化所有的共享变量
init = tf.initialize_all_variables()
with tf.Session() as sess:
    if Flag_Is_Train:
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)
        try:
            for w in range(training_iters):
                # 获取训练数据
                images_data_v, true_labels_v = sess.run([images_data, true_labels])
                sess.run(optimizer, feed_dict={x: images_data_v, y: true_labels_v, keep_prob: dropout})
                # 获取测试数据
                images_data_v_test, true_labels_v_test = sess.run([images_data_test, true_labels_test])
                print('step %d, train_loss = %.6f, test_loss = %.6f, '
                      'train_accuracy = %.4f, test_accuracy = %.4f' % (
                        w, sess.run(loss, feed_dict={x: images_data_v, y: true_labels_v, keep_prob: 1.0}),
                        sess.run(loss, feed_dict={x: images_data_v_test, y: true_labels_v_test, keep_prob: 1.0}),
                        sess.run(accuracy, feed_dict={x: images_data_v, y: true_labels_v, keep_prob: 1.0}),
                        sess.run(accuracy, feed_dict={x: images_data_v_test, y: true_labels_v_test, keep_prob: 1.0})))
                print(sess.run(pred, feed_dict={x: images_data_v, keep_prob: 1.0})[0])
                # 保存模型
                saver.save(sess, "cifar10_model/model.ckpt", global_step=w)
        except tf.errors.OutOfRangeError:
            print("done")
        finally:
            coord.request_stop()
        coord.join(threads)
    else:
        print("else")

版权声明:本文为qq_28063447原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。