发新帖

训练准确率99%,测试准确率只有40%

[复制链接]
85 0

快来加入 TensorFlowers 大家庭!

您需要 登录 才可以下载或查看,没有帐号?加入社区

x
Alexnet训练ImageNet,训练准确率99%(这显然很诡异),测试准确率只有40%。换Cifar10试了试,也是同样的问题,训练准确率99.9%,测试准确率76%,加不加batch normalization都是这样,不用tf.data.Dataset的方式,用基本的加载数据方式,数据打乱不打乱,都是这样。可代码实在看不出哪里有问题
  1. #coding:utf-8
  2. import tensorflow as tf
  3. import numpy as np
  4. from datetime import *
  5. import datetime
  6. import ConfigParser
  7. from tensorflow.python.ops import control_flow_ops
  8. from dataflow import *
  9. #from model import Cifar10
  10. from model import Alexnet

  11. from datagenerator import ImageDataGenerator

  12. from tensorflow.data import Iterator
  13. # Path to the textfiles for the trainings and validation set
  14. train_file = '/path/ilsvrc_metadata/train.txt'
  15. val_file = '/path/ilsvrc_metadata/val.txt'

  16. Model = Alexnet

  17. params_file = "imagenet.ini"


  18. FLAGS = tf.app.flags.FLAGS
  19. config = ConfigParser.ConfigParser()
  20. config.read(params_file)
  21. tf.app.flags.DEFINE_integer('train_batch_size', config.getint('DEFINE','train_batch_size'),'')
  22. tf.app.flags.DEFINE_integer('val_batch_size',  config.getint('DEFINE','val_batch_size'), '')
  23. tf.app.flags.DEFINE_integer('batch_size', config.getint('DEFINE','batch_size'), '')
  24. tf.app.flags.DEFINE_integer('classnum', config.getint('DEFINE','classnum'),'')
  25. tf.app.flags.DEFINE_integer('max_steps',  config.getint('DEFINE','max_steps'),'')
  26. tf.app.flags.DEFINE_integer('display_step', config.getint('DEFINE','display_step'),'')
  27. tf.app.flags.DEFINE_string('train_tf_path', config.get('DEFINE','train_tf_path'),'')
  28. tf.app.flags.DEFINE_string('val_tf_path', config.get('DEFINE','val_tf_path'),'')
  29. #tf.app.flags.DEFINE_integer('height', config.getint('DEFINE','input_height'),'')
  30. #tf.app.flags.DEFINE_integer('width', config.getint('DEFNE', 'input_width'),'')
  31. model_name = config.get('DEFINE','model_name')

  32. lr = float(config.get('DEFINE','learning_rate'))
  33. lr = 1e-3
  34. num_epoch  = config.getint('DEFINE','num_epoch')



  35. restore_model_path = config.get('DEFINE','restore_model_path')
  36. restore_model_path = 'point_data/imagenet_alexnet_1'
  37. filewrite_path = config.get('DEFINE','filewrite_path')
  38. checkpoint = config.get('DEFINE','checkpoint')
  39. if not os.path.isdir(checkpoint):
  40.     os.mkdir(checkpoint)
  41. input_height = config.getint('DEFINE','input_height')
  42. input_width = config.getint('DEFINE','input_width')


  43. batch_size = FLAGS.batch_size
  44. num_classes = FLAGS.classnum

  45. tr_data = ImageDataGenerator(train_file,
  46.                                  mode='training',
  47.                                  batch_size=batch_size,
  48.                                  num_classes=num_classes,
  49.                                  shuffle=True)
  50. val_data = ImageDataGenerator(val_file,
  51.                                   mode='inference',
  52.                                   batch_size=batch_size,
  53.                                   num_classes=num_classes,
  54.                                   shuffle=False)

  55. # create an reinitializable iterator given the dataset structure
  56. iterator = Iterator.from_structure(tr_data.data.output_types,
  57.                                        tr_data.data.output_shapes)
  58. next_batch = iterator.get_next()

  59. # Ops for initializing the two different iterators
  60. training_init_op = iterator.make_initializer(tr_data.data)
  61. validation_init_op = iterator.make_initializer(val_data.data)

  62. x = tf.placeholder(tf.float32,[FLAGS.batch_size,input_height,input_width,3])
  63. y = tf.placeholder(tf.float32,[FLAGS.batch_size,FLAGS.classnum])
  64. phase_train = tf.placeholder(tf.bool,name='phase_train')
  65.   


  66. model = Model(x,FLAGS.classnum,phase_train)
  67. score = model.fc3

  68. var_list = [v for v in tf.trainable_variables() ]

  69. with tf.name_scope('cross_ent'):
  70.     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=score,labels=y))
  71.    
  72. with tf.name_scope('train'):
  73.     gradients = tf.gradients(loss, var_list)
  74.     gradients = list(zip(gradients,var_list))
  75.     optimizer = tf.train.GradientDescentOptimizer(lr)
  76.     '''
  77.     update = optimizer.apply_gradients(grads_and_vars=gradients)
  78.     with tf.control_dependencies([update]):
  79.         train_op = tf.no_op(name='train_op')'''
  80.     extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  81.     with tf.control_dependencies(extra_update_ops):
  82.         train_op = optimizer.apply_gradients(grads_and_vars=gradients)
  83.         #train_op = optimizer.minimize(loss)

  84. with tf.name_scope('accuracy') as scope:
  85.     pred = tf.equal(tf.argmax(score,1), tf.argmax(y,1))
  86.     accuracy = tf.reduce_mean( tf.cast(pred,tf.float32))
  87.    

  88. saver = tf.train.Saver(max_to_keep=3)



  89. train_n_batch = int(np.floor(tr_data.data_size/batch_size))
  90. test_n_batch = int(np.floor(val_data.data_size / batch_size))

  91. with tf.Session() as sess:
  92.     sess.run(tf.global_variables_initializer())
  93.     if restore_model_path:
  94.         model_file = tf.train.latest_checkpoint(restore_model_path)
  95.         saver.restore(sess, model_file)
  96.    
  97.     max_test_acc = 0.0
  98.     for step in range(FLAGS.max_steps):
  99.       
  100.         sess.run(training_init_op)
  101.         start_time = datetime.datetime.now()
  102.         
  103.         t_acc,t_loss = 0.0,0.0
  104.         for i in range(train_n_batch):
  105.            
  106.             img_batch, label_batch = sess.run(next_batch)
  107.             _,_acc, _loss = sess.run([train_op,accuracy,loss],{x: img_batch, y: label_batch,phase_train:True} )
  108.             if i%1000==0:
  109.                print _acc,_loss
  110.             t_acc += _acc
  111.             t_loss += _loss
  112.         end_time = datetime.datetime.now()  
  113.       
  114.         print 'step %d training finished,training time:%s' %(step,end_time-start_time)
  115.         print "train accuracy : %.5f" %(t_acc/train_n_batch)
  116.         print "train loss : %.8f" %(t_loss/train_n_batch)
  117.         
  118.         print("{} Start validation".format(datetime.datetime.now()))
  119.         sess.run(validation_init_op)
  120.         test_acc,test_loss = 0.0,0.0
  121.         for i in range(test_n_batch):
  122.             img_batch, label_batch = sess.run(next_batch)
  123.             
  124.             acc,_loss=sess.run([accuracy,loss],{x:img_batch, y:label_batch, phase_train:False})
  125.             test_acc += acc
  126.             test_loss += _loss
  127.         print 'test accuracy:%.5f,test loss:%.8f' %((test_acc/test_n_batch),(test_loss/test_n_batch))
  128.         test_acc /= test_n_batch
  129.         if test_acc>max_test_acc:
  130.             max_test_acc = test_acc
  131.             checkpoint_path = checkpoint +'/' + model_name+'.ckpy'
  132.             saver.save(sess,checkpoint_path,global_step = step+1)
  133.         
  134. ##########################################
  135. ###### model file ##########################
  136. #model.py   
  137. import tensorflow as tf
  138. import numpy as np
  139. from tensorflow.python.ops import control_flow_ops

  140. def maxpool(x, k,  stride,name, padding='SAME'):
  141.     return tf.nn.max_pool(x,ksize=[1,k, k,1],strides=[1,stride,stride,1],padding=padding,
  142.                  name=name)
  143.    
  144. def norm(x,name,is_training):
  145.     with tf.variable_scope(name) as scope:
  146.         return tf.layers.batch_normalization(x,training=is_training)

  147. def batch_norm(x, n_out, phase_train,scope='bn', affine = True):
  148.     """
  149.     Batch normalization on convolutional maps.
  150.     Args:
  151.         x: Tensor, 4D BHWD input maps.
  152.         n_out: integer, depth of input maps
  153.         phase_train: boolean tf.Vartiable, true indicates training phase
  154.         scope: string, variable scope
  155.         affine: whether to affine-transfrom outputs
  156.     Return:
  157.         normed: batch-normalized maps
  158.     """
  159.     with tf.variable_scope(scope):
  160.         beta = tf.Variable(tf.constant(0.0, shape=[n_out]),name = 'beta', trainable = True)
  161.         gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),name='gamma', trainable=affine)
  162.         tf.add_to_collection('biases',beta)
  163.         tf.add_to_collection('weights',gamma)

  164.         batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
  165.         ema = tf.train.ExponentialMovingAverage(decay=0.99)
  166.         
  167.         def mean_var_with_update():
  168.             ema_apply_op = ema.apply([batch_mean, batch_var])
  169.             with tf.control_dependencies([ema_apply_op]):
  170.                 return tf.identity(batch_mean),tf.identity(batch_var)
  171.         mean, var = control_flow_ops.cond(phase_train,mean_var_with_update,
  172.                          lambda: (ema.average(batch_mean),ema.average(batch_var)))
  173.         normed = tf.nn.batch_norm_with_global_normalization(x, mean,var,beta, gamma,
  174.                1e-3, affine)
  175.     return normed
  176.             
  177. def conv_layer(x,k,s,channel,filter_num,name,bias=True,padding='SAME'):
  178.    
  179.     with tf.variable_scope(name) as scope:
  180.         weights = tf.get_variable('weights', shape = [k,k,channel,filter_num])
  181.         
  182.         conv = tf.nn.conv2d(x,weights,[1,s,s,1],padding=padding)
  183.         
  184.         if bias:
  185.             biases = tf.get_variable('biases', shape = [filter_num])
  186.             conv = tf.nn.bias_add(conv, biases)
  187.         conv = tf.nn.relu(conv)
  188.         return  conv
  189.         
  190. def fc_layer(x,num_in, num_out, name, bias = True,relu = True):
  191.     with tf.variable_scope(name) as scope:
  192.         weights = tf.get_variable("weights", shape=[num_in,num_out])
  193.                  
  194.         if bias:
  195.             biases = tf.get_variable("biases", shape=[num_out])
  196.             fc = tf.nn.xw_plus_b(x, weights, biases)
  197.         else:
  198.             fc = tf.matmul(x, weights)
  199.         if relu:
  200.             return tf.nn.relu(fc)
  201.         else:
  202.             return fc
  203.    
  204.    
  205. class Alexnet(object):
  206.     def __init__(self, x,  classnum,is_training):
  207.         self.X = x
  208.         self.classnum = classnum
  209.         self.is_training = is_training
  210.         self.model()
  211.         
  212.     def model(self):
  213.         conv1 = conv_layer(self.X, 11, 4, 3, 96,bias = True, name='conv1',padding = 'VALID')
  214.         pool1 = maxpool(conv1, 3, 2, name = 'pool1', padding='VALID')
  215.         #norm1 = batch_norm(pool1, n_out = 96,scope='norm1', phase_train=self.is_training)
  216.         norm1 = norm(pool1,'norm1',self.is_training)
  217.         
  218.         conv2 = conv_layer(norm1, 5, 1, 96, 256, bias=True, name = 'conv2')
  219.         pool2 = maxpool(conv2, 3, 2, name='pool2', padding='VALID')
  220.         #norm2 = batch_norm(pool2, n_out = 256,scope='norm2', phase_train=self.is_training)
  221.         norm2 = norm(pool2,'norm2',self.is_training)
  222.         
  223.         conv3 = conv_layer(norm2, 3, 1, 256, 384, name='conv3')
  224.         #norm3 = batch_norm(conv3, n_out = 384,scope='norm3', phase_train=self.is_training)
  225.         norm3 = norm(conv3,'norm3',self.is_training)
  226.         
  227.         conv4 = conv_layer(norm3, 3, 1, 384, 384,name='conv4')
  228.         #norm4 = batch_norm(conv4, n_out = 384,scope='norm4', phase_train=self.is_training)
  229.         norm4 = norm(conv4,'norm4',sefl.is_training)
  230.         
  231.         conv5 = conv_layer(norm4, 3, 1, 384, 256, name='conv5')
  232.         pool5 = maxpool(conv5, 3, 2, name='pool5',padding='VALID')
  233.         #norm5 = batch_norm(pool5, n_out = 256,scope='norm5', phase_train=self.is_training)
  234.         norm5 = norm(pool5,'norm5',self.is_trainig)
  235.         
  236.         fcin = tf.reshape(norm5,[-1,256*6*6])
  237.         fc1 = fc_layer(fcin, 256*6*6, 4096,bias=True,name='fc1')
  238.         fc2 = fc_layer(fc1, 4096, 4096, bias=True,name='fc2')
  239.         self.fc3 = fc_layer(fc2, 4096, self.classnum, bias=False,relu='False', name='fc3')
  240.         
  241.    
  242.         
  243. class Cifar10(object):
  244.     def __init__(self, x,  classnum, is_training):
  245.         self.X = x
  246.         self.classnum = classnum
  247.         self.is_training = is_training
  248.         self.model()
  249.         
  250.         
  251.     def model(self):
  252.         conv1 = conv_layer(self.X, 3, 1, 3, 96,bias = True, name='conv1')
  253.         pool1 = maxpool(conv1, 2, 2, name = 'pool1', padding='VALID')
  254.         #norm1 = norm(pool1, 'norm1', self.is_training)
  255.         
  256.         conv2 = conv_layer(pool1, 3, 1, 96, 256, bias=False, name = 'conv2')
  257.         pool2 = maxpool(conv2, 2, 2, name='pool2', padding='VALID')
  258.         #norm2 = norm(pool2, 'norm2', self.is_training)
  259.         
  260.         conv3 = conv_layer(pool2, 3, 1, 256, 384, name='conv3')
  261.         #norm3 = norm(conv3, 'norm3', self.is_training)
  262.         
  263.         conv4 = conv_layer(conv3, 3, 1, 384, 384,name='conv4')
  264.         #norm4 = norm(conv4, 'norm4', self.is_training)
  265.         
  266.         conv5 = conv_layer(conv4, 3, 1, 384, 256, name='conv5')
  267.         pool5 = maxpool(conv5, 2, 2, name='pool5',padding='VALID')
  268.         
  269.         fcin = tf.reshape(pool5,[-1,256*4*4])
  270.         fc1 = fc_layer(fcin, 256*4*4, 4096,name='fc1')
  271.         fc2 = fc_layer(fc1, 4096, 4096, name='fc2')
  272.         self.fc3 = fc_layer(fc2, 4096, self.classnum, relu='False', name='fc3')
  273.         
  274. ############################
  275. # load data file ########
  276. #datagenerator.py
  277. """Containes a helper class for image input pipelines in tensorflow."""

  278. import tensorflow as tf
  279. import numpy as np

  280. #from tensorflow.contrib.data import Dataset
  281. from tensorflow.data import Dataset
  282. from tensorflow.python.framework import dtypes
  283. from tensorflow.python.framework.ops import convert_to_tensor

  284. IMAGENET_MEAN = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32)


  285. class ImageDataGenerator(object):
  286.     """Wrapper class around the new Tensorflows dataset pipeline.

  287.     Requires Tensorflow >= version 1.12rc0
  288.     """

  289.     def __init__(self, txt_file, mode, batch_size, num_classes, shuffle=True,
  290.                  buffer_size=1000):
  291.         """Create a new ImageDataGenerator.

  292.         Recieves a path string to a text file, which consists of many lines,
  293.         where each line has first a path string to an image and seperated by
  294.         a space an integer, referring to the class number. Using this data,
  295.         this class will create TensrFlow datasets, that can be used to train
  296.         e.g. a convolutional neural network.

  297.         Args:
  298.             txt_file: Path to the text file.
  299.             mode: Either 'training' or 'validation'. Depending on this value,
  300.                 different parsing functions will be used.
  301.             batch_size: Number of images per batch.
  302.             num_classes: Number of classes in the dataset.
  303.             shuffle: Wether or not to shuffle the data in the dataset and the
  304.                 initial file list.
  305.             buffer_size: Number of images used as buffer for TensorFlows
  306.                 shuffling of the dataset.

  307.         Raises:
  308.             ValueError: If an invalid mode is passed.

  309.         """
  310.         self.txt_file = txt_file
  311.         self.num_classes = num_classes
  312.         self.mode = mode
  313.         # retrieve the data from the text file
  314.         self._read_txt_file()

  315.         # number of samples in the dataset
  316.         self.data_size = len(self.labels)

  317.         # initial shuffling of the file and label lists (together!)
  318.         if shuffle:
  319.             self._shuffle_lists()

  320.         # convert lists to TF tensor
  321.         self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
  322.         self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
  323.         
  324.         
  325.         # create dataset
  326.         data = Dataset.from_tensor_slices((self.img_paths, self.labels))

  327.         # distinguish between train/infer. when calling the parsing functions
  328.         if mode == 'training':
  329.             #data = data.map(self._parse_function_train, num_threads=8,
  330.             #         output_buffer_size=100*batch_size)
  331.             data = data.map(self._parse_function_train)

  332.         elif mode == 'inference':
  333.             #data = data.map(self._parse_function_inference,num_threads=8,
  334.             #          output_buffer_size=100*batch_size)
  335.             data = data.map(self._parse_function_inference)

  336.         else:
  337.             raise ValueError("Invalid mode '%s'." % (mode))

  338.         # shuffle the first `buffer_size` elements of the dataset
  339.         if shuffle:
  340.             data = data.shuffle(buffer_size=buffer_size)

  341.         # create a new dataset with batches of images
  342.         data = data.batch(batch_size)

  343.         self.data = data

  344.     def _read_txt_file(self):
  345.         """Read the content of the text file and store it into lists."""
  346.         self.img_paths = []
  347.         self.labels = []
  348.         path = ''
  349.         if self.mode=='training':
  350.             path = '/data/ImageNet/imagenet/ILSVRC2015/Data/CLS-LOC/train/'
  351.         elif self.mode == 'inference':
  352.             path = '/data/ImageNet/imagenet/ILSVRC2015/Data/CLS-LOC/val/'
  353.         with open(self.txt_file, 'r') as f:
  354.             lines = f.readlines()
  355.             for line in lines:
  356.                 items = line.split(' ')
  357.                 self.img_paths.append(path+items[0])
  358.                 self.labels.append(int(items[1]))

  359.     def _shuffle_lists(self):
  360.         """Conjoined shuffling of the list of paths and labels."""
  361.         path = self.img_paths
  362.         labels = self.labels
  363.         permutation = np.random.permutation(self.data_size)
  364.         self.img_paths = []
  365.         self.labels = []
  366.         for i in permutation:
  367.             self.img_paths.append(path[i])
  368.             self.labels.append(labels[i])

  369.     def _parse_function_train(self, filename, label):
  370.         """Input parser for samples of the training set."""
  371.         # convert label number into one-hot-encoding
  372.         one_hot = tf.one_hot(label, self.num_classes)

  373.         # load and preprocess the image
  374.         img_string = tf.read_file(filename)
  375.         img_decoded = tf.image.decode_png(img_string, channels=3)
  376.         img_resized = tf.image.resize_images(img_decoded, [227, 227])
  377.         """
  378.         Dataaugmentation comes here.
  379.         """
  380.         img_centered = tf.subtract(img_resized, IMAGENET_MEAN)

  381.         # RGB -> BGR
  382.         img_bgr = img_centered[:, :, ::-1]

  383.         return img_bgr, one_hot

  384.     def _parse_function_inference(self, filename, label):
  385.         """Input parser for samples of the validation/test set."""
  386.         # convert label number into one-hot-encoding
  387.         one_hot = tf.one_hot(label, self.num_classes)

  388.         # load and preprocess the image
  389.         img_string = tf.read_file(filename)
  390.         img_decoded = tf.image.decode_png(img_string, channels=3)
  391.         img_resized = tf.image.resize_images(img_decoded, [227, 227])
  392.         img_centered = tf.subtract(img_resized, IMAGENET_MEAN)

  393.         # RGB -> BGR
  394.         img_bgr = img_centered[:, :, ::-1]

  395.         return img_bgr, one_hot
复制代码





我知道答案 回答被采纳将会获得10 金币 + 5 金币 已有0人回答
本楼点评(0) 收起
您需要登录后才可以回帖 登录 | 加入社区

本版积分规则

快速回复 返回顶部 返回列表