TensorFlow ModelsGitHub:https://github.com/tensorflow/models Document:https://github.com/jikexueyuanwiki/tensorflow-zh CIFAR-10 數(shù)據(jù)集Web:http://www.cs./~kriz/cifar.html
目標(biāo):(建立一個(gè)用于識(shí)別圖像的相對(duì)較小的卷積神經(jīng)網(wǎng)絡(luò))對(duì)一組32x32RGB的圖像進(jìn)行分類 數(shù)據(jù)集:60000張32*32*3的彩色圖片,,其中50000張訓(xùn)練集,,10000張測(cè)試集,涵蓋10個(gè)類別:飛機(jī),, 汽車,, 鳥, 貓,, 鹿,, 狗, 青蛙,, 馬,, 船以及卡車 CIFAR-10 模型訓(xùn)練GitHub:https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10 流程:首先讀取圖片,對(duì)圖片預(yù)處理,,進(jìn)行數(shù)據(jù)增強(qiáng),,然后將圖片存放到隊(duì)列中打亂之后用于網(wǎng)絡(luò)輸入。其次構(gòu)造模型,,損失函數(shù)計(jì)算,,學(xué)習(xí)率指數(shù)衰減,計(jì)算梯度,,用梯度來求解最優(yōu)值,。最后開始訓(xùn)練,。 1)導(dǎo)入庫# cifar10_train.py from __future__ import absolute_import from __future__ import division from __future__ import print_function from datetime import datetime import time import tensorflow as tf import cifar10 # cifar10.py from __future__ import absolute_import from __future__ import division from __future__ import print_function import re import tensorflow as tf import cifar10_input import os import sys import urllib import tarfile # cifar10_input.py from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow_datasets as tfds 2)使用FLAGS設(shè)置參數(shù)# cifar10_train.py # 定義全局變量 FLAGS = tf.app.flags.FLAGS # 初始化 參數(shù)1為變量名,如train_dir,,可通過FLAGS.train_dir取得該變量的值 tf.app.flags.DEFINE_string('train_dir', './tmp/cifar10_train', """Directory where to write event logs """"""and checkpoint.""") tf.app.flags.DEFINE_integer('max_steps', 1000000, """Number of batches to run.""") tf.app.flags.DEFINE_boolean('log_device_placement', False, """Whether to log device placement.""") # cifar10.py' # 基本模型參數(shù) tf.app.flags.DEFINE_integer('batch_size', 128, """Number of images to process in a batch.""") tf.app.flags.DEFINE_boolean('use_fp16', True, """Train the model using fp16.""") tf.app.flags.DEFINE_string('data_dir', './tmp/cifar10_data', """Path to the CIFAR-10 data directory.""") tf.app.flags.DEFINE_integer('log_frequency', 10, """How often to log results to the console.""") DATA_URL = 'http://www.cs./~kriz/cifar-10-binary.tar.gz' 3)下載數(shù)據(jù)集國內(nèi)網(wǎng)絡(luò)環(huán)境的原因,,源碼中下載數(shù)據(jù)集代碼可能執(zhí)行不成功,,可以去官網(wǎng)下載好數(shù)據(jù)集,然后放置在 下載:cifar-10-binary.tar.gz 下載至:*\tutorials\image\cifar10\tmp\cifar10_data(FLAGS.data_dir ='./tmp/cifar10_data') # cifar10.py # 檢測(cè)本地是否有數(shù)據(jù)集 def maybe_download_and_extract(): """Download and extract the tarball from Alex's website.""" dest_directory = FLAGS.data_dir # /tmp/cifar10_data # 判斷文件夾是否存在,,不存在則創(chuàng)建 if not os.path.exists(dest_directory): os.makedirs(dest_directory) # 從URL中獲得文件名:DATA_URL定義為cifar10數(shù)據(jù)集下載地址,,這里將URL最后一個(gè)斜杠后面的內(nèi)容作為文件名 filename = DATA_URL.split('/')[-1] # 合并文件路徑:將文件名與數(shù)據(jù)文件夾結(jié)合得到下載文件存放的路徑 filepath = os.path.join(dest_directory, filename) # 判斷文件是否存在,如果存在,,表明數(shù)據(jù)集已經(jīng)下載,,就無需再下載,如果還沒下載,,則通過urllib.request.urlretrieve直接下載文件 if not os.path.exists(filepath): # 定義下載過程中打印日志的回調(diào)函數(shù):回調(diào)函數(shù)用于顯示下載進(jìn)度,,下載進(jìn)度為當(dāng)前下載量除以總下載量 def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() # 下載數(shù)據(jù)集 filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath,reporthook=_progress) print() # 獲得文件信息 statinfo = os.stat(filepath) print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') # 定義解壓路徑 extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin') # 解壓縮:判斷解壓文件夾是否存在,若存在表明數(shù)據(jù)集已經(jīng)下載并解壓了,,就不需要操作 if not os.path.exists(extracted_dir_path): tarfile.open(filepath, 'r:gz').extractall(dest_directory) 4)導(dǎo)入數(shù)據(jù)和標(biāo)簽# cifar10_train.py def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): # 定義記錄訓(xùn)練步數(shù)的變量 global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): # 從CIFAR-10中導(dǎo)入數(shù)據(jù)和標(biāo)簽 images, labels = cifar10.distorted_inputs() # cifar10.py def distorted_inputs(): """Construct distorted input for CIFAR training using the Reader ops. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ images, labels = cifar10_input.distorted_inputs(batch_size=FLAGS.batch_size) if FLAGS.use_fp16: images = tf.cast(images, tf.float16) labels = tf.cast(labels, tf.float16) return images, labels # cifar10_input.py # Process images of this size. Note that this differs from the original CIFAR # image size of 32 x 32. If one alters this number, then the entire model # architecture will change and any model would need to be retrained. IMAGE_SIZE = 24 # Global constants describing the CIFAR-10 data set. NUM_CLASSES = 10 NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000 NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000 def distorted_inputs(batch_size): """Construct distorted input for CIFAR training using the Reader ops. Args: batch_size: Number of images per batch. Returns: images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size. labels: Labels. 1D tensor of [batch_size] size. """ # 要讀入的數(shù)據(jù)文件 filenames = [os.path.join('./tmp/cifar10_data/cifar-10-batches-bin/', 'data_batch_%d.bin' % i) for i in range(1, 6)] # 如果有數(shù)據(jù)文件缺失,,拋出異常 for f in filenames: # print(f) if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) # 把要讀取的全部文件打包為一個(gè)tf內(nèi)部的queue類型,之后tf開文件就從這個(gè)queue中取目錄 filename_queue = tf.train.string_input_producer(filenames) with tf.name_scope('data_augmentation'): # Read examples from files in the filename queue. # 讀取文件隊(duì)列中文件的樣本 read_input = read_cifar10(filename_queue) reshaped_image = tf.cast(read_input.uint8image, tf.float32) height = IMAGE_SIZE width = IMAGE_SIZE # 用于訓(xùn)練網(wǎng)絡(luò)的圖像處理,,請(qǐng)注意應(yīng)用于圖像的許多隨機(jī)失真 # 隨機(jī)裁剪圖像的[height, width]部分 distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) # 隨機(jī)水平翻轉(zhuǎn)圖像 distorted_image = tf.image.random_flip_left_right(distorted_image) # 由于這些操作是不可交換的,,因此可以考慮隨機(jī)化和調(diào)整操作的順序 # 在某范圍隨機(jī)調(diào)整圖片亮度 distorted_image = tf.image.random_brightness(distorted_image, max_delta=63) # 在某范圍隨機(jī)調(diào)整圖片對(duì)比度 distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8) # 減去平均值并除以像素的方差,白化操作:均值變?yōu)?,,方差變?yōu)? float_image = tf.image.per_image_standardization(distorted_image) # 設(shè)置張量的形狀. float_image.set_shape([height, width, 3]) read_input.label.set_shape([1]) # 確保隨機(jī)shuffling有好的混合性質(zhì) min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue) print('Filling queue with %d CIFAR images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # 通過建立一個(gè)樣本隊(duì)列來生成一批image和label return _generate_image_and_label_batch(float_image, read_input.label,min_queue_examples, batch_size) def read_cifar10(filename_queue): """ 讀取和解析來自CIFAR10數(shù)據(jù)文件的樣本 建議:如果您想要N路并行讀取,,請(qǐng)調(diào)用此函數(shù)N次 這會(huì)給你N個(gè)獨(dú)立的Readers,閱讀那些文件中不同的文件和位置,,這將提供更好的混合例子 ARGS: filename_queue:具有要讀取的文件名的字符串隊(duì)列,。 返回: 表示單個(gè)樣本的對(duì)象,包含以下字段: height:結(jié)果中的行數(shù)(32) width:結(jié)果中的列數(shù)(32) depth:結(jié)果中的顏色通道數(shù)量(3) key:描述這個(gè)例子文件名和記錄號(hào)的標(biāo)量字符串張量 label:一個(gè)int32張量,,帶有范圍為0..9的標(biāo)簽 uint8image:一個(gè)圖像數(shù)據(jù)的[height, width, depth] uint8 張量 """ # 定義返回的結(jié)果對(duì)象類 class CIFAR10Record(object): pass result = CIFAR10Record() # 只有10個(gè)類別 label_bytes = 1 # 2 for CIFAR-100 # 32x32 RGB 的圖像 result.height = 32 result.width = 32 result.depth = 3 image_bytes = result.height * result.width * result.depth # 每條記錄的格式固定:label+image,,因此長(zhǎng)度固定 record_bytes = label_bytes + image_bytes # 采用固定長(zhǎng)度的閱讀器,CIFAR-10格式?jīng)]有文件頭或文件尾,將header_bytes和footer_bytes保留為默認(rèn)值0 reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) # 打開filename_queue中的文件,,讀取一條記錄 result.key, value = reader.read(filename_queue) # 閱讀器的read方法會(huì)輸出一個(gè)key來表征輸入的文件和其中的紀(jì)錄(對(duì)于調(diào)試非常有用) # 同時(shí)得到一個(gè)字符串標(biāo)量,,這個(gè)字符串標(biāo)量可以被一個(gè)或多個(gè)解析器,或者轉(zhuǎn)換操作將其解碼為張量并且構(gòu)造成為樣本,。 # 將字符串標(biāo)量轉(zhuǎn)換為長(zhǎng)度為record_bytes的uint8張量 record_bytes = tf.decode_raw(value, tf.uint8) # 第一個(gè)字節(jié)代表了label,,類型轉(zhuǎn)換uint8->int32,與一般的切片操作不同,,tf.slice的第三個(gè)參數(shù)是切片的長(zhǎng)度 result.label = tf.cast(tf.slice(record_bytes, [0], [label_bytes]), tf.int32) # 標(biāo)簽之后的剩余字節(jié)表示圖像,,reshape [depth * height * width] => [depth,height,,width] depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]) # 交換輸入張量的不同維度 [depth, height, width] => [height, width, depth]. result.uint8image = tf.transpose(depth_major, [1, 2, 0]) return result def _generate_image_and_label_batch(image, label, min_queue_examples, batch_size): """ 構(gòu)建排隊(duì)的一批圖像和標(biāo)簽 ARGS: image:type.float32的[height,,width,3]的3-D張量 label:type.int32的1-D張量 min_queue_examples:int32,,在隊(duì)列中保留的最小樣本數(shù)量,,可提供多批樣本 batch_size:每批次的圖像數(shù)量 返回: images: Images. 4D張量 [batch_size,height,,width,,3] labels: Labels. 1D張量 [batch_size] """ # 創(chuàng)建一個(gè)混合樣本的隊(duì)列,然后從樣本隊(duì)列中讀取batch_size的圖像+標(biāo)簽 num_preprocess_threads = 16 images, label_batch = tf.train.shuffle_batch([image, label], batch_size=batch_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples) # 在數(shù)據(jù)輸入管線的末端,我們需要有另一個(gè)隊(duì)列來執(zhí)行輸入樣本的訓(xùn)練(train),,評(píng)價(jià)(loss)和推理(inference) # 因此我們使用tf.train.shuffle_batch函數(shù)來對(duì)隊(duì)列中的樣本進(jìn)行亂序處理 # 在可視化器中顯示訓(xùn)練圖像 tf.summary.image('images', images) return images, tf.reshape(label_batch, [batch_size]) 5)構(gòu)建圖# cifar10_train.py # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # cifar10.py # 盡可能地構(gòu)建好圖表,滿足促使神經(jīng)網(wǎng)絡(luò)向前反饋并做出預(yù)測(cè)的要求 TOWER_NAME = 'tower' def inference(images): """ 構(gòu)建CIFAR-10模型 ARGS: images:從distorted_inputs()或inputs()返回的圖像 返回: Logits """ # 我們使用tf.get_variable()而不是tf.Variable()來實(shí)例化所有變量,,以便跨多個(gè)GPU訓(xùn)練時(shí)能共享變量 # 如果我們只在單個(gè)GPU上運(yùn)行此模型,,我們可以通過用tf.Variable()替換tf.get_variable()的所有實(shí)例來簡(jiǎn)化此功能 # conv1-第一層卷積 with tf.variable_scope('conv1') as scope: #每一層都創(chuàng)建于一個(gè)唯一的tf.name_scope之下,創(chuàng)建于該作用域之下的所有元素都將帶有其前綴 # 5*5 的卷積核,,64個(gè) kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=1e-4, wd=0.0) # 卷積操作,,步長(zhǎng)為1,0padding SAME,,不改變寬高,,通道數(shù)變?yōu)?4 conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') # 在CPU上創(chuàng)建第一層卷積操作的偏置變量 biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) # 加上偏置 bias = tf.nn.bias_add(conv, biases) # relu非線性激活 conv1 = tf.nn.relu(bias, name=scope.name) # 創(chuàng)建激活顯示圖的summary _activation_summary(conv1) # pool1-第一層pooling # 3*3 最大池化,步長(zhǎng)為2 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # norm1-局部響應(yīng)歸一化 # LRN層,,對(duì)局部神經(jīng)元的活動(dòng)創(chuàng)建競(jìng)爭(zhēng)機(jī)制,,使得其中響應(yīng)比較大的值變得相對(duì)更大,并抑制其他反饋較小的神經(jīng)元,,增強(qiáng)了模型的泛化能力 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # conv2-第二層卷積 with tf.variable_scope('conv2') as scope: # 卷積核:5*5 ,64個(gè) kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=1e-4, wd=0.0) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) bias = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(bias, name=scope.name) _activation_summary(conv2) # norm2-局部響應(yīng)歸一化 norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # pool2-第二層最大池化 pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # local3-全連接層,,384個(gè)節(jié)點(diǎn) with tf.variable_scope('local3') as scope: # 把單個(gè)樣本的特征拼成一個(gè)大的列向量,以便我們可以執(zhí)行單個(gè)矩陣乘法 dim = 1 for d in pool2.get_shape()[1:].as_list(): dim *= d reshape = tf.reshape(pool2, [FLAGS.batch_size, dim]) # 權(quán)重 weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) # 偏置 biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) # relu激活 local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) #生成summary _activation_summary(local3) # local4-全連接層,,192個(gè)節(jié)點(diǎn) with tf.variable_scope('local4') as scope: weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) _activation_summary(local4) # softmax, i.e. softmax(WX + b) # 輸出層 with tf.variable_scope('softmax_linear') as scope: # 權(quán)重 weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1/192.0, wd=0.0) # 偏置 biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) # 輸出層的線性操作 softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) # 生成summary _activation_summary(softmax_linear) return softmax_linear def _variable_with_weight_decay(name, shape, stddev, wd): ''' 幫助創(chuàng)建一個(gè)權(quán)重衰減的初始化變量 請(qǐng)注意,,變量是用截?cái)嗟恼龖B(tài)分布初始化的 只有在指定了權(quán)重衰減時(shí)才會(huì)添加權(quán)重衰減 Args: name: 變量的名稱 shape: 整數(shù)列表 stddev: 截?cái)喔咚沟臉?biāo)準(zhǔn)差 wd: 加L2Loss權(quán)重衰減乘以這個(gè)浮點(diǎn)數(shù).如果沒有,此變量不會(huì)添加權(quán)重衰減. Returns: 變量張量 ''' var = _variable_on_cpu(name, shape, tf.truncated_normal_initializer(stddev=stddev)) if wd is not None: weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') tf.add_to_collection('losses', weight_decay) return var def _variable_on_cpu(name, shape, initializer): ''' 幫助創(chuàng)建存儲(chǔ)在CPU內(nèi)存上的變量 ARGS: name:變量的名稱 shape:整數(shù)列表 initializer:變量的初始化操作 返回: 變量張量 ''' with tf.device('/cpu:0'): #用 with tf.device 創(chuàng)建一個(gè)設(shè)備環(huán)境, 這個(gè)環(huán)境下的 operation 都統(tǒng)一運(yùn)行在環(huán)境指定的設(shè)備上. var = tf.get_variable(name, shape, initializer=initializer) return var def _activation_summary(x): ''' 為激活創(chuàng)建summary 添加一個(gè)激活直方圖的summary 添加一個(gè)測(cè)量激活稀疏度的summary ARGS: x:張量 返回: 沒有 ''' # 如果這是多GPU訓(xùn)練,請(qǐng)從名稱中刪除'tower_ [0-9] /'.這有助于張量板上顯示的清晰度. tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) tf.summary.histogram(tensor_name + '/activations', x) tf.summary.scalar(tensor_name + '/sparsity',tf.nn.zero_fraction(x)) 6)Inference圖構(gòu)造損失函數(shù)# cifar10_train.py # Calculate loss. loss = cifar10.loss(logits, labels) # cifar10.py # 描述損失函數(shù),,往inference圖中添加生成損失(loss)所需要的操作(ops) def loss(logits, labels): ''' 將L2Loss添加到所有可訓(xùn)練變量 添加"Loss" and "Loss/avg"的summary ARGS: logits:來自inference()的Logits labels:來自distorted_inputs或輸入()的標(biāo)簽.一維張量形狀[batch_size] 返回: float類型的損失張量 ''' labels = tf.cast(labels, tf.int64) # 計(jì)算這個(gè)batch的平均交叉熵?fù)p失 # 添加一個(gè)tf.nn.softmax_cross_entropy_with_logits操作,,用來比較inference()函數(shù)所輸出的logits Tensor與labels cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # 總損失定義為交叉熵?fù)p失加上所有的權(quán)重衰減項(xiàng)(L2損失) return tf.add_n(tf.get_collection('losses'), name='total_loss') 7)構(gòu)造模型訓(xùn)練(梯度下降算法)# cifar10_train.py # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # cifar10.py # Constants describing the training process. # 描述模型的訓(xùn)練 MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average. NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays. LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor. INITIAL_LEARNING_RATE = 0.1 # Initial learning rate. def train(total_loss, global_step): ''' 訓(xùn)練 CIFAR-10模型 創(chuàng)建一個(gè)optimizer并應(yīng)用于所有可訓(xùn)練變量. 為所有可訓(xùn)練變量添加移動(dòng)平均值. ARGS: total_loss:loss()的全部損失 global_step:記錄訓(xùn)練步數(shù)的整數(shù)變量 返回: train_op:訓(xùn)練的op ''' # 影響學(xué)習(xí)率的變量 num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) # 根據(jù)步驟數(shù)以指數(shù)方式衰減學(xué)習(xí)率 lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) # Summary是對(duì)網(wǎng)絡(luò)中Tensor取值進(jìn)行監(jiān)測(cè)的一種Operation.這些操作在圖中是“外圍”操作,不影響數(shù)據(jù)流本身. # 把lr添加到觀測(cè)中 tf.summary.scalar('learning_rate', lr) # 生成所有損失和相關(guān)和的移動(dòng)平均值的summary loss_averages_op = _add_loss_summaries(total_loss) # 計(jì)算梯度 with tf.control_dependencies([loss_averages_op]): opt = tf.train.GradientDescentOptimizer(lr) grads = opt.compute_gradients(total_loss) # 應(yīng)用梯度. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # 為可訓(xùn)練變量添加直方圖summary. for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) # 為梯度添加直方圖summary for grad, var in grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) # 跟蹤所有可訓(xùn)練變量的移動(dòng)平均值 variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) with tf.control_dependencies([apply_gradient_op]): variables_averages_op = variable_averages.apply(tf.trainable_variables()) return variables_averages_op def _add_loss_summaries(total_loss): ''' 往CIFAR-10模型中添加損失summary 為所有損失和相關(guān)summary生成移動(dòng)平均值,,以便可視化網(wǎng)絡(luò)的性能 ARGS: total_loss:loss()的全部損失 返回: loss_averages_op:用于生成移動(dòng)平均的損失 ''' # 計(jì)算所有單個(gè)損失和總損失的移動(dòng)平均 loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) # 把所有的單個(gè)損失和總損失添加到summary觀測(cè)中,,平均損失也添加觀測(cè) for l in losses + [total_loss]: # 將每個(gè)損失命名為損失的原始名稱+“(raw)”,并將損失的移動(dòng)平均版本命名為損失的原始名稱 # 這一行代碼應(yīng)該已經(jīng)過時(shí)了,,執(zhí)行時(shí)提醒: # INFO:tensorflow:Summary name conv1/weight_loss (raw) is illegal; using conv1/weight_loss__raw_ instead. tf.summary.scalar(l.op.name + ' (raw)', l) tf.summary.scalar(l.op.name, loss_averages.average(l)) return loss_averages_op 8)在會(huì)話中啟動(dòng)圖,,開始執(zhí)行訓(xùn)練TensorFlow程序通常被組織成一個(gè)構(gòu)建階段和一個(gè)執(zhí)行階段:
# cifar10_train.py # cifar10_train.py def train(): …… class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" # begin方法初始化訓(xùn)練步數(shù)和起始時(shí)間 def begin(self): self._step = -1 self._start_time = time.time() # before_run方法用于運(yùn)行之前返回loss的值,同時(shí)計(jì)數(shù)訓(xùn)練步數(shù) def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. # after_run方法用于打印相關(guān)信息 def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) # tf.train.MonitoredTrainingSession 為監(jiān)督訓(xùn)練的會(huì)話 with tf.train.MonitoredTrainingSession( # checkpoint_dir 恢復(fù)checkpoint的文件夾 checkpoint_dir=FLAGS.train_dir, # tf.train.StopAtStepHook 到達(dá)last_step時(shí)發(fā)起停止的信號(hào) hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), # tf.train.NanTensorHook 用于監(jiān)督loss是否為nan,,如果沒有收到停止信息就訓(xùn)練 tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op) 訓(xùn)練損失 | 隨機(jī)抽取10個(gè)Test數(shù)據(jù)集中的數(shù)據(jù)統(tǒng)計(jì)準(zhǔn)確率:
|
|