用SSD基于TensorFlow搭建模型训练自己的数据集

1、下载Tensorflow object detection API

https://github.com/tensorflow/models

从github上下载项目（右上角“Clone or download”-"DownloadZIP"），下载到本地目录（避免中文），解压。

2、Protobuf 安装与配置

在 https://github.com/google/protobuf/releases 网站中选择windows 版本（最下面），解压后将bin文件夹中的【protoc.exe】放到C:\Windows

object_detection/protos/*.proto: No such file or directory，
单个编译的话又有好多文件相互关联，编译不成功，尝试多次后发现在models文件夹下使用 shift+右键的Windows powershell中使用以下命令可以全部编译：

Get-ChildItem object_detection/protos/*.proto |Resolve-Path -Relative | %{protoc $_ --python_out=.}
3、PYTHONPATH 环境变量设置

在 ‘此电脑’-‘属性’- ‘高级系统设置’ -‘环境变量’-‘系统变量’ 中新建名为‘PYTHONPATH’的变量，将
models/research/ 及 models/research/slim 两个文件夹的完整目录添加，分号隔开，效果如下图：

接下来可以测试API，在 models/research/ 文件夹下运行命令行：
python object_detection/builders/model_builder_test.py

不报错说明运行成功。
4、准备自己的训练集

首先要对图片打标签，使用 LabelImg 这款小软件

标记好了以后，会生成XML文件，分为两部分，一部分train,一部分为test,先转换成CSV,再转成tfrecord，都装在object detection的data 目录下，这样训练集就做成功了。

这是XML_to_CSV脚本代码（注意文件路径）：

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 16 00:52:02 2018
@author: Xiang Guo
将文件夹内所有XML文件的信息记录到CSV文件中
"""

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

os.chdir(\'D:\\机器学习\\bl\\turtles\\imgs\\test\')
path = \'D:\\机器学习\\bl\\turtles\\imgs\\test\'


def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + \'/*.xml\'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall(\'object\'):
            value = (root.find(\'filename\').text,
                     int(root.find(\'size\')[0].text),
                     int(root.find(\'size\')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = [\'filename\', \'width\', \'height\', \'class\', \'xmin\', \'ymin\', \'xmax\', \'ymax\']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def main():
    image_path = path
    xml_df = xml_to_csv(image_path)
    xml_df.to_csv(\'turtle_test.csv\', index=None)
    print(\'Successfully converted xml to csv.\')


main()

这是CSV_to_tfrecord的脚本代码：

# Usage:
#   # From tensorflow/models/
#   # Create train data:
#   python generate_tfrecord.py --csv_input=data/train_labels.csv  --output_path=train.record
#   # Create test data:
#   python generate_tfrecord.py --csv_input=data/test_labels.csv  --output_path=test.record
# """
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf

from PIL import Image
from utils import dataset_util
from collections import namedtuple, OrderedDict

flags = tf.app.flags
flags.DEFINE_string(\'csv_input\', \'\', \'Path to the CSV input\')
flags.DEFINE_string(\'output_path\', \'\', \'Path to output TFRecord\')
FLAGS = flags.FLAGS


# TO-DO replace this with label map
def class_text_to_int(row_label):
    if row_label == \'wg\':
        return 1
    else:
        None


def split(df, group):
    data = namedtuple(\'data\', [\'filename\', \'object\'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, \'{}\'.format(group.filename)), \'rb\') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode(\'utf8\')
    image_format = b\'jpg\'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row[\'xmin\'] / width)
        xmaxs.append(row[\'xmax\'] / width)
        ymins.append(row[\'ymin\'] / height)
        ymaxs.append(row[\'ymax\'] / height)
        classes_text.append(row[\'class\'].encode(\'utf8\'))
        classes.append(class_text_to_int(row[\'class\']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        \'image/height\': dataset_util.int64_feature(height),
        \'image/width\': dataset_util.int64_feature(width),
        \'image/filename\': dataset_util.bytes_feature(filename),
        \'image/source_id\': dataset_util.bytes_feature(filename),
        \'image/encoded\': dataset_util.bytes_feature(encoded_jpg),
        \'image/format\': dataset_util.bytes_feature(image_format),
        \'image/object/bbox/xmin\': dataset_util.float_list_feature(xmins),
        \'image/object/bbox/xmax\': dataset_util.float_list_feature(xmaxs),
        \'image/object/bbox/ymin\': dataset_util.float_list_feature(ymins),
        \'image/object/bbox/ymax\': dataset_util.float_list_feature(ymaxs),
        \'image/object/class/text\': dataset_util.bytes_list_feature(classes_text),
        \'image/object/class/label\': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(os.getcwd(), \'images/test\')
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, \'filename\')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print(\'Successfully created the TFRecords: {}\'.format(output_path))


if __name__ == \'__main__\':
    tf.app.run()

完成之后我们现在data文件夹就新加入了四个文件，两个CSV,两个tf。这部分完成。

在object detection下面创建文件夹images,把train和test的图片拖进去。并且创建文件夹training

结构如下：

Object-Detection

-data/
--test_labels.csv

--test.record

--train_labels.csv

--train.record

-images/
--test/
---testingimages.jpg
--train/
---testingimages.jpg
--...yourimages.jpg

-training

接下来需要设置配置文件，进入 Object Detection github 对应页面寻找配置文件的Sample https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs。

以 ssd_mobilenet_v1_coco.config 为例，在 object_dection文件夹下，解压 ssd_mobilenet_v1_coco_2017_11_17.tar.gz，

将ssd_mobilenet_v1_coco.config 放在training 文件夹下，用文本编辑器打开（我用的sublime 3），进行如下操作：

1、搜索其中的 PATH_TO_BE_CONFIGURED ，将对应的路径改为自己的路径，注意不要把test跟train弄反了；

2、将 num_classes 按照实际情况更改，我的例子中是2；

3、batch_size 原本是24，我在运行的时候出现显存不足的问题，为了保险起见，改为1，如果1还是出现类似问题的话，建议换电脑……

4、fine_tune_checkpoint: "ssd_mobilenet_v1_coco_11_06_2017/model.ckpt"
from_detection_checkpoint: true

这两行是设置checkpoint，我开始也设置，但是一直出现显存不足的问题，我的理解是从预先训练的模型中寻找checkpoint，可能是因为原先的模型是基于较大规模的公开数据集训练的，因此配置到本地的时候出现了问题，后来我选择删除这两行，相当于自己从头开始训练，最后正常了，因此如果是自己从头开始训练，建议把这两行删除。
---

开始训练：

Anaconda Prompt 定位到 models\research\object_detection文件夹下，运行如下命令：

python train.py --logtostderr --train_dir=training/ --pipeline_config_path=training/ssd_mobilenet_v1_coco.config

由于更新之后文件变了，所以现在train.py在leagacy文件下，我是把这个文件拖出来放在object detection下。

可视化训练过程：

定位到object detection下运行命令：tensorboard --logdir=training 就可以复制出现的网址在Chrome中打开可以看到所有训练过程。

测试模型：

我们可以先来测试一下目前的模型效果如何，关闭命令行。在 models\research\object_detection 文件夹下找到 export_inference_graph.py 文件，要运行这个文件，还需要传入config以及checkpoint的相关参数。

Anaconda Prompt 定位到 models\research\object_detection 文件夹下，运行：python export_inference_graph.py

--trained_checkpoint_prefix training/model.ckpt-31012 这个checkpoint（.ckpt-后面的数字）可以在training文件夹下找到你自己训练的模型的情况，填上对应的数字（如果有多个，选最大的）。

--output_directory tv_vehicle_inference_graph 改成自己的名字

运行完后，可以在tv_vehicle_inference_graph （这是我的名字）文件夹下发现若干文件，有saved_model、checkpoint、frozen_inference_graph.pb等。 .pb结尾的就是最重要的frozen model了，还记得第一大部分中frozen model吗？没错，就是我们在后面要用到的部分。

训练的部分也完成了，接下来就是最后的test部分了

回到第一部分的代码结构分析，现在已有对应的训练模型，只需要根据自己的实际情况改一些路径之类的参数即可。直接上完整代码

# -*- coding: utf-8 -*-
"""
Created on Thu Jan 11 16:55:43 2018
@author: Xiang Guo
"""
#Imports
import time
start = time.time()
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
 
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
 
if tf.__version__ < \'1.4.0\':
  raise ImportError(\'Please upgrade your tensorflow installation to v1.4.* or later!\')
  
os.chdir(\'D:\\tensorflow-model\\models\\research\\object_detection\')
  
  
#Env setup 
# This is needed to display the images.
#%matplotlib inline
 
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
 
 
 
 
#Object detection imports
from utils import label_map_util
 
from utils import visualization_utils as vis_util
 
 
 
 
#Model preparation
# What model to download.
 
#这是我们刚才训练的模型
MODEL_NAME = \'tv_vehicle_inference_graph\'
 
 
 
#对应的Frozen model位置
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + \'/frozen_inference_graph.pb\'
 
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(\'training\', \'tv_vehicle_detection.pbtxt\')
 
#改成自己例子中的类别数，2
NUM_CLASSES = 2
 
 
 
\'\'\'
#Download Model
自己的模型，不需要下载了
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
  file_name = os.path.basename(file.name)
  if \'frozen_inference_graph.pb\' in file_name:
    tar_file.extract(file, os.getcwd())
\'\'\'   
    
    
#Load a (frozen) Tensorflow model into memory.    
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_CKPT, \'rb\') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name=\'\')    
    
    
#Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
 
 
#Helper code
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)
 
 
#Detection
 
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
#测试图片位置
PATH_TO_TEST_IMAGES_DIR = os.getcwd()+\'\\test_images2\'
os.chdir(PATH_TO_TEST_IMAGES_DIR)
TEST_IMAGE_PATHS = os.listdir(PATH_TO_TEST_IMAGES_DIR)
 
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
 
output_path = (\'D:\\tensorflow-model\\models\\research\\object_detection\\test_output\\self_trained\\\')
 
 
with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name(\'image_tensor:0\')
    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name(\'detection_boxes:0\')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name(\'detection_scores:0\')
    detection_classes = detection_graph.get_tensor_by_name(\'detection_classes:0\')
    num_detections = detection_graph.get_tensor_by_name(\'num_detections:0\')
    for image_path in TEST_IMAGE_PATHS:
      image = Image.open(image_path)
      # the array based representation of the image will be used later in order to prepare the
      # result image with boxes and labels on it.
      image_np = load_image_into_numpy_array(image)
      # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
      image_np_expanded = np.expand_dims(image_np, axis=0)
      # Actual detection.
      (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})
      # Visualization of the results of a detection.
      vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          np.squeeze(boxes),
          np.squeeze(classes).astype(np.int32),
          np.squeeze(scores),
          category_index,
          use_normalized_coordinates=True,
          line_thickness=8)
      #保存文件
      cv2.imwrite(output_path+image_path.split(\'\\\')[-1],image_np)
      
end =  time.time()
print("Execution Time: ", end - start)